SEP Solver Library  1.0
 All Classes Namespaces Files Functions Variables Typedefs Macros
inCoreFloat.cpp
Go to the documentation of this file.
1 #include "inCoreFloat.h"
2 #include "vectorize.h"
3 #include "DataInCoreFloat.h"
4 #include <iostream> //****just to test
5 #include <cstdlib> //****just to test
6 
7 
8 void SEP::inCoreFloat::linearCombo(float a, dataContainer *x, float b, dataContainer *y) const{
13  std::vector<long long> *b_omp=s->get_b_omp(),*n_omp=s->get_n_omp();
14  float *__restrict__ my=s->getFloatPtr();
15  float *__restrict__ your=t->getFloatPtr();
16  for(long long i=0; i < b_omp->at(0); i++) {
17  my[i]=my[i]*a+your[i]*b;
18  }
19  int ith=0;
20  #pragma omp parallel
21  ith=omp_get_thread_num();
22  long long j=0;
23  SIMD_constant c0,c1;
24  SIMD_Float16::setConstant(c0,a);
25  SIMD_Float16::setConstant(c1,b);
26  SIMD_Float16 mine,yours;
27  for(long long i=b_omp->at(ith); j< n_omp->at(ith); j++, i+=BLOCK_SIZE){
28  mine.load(&my[i]);
29  yours.loadu(&your[i]); //Can't guarantee the same allignment
30  mine*=c0;
31  yours*=c1;
32  mine+=yours;
33  mine.stream(&my[i]);
34  }
35 
36  //Finish it up
37  int nth=s->get_nth()-1;
38  for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) my[i]=my[i]*a+b*your[i];
39 }
40 
41 
46  std::vector<long long> *b_omp=s->get_b_omp(),*n_omp=s->get_n_omp();
47  float *__restrict__ my=s->getFloatPtr();
48  int ith=0;
49  #pragma omp parallel
50  ith=omp_get_thread_num();
51  for(long long i=0; i < b_omp->at(0); i++) my[i]=my[i]*-1;;
52  long long j=0;
53  SIMD_constant c0;
54  SIMD_Float16::setConstant(c0,-1.);
55  SIMD_Float16 mine;
56  for(long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=BLOCK_SIZE){
57  mine.load(&my[i]);
58  mine*=c0;
59  mine.stream(&my[i]);
60  }
61  //Finish it up
62  int nth=s->get_nth()-1;
63 
64  for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) my[i]=-my[i];
65 
66 }
72  std::vector<long long> *b_omp=s->get_b_omp(),*n_omp=s->get_n_omp();
73  float *__restrict__ my=t->getFloatPtr();
74  float *__restrict__ you=s->getFloatPtr();
75  int ith=0;
76  #pragma omp parallel
77  ith=omp_get_thread_num();
78  for(long long i=0; i < b_omp->at(0) ; i++) my[i]=you[i]*-1;;
79 
80  long long j=0;
81  SIMD_constant c0;
82  SIMD_Float16::setConstant(c0,-1.);
83  SIMD_Float16 mine;
84  for(long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=BLOCK_SIZE){
85  mine.load(&you[i]);
86  mine*=c0;
87  mine.stream(&my[i]);
88  }
89  //Finish it up
90  int nth=s->get_nth()-1;
91 
92  for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) my[i]=-you[i];
93 }
94 
100  std::vector<long long> *b_omp=s->get_b_omp(),*n_omp=s->get_n_omp();
101  float *__restrict__ my=t->getFloatPtr();
102  float *__restrict__ their=s->getFloatPtr();
103  for(long long i=0; i < b_omp->at(0); i++) my[i]=their[i]*a;
104  int ith;
105  #pragma omp parallel
106  ith=omp_get_thread_num();
107  long long j=0;
108  SIMD_constant c0;
109  SIMD_Float16::setConstant(c0,a);
110  SIMD_Float16 mine;
111  for(long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=BLOCK_SIZE){
112  mine.loadu(&their[i]);
113  mine*=c0;
114  mine.stream(&my[i]);
115  }
116  //Finish it up
117  int nth=s->get_nth()-1;
118 
119  for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) my[i]=their[i]*a;
120  // added *a in line above here, made no difference
121 }
122 
123 void SEP::inCoreFloat::scale(float a, dataContainer *x) const {
127  std::vector<long long> *b_omp=s->get_b_omp(),*n_omp=s->get_n_omp();
128  float *__restrict__ my=s->getFloatPtr();
129  for(long long i=0; i < b_omp->at(0); i++) my[i]=my[i]*a;
130  int ith;
131 
132  #pragma omp parallel
133  ith=omp_get_thread_num();
134  long long j=0;
135  SIMD_constant c0;
136  SIMD_Float16::setConstant(c0,a);
137  SIMD_Float16 mine;
138  for(long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=BLOCK_SIZE){
139  mine.load(&my[i]);
140  mine*=c0;
141  mine.stream(&my[i]);
142  }
143  //Finish it up
144  int nth=s->get_nth()-1;
145 
146  for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) my[i]*=a;
147 }
152  std::vector<long long> *b_omp=s->get_b_omp(),*n_omp=s->get_n_omp();
153  float *__restrict__ my=s->getFloatPtr();
154  int ith;
155  #pragma omp parallel
156  ith=omp_get_thread_num();
157  for(long long i=0; i < b_omp->at(0); i++) my[i]=0;
158  long long j=0;
159  SIMD_constant c0;
160  SIMD_Float16 mine;
161  mine.load(&my[0]);
162  SIMD_Float16::setConstant(c0,0);
163  mine*=c0;
164  for(long long i=b_omp->at(ith); j <n_omp->at(ith); j++, i+=BLOCK_SIZE){
165  mine.stream(&my[i]);
166  }
167  //Finish it up
168 
169 
170  int nth=s->get_nth()-1;
171 
172  for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) my[i]=0.;
173 }
178  std::vector<long long> *b_omp=s->get_b_omp(),*n_omp=s->get_n_omp();
179  float *__restrict__ my=s->getFloatPtr();
180  int ith;
181  #pragma omp parallel
182  ith=omp_get_thread_num();
183  for(long long i=0; i < b_omp->at(0); i++) my[i]=(float)rand()/(float)RAND_MAX-.5;
184  long long j=0;
185  SIMD_constant c0;
186  SIMD_Float16 mine;
187  SIMD_Float16::setConstant(c0,0);
188  mine*=c0;
189  for(long long i=b_omp->at(ith); j <n_omp->at(ith); j++, i+=BLOCK_SIZE){
190  for(long long k=0; k < BLOCK_SIZE; k++) my[i+k]=(float)rand()/(float)RAND_MAX-.5;
191  }
192  //Finish it up
193  int nth=s->get_nth()-1;
194 
195  for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) my[i]=(float)rand()/(float)RAND_MAX-.5;
196 }
197 
198 
204  std::vector<long long> *b_omp=s->get_b_omp(), *n_omp=s->get_n_omp();
205  float *__restrict__ my=s->getFloatPtr();
206  float *__restrict__ your=t->getFloatPtr();
207  double ip = 0.0; // total sum
208  double dip = 0.0; // keep track of double sub sum
209  int floatGroup = 0; // keep track of number of entries added in double
210  int maxFloatGroup = 500;
211  //std::cout << "maxFloatGroup is " << maxFloatGroup << "\n"; //******test
212 /* */ for(long long i=0; i<b_omp->at(0); i++){
213  floatGroup = floatGroup + 1;
214  //if (i == 0) std::cout << "In first loop \n"; //***just for test
215  dip = dip + double(my[i]*your[i]);
216  if(floatGroup >= maxFloatGroup){
217  ip = ip + float(dip);
218  dip = 0.0;
219  floatGroup = 0;
220  }
221  }
222  int ith=0;
223  #pragma omp parallel
224  ith=omp_get_thread_num();
225  long long j=0;
226  SIMD_Float16 mine, yours, prod;
227  float *__restrict__ prodv = new float[n123];
228  //std::cout << "Past first for loop \n"; //******just for test
229  for(long long i=b_omp->at(ith); j< n_omp->at(ith); j++, i+=BLOCK_SIZE){
230  //std::cout << "In the middle for loop \n"; // ***** just for test
231  mine.load(&my[i]);
232  yours.loadu(&your[i]);
233  prod.mult(mine,yours);
234  prod.stream(&prodv[i]);
235  }
236  for(long long i=b_omp->at(ith); i<(b_omp->at(ith))+(n_omp->at(ith))*BLOCK_SIZE; i++){
237  floatGroup += 1;
238  dip += prodv[i];
239  if(floatGroup >= maxFloatGroup){
240  ip+=dip;
241  floatGroup = 0;
242  dip=0;
243  }
244  }
245  delete[] prodv;
246 /* */
247  //std::cout << "Past second loop \n"; //***just for test
248  // Finish it up
249  int nth=s->get_nth()-1;
250 /* */ for(long long i=b_omp->at(nth)+(n_omp->at(nth))*BLOCK_SIZE; i< n123; i++) {
251  //for(long long i=0; i<n123; i++){
252  floatGroup = floatGroup + 1;
253  dip = dip + double(my[i]*your[i]);
254  if(floatGroup >= maxFloatGroup){
255  ip = ip + dip;
256  dip = 0.0;
257  floatGroup = 0;
258 
259  }
260  }
261  ip = ip + dip; // add on any additional terms in sum
262 
263  return ip;
264 }
265 
267  set_n123((long long) n1);
268  push_back_n(n1);
269 }
270 namespace SEP{
272  long long a=get_n(0);
273  data1DFloat *x=(data1DFloat*) new data1DFloat(a);
274  std::cout << "built from 1d\n";
275  return x;
276 
277  }
278 }
279 void SEP::inCoreFloat2D::base_2d(int n1,int n2){
280  long long n123=n1;push_back_n(n1);
281  n123*=(long long) n2; push_back_n(n2);
282  std::cout << "built from 2d\n";
283  set_n123(n123);
284 }
285 void SEP::inCoreFloat3D::base_3d(int n1,int n2,int n3){
286  long long n123=n1;push_back_n(n1);
287  n123*=(long long) n2; push_back_n(n2);
288  n123*=(long long) n3; push_back_n(n3);
289  set_n123(n123);
290 }
291 void SEP::inCoreFloat4D::base_4d(int n1,int n2,int n3, int n4){
292  long long n123=n1;push_back_n(n1);
293  n123*=(long long) n2; push_back_n(n2);
294  n123*=(long long) n3; push_back_n(n3);
295  n123*=(long long) n4; push_back_n(n4);
296  set_n123(n123);
297 }
298 void SEP::inCoreFloat5D::base_5d(int n1,int n2,int n3, int n4,int n5){
299  long long n123=n1;push_back_n(n1);
300  n123*=(long long) n2; push_back_n(n2);
301  n123*=(long long) n3; push_back_n(n3);
302  n123*=(long long) n4;push_back_n(n4);
303  n123*=(long long) n5; push_back_n(n5);
304  set_n123(n123);
305 }
306 void SEP::inCoreFloat6D::base_6d(int n1,int n2,int n3, int n4,int n5,int n6){
307  long long n123=n1; push_back_n(n1);
308  n123*=(long long) n2;push_back_n(n2);
309  n123*=(long long) n3; push_back_n(n3);
310  n123*=(long long) n4; push_back_n(n4);
311  n123*=(long long) n5;push_back_n(n5);
312  n123*=(long long) n6; push_back_n(n6);
313  set_n123(n123);
314 }
315 void SEP::inCoreFloat7D::base_7d(int n1,int n2,int n3, int n4,int n5,int n6, int n7){
316  long long n123=n1; push_back_n(n1);
317  n123*=(long long) n2;push_back_n(n2);
318  n123*=(long long) n3; push_back_n(n3);
319  n123*=(long long) n4; push_back_n(n4);
320  n123*=(long long) n5; push_back_n(n5);
321  n123*=(long long) n6; push_back_n(n6);
322  n123*=(long long) n7; push_back_n(n7);
323  push_back_n(n1);
324 
325  set_n123(n123);
326 }
329  std::vector<long long> n2; n2= y->get_ns();
330  std::vector<long long> n3=this->get_ns();
331 
332  if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] &&
333  n3[4]==n2[4] && n3[5]==n2[5] && n3[6]==n2[6]) return true;
334  return false;
335 
336  }
337 
338  bool SEP::inCoreFloat7D::isCompatible(Space *otherSpace, bool makeSame) const{
339 // *******should actually check the type? *********
340  inCoreFloat7D *os=(inCoreFloat7D*)otherSpace;
341  std::vector<long long> n2 = os->get_ns();
342  std::vector<long long> n3 = this->get_ns();
343  if(this == os) return true;
344  if(get_key() == os->get_key()) return true;
345 
346  if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3] && n3[4]==n2[4] && n3[5]==n2[5] && n3[6]==n2[6]){
347  // If you choose makeSame to be true, the key for these spaces will be the same
348  if(makeSame == true) os->set_key(get_key());
349  return true;
350  }
351  return false;
352  }
353 
356  std::vector<long long> n2; n2= y->get_ns();
357  std::vector<long long> n3=get_ns();
358 
359  if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] &&
360  n3[4]==n2[4] && n3[5]==n2[5] ) return true;
361  return false;
362  }
363 
364  bool SEP::inCoreFloat6D::isCompatible(Space *otherSpace, bool makeSame) const{
365 // *******should actually check the type? *********
366  inCoreFloat6D *os=(inCoreFloat6D*)otherSpace;
367  std::vector<long long> n2 = os->get_ns();
368  std::vector<long long> n3 = this->get_ns();
369  if(this == os) return true;
370  if(get_key() == os->get_key()) return true;
371  if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3] && n3[4]==n2[4] && n3[5]==n2[5]){
372  // If you choose makeSame to be true, the key for these spaces will be the same
373  if(makeSame == true) os->set_key(get_key());
374  return true;
375  }
376  return false;
377  }
378 
381  std::vector<long long> n3=get_ns();
382  std::vector<long long> n2; n2= y->get_ns();
383  if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] &&
384  n3[4]==n2[4] ) return true;
385  return false;
386  }
387 
388  bool SEP::inCoreFloat5D::isCompatible(Space *otherSpace, bool makeSame) const{
389 // *******should actually check the type? *********
390  inCoreFloat5D *os=(inCoreFloat5D*)otherSpace;
391  std::vector<long long> n2 = os->get_ns();
392  std::vector<long long> n3 = this->get_ns();
393  if(this == os) return true;
394  if(get_key() == os->get_key()) return true;
395  if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3] && n3[4]==n2[4]){
396  // If you choose makeSame to be true, the key for these spaces will be the same
397  if(makeSame == true) os->set_key(get_key());
398  return true;
399  }
400  return false;
401  }
402 
405  std::vector<long long> n3=get_ns();
406 
407  std::vector<long long> n2; n2= y->get_ns();
408  if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] ) return true;
409  return false;
410  }
411 
412  bool SEP::inCoreFloat4D::isCompatible(Space *otherSpace, bool makeSame) const{
413 // *******should actually check the type? *********
414  inCoreFloat4D *os=(inCoreFloat4D*)otherSpace;
415  std::vector<long long> n2 = os->get_ns();
416  std::vector<long long> n3 = this->get_ns();
417  if(this == os) return true;
418  if(get_key() == os->get_key()) return true;
419  if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3]){
420  // If you choose makeSame to be true, the key for these spaces will be the same
421  if(makeSame == true) os->set_key(get_key());
422  return true;
423  }
424  return false;
425  }
426 
429  std::vector<long long> n3=get_ns();
430 
431  std::vector<long long> n2; n2= y->get_ns();
432  if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] ) return true;
433  return false;
434  }
435 
436  bool SEP::inCoreFloat3D::isCompatible(Space *otherSpace, bool makeSame) const{
437 // *******should actually check the type? *********
438  inCoreFloat3D *os=(inCoreFloat3D*)otherSpace;
439  std::vector<long long> n2 = os->get_ns();
440  std::vector<long long> n3 = this->get_ns();
441  if(this == os) return true;
442  if(get_key() == os->get_key()) return true;
443  if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2]){
444  // If you choose makeSame to be true, the key for these spaces will be the same
445  if(makeSame == true) os->set_key(get_key());
446  return true;
447  }
448  return false;
449  }
450 
453  std::vector<long long> n3=get_ns();
454  std::vector<long long> n2; n2= y->get_ns();
455  if(n3[0]==n2[0] && n3[1]==n2[1] ) return true;
456  return false;
457  }
458 
459  bool SEP::inCoreFloat2D::isCompatible(SEP::Space *otherSpace, bool makeSame) const{
460 // *******should actually check the type? *********
461  inCoreFloat2D *os=(inCoreFloat2D*)otherSpace;
462  std::vector<long long> n2 = os->get_ns();
463  std::vector<long long> n3 = this->get_ns();
464  if(this == os) return true;
465  if(get_key() == os->get_key()) return true;
466  if(n3[0]==n2[0] && n3[1]==n2[1]){
467  // If you choose makeSame to be true, the key for these spaces will be the same
468  if(makeSame == true) os->set_key(get_key());
469  return true;
470  }
471  return false;
472  }
473 
476  std::vector<long long> n3=get_ns();
477 
478  std::vector<long long> n2; n2= y->get_ns();
479  if(n3[0]==n2[0] ) return true;
480  return false;
481  }
482 
483  bool SEP::inCoreFloat1D::isCompatible(SEP::Space *otherSpace, bool makeSame) const{
484 // *******should actually check the type? *********
485  inCoreFloat1D *os=(inCoreFloat1D*)otherSpace;
486  std::vector<long long> n2 = os->get_ns();
487  std::vector<long long> n3 = this->get_ns();
488  if(this == os) return true;
489  if(get_key() == os->get_key()) return true;
490  if(n3[0]==n2[0]){
491  // If you choose makeSame to be true, the key for these spaces will be the same
492  if(makeSame == true) os->set_key(get_key());
493  return true;
494  }
495  return false;
496  }