16 for(
long long i=0; i < b_omp->at(0); i++) {
17 my[i]=my[i]*a+your[i]*b;
24 SIMD_Float16::setConstant(c0,a);
25 SIMD_Float16::setConstant(c1,b);
27 for(
long long i=b_omp->at(ith); j< n_omp->at(ith); j++, i+=
BLOCK_SIZE){
29 yours.loadu(&your[i]);
38 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) my[i]=my[i]*a+b*your[i];
51 for(
long long i=0; i < b_omp->at(0); i++) my[i]=my[i]*-1;;
54 SIMD_Float16::setConstant(c0,-1.);
56 for(
long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=
BLOCK_SIZE){
64 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) my[i]=-my[i];
78 for(
long long i=0; i < b_omp->at(0) ; i++) my[i]=you[i]*-1;;
82 SIMD_Float16::setConstant(c0,-1.);
84 for(
long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=
BLOCK_SIZE){
92 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) my[i]=-you[i];
103 for(
long long i=0; i < b_omp->at(0); i++) my[i]=their[i]*a;
109 SIMD_Float16::setConstant(c0,a);
111 for(
long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=
BLOCK_SIZE){
112 mine.loadu(&their[i]);
119 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) my[i]=their[i]*a;
129 for(
long long i=0; i < b_omp->at(0); i++) my[i]=my[i]*a;
136 SIMD_Float16::setConstant(c0,a);
138 for(
long long i=b_omp->at(ith); j < n_omp->at(ith); j++, i+=
BLOCK_SIZE){
146 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) my[i]*=a;
157 for(
long long i=0; i < b_omp->at(0); i++) my[i]=0;
162 SIMD_Float16::setConstant(c0,0);
164 for(
long long i=b_omp->at(ith); j <n_omp->at(ith); j++, i+=
BLOCK_SIZE){
172 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) my[i]=0.;
183 for(
long long i=0; i < b_omp->at(0); i++) my[i]=(
float)rand()/(float)RAND_MAX-.5;
187 SIMD_Float16::setConstant(c0,0);
189 for(
long long i=b_omp->at(ith); j <n_omp->at(ith); j++, i+=
BLOCK_SIZE){
190 for(
long long k=0; k <
BLOCK_SIZE; k++) my[i+k]=(
float)rand()/(float)RAND_MAX-.5;
195 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) my[i]=(
float)rand()/(float)RAND_MAX-.5;
210 int maxFloatGroup = 500;
212 for(
long long i=0; i<b_omp->at(0); i++){
213 floatGroup = floatGroup + 1;
215 dip = dip + double(my[i]*your[i]);
216 if(floatGroup >= maxFloatGroup){
217 ip = ip + float(dip);
227 float *__restrict__ prodv =
new float[n123];
229 for(
long long i=b_omp->at(ith); j< n_omp->at(ith); j++, i+=
BLOCK_SIZE){
232 yours.loadu(&your[i]);
233 prod.mult(mine,yours);
234 prod.stream(&prodv[i]);
236 for(
long long i=b_omp->at(ith); i<(b_omp->at(ith))+(n_omp->at(ith))*
BLOCK_SIZE; i++){
239 if(floatGroup >= maxFloatGroup){
250 for(
long long i=b_omp->at(nth)+(n_omp->at(nth))*
BLOCK_SIZE; i< n123; i++) {
252 floatGroup = floatGroup + 1;
253 dip = dip + double(my[i]*your[i]);
254 if(floatGroup >= maxFloatGroup){
267 set_n123((
long long) n1);
272 long long a=get_n(0);
274 std::cout <<
"built from 1d\n";
279 void SEP::inCoreFloat2D::base_2d(
int n1,
int n2){
280 long long n123=n1;push_back_n(n1);
281 n123*=(
long long) n2; push_back_n(n2);
282 std::cout <<
"built from 2d\n";
285 void SEP::inCoreFloat3D::base_3d(
int n1,
int n2,
int n3){
286 long long n123=n1;push_back_n(n1);
287 n123*=(
long long) n2; push_back_n(n2);
288 n123*=(
long long) n3; push_back_n(n3);
291 void SEP::inCoreFloat4D::base_4d(
int n1,
int n2,
int n3,
int n4){
292 long long n123=n1;push_back_n(n1);
293 n123*=(
long long) n2; push_back_n(n2);
294 n123*=(
long long) n3; push_back_n(n3);
295 n123*=(
long long) n4; push_back_n(n4);
298 void SEP::inCoreFloat5D::base_5d(
int n1,
int n2,
int n3,
int n4,
int n5){
299 long long n123=n1;push_back_n(n1);
300 n123*=(
long long) n2; push_back_n(n2);
301 n123*=(
long long) n3; push_back_n(n3);
302 n123*=(
long long) n4;push_back_n(n4);
303 n123*=(
long long) n5; push_back_n(n5);
306 void SEP::inCoreFloat6D::base_6d(
int n1,
int n2,
int n3,
int n4,
int n5,
int n6){
307 long long n123=n1; push_back_n(n1);
308 n123*=(
long long) n2;push_back_n(n2);
309 n123*=(
long long) n3; push_back_n(n3);
310 n123*=(
long long) n4; push_back_n(n4);
311 n123*=(
long long) n5;push_back_n(n5);
312 n123*=(
long long) n6; push_back_n(n6);
315 void SEP::inCoreFloat7D::base_7d(
int n1,
int n2,
int n3,
int n4,
int n5,
int n6,
int n7){
316 long long n123=n1; push_back_n(n1);
317 n123*=(
long long) n2;push_back_n(n2);
318 n123*=(
long long) n3; push_back_n(n3);
319 n123*=(
long long) n4; push_back_n(n4);
320 n123*=(
long long) n5; push_back_n(n5);
321 n123*=(
long long) n6; push_back_n(n6);
322 n123*=(
long long) n7; push_back_n(n7);
329 std::vector<long long> n2; n2= y->
get_ns();
330 std::vector<long long> n3=this->get_ns();
332 if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] &&
333 n3[4]==n2[4] && n3[5]==n2[5] && n3[6]==n2[6])
return true;
341 std::vector<long long> n2 = os->
get_ns();
342 std::vector<long long> n3 = this->get_ns();
343 if(
this == os)
return true;
344 if(get_key() == os->
get_key())
return true;
346 if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3] && n3[4]==n2[4] && n3[5]==n2[5] && n3[6]==n2[6]){
348 if(makeSame ==
true) os->
set_key(get_key());
356 std::vector<long long> n2; n2= y->
get_ns();
357 std::vector<long long> n3=get_ns();
359 if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] &&
360 n3[4]==n2[4] && n3[5]==n2[5] )
return true;
367 std::vector<long long> n2 = os->
get_ns();
368 std::vector<long long> n3 = this->get_ns();
369 if(
this == os)
return true;
370 if(get_key() == os->
get_key())
return true;
371 if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3] && n3[4]==n2[4] && n3[5]==n2[5]){
373 if(makeSame ==
true) os->
set_key(get_key());
381 std::vector<long long> n3=get_ns();
382 std::vector<long long> n2; n2= y->
get_ns();
383 if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] &&
384 n3[4]==n2[4] )
return true;
391 std::vector<long long> n2 = os->
get_ns();
392 std::vector<long long> n3 = this->get_ns();
393 if(
this == os)
return true;
394 if(get_key() == os->
get_key())
return true;
395 if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3] && n3[4]==n2[4]){
397 if(makeSame ==
true) os->
set_key(get_key());
405 std::vector<long long> n3=get_ns();
407 std::vector<long long> n2; n2= y->
get_ns();
408 if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] && n2[3]==n3[3] )
return true;
415 std::vector<long long> n2 = os->
get_ns();
416 std::vector<long long> n3 = this->get_ns();
417 if(
this == os)
return true;
418 if(get_key() == os->
get_key())
return true;
419 if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2] && n3[3]==n2[3]){
421 if(makeSame ==
true) os->
set_key(get_key());
429 std::vector<long long> n3=get_ns();
431 std::vector<long long> n2; n2= y->
get_ns();
432 if(n3[0]==n2[0] && n3[1]==n2[1] && n2[2]==n3[2] )
return true;
439 std::vector<long long> n2 = os->
get_ns();
440 std::vector<long long> n3 = this->get_ns();
441 if(
this == os)
return true;
442 if(get_key() == os->
get_key())
return true;
443 if(n3[0]==n2[0] && n3[1]==n2[1] && n3[2]==n2[2]){
445 if(makeSame ==
true) os->
set_key(get_key());
453 std::vector<long long> n3=get_ns();
454 std::vector<long long> n2; n2= y->
get_ns();
455 if(n3[0]==n2[0] && n3[1]==n2[1] )
return true;
462 std::vector<long long> n2 = os->
get_ns();
463 std::vector<long long> n3 = this->get_ns();
464 if(
this == os)
return true;
465 if(get_key() == os->
get_key())
return true;
466 if(n3[0]==n2[0] && n3[1]==n2[1]){
468 if(makeSame ==
true) os->
set_key(get_key());
476 std::vector<long long> n3=get_ns();
478 std::vector<long long> n2; n2= y->
get_ns();
479 if(n3[0]==n2[0] )
return true;
486 std::vector<long long> n2 = os->
get_ns();
487 std::vector<long long> n3 = this->get_ns();
488 if(
this == os)
return true;
489 if(get_key() == os->
get_key())
return true;
492 if(makeSame ==
true) os->
set_key(get_key());