// partrand.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#define _USE_MATH_DEFINES
#include<iostream>
#include<cmath>
#include<cstdio>
#include<ctime>
#include<random>
#include<functional>
#include<omp.h>
using namespace std;
#define TABLESIZES 128
#define INDEXSIZE 7
float vectorX[3][TABLESIZES][TABLESIZES];
float vectorY[3][TABLESIZES][TABLESIZES];
float vectorZ[3][TABLESIZES][TABLESIZES];
float inversionTable[3][8];
int swapTable[3][8] = { { 0, 0, 1, 1, 2, 2 }, { 1, 2, 0, 2, 0, 1 }, { 2, 1, 2, 0, 1, 0 } };
float wobbleFactors[2][TABLESIZES];
int _tmain(int argc, _TCHAR* argv[])
{
int threadnum;
cout << "Enter the number of threads: 1, 2, 4, 8\n";
cin >> threadnum;
clock_t start;
double duration;
start = clock();
int steps = 1024000, cumulative = 0, counter, dummyval = 10000;
float x, y, z;
float newz, alpha, r;
float pos[10000][3] = { 0 };
float non = 0;//prevent nothing from happening!
// build the tables
for (int i = 0; i < TABLESIZES; i++) {
vectorX[0][0][i] = vectorX[1][0][TABLESIZES - 1 - i] = cos(M_PI * ((double)i) / ((double)TABLESIZES)); // equator arc
vectorY[1][0][i] = 1.0; // top point
}
for (int i = 1; i < TABLESIZES; i++) {
for (int j = 0; j < TABLESIZES; j++) {
vectorX[0][i][j] = vectorX[0][0][2 * i] * vectorX[0][0][j]; // cos2 cos
vectorX[1][i][j] = vectorX[0][0][2 * i] * vectorX[1][0][j]; // cos2 sin
vectorX[2][i][j] = vectorX[1][0][2 * i]; // sin2
}
}
// y vector all transposed x vector reverse order (-y, x)
for (int i = 1; i < TABLESIZES; i++) {
for (int j = 0; j < TABLESIZES; j++) {
vectorY[0][i][j] = -vectorX[1][TABLESIZES - 1 - i][j];
vectorY[1][i][j] = vectorX[0][TABLESIZES - 1 - i][j];
vectorY[2][i][j] = vectorX[2][TABLESIZES - 1 - i][j];
}
}
// z vector all transposed x zero vector ( y, -x)
for (int i = 0; i < TABLESIZES; i++) {
for (int j = 0; j < TABLESIZES; j++) {
vectorZ[0][i][j] = vectorX[1][0][j];
vectorZ[1][i][j] = -vectorX[0][0][j];
}
}
// for flipping signs
for (int i = 0; i < 8; i++) {
inversionTable[0][i] = i & 1 ? -1.0 : 1.0;
inversionTable[1][i] = i & 2 ? -1.0 : 1.0;
inversionTable[3][i] = i & 4 ? -1.0 : 1.0;
}
for (int i = 0; i < TABLESIZES; i++) {
wobbleFactors[0][i] = cos(((double)M_PI) * ((double)i)) / (((double)TABLESIZES) *((double)TABLESIZES));
wobbleFactors[1][i] = 1.0 / sqrt(1.0 + wobbleFactors[0][i] * wobbleFactors[0][i]);
}
// std::srand(std::time(0)); // use current time as seed for random generator
// int random_variable = std::rand();
/*mt19937::result_type seed = time(0);
auto real_rand = bind(uniform_real_distribution<float>(-1,1),mt19937(seed));*/
int arcx, arcy, swap, neg, wobblex, wobbley, wobbleSignx, wobbleSigny, localrand;
#pragma omp parallel private(arcx, arcy, swap, neg, wobblex, wobbley, wobbleSignx, wobbleSigny,counter,x,y,z) reduction (+: non, cumulative) num_threads(threadnum)
{
for (int dummy = 0; dummy<dummyval; dummy++)
{
mt19937::result_type seed = time(0);
auto int_rand = bind(uniform_int_distribution<>(), mt19937(seed));
// mt19937::result_type seed = time(0);
// auto int_rand = bind(uniform_real_distribution<float>(-1, 1), mt19937(seed));
#pragma omp for
for (counter = 0; counter<steps; counter++)
{
//int x=0,y=0,z=0; //individual x,y,z declared for each thread loop
x = 0, y = 0, z = 0;
localrand = int_rand();
arcx = localrand&((1 << INDEXSIZE) - 1);
arcy = (localrand >> 8)&((1 << INDEXSIZE) - 1);
wobblex = (localrand >> 16)&((1 << INDEXSIZE) - 1);
wobbley = (localrand >> 24)&((1 << INDEXSIZE) - 1);
wobbleSignx = (localrand >> 30) & 1;
wobbleSigny = (localrand >> 31) & 1;
swap = (localrand >> 7)&((1 << 3) - 1);
neg = (localrand >> 12)&((1 << 3) - 1);
x += (((vectorX[swapTable[0][swap]][arcx][arcy] +
vectorY[swapTable[0][swap]][arcx][arcy] * wobbleFactors[0][wobblex]) * wobbleFactors[1][wobblex] * inversionTable[0][wobbleSignx] +
vectorZ[swapTable[0][swap]][arcx][arcy] * wobbleFactors[0][wobbley]) * wobbleFactors[1][wobbley] * inversionTable[0][wobbleSigny]) * inversionTable[0][neg];
y += (((vectorX[swapTable[1][swap]][arcx][arcy] +
vectorY[swapTable[1][swap]][arcx][arcy] * wobbleFactors[0][wobblex]) * wobbleFactors[1][wobblex] * inversionTable[0][wobbleSignx] +
vectorZ[swapTable[1][swap]][arcx][arcy] * wobbleFactors[0][wobbley]) * wobbleFactors[1][wobbley] * inversionTable[0][wobbleSigny]) * inversionTable[1][neg];
z += abs((((vectorX[swapTable[2][swap]][arcx][arcy] +
vectorY[swapTable[2][swap]][arcx][arcy] * wobbleFactors[0][wobblex]) * wobbleFactors[1][wobblex] * inversionTable[0][wobbleSignx] +
vectorZ[swapTable[2][swap]][arcx][arcy] * wobbleFactors[0][wobbley]) * wobbleFactors[1][wobbley] * inversionTable[0][wobbleSigny]) * inversionTable[2][neg]);
cumulative++; //make sure it at least enters every loop
}
pos[dummy][0] = x;
pos[dummy][1] = y;
pos[dummy][2] = z;
non += pos[dummy][0];
}
}
duration = (clock() - start) / (double)CLOCKS_PER_SEC;
cout << "dummy value is " << non;
cout << "\nDuration is " << duration << "\n";
cout << "A total of " << cumulative << " steps were taken.\n";
cout << "Score (M steps/sec) is " << cumulative / (duration * 1000000) << ".";
cout << "Press any key to continue\n";
cin >> threadnum;
return 0;
}