// 3DPM v2.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include "math.h"
#include <string>
#include "stdio.h"
#include <iostream>
#include <iomanip>
#include "omp.h"
#include "windows.h"
#include "ran.h"
using namespace std;
#ifdef _DEBUG
static long particles = 20;
int steps = 2000;
float timeperloop = 5.0f;
#else
static long particles = 1e5;
int steps = 1000;
float timeperloop = 20.0f;
#endif
#define twopi 6.28318531f
Simul Sim13DTrig(Simul partic, Ranq2 &ran); // 3DTrig
Simul Sim2BiPy(Simul partic, Ranq2 &ran); // BiPy
Simul Sim3PolarReject(Simul partic, Ranq2 &ran); // PolarReject
Simul Sim4Cosine(Simul partic, Ranq2 &ran); // Cosine
Simul Sim5Hypercube(Simul partic, Ranq2 &ran); // Hypercube
Simul Sim6NormDev(Simul partic, Ranq2 &ran); // NormDev
Simul(*functptr[])(Simul parti, Ranq2 &ran) = { Sim13DTrig, Sim2BiPy, Sim3PolarReject, Sim4Cosine, Sim5Hypercube, Sim6NormDev };
char* names[] = { " 3DTrig", " BiPy", "PlrRjct", " Cosine", "HypCube", "NormDev" };
float outputFunc(double* x, double* y, double* z, double start, double end, int threads, int looptimer, int s);
void printCPUName();
char *trim(char *str);
int main()
{
//set high priority
SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
//get system info
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
MEMORYSTATUSEX statex;
statex.dwLength = sizeof(statex);
GlobalMemoryStatusEx(&statex);
// thread detect and display
int threads = omp_get_max_threads();
cout << " 3DPM v2.0 beta-1" << endl;
cout << " (c) Ian Cutress 2016" << endl;
cout << "/------------------------------------------------------------------------------\\";
cout << "|--- CPU ---/ "; printCPUName(); cout << endl;
cout << "|--- Cores / Threads ---/ " << sysInfo.dwNumberOfProcessors << " / " << threads << endl;
cout << "|--- Memory ---/ " << (statex.ullTotalPhys / 1024) / 1024 << "MB" << endl;
cout << "|------------------------------------------------------------------------------|";
cout << "|--- Points / Steps ---/ " << particles << " / " << steps << endl;
cout << "\\------------------------------------------------------------------------------/";
cout << endl;
cout << "Initializing six algorithms of >20 seconds each." << endl;
cout << "Stage 0";
for (int s = 0; s < 6; s++) // simulations
{
Simul(*functptr2)(Simul parti, Ranq2 &ran) = *functptr[s];
for (int i = 0; i < particles; i++) {
Simul partic;
Ranq2 ran(i);
for (int j = 0; j < 100; j++) {
partic = functptr2(partic, ran);
}
}
cout << "\rStage " << s+1 << " ";
for (int i = 0; i < s+1; i++) { cout << "."; }
}
cout << "\rInitialization Complete" << endl << endl;
// cout << "Max Number of Threads = " << threads << endl;
// cout << "Max Number of Particles = " << particles << endl;
// cout << "Max Number of Steps = " << steps << endl;
// final score counter
double totalScore = 0.0;
// initial final position calculation
double* sumx = new double[threads * 64];
double* sumy = new double[threads * 64];
double* sumz = new double[threads * 64];
for (int i = 0; i < threads; i++) {
int a = 64 * i;
sumx[a] = sumy[a] = sumz[a] = 0.0;
}
for (int s = 0; s < 6; s++) // simulations
{
// get correct function
Simul(*functptr2)(Simul parti, Ranq2 &ran) = *functptr[s];
// start timer
double start = omp_get_wtime();
// start looptimer counter
int looptimer = 0;
do {
looptimer++;
#pragma omp parallel for ordered
for (int i = 0; i < particles; i++) {
Simul partic;
Ranq2 ran(i);
for (int j = 0; j < steps; j++) {
partic = functptr2(partic, ran);
} // Simulation loop
#pragma omp atomic
sumx[omp_get_thread_num() * 64] += partic.x; // calc location from 0,0,0
#pragma omp atomic
sumy[omp_get_thread_num() * 64] += partic.y; // calc location from 0,0,0
#pragma omp atomic
sumz[omp_get_thread_num() * 64] += partic.z; // calc location from 0,0,0
}
double diff = omp_get_wtime() - start; // finish time
cout << "\r" << names[s] << " stage " << s+1 << ": " << looptimer << " loops @ " << diff;
} while (omp_get_wtime() - start < timeperloop); // end OpenMP loop
double end = omp_get_wtime(); // finish time
totalScore += outputFunc(sumx, sumy, sumz, start, end, threads, looptimer, s); // calculate final position
looptimer = 0;
for (int i = 0; i < threads; i++) {
int a = 64 * i;
sumx[a] = sumy[a] = sumz[a] = 0.0;
}
//write results files
}
cout << "|------------------------------------------------------------------------------|";
cout << "|--- Total Score --- | " << setprecision(3) << fixed << totalScore /1000 /1000 << " Mops/sec" << endl;
cout << "|------------------------------------------------------------------------------|";
system("pause");
return 0;
}
float outputFunc(double* x, double* y, double* z, double start, double end, int threads, int looptimer, int s) {
cout.precision(2);
for (int i = 1; i < threads; i++) {
x[0] += x[64 * i];
y[0] += y[64 * i];
z[0] += z[64 * i];
}
x[0] /= particles;
x[0] /= looptimer;
y[0] /= particles;
y[0] /= looptimer;
z[0] /= particles;
z[0] /= looptimer;
float magnitude = sqrtf(x[0]*x[0] + y[0]*y[0] + z[0]*z[0]);
float average = (double)looptimer * (double)steps * (double)(particles) / (end - start);
cout << setprecision(3) << fixed;
cout << "\r" << names[s] << " " << looptimer << "*(" << magnitude << ") ";
cout << setprecision(4) << fixed;
cout << "in " << end - start << " sec : " << (float)average / 1000 / 1000 << " Mops/sec" << endl;
return average;
}
Simul Sim13DTrig(Simul partic, Ranq2 &ran) {
// 3DTrig
float newz = 2.0f * ran.doub() - 1.0f;
float alpha = ran.doub() * twopi;
float r = sqrtf(1 - newz*newz);
partic.x += r*cosf(alpha);
partic.y += r*sinf(alpha);
partic.z += newz;
return partic;
}
Simul Sim2BiPy(Simul partic, Ranq2 &ran) {
// BiPy
int k = (int)floor(6 * ran.doub());
if (k == 0) { partic.x += 1; }
else if (k == 1) { partic.x -= 1; }
else if (k == 2) { partic.y += 1; }
else if (k == 3) { partic.y -= 1; }
else if (k == 4) { partic.z += 1; }
else { partic.z -= 1; }
return partic;
}
Simul Sim3PolarReject(Simul partic, Ranq2 &ran) {
//Polar Reject
float u, v, s;
do {
u = 2.0f * ran.doub() - 1.0f;
v = 2.0f * ran.doub() - 1.0f;
s = sqrtf(u*u + v*v);
} while (s > 1.0f);
float a = 2 * sqrt(1.0f - s);
partic.x += a * u;
partic.y += a * v;
partic.z += 2.0f * ran.doub() - 1.0f;
return partic;
}
Simul Sim4Cosine(Simul partic, Ranq2 &ran) {
// Cosine
float phi = twopi * ran.doub();
float theta = acosf(2 * ran.doub() - 1);
partic.x += sinf(phi) * sinf(theta);
partic.y += cosf(phi) * sinf(theta);
partic.z += cosf(theta);
return partic;
}
Simul Sim5Hypercube(Simul partic, Ranq2 &ran) {
//Hypercube
float u, v, w, s;
do {
u = 2.0f * ran.doub() - 1.0f;
v = 2.0f * ran.doub() - 1.0f;
w = 2.0f * ran.doub() - 1.0f;
s = sqrtf(u*u + v*v + w*w);
} while (s > 1.0f);
float invs = 1.0f / s;
partic.x += u * invs;
partic.y += v * invs;
partic.z += w * invs;
return partic;
}
Simul Sim6NormDev(Simul partic, Ranq2 &ran) {
// NormDev
float u = ran.normal(1);
float v = ran.normal(1);
float w = ran.normal(1);
float gamma = 1 / sqrtf(u*u + v*v + w*w);
partic.x += gamma * u;
partic.y += gamma * v;
partic.z += gamma * w;
return partic;
}
void printCPUName() {
int CPUInfo[4] = { -1 };
unsigned nExIds, i = 0;
char CPUBrandString[0x40];
// Get the information associated with each extended ID.
__cpuid(CPUInfo, 0x80000000);
nExIds = CPUInfo[0];
for (i = 0x80000000; i <= nExIds; ++i)
{
__cpuid(CPUInfo, i);
// Interpret CPU brand string
if (i == 0x80000002)
memcpy(CPUBrandString, CPUInfo, sizeof(CPUInfo));
else if (i == 0x80000003)
memcpy(CPUBrandString + 16, CPUInfo, sizeof(CPUInfo));
else if (i == 0x80000004)
memcpy(CPUBrandString + 32, CPUInfo, sizeof(CPUInfo));
}
//string includes manufacturer, model and clockspeed
cout << trim(CPUBrandString);
}
char *trim(char *str)
{
size_t len = 0;
char *frontp = str;
char *endp = NULL;
if (str == NULL) { return NULL; }
if (str[0] == '\0') { return str; }
len = strlen(str);
endp = str + len;
/* Move the front and back pointers to address the first non-whitespace
* characters from each end.
*/
while (isspace(*frontp)) { ++frontp; }
if (endp != frontp)
{
while (isspace(*(--endp)) && endp != frontp) {}
}
if (str + len - 1 != endp)
*(endp + 1) = '\0';
else if (frontp != str && endp == frontp)
*str = '\0';
/* Shift the string so that it starts at str so that if it's dynamically
* allocated, we can still free it on the returned pointer. Note the reuse
* of endp to mean the front of the string buffer now.
*/
endp = str;
if (frontp != str)
{
while (*frontp) { *endp++ = *frontp++; }
*endp = '\0';
}
return str;
}