for(int j=i;j<5001;j+=2)
01371020 cmp ecx,1389h
01371026 lea eax,[ecx+1]
01371029 mov dword ptr [esp+10h],ecx
0137102D mov dword ptr [esp+14h],eax
01371031 movq mm0,mmword ptr [esp+10h]
01371036 cvtpi2pd xmm2,mm0
0137103A mulpd xmm2,xmm2
0137103E movapd xmmword ptr [esp+10h],xmm2
01371044 jge Pythsse+0BAh (13710BAh)
01371046 jmp Pythsse+50h (1371050h)
01371048 lea esp,[esp]
0137104F nop
{
sSimdScalar b,c,d,e;
#ifdef _OUTPUTENABLE
sSimdScalarInt bInt, cInt;
#endif
b.m_ints[0]=j; // load
01371050 lea edx,[eax-1]
01371053 mov dword ptr [esp+20h],edx
b.m_ints[1]=j+1;
01371057 mov dword ptr [esp+24h],eax
c.m_ints[0]=j+1;
c.m_ints[1]=j+2;
b.m_vec128d=_mm_cvtpi32_pd(b.m_vec64[0]); // convert double
0137105B movq mm0,mmword ptr [esp+20h]
01371060 lea edx,[eax+1]
01371063 cvtpi2pd xmm0,mm0
b.m_vec128d=_mm_mul_pd(b.m_vec128d,b.m_vec128d); // square
01371067 mulpd xmm0,xmm0
0137106B mov dword ptr [esp+30h],eax
0137106F mov dword ptr [esp+34h],edx
c.m_vec128d=_mm_cvtpi32_pd(c.m_vec64[0]);
01371073 movq mm0,mmword ptr [esp+30h]
c.m_vec128d=_mm_mul_pd(c.m_vec128d,c.m_vec128d);
b.m_vec128d=_mm_add_pd(a.m_vec128d,b.m_vec128d); // add
01371078 addpd xmm0,xmm2
c.m_vec128d=_mm_add_pd(a.m_vec128d,c.m_vec128d);
d.m_vec128d=_mm_sqrt_pd(b.m_vec128d); // sqrt
0137107C sqrtpd xmm0,xmm0
01371080 cvtpi2pd xmm1,mm0
e.m_vec128d=_mm_sqrt_pd(c.m_vec128d);
b.m_vec64[0]=_mm_cvtpd_pi32(d.m_vec128d); // convert int
01371084 cvtpd2pi mm0,xmm0
c.m_vec64[0]=_mm_cvtpd_pi32(e.m_vec128d);
#ifdef _OUTPUTENABLE
bInt.m_vec64=b.m_vec64[0];
cInt.m_vec64=c.m_vec64[0];
#endif
b.m_vec128d=_mm_cvtpi32_pd(b.m_vec64[0]); // convert back double
01371088 cvtpi2pd xmm0,mm0
0137108C add eax,2
0137108F mulpd xmm1,xmm1
01371093 movapd xmmword ptr [esp+20h],xmm0
01371099 addpd xmm1,xmm2
0137109D sqrtpd xmm0,xmm1
013710A1 lea edx,[eax-1]
013710A4 cmp edx,1389h
013710AA cvtpd2pi mm0,xmm0
c.m_vec128d=_mm_cvtpi32_pd(c.m_vec64[0]);
013710AE cvtpi2pd xmm0,mm0
013710B2 movapd xmmword ptr [esp+30h],xmm0
013710B8 jl Pythsse+50h (1371050h)
for(int i=1;i<5001;i+=2)
013710BA add ecx,2
013710BD cmp ecx,1389h
013710C3 jl Pythsse+20h (1371020h)
d.m_vec128d=_mm_cmpeq_pd(b.m_vec128d, d.m_vec128d); // compare int to double
e.m_vec128d=_mm_cmpeq_pd(c.m_vec128d, e.m_vec128d);