AnTuTu and Intel

Page 7 - Seeking answers? Join the AnandTech community: where nearly half-a-million members share solutions and discuss the latest tech.

Schmide

Diamond Member
Mar 7, 2002
5,745
1,036
126
I had some free time so I decided to play with icc 13.

First a handy site: http://gcc.godbolt.org/. This allows you to enter code and see the resulting assembly after compilation with gcc, icc, clang and gcc-arm.

Nice tool.

Loop halved/instructions unrolled, breaks the optimization.

Code:
void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 32;
  unsigned overun = nb & 1;

  nb>>=1;
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:
L__routine_start__Z1rPj_0:
r(unsigned int*):
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %ebx                                          #4.1
        xorl      %eax, %eax                                    #
        movl      16(%esp), %ecx                                #3.6
        movl      $15, %edx                                     #
        movl      %ecx, %esi                                    #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %eax, %ebx                                    #11.21
        movl      %eax, %ecx                                    #11.41
        shrl      $5, %ebx                                      #11.21
        movl      $1, %edi                                      #11.41
        shll      %cl, %edi                                     #11.41
        lea       1(%eax), %ecx                                 #14.5
        decl      %edx                                          #10.10
        addl      $2, %eax                                      #14.5
        orl       %edi, (%esi,%ebx,4)                           #11.5
        movl      %ecx, %ebx                                    #13.21
        shrl      $5, %ebx                                      #13.21
        movl      $1, %edi                                      #13.41
        shll      %cl, %edi                                     #13.41
        orl       %edi, (%esi,%ebx,4)                           #13.5
        cmpl      $-1, %edx                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        popl      %ebx                                          #18.1
        popl      %edi                                          #18.1
        popl      %esi                                          #18.1
        ret                                                     #18.1



Edit: This one is funny.

Index halved, two subsequent loops identical except for the counter and an overun if necessary.

Code:
// Type your code here, or load an example.

void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 32;
  unsigned overun = nb & 1;
  nb>>=1;
  unsigned nb2=nb; 
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  while (nb2--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:
L__routine_start__Z1rPj_0:
r(unsigned int*):
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %esi                                          #4.1
        xorl      %edx, %edx                                    #
        movl      $15, %esi                                     #
        movl      %edx, %ecx                                    #
        movl      16(%esp), %eax                                #3.6
        movl      %esi, %edi                                    #
        movl      %ebp, (%esp)                                  #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %ecx, %edx                                    #11.21
        movl      $1, %ebp                                      #11.41
        shrl      $5, %edx                                      #11.21
        decl      %esi                                          #10.10
        shll      %cl, %ebp                                     #11.41
        incl      %ecx                                          #12.5
        orl       %ebp, (%eax,%edx,4)                           #11.5
        cmpl      $-1, %esi                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        movl      %ecx, %edx                                    #
        lea       1(%edi), %ecx                                 #14.10
        movl      (%esp), %ebp                                  #
        movl      %edx, %esi                                    #14.10
        andl      $31, %edx                                     #14.10
        shrl      $5, %esi                                      #14.10
        lea       (%eax,%esi,4), %eax                           #14.10
        movl      %ecx, %esi                                    #14.10
        addl      %edx, %ecx                                    #14.10
        cmpl      $32, %ecx                                     #14.10
        jbe       ..L12         # Prob 50%                      #14.10
        movl      %ecx, %esi                                    #14.10
        movl      %edx, %ecx                                    #14.10
        movl      $-1, %edi                                     #14.10
        shll      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
        subl      $32, %esi                                     #14.10
        addl      $4, %eax                                      #14.10
        movl      $-1, %edi                                     #14.10
        cmpl      $32, %esi                                     #14.10
        jbe       ..L13         # Prob 50%                      #14.10
..L14:                                                          #
        movl      %edi, (%eax)                                  #14.10
        addl      $4, %eax                                      #14.10
        subl      $32, %esi                                     #14.10
        cmpl      $32, %esi                                     #14.10
        ja        ..L14         # Prob 50%                      #14.10
..L13:                                                          #
        movl      $32, %ecx                                     #14.10
        subl      %esi, %ecx                                    #14.10
        shrl      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
        jmp       ..L15         # Prob 100%                     #14.10
..L12:                                                          #
        movl      $-1, %edi                                     #14.10
        movl      $32, %ecx                                     #14.10
        subl      %esi, %ecx                                    #14.10
        shrl      %cl, %edi                                     #14.10
        movl      %edx, %ecx                                    #14.10
        shll      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
..L15:                                                          #
        popl      %ecx                                          #20.1
        popl      %edi                                          #20.1
        popl      %esi                                          #20.1
        ret                                                     #20.1

Skips the first loop optimization, optimizes the second.

Change nb to an odd number, breaks the optimization for the second loop!

Code:
void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 65;
  unsigned overun = nb & 1;
  nb>>=1;
  unsigned nb2=nb; 
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  while (nb2--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %esi                                          #4.1
        xorl      %ecx, %ecx                                    #
        movl      16(%esp), %edx                                #
        movl      $31, %eax                                     #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %ecx, %esi                                    #11.21
        movl      $1, %edi                                      #11.41
        shrl      $5, %esi                                      #11.21
        decl      %eax                                          #10.10
        shll      %cl, %edi                                     #11.41
        incl      %ecx                                          #12.5
        orl       %edi, (%edx,%esi,4)                           #11.5
        cmpl      $-1, %eax                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        movl      16(%esp), %edi                                #14.10
        movl      $31, %eax                                     #14.10
..B1.4:                         # Preds ..B1.4 ..B1.3
        movl      %ecx, %edx                                    #15.21
        movl      $1, %esi                                      #15.41
        shrl      $5, %edx                                      #15.21
        decl      %eax                                          #14.10
        shll      %cl, %esi                                     #15.41
        incl      %ecx                                          #16.5
        orl       %esi, (%edi,%edx,4)                           #15.5
        cmpl      $-1, %eax                                     #14.10
        jne       ..B1.4        # Prob 82%                      #14.10
        movl      %ecx, %eax                                    #19.21
        movl      $1, %edx                                      #19.41
        shrl      $5, %eax                                      #19.21
        shll      %cl, %edx                                     #19.41
        movl      16(%esp), %ecx                                #19.5
        orl       %edx, (%ecx,%eax,4)                           #19.5
        popl      %ecx                                          #20.1
        popl      %edi                                          #20.1
        popl      %esi                                          #20.1
        ret                                                     #20.1

Optimization skipped because of the overun.
 
Last edited:

Idontcare

Elite Member
Oct 10, 1999
21,110
64
91
Thanks guys, got it. So then, this means intel really is cheating again, huh.

Not Intel, just anyone using ICC in a way that results in the creatively optimized code ;)

Remember gun manufacturers make guns, but that doesn't mean they kill people or are responsible for firearm related deaths. Same with planes, trains, and automobiles.