AnTuTu and Intel

Schmide · Jul 23, 2013

Nothingness said:
I had some free time so I decided to play with icc 13.

First a handy site: http://gcc.godbolt.org/. This allows you to enter code and see the resulting assembly after compilation with gcc, icc, clang and gcc-arm.

Nice tool.

Loop halved/instructions unrolled, breaks the optimization.

Code:

void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 32;
  unsigned overun = nb & 1;

  nb>>=1;
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:

L__routine_start__Z1rPj_0:
r(unsigned int*):
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %ebx                                          #4.1
        xorl      %eax, %eax                                    #
        movl      16(%esp), %ecx                                #3.6
        movl      $15, %edx                                     #
        movl      %ecx, %esi                                    #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %eax, %ebx                                    #11.21
        movl      %eax, %ecx                                    #11.41
        shrl      $5, %ebx                                      #11.21
        movl      $1, %edi                                      #11.41
        shll      %cl, %edi                                     #11.41
        lea       1(%eax), %ecx                                 #14.5
        decl      %edx                                          #10.10
        addl      $2, %eax                                      #14.5
        orl       %edi, (%esi,%ebx,4)                           #11.5
        movl      %ecx, %ebx                                    #13.21
        shrl      $5, %ebx                                      #13.21
        movl      $1, %edi                                      #13.41
        shll      %cl, %edi                                     #13.41
        orl       %edi, (%esi,%ebx,4)                           #13.5
        cmpl      $-1, %edx                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        popl      %ebx                                          #18.1
        popl      %edi                                          #18.1
        popl      %esi                                          #18.1
        ret                                                     #18.1

Edit: This one is funny.

Index halved, two subsequent loops identical except for the counter and an overun if necessary.

Code:

// Type your code here, or load an example.

void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 32;
  unsigned overun = nb & 1;
  nb>>=1;
  unsigned nb2=nb; 
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  while (nb2--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:

L__routine_start__Z1rPj_0:
r(unsigned int*):
        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %esi                                          #4.1
        xorl      %edx, %edx                                    #
        movl      $15, %esi                                     #
        movl      %edx, %ecx                                    #
        movl      16(%esp), %eax                                #3.6
        movl      %esi, %edi                                    #
        movl      %ebp, (%esp)                                  #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %ecx, %edx                                    #11.21
        movl      $1, %ebp                                      #11.41
        shrl      $5, %edx                                      #11.21
        decl      %esi                                          #10.10
        shll      %cl, %ebp                                     #11.41
        incl      %ecx                                          #12.5
        orl       %ebp, (%eax,%edx,4)                           #11.5
        cmpl      $-1, %esi                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        movl      %ecx, %edx                                    #
        lea       1(%edi), %ecx                                 #14.10
        movl      (%esp), %ebp                                  #
        movl      %edx, %esi                                    #14.10
        andl      $31, %edx                                     #14.10
        shrl      $5, %esi                                      #14.10
        lea       (%eax,%esi,4), %eax                           #14.10
        movl      %ecx, %esi                                    #14.10
        addl      %edx, %ecx                                    #14.10
        cmpl      $32, %ecx                                     #14.10
        jbe       ..L12         # Prob 50%                      #14.10
        movl      %ecx, %esi                                    #14.10
        movl      %edx, %ecx                                    #14.10
        movl      $-1, %edi                                     #14.10
        shll      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
        subl      $32, %esi                                     #14.10
        addl      $4, %eax                                      #14.10
        movl      $-1, %edi                                     #14.10
        cmpl      $32, %esi                                     #14.10
        jbe       ..L13         # Prob 50%                      #14.10
..L14:                                                          #
        movl      %edi, (%eax)                                  #14.10
        addl      $4, %eax                                      #14.10
        subl      $32, %esi                                     #14.10
        cmpl      $32, %esi                                     #14.10
        ja        ..L14         # Prob 50%                      #14.10
..L13:                                                          #
        movl      $32, %ecx                                     #14.10
        subl      %esi, %ecx                                    #14.10
        shrl      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
        jmp       ..L15         # Prob 100%                     #14.10
..L12:                                                          #
        movl      $-1, %edi                                     #14.10
        movl      $32, %ecx                                     #14.10
        subl      %esi, %ecx                                    #14.10
        shrl      %cl, %edi                                     #14.10
        movl      %edx, %ecx                                    #14.10
        shll      %cl, %edi                                     #14.10
        orl       %edi, (%eax)                                  #14.10
..L15:                                                          #
        popl      %ecx                                          #20.1
        popl      %edi                                          #20.1
        popl      %esi                                          #20.1
        ret                                                     #20.1

Skips the first loop optimization, optimizes the second.

Change nb to an odd number, breaks the optimization for the second loop!

Code:

void r(unsigned *bitmap)
{
  unsigned baddr = 0;
  unsigned nb = 65;
  unsigned overun = nb & 1;
  nb>>=1;
  unsigned nb2=nb; 
  while (nb--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  while (nb2--) {
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
    baddr++;
  }
  if(overun)
    bitmap[baddr >> 5] |= 1 << (baddr & 0x1f);
}

-O3 -m32

Code:

        pushl     %esi                                          #4.1
        pushl     %edi                                          #4.1
        pushl     %esi                                          #4.1
        xorl      %ecx, %ecx                                    #
        movl      16(%esp), %edx                                #
        movl      $31, %eax                                     #
..B1.2:                         # Preds ..B1.2 ..B1.1
        movl      %ecx, %esi                                    #11.21
        movl      $1, %edi                                      #11.41
        shrl      $5, %esi                                      #11.21
        decl      %eax                                          #10.10
        shll      %cl, %edi                                     #11.41
        incl      %ecx                                          #12.5
        orl       %edi, (%edx,%esi,4)                           #11.5
        cmpl      $-1, %eax                                     #10.10
        jne       ..B1.2        # Prob 82%                      #10.10
        movl      16(%esp), %edi                                #14.10
        movl      $31, %eax                                     #14.10
..B1.4:                         # Preds ..B1.4 ..B1.3
        movl      %ecx, %edx                                    #15.21
        movl      $1, %esi                                      #15.41
        shrl      $5, %edx                                      #15.21
        decl      %eax                                          #14.10
        shll      %cl, %esi                                     #15.41
        incl      %ecx                                          #16.5
        orl       %esi, (%edi,%edx,4)                           #15.5
        cmpl      $-1, %eax                                     #14.10
        jne       ..B1.4        # Prob 82%                      #14.10
        movl      %ecx, %eax                                    #19.21
        movl      $1, %edx                                      #19.41
        shrl      $5, %eax                                      #19.21
        shll      %cl, %edx                                     #19.41
        movl      16(%esp), %ecx                                #19.5
        orl       %edx, (%ecx,%eax,4)                           #19.5
        popl      %ecx                                          #20.1
        popl      %edi                                          #20.1
        popl      %esi                                          #20.1
        ret                                                     #20.1

Optimization skipped because of the overun.

Idontcare · Jul 23, 2013

ashetos said:
Thanks guys, got it. So then, this means intel really is cheating again, huh.

Not Intel, just anyone using ICC in a way that results in the creatively optimized code

Remember gun manufacturers make guns, but that doesn't mean they kill people or are responsible for firearm related deaths. Same with planes, trains, and automobiles.

Search

AnTuTu and Intel

Schmide

Diamond Member

Idontcare

Elite Member

TRENDING THREADS