Code optimization <LEA Instruction>

Richard B. Johnson root en chaos.analogic.com
Vie Ene 28 17:42:43 CST 2000


On Fri, 28 Jan 2000, Jesse Pollard wrote:
[SNIPPED..]
> 
> Just curious, what happens to the time if they are mixed (addl between
> the leal using different registers)? Doesn't the address calculation overlap
> the arithmetic operation? The example looks rather artificial to demonstrate
> the worst case by filling the hardware pipeline without the option of
> re-ordering.

This was not an example of how to do pipe-lining.

> 
> >Unfortunately, I see that much of the indexed address generation
> >done in new versions of the C compiler abitrarily use LEA, followed
> >immediately by the memory fetch using the newly generated address.
> >This will be slower than simple math to obtain the address.
> 

> Doesn't hat sound more like a lack of proper optimization? If it were
> optimized I would expect to see some addl on index registers mixed with
> leal on other registers.

The recent versions of the GCC compiler output shows that where simple
offsets were previously calculated with early versions, there is now the
excessive use of LEA, with the calculated address immediately being used.

This is as though somebody just changed the code generator to use LEA
wherever possible. Here's an example, the dumped output from sched.o.
I show the multiple times where LEA is used to calculate an address
without an intervening instruction(s) before this address is used.

This is from GCC version 2.7.2.3, the exact utility that
./linux/Documentation/Changes requires.

In most cases LEA is used where an offset is already known at compile-
time. Note the stack operations using LEA and that the "pop" instruction
uses the stack-pointer that has just been changed in value using LEA. This
is a 3-clock penality.

     333:	8d 65 e0             	lea    0xffffffe0(%ebp),%esp
     336:	5b                   	pop    %ebx

0000033c <process_timeout>:

     667:	8d 65 e0             	lea    0xffffffe0(%ebp),%esp
     66a:	5b                   	pop    %ebx

00000670 <schedule_timeout>:
     6da:	8d 5d ec             	lea    0xffffffec(%ebp),%ebx
     6dd:	53                   	push   %ebx 

00000708 <schedule_tail>:

     9fe:	8d 65 e4             	lea    0xffffffe4(%ebp),%esp
     a01:	5b                   	pop    %ebx

00000a08 <schedule>:
     d78:	8d 1c 9d 0d 00 00 00 	lea    0xd(,%ebx,4),%ebx
     d7f:	8d 0c dd 00 00 00 00 	lea    0x0(,%ebx,8),%ecx

     dad:	8d 76 00             	lea    0x0(%esi),%esi ! NOP

     fda:	8d 5c 1e 01          	lea    0x1(%esi,%ebx,1),%ebx
     fde:	89 5d cc             	mov    %ebx,0xffffffcc(%ebp)

    1005:	8d 76 00             	lea    0x0(%esi),%esi ! NOP

    10a2:	8d 44 02 01          	lea    0x1(%edx,%eax,1),%eax
    10a6:	89 45 d4             	mov    %eax,0xffffffd4(%ebp)

    1115:	8d 76 00             	lea    0x0(%esi),%esi
    1118:	8b 71 38             	mov    0x38(%ecx),%esi


    1335:	8d 76 00             	lea    0x0(%esi),%esi ! NOP

    146c:	8d 65 c0             	lea    0xffffffc0(%ebp),%esp


00001474 <__wake_up>:

 
    19ee:	8d 65 c4             	lea    0xffffffc4(%ebp),%esp
    19f1:	5b                   	pop    %ebx

    21a3:	8d 65 e0             	lea    0xffffffe0(%ebp),%esp
    21a6:	5b                   	pop    %ebx

000021ac <interruptible_sleep_on_timeout>:
    23e4:	8d 65 dc             	lea    0xffffffdc(%ebp),%esp
    23e7:	5b                   	pop    %ebx

000023ec <sleep_on>:
    261b:	8d 65 e0             	lea    0xffffffe0(%ebp),%esp
    261e:	5b                   	pop    %ebx

00002624 <sleep_on_timeout>:

    2656:	8d 4d f8             	lea    0xfffffff8(%ebp),%ecx
    2659:	89 4d f8             	mov    %ecx,0xfffffff8(%ebp)


    2745:	8d 4d f0             	lea    0xfffffff0(%ebp),%ecx
    2748:	89 48 04             	mov    %ecx,0x4(%eax)

    274e:	8d 4b 08             	lea    0x8(%ebx),%ecx
    2751:	89 4d f4             	mov    %ecx,0xfffffff4(%ebp)

    2754:	8d 4d f0             	lea    0xfffffff0(%ebp),%ecx
    2757:	89 4b 08             	mov    %ecx,0x8(%ebx)


    285c:	8d 65 dc             	lea    0xffffffdc(%ebp),%esp
    285f:	5b                   	pop    %ebx

    2b69:	8d 65 e8             	lea    0xffffffe8(%ebp),%esp
    2b6c:	5b                   	pop    %ebx

00002c28 <sys_sched_getparam>:
    2d08:	8d 65 ec             	lea    0xffffffec(%ebp),%esp
    2d0b:	5b                   	pop    %ebx


00002e30 <sys_sched_rr_get_interval>:
    2e81:	8d 65 ec             	lea    0xffffffec(%ebp),%esp
    2e84:	5b                   	pop    %ebx

00002e8c <show_task>:
    2e98:	8d 87 1a 03 00 00    	lea    0x31a(%edi),%eax
    2e9e:	50                   	push   %eax

    2f24:	8d 90 08 fb ff ff    	lea    0xfffffb08(%eax),%edx
    2f2a:	29 fa                	sub    %edi,%edx

    2fe5:	8d 75 ec             	lea    0xffffffec(%ebp),%esi
    2fe8:	56                   	push   %esi
    2fe9:	8d 87 ac 04 00 00    	lea    0x4ac(%edi),%eax
    2fef:	50                   	push   %eax

    2ff5:	8d 5d d8             	lea    0xffffffd8(%ebp),%ebx
    2ff8:	53                   	push   %ebx

    2ff9:	8d 87 b4 04 00 00    	lea    0x4b4(%edi),%eax
    2fff:	50                   	push   %eax

    304c:	8d 65 cc             	lea    0xffffffcc(%ebp),%esp
    304f:	5b                   	pop    %ebx

00003054 <render_sigset_t>:
    3079:	8d 79 02             	lea    0x2(%ecx),%edi
    307c:	89 7d fc             	mov    %edi,0xfffffffc(%ebp)

    30d8:	8d 65 f0             	lea    0xfffffff0(%ebp),%esp
    30db:	5b                   	pop    %ebx


00000000 <init_idle>:
  5e:	8d 65 f8             	lea    0xfffffff8(%ebp),%esp
  62:	5e                   	pop    %esi

Cheers,
Dick Johnson

Penguin : Linux version 2.3.39 on an i686 machine (800.63 BogoMips).


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo en vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



Más información sobre la lista de distribución Ayuda