public inbox for devel@edk2.groups.io
 help / color / mirror / Atom feed
* [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
@ 2018-11-07  4:03 Ruiyu Ni
  2018-11-07  8:46 ` Gao, Liming
  2018-11-07 14:31 ` Laszlo Ersek
  0 siblings, 2 replies; 9+ messages in thread
From: Ruiyu Ni @ 2018-11-07  4:03 UTC (permalink / raw)
  To: edk2-devel
  Cc: Liming Gao, Michael Kinney, Laszlo Ersek,
	Philippe Mathieu-Daudé

REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303

XCODE disassembly code of InternalSyncDecrement with today's code is:

  __asm__ __volatile__ (
    "movl    $1, %%eax  \n\t"
    "lock               \n\t"
    "xadd    %%eax, %1  \n\t"
    "inc     %%eax      \n\t"
    : "=a" (Result),          // %0
      "+m" (*Value)           // %1
    :                         // no inputs that aren't also outputs
    : "memory",
      "cc"
    );

 0:       55      pushl   %ebp
 1:       89 e5   movl    %esp, %ebp
 3:       8b 45 08        movl    8(%ebp), %eax
 6:       b8 01 00 00 00  movl    $1, %eax
 b:       f0      lock
 c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
 f:       40      incl    %eax
10:       5d      popl    %ebp
11:       c3      retl

%EAX value retrieved in line #3 is overwritten in line #6.

The patch uses the clobber list to tell GCC that EAX is used in ASM.

Contributed-under: TianoCore Contribution Agreement 1.1
Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
Cc: Liming Gao <liming.gao@intel.com>
Cc: Michael Kinney <michael.d.kinney@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
---
 MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
index af39bdeb51..0a985529fd 100644
--- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
+++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
@@ -40,11 +40,13 @@ InternalSyncIncrement (
     "lock               \n\t"
     "xadd    %%eax, %1  \n\t"
     "inc     %%eax      \n\t"
-    : "=a" (Result),          // %0
+    "mov     %%eax, %0  \n\t"
+    : "=r" (Result),          // %0
       "+m" (*Value)           // %1
     :                         // no inputs that aren't also outputs
     : "memory",
-      "cc"
+      "cc",
+      "eax"
     );
 
   return Result;
@@ -76,11 +78,13 @@ InternalSyncDecrement (
     "lock                \n\t"
     "xadd    %%eax, %1   \n\t"
     "dec     %%eax       \n\t"
-    : "=a" (Result),           // %0
+    "mov     %%eax, %0  \n\t"
+    : "=r" (Result),           // %0
       "+m" (*Value)            // %1
     :                          // no inputs that aren't also outputs
     : "memory",
-      "cc"
+      "cc",
+      "eax"
     );
 
   return Result;
-- 
2.16.1.windows.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07  4:03 [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement Ruiyu Ni
@ 2018-11-07  8:46 ` Gao, Liming
  2018-11-07 14:31 ` Laszlo Ersek
  1 sibling, 0 replies; 9+ messages in thread
From: Gao, Liming @ 2018-11-07  8:46 UTC (permalink / raw)
  To: Ni, Ruiyu, edk2-devel@lists.01.org; +Cc: Kinney, Michael D, Laszlo Ersek

Ray:
  The patch is good. Reviewed-by: Liming Gao <liming.gao@intel.com>

Thanks
Liming
> -----Original Message-----
> From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Ruiyu Ni
> Sent: Wednesday, November 7, 2018 12:04 PM
> To: edk2-devel@lists.01.org
> Cc: Kinney, Michael D <michael.d.kinney@intel.com>; Laszlo Ersek <lersek@redhat.com>; Gao, Liming <liming.gao@intel.com>
> Subject: [edk2] [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
> 
> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
> 
> XCODE disassembly code of InternalSyncDecrement with today's code is:
> 
>   __asm__ __volatile__ (
>     "movl    $1, %%eax  \n\t"
>     "lock               \n\t"
>     "xadd    %%eax, %1  \n\t"
>     "inc     %%eax      \n\t"
>     : "=a" (Result),          // %0
>       "+m" (*Value)           // %1
>     :                         // no inputs that aren't also outputs
>     : "memory",
>       "cc"
>     );
> 
>  0:       55      pushl   %ebp
>  1:       89 e5   movl    %esp, %ebp
>  3:       8b 45 08        movl    8(%ebp), %eax
>  6:       b8 01 00 00 00  movl    $1, %eax
>  b:       f0      lock
>  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
>  f:       40      incl    %eax
> 10:       5d      popl    %ebp
> 11:       c3      retl
> 
> %EAX value retrieved in line #3 is overwritten in line #6.
> 
> The patch uses the clobber list to tell GCC that EAX is used in ASM.
> 
> Contributed-under: TianoCore Contribution Agreement 1.1
> Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
> Cc: Liming Gao <liming.gao@intel.com>
> Cc: Michael Kinney <michael.d.kinney@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
> ---
>  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> index af39bdeb51..0a985529fd 100644
> --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> @@ -40,11 +40,13 @@ InternalSyncIncrement (
>      "lock               \n\t"
>      "xadd    %%eax, %1  \n\t"
>      "inc     %%eax      \n\t"
> -    : "=a" (Result),          // %0
> +    "mov     %%eax, %0  \n\t"
> +    : "=r" (Result),          // %0
>        "+m" (*Value)           // %1
>      :                         // no inputs that aren't also outputs
>      : "memory",
> -      "cc"
> +      "cc",
> +      "eax"
>      );
> 
>    return Result;
> @@ -76,11 +78,13 @@ InternalSyncDecrement (
>      "lock                \n\t"
>      "xadd    %%eax, %1   \n\t"
>      "dec     %%eax       \n\t"
> -    : "=a" (Result),           // %0
> +    "mov     %%eax, %0  \n\t"
> +    : "=r" (Result),           // %0
>        "+m" (*Value)            // %1
>      :                          // no inputs that aren't also outputs
>      : "memory",
> -      "cc"
> +      "cc",
> +      "eax"
>      );
> 
>    return Result;
> --
> 2.16.1.windows.1
> 
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07  4:03 [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement Ruiyu Ni
  2018-11-07  8:46 ` Gao, Liming
@ 2018-11-07 14:31 ` Laszlo Ersek
  2018-11-07 14:41   ` Laszlo Ersek
                     ` (2 more replies)
  1 sibling, 3 replies; 9+ messages in thread
From: Laszlo Ersek @ 2018-11-07 14:31 UTC (permalink / raw)
  To: Ruiyu Ni, edk2-devel
  Cc: Liming Gao, Michael Kinney, Philippe Mathieu-Daudé,
	Andrew Fish

(+Andrew)

Hi Ray,

On 11/07/18 05:03, Ruiyu Ni wrote:
> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
> 
> XCODE disassembly code of InternalSyncDecrement with today's code is:
> 
>   __asm__ __volatile__ (
>     "movl    $1, %%eax  \n\t"
>     "lock               \n\t"
>     "xadd    %%eax, %1  \n\t"
>     "inc     %%eax      \n\t"
>     : "=a" (Result),          // %0
>       "+m" (*Value)           // %1
>     :                         // no inputs that aren't also outputs
>     : "memory",
>       "cc"
>     );
> 
>  0:       55      pushl   %ebp
>  1:       89 e5   movl    %esp, %ebp
>  3:       8b 45 08        movl    8(%ebp), %eax
>  6:       b8 01 00 00 00  movl    $1, %eax
>  b:       f0      lock
>  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
>  f:       40      incl    %eax
> 10:       5d      popl    %ebp
> 11:       c3      retl
> 
> %EAX value retrieved in line #3 is overwritten in line #6.

(a) This looks like an XCODE bug to me. The "=a" constraint on operand
%0 means that Result should be set from eax/rax, and that this operand
is "write only". Here's the gcc documentation:

"The ordinary output operands must be write-only; GCC assumes that the
values in these operands before the instruction are dead and need not be
generated."

Furthermore, if a register is named in any operand constraint (such as
input, output, input-output), then it cannot, by definition, be listed
in the "clobber list" -- it is *obvious* that the inline assembly will
work with that register. In case of an output-only operand, it's obvious
that the inline assembly will *overwrite* that register, and the
compiler cannot assume *when* that overwrite will happen. Here's the gcc
documentation:

"You may not write a clobber description in a way that overlaps with an
input or output operand.  For example, you may not have an operand
describing a register class with one member if you mention that register
in the clobber list.  Variables declared to live in specific registers
[...] and used as asm input or output operands must have no part
mentioned in the clobber description." [1]

However, XCODE generates such code that depends on EAX as an *input*
operand. That's clearly wrong and violates the constraints in the
operand list. This is an XCODE bug.

In fact: the situation is worse than that. In the commit message you
spell out that the MOV instruction at offset 6 overwrites the value in
EAX that was just loaded at offset 3. But this is just the small
problem; the *large* problem is the generated XADD instruction itself,
at offset 0xb and 0xc. The binary encoding is, from your commit message:

  f0 0f c1 00

and this is *exactly* the problem that my commit 8a94eb9283fa fixed for
gcc! From my commit message:

    >     439c:       f0 0f c1 00             lock xadd %eax,(%rax)

Because, it makes *no sense* for XADD to use the AX register for *both*
pointer-to-memory (i.e. address of the destination location that
receives the sum) *and* as the other addend!

In other words, regardless of how we fill the AX register up-front, an
XADD instruction generated like this *cannot* be right.

> The patch uses the clobber list to tell GCC that EAX is used in ASM.
> 
> Contributed-under: TianoCore Contribution Agreement 1.1
> Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
> Cc: Liming Gao <liming.gao@intel.com>
> Cc: Michael Kinney <michael.d.kinney@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
> ---
>  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> index af39bdeb51..0a985529fd 100644
> --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> @@ -40,11 +40,13 @@ InternalSyncIncrement (
>      "lock               \n\t"
>      "xadd    %%eax, %1  \n\t"
>      "inc     %%eax      \n\t"
> -    : "=a" (Result),          // %0
> +    "mov     %%eax, %0  \n\t"
> +    : "=r" (Result),          // %0
>        "+m" (*Value)           // %1
>      :                         // no inputs that aren't also outputs
>      : "memory",
> -      "cc"
> +      "cc",
> +      "eax"
>      );
>  
>    return Result;

(b) This change is invalid, for two separate reasons.

Reason #1: on the clobber list, the AX register should be listed as "a",
not as "eax".

Reason #2:

- For operand %0, we say "use any register in the 'r' class as
write-only". (The class "r" means "general register".) And, at the end
of the inline assembly, we store EAX manually to that 'r' class
register. And we rely on the compiler to store that other general
register into Result.

- We append the AX register (which should be spelled as "a") to the
clobber list. However, the "a" register is itself in the "general
register" class, and therefore this clobber list breaks the passage that
I quoted above, marked as [1]!


> @@ -76,11 +78,13 @@ InternalSyncDecrement (
>      "lock                \n\t"
>      "xadd    %%eax, %1   \n\t"
>      "dec     %%eax       \n\t"
> -    : "=a" (Result),           // %0
> +    "mov     %%eax, %0  \n\t"
> +    : "=r" (Result),           // %0
>        "+m" (*Value)            // %1
>      :                          // no inputs that aren't also outputs
>      : "memory",
> -      "cc"
> +      "cc",
> +      "eax"
>      );
>  
>    return Result;
> 

(c) The patch doesn't update the X64 variant.

I think we should be clear here that we are working around an XCODE bug.
Commit 8a94eb9283fa was different, because the code before that violated
the gcc documentation, and so the issue was in the code.


I'll comment more on your second email.

Thanks,
Laszlo


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07 14:31 ` Laszlo Ersek
@ 2018-11-07 14:41   ` Laszlo Ersek
  2018-11-07 14:57     ` Ni, Ruiyu
  2018-11-07 14:45   ` Ard Biesheuvel
  2018-11-07 14:53   ` Ni, Ruiyu
  2 siblings, 1 reply; 9+ messages in thread
From: Laszlo Ersek @ 2018-11-07 14:41 UTC (permalink / raw)
  To: Ruiyu Ni, edk2-devel
  Cc: Liming Gao, Michael Kinney, Philippe Mathieu-Daudé,
	Andrew Fish

On 11/07/18 15:31, Laszlo Ersek wrote:
> (+Andrew)
> 
> Hi Ray,
> 
> On 11/07/18 05:03, Ruiyu Ni wrote:
>> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
>>
>> XCODE disassembly code of InternalSyncDecrement with today's code is:
>>
>>   __asm__ __volatile__ (
>>     "movl    $1, %%eax  \n\t"
>>     "lock               \n\t"
>>     "xadd    %%eax, %1  \n\t"
>>     "inc     %%eax      \n\t"
>>     : "=a" (Result),          // %0
>>       "+m" (*Value)           // %1
>>     :                         // no inputs that aren't also outputs
>>     : "memory",
>>       "cc"
>>     );
>>
>>  0:       55      pushl   %ebp
>>  1:       89 e5   movl    %esp, %ebp
>>  3:       8b 45 08        movl    8(%ebp), %eax
>>  6:       b8 01 00 00 00  movl    $1, %eax
>>  b:       f0      lock
>>  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
>>  f:       40      incl    %eax
>> 10:       5d      popl    %ebp
>> 11:       c3      retl
>>
>> %EAX value retrieved in line #3 is overwritten in line #6.
> 
> (a) This looks like an XCODE bug to me. The "=a" constraint on operand
> %0 means that Result should be set from eax/rax, and that this operand
> is "write only". Here's the gcc documentation:
> 
> "The ordinary output operands must be write-only; GCC assumes that the
> values in these operands before the instruction are dead and need not be
> generated."
> 
> Furthermore, if a register is named in any operand constraint (such as
> input, output, input-output), then it cannot, by definition, be listed
> in the "clobber list" -- it is *obvious* that the inline assembly will
> work with that register. In case of an output-only operand, it's obvious
> that the inline assembly will *overwrite* that register, and the
> compiler cannot assume *when* that overwrite will happen. Here's the gcc
> documentation:
> 
> "You may not write a clobber description in a way that overlaps with an
> input or output operand.  For example, you may not have an operand
> describing a register class with one member if you mention that register
> in the clobber list.  Variables declared to live in specific registers
> [...] and used as asm input or output operands must have no part
> mentioned in the clobber description." [1]
> 
> However, XCODE generates such code that depends on EAX as an *input*
> operand. That's clearly wrong and violates the constraints in the
> operand list. This is an XCODE bug.
> 
> In fact: the situation is worse than that. In the commit message you
> spell out that the MOV instruction at offset 6 overwrites the value in
> EAX that was just loaded at offset 3. But this is just the small
> problem; the *large* problem is the generated XADD instruction itself,
> at offset 0xb and 0xc. The binary encoding is, from your commit message:
> 
>   f0 0f c1 00
> 
> and this is *exactly* the problem that my commit 8a94eb9283fa fixed for
> gcc! From my commit message:
> 
>     >     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
> 
> Because, it makes *no sense* for XADD to use the AX register for *both*
> pointer-to-memory (i.e. address of the destination location that
> receives the sum) *and* as the other addend!
> 
> In other words, regardless of how we fill the AX register up-front, an
> XADD instruction generated like this *cannot* be right.
> 
>> The patch uses the clobber list to tell GCC that EAX is used in ASM.
>>
>> Contributed-under: TianoCore Contribution Agreement 1.1
>> Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
>> Cc: Liming Gao <liming.gao@intel.com>
>> Cc: Michael Kinney <michael.d.kinney@intel.com>
>> Cc: Laszlo Ersek <lersek@redhat.com>
>> Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
>> ---
>>  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12 ++++++++----
>>  1 file changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>> index af39bdeb51..0a985529fd 100644
>> --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>> +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>> @@ -40,11 +40,13 @@ InternalSyncIncrement (
>>      "lock               \n\t"
>>      "xadd    %%eax, %1  \n\t"
>>      "inc     %%eax      \n\t"
>> -    : "=a" (Result),          // %0
>> +    "mov     %%eax, %0  \n\t"
>> +    : "=r" (Result),          // %0
>>        "+m" (*Value)           // %1
>>      :                         // no inputs that aren't also outputs
>>      : "memory",
>> -      "cc"
>> +      "cc",
>> +      "eax"
>>      );
>>  
>>    return Result;
> 
> (b) This change is invalid, for two separate reasons.
> 
> Reason #1: on the clobber list, the AX register should be listed as "a",
> not as "eax".
> 
> Reason #2:
> 
> - For operand %0, we say "use any register in the 'r' class as
> write-only". (The class "r" means "general register".) And, at the end
> of the inline assembly, we store EAX manually to that 'r' class
> register. And we rely on the compiler to store that other general
> register into Result.
> 
> - We append the AX register (which should be spelled as "a") to the
> clobber list. However, the "a" register is itself in the "general
> register" class, and therefore this clobber list breaks the passage that
> I quoted above, marked as [1]!
> 
> 
>> @@ -76,11 +78,13 @@ InternalSyncDecrement (
>>      "lock                \n\t"
>>      "xadd    %%eax, %1   \n\t"
>>      "dec     %%eax       \n\t"
>> -    : "=a" (Result),           // %0
>> +    "mov     %%eax, %0  \n\t"
>> +    : "=r" (Result),           // %0
>>        "+m" (*Value)            // %1
>>      :                          // no inputs that aren't also outputs
>>      : "memory",
>> -      "cc"
>> +      "cc",
>> +      "eax"
>>      );
>>  
>>    return Result;
>>
> 
> (c) The patch doesn't update the X64 variant.
> 
> I think we should be clear here that we are working around an XCODE bug.
> Commit 8a94eb9283fa was different, because the code before that violated
> the gcc documentation, and so the issue was in the code.
> 
> 
> I'll comment more on your second email.

Actually I'd like to continue here.

In my view:

- Commit 17634d026f96 ("MdePkg/SynchronizationLib: fix
Interlocked[De|In]crement return value", 2018-09-25) was the right idea,
because it improved internal edk2 API conformance (regarding the return
values). However, the assembly template was not correct, according to
the GCC documentation.

- Commit 8a94eb9283fa ("MdePkg/BaseSynchronizationLib: fix XADD operands
in GCC IA32/X64 assembly", 2018-09-26) fixed the assembly templates, so
that they would conform to the GCC documentation.

- Now the current issue is that XCODE does not implement the GCC
documentation correctly, from the compiler / assembler side. So the bug
is not in the edk2 code, but the toolchain.


My first preference would be if we could split the code between XCODE
and genuine GCC. This is now the n'th time when XCODE does not live up
to its promised GCC compatibility. I think it's time to address that
with actual code separation in edk2, like we do for MSVC and ICC
already. Whatever we do for XCODE is basically "shotgun debugging" at
this point, and I wouldn't like that to affect source code that genuine
GCC compiles.

My second preference would be to roll back both commits 8a94eb9283fa and
17634d026f96.

Disclaimer: obviously, if someone can refute my statements about the
correctness of the current code, that's 100% welcome and appreciated.
The above is how I understand the status, but if I'm wrong, I *should*
be corrected.

Thanks,
Laszlo


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07 14:31 ` Laszlo Ersek
  2018-11-07 14:41   ` Laszlo Ersek
@ 2018-11-07 14:45   ` Ard Biesheuvel
  2018-11-07 15:27     ` Ni, Ruiyu
  2018-11-07 14:53   ` Ni, Ruiyu
  2 siblings, 1 reply; 9+ messages in thread
From: Ard Biesheuvel @ 2018-11-07 14:45 UTC (permalink / raw)
  To: Laszlo Ersek
  Cc: Ruiyu Ni, edk2-devel@lists.01.org, Michael Kinney, Liming Gao

On 7 November 2018 at 15:31, Laszlo Ersek <lersek@redhat.com> wrote:
> (+Andrew)
>
> Hi Ray,
>
> On 11/07/18 05:03, Ruiyu Ni wrote:
>> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
>>
>> XCODE disassembly code of InternalSyncDecrement with today's code is:
>>
>>   __asm__ __volatile__ (
>>     "movl    $1, %%eax  \n\t"
>>     "lock               \n\t"
>>     "xadd    %%eax, %1  \n\t"
>>     "inc     %%eax      \n\t"
>>     : "=a" (Result),          // %0
>>       "+m" (*Value)           // %1
>>     :                         // no inputs that aren't also outputs
>>     : "memory",
>>       "cc"
>>     );
>>
>>  0:       55      pushl   %ebp
>>  1:       89 e5   movl    %esp, %ebp
>>  3:       8b 45 08        movl    8(%ebp), %eax
>>  6:       b8 01 00 00 00  movl    $1, %eax
>>  b:       f0      lock
>>  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
>>  f:       40      incl    %eax
>> 10:       5d      popl    %ebp
>> 11:       c3      retl
>>
>> %EAX value retrieved in line #3 is overwritten in line #6.
>
> (a) This looks like an XCODE bug to me. The "=a" constraint on operand
> %0 means that Result should be set from eax/rax, and that this operand
> is "write only". Here's the gcc documentation:
>
> "The ordinary output operands must be write-only; GCC assumes that the
> values in these operands before the instruction are dead and need not be
> generated."
>

Do we need to use early clobber here? "=&a"?

It seems to me that without that, the compiler is free to use eax as
input, since it only assumes that eax will be set by the last
instruction.


> Furthermore, if a register is named in any operand constraint (such as
> input, output, input-output), then it cannot, by definition, be listed
> in the "clobber list" -- it is *obvious* that the inline assembly will
> work with that register. In case of an output-only operand, it's obvious
> that the inline assembly will *overwrite* that register, and the
> compiler cannot assume *when* that overwrite will happen. Here's the gcc
> documentation:
>
> "You may not write a clobber description in a way that overlaps with an
> input or output operand.  For example, you may not have an operand
> describing a register class with one member if you mention that register
> in the clobber list.  Variables declared to live in specific registers
> [...] and used as asm input or output operands must have no part
> mentioned in the clobber description." [1]
>
> However, XCODE generates such code that depends on EAX as an *input*
> operand. That's clearly wrong and violates the constraints in the
> operand list. This is an XCODE bug.
>
> In fact: the situation is worse than that. In the commit message you
> spell out that the MOV instruction at offset 6 overwrites the value in
> EAX that was just loaded at offset 3. But this is just the small
> problem; the *large* problem is the generated XADD instruction itself,
> at offset 0xb and 0xc. The binary encoding is, from your commit message:
>
>   f0 0f c1 00
>
> and this is *exactly* the problem that my commit 8a94eb9283fa fixed for
> gcc! From my commit message:
>
>     >     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
>
> Because, it makes *no sense* for XADD to use the AX register for *both*
> pointer-to-memory (i.e. address of the destination location that
> receives the sum) *and* as the other addend!
>
> In other words, regardless of how we fill the AX register up-front, an
> XADD instruction generated like this *cannot* be right.
>
>> The patch uses the clobber list to tell GCC that EAX is used in ASM.
>>
>> Contributed-under: TianoCore Contribution Agreement 1.1
>> Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
>> Cc: Liming Gao <liming.gao@intel.com>
>> Cc: Michael Kinney <michael.d.kinney@intel.com>
>> Cc: Laszlo Ersek <lersek@redhat.com>
>> Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
>> ---
>>  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12 ++++++++----
>>  1 file changed, 8 insertions(+), 4 deletions(-)
>>
>> diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>> index af39bdeb51..0a985529fd 100644
>> --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>> +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>> @@ -40,11 +40,13 @@ InternalSyncIncrement (
>>      "lock               \n\t"
>>      "xadd    %%eax, %1  \n\t"
>>      "inc     %%eax      \n\t"
>> -    : "=a" (Result),          // %0
>> +    "mov     %%eax, %0  \n\t"
>> +    : "=r" (Result),          // %0
>>        "+m" (*Value)           // %1
>>      :                         // no inputs that aren't also outputs
>>      : "memory",
>> -      "cc"
>> +      "cc",
>> +      "eax"
>>      );
>>
>>    return Result;
>
> (b) This change is invalid, for two separate reasons.
>
> Reason #1: on the clobber list, the AX register should be listed as "a",
> not as "eax".
>
> Reason #2:
>
> - For operand %0, we say "use any register in the 'r' class as
> write-only". (The class "r" means "general register".) And, at the end
> of the inline assembly, we store EAX manually to that 'r' class
> register. And we rely on the compiler to store that other general
> register into Result.
>
> - We append the AX register (which should be spelled as "a") to the
> clobber list. However, the "a" register is itself in the "general
> register" class, and therefore this clobber list breaks the passage that
> I quoted above, marked as [1]!
>
>
>> @@ -76,11 +78,13 @@ InternalSyncDecrement (
>>      "lock                \n\t"
>>      "xadd    %%eax, %1   \n\t"
>>      "dec     %%eax       \n\t"
>> -    : "=a" (Result),           // %0
>> +    "mov     %%eax, %0  \n\t"
>> +    : "=r" (Result),           // %0
>>        "+m" (*Value)            // %1
>>      :                          // no inputs that aren't also outputs
>>      : "memory",
>> -      "cc"
>> +      "cc",
>> +      "eax"
>>      );
>>
>>    return Result;
>>
>
> (c) The patch doesn't update the X64 variant.
>
> I think we should be clear here that we are working around an XCODE bug.
> Commit 8a94eb9283fa was different, because the code before that violated
> the gcc documentation, and so the issue was in the code.
>
>
> I'll comment more on your second email.
>
> Thanks,
> Laszlo
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07 14:31 ` Laszlo Ersek
  2018-11-07 14:41   ` Laszlo Ersek
  2018-11-07 14:45   ` Ard Biesheuvel
@ 2018-11-07 14:53   ` Ni, Ruiyu
  2 siblings, 0 replies; 9+ messages in thread
From: Ni, Ruiyu @ 2018-11-07 14:53 UTC (permalink / raw)
  To: Laszlo Ersek, edk2-devel@lists.01.org
  Cc: Gao, Liming, Kinney, Michael D, Philippe Mathieu-Daudé,
	Andrew Fish



> -----Original Message-----
> From: Laszlo Ersek [mailto:lersek@redhat.com]
> Sent: Wednesday, November 7, 2018 10:32 PM
> To: Ni, Ruiyu <ruiyu.ni@intel.com>; edk2-devel@lists.01.org
> Cc: Gao, Liming <liming.gao@intel.com>; Kinney, Michael D
> <michael.d.kinney@intel.com>; Philippe Mathieu-Daudé <philmd@redhat.com>;
> Andrew Fish <afish@apple.com>
> Subject: Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix
> InternalSync[De|In]crement
> 
> (+Andrew)
> 
> Hi Ray,
> 
> On 11/07/18 05:03, Ruiyu Ni wrote:
> > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
> >
> > XCODE disassembly code of InternalSyncDecrement with today's code is:
> >
> >   __asm__ __volatile__ (
> >     "movl    $1, %%eax  \n\t"
> >     "lock               \n\t"
> >     "xadd    %%eax, %1  \n\t"
> >     "inc     %%eax      \n\t"
> >     : "=a" (Result),          // %0
> >       "+m" (*Value)           // %1
> >     :                         // no inputs that aren't also outputs
> >     : "memory",
> >       "cc"
> >     );
> >
> >  0:       55      pushl   %ebp
> >  1:       89 e5   movl    %esp, %ebp
> >  3:       8b 45 08        movl    8(%ebp), %eax
> >  6:       b8 01 00 00 00  movl    $1, %eax
> >  b:       f0      lock
> >  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
> >  f:       40      incl    %eax
> > 10:       5d      popl    %ebp
> > 11:       c3      retl
> >
> > %EAX value retrieved in line #3 is overwritten in line #6.
> 
> (a) This looks like an XCODE bug to me. The "=a" constraint on operand
> %0 means that Result should be set from eax/rax, and that this operand is "write
> only". Here's the gcc documentation:
> 
> "The ordinary output operands must be write-only; GCC assumes that the values
> in these operands before the instruction are dead and need not be generated."

Actually I am a bit confused about "=a" here.
"=a"(Result) tells GCC that Result should be set from eax. But in the final generated
assembly code I cannot see the instruction "mov %eax, Result".

> 
> Furthermore, if a register is named in any operand constraint (such as input,
> output, input-output), then it cannot, by definition, be listed in the "clobber list"
> -- it is *obvious* that the inline assembly will work with that register. In case of
> an output-only operand, it's obvious that the inline assembly will *overwrite*
> that register, and the compiler cannot assume *when* that overwrite will
> happen. Here's the gcc
> documentation:
> 
> "You may not write a clobber description in a way that overlaps with an input or
> output operand.  For example, you may not have an operand describing a
> register class with one member if you mention that register in the clobber list.
> Variables declared to live in specific registers [...] and used as asm input or
> output operands must have no part mentioned in the clobber description." [1]
> 
> However, XCODE generates such code that depends on EAX as an *input*
> operand. That's clearly wrong and violates the constraints in the operand list.
> This is an XCODE bug.

Agree. But I thought I misunderstood the GCC documents.

> 
> In fact: the situation is worse than that. In the commit message you spell out
> that the MOV instruction at offset 6 overwrites the value in EAX that was just
> loaded at offset 3. But this is just the small problem; the *large* problem is the
> generated XADD instruction itself, at offset 0xb and 0xc. The binary encoding is,
> from your commit message:
> 
>   f0 0f c1 00
> 
> and this is *exactly* the problem that my commit 8a94eb9283fa fixed for gcc!
> From my commit message:
> 
>     >     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
> 
> Because, it makes *no sense* for XADD to use the AX register for *both*
> pointer-to-memory (i.e. address of the destination location that receives the
> sum) *and* as the other addend!

Agree. I just realized that. But with my patch the issue was gone.

I saw your second mail. Let me check that.

> 
> In other words, regardless of how we fill the AX register up-front, an XADD
> instruction generated like this *cannot* be right.
> 
> > The patch uses the clobber list to tell GCC that EAX is used in ASM.
> >
> > Contributed-under: TianoCore Contribution Agreement 1.1
> > Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
> > Cc: Liming Gao <liming.gao@intel.com>
> > Cc: Michael Kinney <michael.d.kinney@intel.com>
> > Cc: Laszlo Ersek <lersek@redhat.com>
> > Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
> > ---
> >  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12
> > ++++++++----
> >  1 file changed, 8 insertions(+), 4 deletions(-)
> >
> > diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> > b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> > index af39bdeb51..0a985529fd 100644
> > --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> > +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> > @@ -40,11 +40,13 @@ InternalSyncIncrement (
> >      "lock               \n\t"
> >      "xadd    %%eax, %1  \n\t"
> >      "inc     %%eax      \n\t"
> > -    : "=a" (Result),          // %0
> > +    "mov     %%eax, %0  \n\t"
> > +    : "=r" (Result),          // %0
> >        "+m" (*Value)           // %1
> >      :                         // no inputs that aren't also outputs
> >      : "memory",
> > -      "cc"
> > +      "cc",
> > +      "eax"
> >      );
> >
> >    return Result;
> 
> (b) This change is invalid, for two separate reasons.
> 
> Reason #1: on the clobber list, the AX register should be listed as "a", not as
> "eax".
> 
> Reason #2:
> 
> - For operand %0, we say "use any register in the 'r' class as write-only". (The
> class "r" means "general register".) And, at the end of the inline assembly, we
> store EAX manually to that 'r' class register. And we rely on the compiler to store
> that other general register into Result.
> 
> - We append the AX register (which should be spelled as "a") to the clobber list.
> However, the "a" register is itself in the "general register" class, and therefore
> this clobber list breaks the passage that I quoted above, marked as [1]!
> 
> 
> > @@ -76,11 +78,13 @@ InternalSyncDecrement (
> >      "lock                \n\t"
> >      "xadd    %%eax, %1   \n\t"
> >      "dec     %%eax       \n\t"
> > -    : "=a" (Result),           // %0
> > +    "mov     %%eax, %0  \n\t"
> > +    : "=r" (Result),           // %0
> >        "+m" (*Value)            // %1
> >      :                          // no inputs that aren't also outputs
> >      : "memory",
> > -      "cc"
> > +      "cc",
> > +      "eax"
> >      );
> >
> >    return Result;
> >
> 
> (c) The patch doesn't update the X64 variant.
> 
> I think we should be clear here that we are working around an XCODE bug.
> Commit 8a94eb9283fa was different, because the code before that violated the
> gcc documentation, and so the issue was in the code.
> 
> 
> I'll comment more on your second email.
> 
> Thanks,
> Laszlo

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07 14:41   ` Laszlo Ersek
@ 2018-11-07 14:57     ` Ni, Ruiyu
  0 siblings, 0 replies; 9+ messages in thread
From: Ni, Ruiyu @ 2018-11-07 14:57 UTC (permalink / raw)
  To: Laszlo Ersek, edk2-devel@lists.01.org
  Cc: Kinney, Michael D, Gao, Liming, Yao, Jiewen



> -----Original Message-----
> From: edk2-devel [mailto:edk2-devel-bounces@lists.01.org] On Behalf Of Laszlo
> Ersek
> Sent: Wednesday, November 7, 2018 10:42 PM
> To: Ni, Ruiyu <ruiyu.ni@intel.com>; edk2-devel@lists.01.org
> Cc: Kinney, Michael D <michael.d.kinney@intel.com>; Gao, Liming
> <liming.gao@intel.com>
> Subject: Re: [edk2] [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix
> InternalSync[De|In]crement
> 
> On 11/07/18 15:31, Laszlo Ersek wrote:
> > (+Andrew)
> >
> > Hi Ray,
> >
> > On 11/07/18 05:03, Ruiyu Ni wrote:
> >> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
> >>
> >> XCODE disassembly code of InternalSyncDecrement with today's code is:
> >>
> >>   __asm__ __volatile__ (
> >>     "movl    $1, %%eax  \n\t"
> >>     "lock               \n\t"
> >>     "xadd    %%eax, %1  \n\t"
> >>     "inc     %%eax      \n\t"
> >>     : "=a" (Result),          // %0
> >>       "+m" (*Value)           // %1
> >>     :                         // no inputs that aren't also outputs
> >>     : "memory",
> >>       "cc"
> >>     );
> >>
> >>  0:       55      pushl   %ebp
> >>  1:       89 e5   movl    %esp, %ebp
> >>  3:       8b 45 08        movl    8(%ebp), %eax
> >>  6:       b8 01 00 00 00  movl    $1, %eax
> >>  b:       f0      lock
> >>  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
> >>  f:       40      incl    %eax
> >> 10:       5d      popl    %ebp
> >> 11:       c3      retl
> >>
> >> %EAX value retrieved in line #3 is overwritten in line #6.
> >
> > (a) This looks like an XCODE bug to me. The "=a" constraint on operand
> > %0 means that Result should be set from eax/rax, and that this operand
> > is "write only". Here's the gcc documentation:
> >
> > "The ordinary output operands must be write-only; GCC assumes that the
> > values in these operands before the instruction are dead and need not
> > be generated."
> >
> > Furthermore, if a register is named in any operand constraint (such as
> > input, output, input-output), then it cannot, by definition, be listed
> > in the "clobber list" -- it is *obvious* that the inline assembly will
> > work with that register. In case of an output-only operand, it's
> > obvious that the inline assembly will *overwrite* that register, and
> > the compiler cannot assume *when* that overwrite will happen. Here's
> > the gcc
> > documentation:
> >
> > "You may not write a clobber description in a way that overlaps with
> > an input or output operand.  For example, you may not have an operand
> > describing a register class with one member if you mention that
> > register in the clobber list.  Variables declared to live in specific
> > registers [...] and used as asm input or output operands must have no
> > part mentioned in the clobber description." [1]
> >
> > However, XCODE generates such code that depends on EAX as an *input*
> > operand. That's clearly wrong and violates the constraints in the
> > operand list. This is an XCODE bug.
> >
> > In fact: the situation is worse than that. In the commit message you
> > spell out that the MOV instruction at offset 6 overwrites the value in
> > EAX that was just loaded at offset 3. But this is just the small
> > problem; the *large* problem is the generated XADD instruction itself,
> > at offset 0xb and 0xc. The binary encoding is, from your commit message:
> >
> >   f0 0f c1 00
> >
> > and this is *exactly* the problem that my commit 8a94eb9283fa fixed
> > for gcc! From my commit message:
> >
> >     >     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
> >
> > Because, it makes *no sense* for XADD to use the AX register for
> > *both* pointer-to-memory (i.e. address of the destination location
> > that receives the sum) *and* as the other addend!
> >
> > In other words, regardless of how we fill the AX register up-front, an
> > XADD instruction generated like this *cannot* be right.
> >
> >> The patch uses the clobber list to tell GCC that EAX is used in ASM.
> >>
> >> Contributed-under: TianoCore Contribution Agreement 1.1
> >> Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
> >> Cc: Liming Gao <liming.gao@intel.com>
> >> Cc: Michael Kinney <michael.d.kinney@intel.com>
> >> Cc: Laszlo Ersek <lersek@redhat.com>
> >> Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
> >> ---
> >>  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12
> >> ++++++++----
> >>  1 file changed, 8 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> index af39bdeb51..0a985529fd 100644
> >> --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> @@ -40,11 +40,13 @@ InternalSyncIncrement (
> >>      "lock               \n\t"
> >>      "xadd    %%eax, %1  \n\t"
> >>      "inc     %%eax      \n\t"
> >> -    : "=a" (Result),          // %0
> >> +    "mov     %%eax, %0  \n\t"
> >> +    : "=r" (Result),          // %0
> >>        "+m" (*Value)           // %1
> >>      :                         // no inputs that aren't also outputs
> >>      : "memory",
> >> -      "cc"
> >> +      "cc",
> >> +      "eax"
> >>      );
> >>
> >>    return Result;
> >
> > (b) This change is invalid, for two separate reasons.
> >
> > Reason #1: on the clobber list, the AX register should be listed as
> > "a", not as "eax".
> >
> > Reason #2:
> >
> > - For operand %0, we say "use any register in the 'r' class as
> > write-only". (The class "r" means "general register".) And, at the end
> > of the inline assembly, we store EAX manually to that 'r' class
> > register. And we rely on the compiler to store that other general
> > register into Result.
> >
> > - We append the AX register (which should be spelled as "a") to the
> > clobber list. However, the "a" register is itself in the "general
> > register" class, and therefore this clobber list breaks the passage
> > that I quoted above, marked as [1]!
> >
> >
> >> @@ -76,11 +78,13 @@ InternalSyncDecrement (
> >>      "lock                \n\t"
> >>      "xadd    %%eax, %1   \n\t"
> >>      "dec     %%eax       \n\t"
> >> -    : "=a" (Result),           // %0
> >> +    "mov     %%eax, %0  \n\t"
> >> +    : "=r" (Result),           // %0
> >>        "+m" (*Value)            // %1
> >>      :                          // no inputs that aren't also outputs
> >>      : "memory",
> >> -      "cc"
> >> +      "cc",
> >> +      "eax"
> >>      );
> >>
> >>    return Result;
> >>
> >
> > (c) The patch doesn't update the X64 variant.
> >
> > I think we should be clear here that we are working around an XCODE bug.
> > Commit 8a94eb9283fa was different, because the code before that
> > violated the gcc documentation, and so the issue was in the code.
> >
> >
> > I'll comment more on your second email.
> 
> Actually I'd like to continue here.
> 
> In my view:
> 
> - Commit 17634d026f96 ("MdePkg/SynchronizationLib: fix
> Interlocked[De|In]crement return value", 2018-09-25) was the right idea,
> because it improved internal edk2 API conformance (regarding the return values).
> However, the assembly template was not correct, according to the GCC
> documentation.
> 
> - Commit 8a94eb9283fa ("MdePkg/BaseSynchronizationLib: fix XADD operands
> in GCC IA32/X64 assembly", 2018-09-26) fixed the assembly templates, so that
> they would conform to the GCC documentation.
> 
> - Now the current issue is that XCODE does not implement the GCC
> documentation correctly, from the compiler / assembler side. So the bug is not
> in the edk2 code, but the toolchain.
> 
> 
> My first preference would be if we could split the code between XCODE and
> genuine GCC. This is now the n'th time when XCODE does not live up to its
> promised GCC compatibility. I think it's time to address that with actual code
> separation in edk2, like we do for MSVC and ICC already. Whatever we do for
> XCODE is basically "shotgun debugging" at this point, and I wouldn't like that to
> affect source code that genuine GCC compiles.
> 
> My second preference would be to roll back both commits 8a94eb9283fa and
> 17634d026f96.

I agree. But need Jiewen's comments on rolling back the 2 commits.
Because he raised the InterlockedIncrement() issue internally to me.

> 
> Disclaimer: obviously, if someone can refute my statements about the
> correctness of the current code, that's 100% welcome and appreciated.
> The above is how I understand the status, but if I'm wrong, I *should* be
> corrected.
> 
> Thanks,
> Laszlo
> _______________________________________________
> edk2-devel mailing list
> edk2-devel@lists.01.org
> https://lists.01.org/mailman/listinfo/edk2-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07 14:45   ` Ard Biesheuvel
@ 2018-11-07 15:27     ` Ni, Ruiyu
  2018-11-07 19:02       ` Laszlo Ersek
  0 siblings, 1 reply; 9+ messages in thread
From: Ni, Ruiyu @ 2018-11-07 15:27 UTC (permalink / raw)
  To: Ard Biesheuvel, Laszlo Ersek
  Cc: edk2-devel@lists.01.org, Kinney, Michael D, Gao, Liming



> -----Original Message-----
> From: Ard Biesheuvel [mailto:ard.biesheuvel@linaro.org]
> Sent: Wednesday, November 7, 2018 10:46 PM
> To: Laszlo Ersek <lersek@redhat.com>
> Cc: Ni, Ruiyu <ruiyu.ni@intel.com>; edk2-devel@lists.01.org; Kinney, Michael D
> <michael.d.kinney@intel.com>; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2] [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix
> InternalSync[De|In]crement
> 
> On 7 November 2018 at 15:31, Laszlo Ersek <lersek@redhat.com> wrote:
> > (+Andrew)
> >
> > Hi Ray,
> >
> > On 11/07/18 05:03, Ruiyu Ni wrote:
> >> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
> >>
> >> XCODE disassembly code of InternalSyncDecrement with today's code is:
> >>
> >>   __asm__ __volatile__ (
> >>     "movl    $1, %%eax  \n\t"
> >>     "lock               \n\t"
> >>     "xadd    %%eax, %1  \n\t"
> >>     "inc     %%eax      \n\t"
> >>     : "=a" (Result),          // %0
> >>       "+m" (*Value)           // %1
> >>     :                         // no inputs that aren't also outputs
> >>     : "memory",
> >>       "cc"
> >>     );
> >>
> >>  0:       55      pushl   %ebp
> >>  1:       89 e5   movl    %esp, %ebp
> >>  3:       8b 45 08        movl    8(%ebp), %eax
> >>  6:       b8 01 00 00 00  movl    $1, %eax
> >>  b:       f0      lock
> >>  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
> >>  f:       40      incl    %eax
> >> 10:       5d      popl    %ebp
> >> 11:       c3      retl
> >>
> >> %EAX value retrieved in line #3 is overwritten in line #6.
> >
> > (a) This looks like an XCODE bug to me. The "=a" constraint on operand
> > %0 means that Result should be set from eax/rax, and that this operand
> > is "write only". Here's the gcc documentation:
> >
> > "The ordinary output operands must be write-only; GCC assumes that the
> > values in these operands before the instruction are dead and need not
> > be generated."
> >
> 
> Do we need to use early clobber here? "=&a"?
> 
> It seems to me that without that, the compiler is free to use eax as input, since it
> only assumes that eax will be set by the last instruction.

Ard,
Thanks for pointing that.
I found below wordings from GCC documents:
"Use the ‘&’ constraint modifier (see Modifiers) on all output operands that must not
 overlap an input. Otherwise, GCC may allocate the output operand in the same
 register as an unrelated input operand, on the assumption that the assembler code
 consumes its inputs before producing outputs. This assumption may be false if the
 assembler code actually consists of more than one instruction."

I think '&' is just for this case.

@Laszlo,
Do you agree that a simple patch to add '&' to both Ia32 and X64 version of
InternalSync[De|In]crement is good enough?

I actually tried in XCODE5 toolchain and it works good.
movl    8(%ebp), %eax
becomes
movl    8(%ebp), %ecx

> 
> 
> > Furthermore, if a register is named in any operand constraint (such as
> > input, output, input-output), then it cannot, by definition, be listed
> > in the "clobber list" -- it is *obvious* that the inline assembly will
> > work with that register. In case of an output-only operand, it's
> > obvious that the inline assembly will *overwrite* that register, and
> > the compiler cannot assume *when* that overwrite will happen. Here's
> > the gcc
> > documentation:
> >
> > "You may not write a clobber description in a way that overlaps with
> > an input or output operand.  For example, you may not have an operand
> > describing a register class with one member if you mention that
> > register in the clobber list.  Variables declared to live in specific
> > registers [...] and used as asm input or output operands must have no
> > part mentioned in the clobber description." [1]
> >
> > However, XCODE generates such code that depends on EAX as an *input*
> > operand. That's clearly wrong and violates the constraints in the
> > operand list. This is an XCODE bug.
> >
> > In fact: the situation is worse than that. In the commit message you
> > spell out that the MOV instruction at offset 6 overwrites the value in
> > EAX that was just loaded at offset 3. But this is just the small
> > problem; the *large* problem is the generated XADD instruction itself,
> > at offset 0xb and 0xc. The binary encoding is, from your commit message:
> >
> >   f0 0f c1 00
> >
> > and this is *exactly* the problem that my commit 8a94eb9283fa fixed
> > for gcc! From my commit message:
> >
> >     >     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
> >
> > Because, it makes *no sense* for XADD to use the AX register for
> > *both* pointer-to-memory (i.e. address of the destination location
> > that receives the sum) *and* as the other addend!
> >
> > In other words, regardless of how we fill the AX register up-front, an
> > XADD instruction generated like this *cannot* be right.
> >
> >> The patch uses the clobber list to tell GCC that EAX is used in ASM.
> >>
> >> Contributed-under: TianoCore Contribution Agreement 1.1
> >> Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
> >> Cc: Liming Gao <liming.gao@intel.com>
> >> Cc: Michael Kinney <michael.d.kinney@intel.com>
> >> Cc: Laszlo Ersek <lersek@redhat.com>
> >> Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
> >> ---
> >>  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12
> >> ++++++++----
> >>  1 file changed, 8 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> index af39bdeb51..0a985529fd 100644
> >> --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
> >> @@ -40,11 +40,13 @@ InternalSyncIncrement (
> >>      "lock               \n\t"
> >>      "xadd    %%eax, %1  \n\t"
> >>      "inc     %%eax      \n\t"
> >> -    : "=a" (Result),          // %0
> >> +    "mov     %%eax, %0  \n\t"
> >> +    : "=r" (Result),          // %0
> >>        "+m" (*Value)           // %1
> >>      :                         // no inputs that aren't also outputs
> >>      : "memory",
> >> -      "cc"
> >> +      "cc",
> >> +      "eax"
> >>      );
> >>
> >>    return Result;
> >
> > (b) This change is invalid, for two separate reasons.
> >
> > Reason #1: on the clobber list, the AX register should be listed as
> > "a", not as "eax".
> >
> > Reason #2:
> >
> > - For operand %0, we say "use any register in the 'r' class as
> > write-only". (The class "r" means "general register".) And, at the end
> > of the inline assembly, we store EAX manually to that 'r' class
> > register. And we rely on the compiler to store that other general
> > register into Result.
> >
> > - We append the AX register (which should be spelled as "a") to the
> > clobber list. However, the "a" register is itself in the "general
> > register" class, and therefore this clobber list breaks the passage
> > that I quoted above, marked as [1]!
> >
> >
> >> @@ -76,11 +78,13 @@ InternalSyncDecrement (
> >>      "lock                \n\t"
> >>      "xadd    %%eax, %1   \n\t"
> >>      "dec     %%eax       \n\t"
> >> -    : "=a" (Result),           // %0
> >> +    "mov     %%eax, %0  \n\t"
> >> +    : "=r" (Result),           // %0
> >>        "+m" (*Value)            // %1
> >>      :                          // no inputs that aren't also outputs
> >>      : "memory",
> >> -      "cc"
> >> +      "cc",
> >> +      "eax"
> >>      );
> >>
> >>    return Result;
> >>
> >
> > (c) The patch doesn't update the X64 variant.
> >
> > I think we should be clear here that we are working around an XCODE bug.
> > Commit 8a94eb9283fa was different, because the code before that
> > violated the gcc documentation, and so the issue was in the code.
> >
> >
> > I'll comment more on your second email.
> >
> > Thanks,
> > Laszlo
> > _______________________________________________
> > edk2-devel mailing list
> > edk2-devel@lists.01.org
> > https://lists.01.org/mailman/listinfo/edk2-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement
  2018-11-07 15:27     ` Ni, Ruiyu
@ 2018-11-07 19:02       ` Laszlo Ersek
  0 siblings, 0 replies; 9+ messages in thread
From: Laszlo Ersek @ 2018-11-07 19:02 UTC (permalink / raw)
  To: Ni, Ruiyu, Ard Biesheuvel
  Cc: edk2-devel@lists.01.org, Kinney, Michael D, Gao, Liming

On 11/07/18 16:27, Ni, Ruiyu wrote:
> 
> 
>> -----Original Message-----
>> From: Ard Biesheuvel [mailto:ard.biesheuvel@linaro.org]
>> Sent: Wednesday, November 7, 2018 10:46 PM
>> To: Laszlo Ersek <lersek@redhat.com>
>> Cc: Ni, Ruiyu <ruiyu.ni@intel.com>; edk2-devel@lists.01.org; Kinney, Michael D
>> <michael.d.kinney@intel.com>; Gao, Liming <liming.gao@intel.com>
>> Subject: Re: [edk2] [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix
>> InternalSync[De|In]crement
>>
>> On 7 November 2018 at 15:31, Laszlo Ersek <lersek@redhat.com> wrote:
>>> (+Andrew)
>>>
>>> Hi Ray,
>>>
>>> On 11/07/18 05:03, Ruiyu Ni wrote:
>>>> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=1303
>>>>
>>>> XCODE disassembly code of InternalSyncDecrement with today's code is:
>>>>
>>>>   __asm__ __volatile__ (
>>>>     "movl    $1, %%eax  \n\t"
>>>>     "lock               \n\t"
>>>>     "xadd    %%eax, %1  \n\t"
>>>>     "inc     %%eax      \n\t"
>>>>     : "=a" (Result),          // %0
>>>>       "+m" (*Value)           // %1
>>>>     :                         // no inputs that aren't also outputs
>>>>     : "memory",
>>>>       "cc"
>>>>     );
>>>>
>>>>  0:       55      pushl   %ebp
>>>>  1:       89 e5   movl    %esp, %ebp
>>>>  3:       8b 45 08        movl    8(%ebp), %eax
>>>>  6:       b8 01 00 00 00  movl    $1, %eax
>>>>  b:       f0      lock
>>>>  c:       0f c1 00        xaddl   %eax, _InternalSyncIncrement(%eax)
>>>>  f:       40      incl    %eax
>>>> 10:       5d      popl    %ebp
>>>> 11:       c3      retl
>>>>
>>>> %EAX value retrieved in line #3 is overwritten in line #6.
>>>
>>> (a) This looks like an XCODE bug to me. The "=a" constraint on operand
>>> %0 means that Result should be set from eax/rax, and that this operand
>>> is "write only". Here's the gcc documentation:
>>>
>>> "The ordinary output operands must be write-only; GCC assumes that the
>>> values in these operands before the instruction are dead and need not
>>> be generated."
>>>
>>
>> Do we need to use early clobber here? "=&a"?
>>
>> It seems to me that without that, the compiler is free to use eax as input, since it
>> only assumes that eax will be set by the last instruction.
> 
> Ard,
> Thanks for pointing that.
> I found below wordings from GCC documents:
> "Use the ‘&’ constraint modifier (see Modifiers) on all output operands that must not
>  overlap an input. Otherwise, GCC may allocate the output operand in the same
>  register as an unrelated input operand, on the assumption that the assembler code
>  consumes its inputs before producing outputs. This assumption may be false if the
>  assembler code actually consists of more than one instruction."
> 
> I think '&' is just for this case.
> 
> @Laszlo,
> Do you agree that a simple patch to add '&' to both Ia32 and X64 version of
> InternalSync[De|In]crement is good enough?

I think so, and I applaud Ard for finding this in the documentation! It
seems like the exact thing we need.

I agree that my commit 8a94eb9283fa ("MdePkg/BaseSynchronizationLib: fix
XADD operands in GCC IA32/X64 assembly", 2018-09-26) was not complete,
due to missing the "&" constraint modifier. Please submit a patch that
adds it.

And, for the commit message, I also recommend:

Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: 8a94eb9283fa09a30f5f06f0c12cf0ee4e14fbcf

I'll test the patch on my end.

(This is a prime example for a change (= a bugfix) that is permitted
during the hard feature freeze, BTW.)

Thanks!
Laszlo


> 
> I actually tried in XCODE5 toolchain and it works good.
> movl    8(%ebp), %eax
> becomes
> movl    8(%ebp), %ecx
> 
>>
>>
>>> Furthermore, if a register is named in any operand constraint (such as
>>> input, output, input-output), then it cannot, by definition, be listed
>>> in the "clobber list" -- it is *obvious* that the inline assembly will
>>> work with that register. In case of an output-only operand, it's
>>> obvious that the inline assembly will *overwrite* that register, and
>>> the compiler cannot assume *when* that overwrite will happen. Here's
>>> the gcc
>>> documentation:
>>>
>>> "You may not write a clobber description in a way that overlaps with
>>> an input or output operand.  For example, you may not have an operand
>>> describing a register class with one member if you mention that
>>> register in the clobber list.  Variables declared to live in specific
>>> registers [...] and used as asm input or output operands must have no
>>> part mentioned in the clobber description." [1]
>>>
>>> However, XCODE generates such code that depends on EAX as an *input*
>>> operand. That's clearly wrong and violates the constraints in the
>>> operand list. This is an XCODE bug.
>>>
>>> In fact: the situation is worse than that. In the commit message you
>>> spell out that the MOV instruction at offset 6 overwrites the value in
>>> EAX that was just loaded at offset 3. But this is just the small
>>> problem; the *large* problem is the generated XADD instruction itself,
>>> at offset 0xb and 0xc. The binary encoding is, from your commit message:
>>>
>>>   f0 0f c1 00
>>>
>>> and this is *exactly* the problem that my commit 8a94eb9283fa fixed
>>> for gcc! From my commit message:
>>>
>>>     >     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
>>>
>>> Because, it makes *no sense* for XADD to use the AX register for
>>> *both* pointer-to-memory (i.e. address of the destination location
>>> that receives the sum) *and* as the other addend!
>>>
>>> In other words, regardless of how we fill the AX register up-front, an
>>> XADD instruction generated like this *cannot* be right.
>>>
>>>> The patch uses the clobber list to tell GCC that EAX is used in ASM.
>>>>
>>>> Contributed-under: TianoCore Contribution Agreement 1.1
>>>> Signed-off-by: Ruiyu Ni <ruiyu.ni@intel.com>
>>>> Cc: Liming Gao <liming.gao@intel.com>
>>>> Cc: Michael Kinney <michael.d.kinney@intel.com>
>>>> Cc: Laszlo Ersek <lersek@redhat.com>
>>>> Cc: Philippe Mathieu-Daudé <philmd@redhat.com>
>>>> ---
>>>>  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12
>>>> ++++++++----
>>>>  1 file changed, 8 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>>>> b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>>>> index af39bdeb51..0a985529fd 100644
>>>> --- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>>>> +++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
>>>> @@ -40,11 +40,13 @@ InternalSyncIncrement (
>>>>      "lock               \n\t"
>>>>      "xadd    %%eax, %1  \n\t"
>>>>      "inc     %%eax      \n\t"
>>>> -    : "=a" (Result),          // %0
>>>> +    "mov     %%eax, %0  \n\t"
>>>> +    : "=r" (Result),          // %0
>>>>        "+m" (*Value)           // %1
>>>>      :                         // no inputs that aren't also outputs
>>>>      : "memory",
>>>> -      "cc"
>>>> +      "cc",
>>>> +      "eax"
>>>>      );
>>>>
>>>>    return Result;
>>>
>>> (b) This change is invalid, for two separate reasons.
>>>
>>> Reason #1: on the clobber list, the AX register should be listed as
>>> "a", not as "eax".
>>>
>>> Reason #2:
>>>
>>> - For operand %0, we say "use any register in the 'r' class as
>>> write-only". (The class "r" means "general register".) And, at the end
>>> of the inline assembly, we store EAX manually to that 'r' class
>>> register. And we rely on the compiler to store that other general
>>> register into Result.
>>>
>>> - We append the AX register (which should be spelled as "a") to the
>>> clobber list. However, the "a" register is itself in the "general
>>> register" class, and therefore this clobber list breaks the passage
>>> that I quoted above, marked as [1]!
>>>
>>>
>>>> @@ -76,11 +78,13 @@ InternalSyncDecrement (
>>>>      "lock                \n\t"
>>>>      "xadd    %%eax, %1   \n\t"
>>>>      "dec     %%eax       \n\t"
>>>> -    : "=a" (Result),           // %0
>>>> +    "mov     %%eax, %0  \n\t"
>>>> +    : "=r" (Result),           // %0
>>>>        "+m" (*Value)            // %1
>>>>      :                          // no inputs that aren't also outputs
>>>>      : "memory",
>>>> -      "cc"
>>>> +      "cc",
>>>> +      "eax"
>>>>      );
>>>>
>>>>    return Result;
>>>>
>>>
>>> (c) The patch doesn't update the X64 variant.
>>>
>>> I think we should be clear here that we are working around an XCODE bug.
>>> Commit 8a94eb9283fa was different, because the code before that
>>> violated the gcc documentation, and so the issue was in the code.
>>>
>>>
>>> I'll comment more on your second email.
>>>
>>> Thanks,
>>> Laszlo
>>> _______________________________________________
>>> edk2-devel mailing list
>>> edk2-devel@lists.01.org
>>> https://lists.01.org/mailman/listinfo/edk2-devel



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-11-07 19:02 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-11-07  4:03 [PATCH] MdePkg/BaseSynchronizationLib XCODE: fix InternalSync[De|In]crement Ruiyu Ni
2018-11-07  8:46 ` Gao, Liming
2018-11-07 14:31 ` Laszlo Ersek
2018-11-07 14:41   ` Laszlo Ersek
2018-11-07 14:57     ` Ni, Ruiyu
2018-11-07 14:45   ` Ard Biesheuvel
2018-11-07 15:27     ` Ni, Ruiyu
2018-11-07 19:02       ` Laszlo Ersek
2018-11-07 14:53   ` Ni, Ruiyu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox