public inbox for devel@edk2.groups.io
 help / color / mirror / Atom feed
* [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly
@ 2018-09-25 19:48 Laszlo Ersek
  2018-09-26  9:05 ` Laszlo Ersek
       [not found] ` <8ecbcc60-8e0f-e418-614e-666aa7fb007b@Intel.com>
  0 siblings, 2 replies; 6+ messages in thread
From: Laszlo Ersek @ 2018-09-25 19:48 UTC (permalink / raw)
  To: edk2-devel-01; +Cc: Jiewen Yao, Liming Gao, Michael Kinney, Ruiyu Ni

Currently, "gcc-4.8.5-28.el7_5.1.x86_64" generates the following code for
me, from the XADD inline assembly added to "X64/GccInline.c" in commit
17634d026f96:

> 0000000000004383 <InternalSyncIncrement>:
> UINT32
> EFIAPI
> InternalSyncIncrement (
>   IN      volatile UINT32    *Value
>   )
> {
>     4383:       55                      push   %rbp
>     4384:       48 89 e5                mov    %rsp,%rbp
>     4387:       48 83 ec 10             sub    $0x10,%rsp
>     438b:       48 89 4d 10             mov    %rcx,0x10(%rbp)
>   UINT32  Result;
>
>   __asm__ __volatile__ (
>     438f:       48 8b 55 10             mov    0x10(%rbp),%rdx
>     4393:       48 8b 45 10             mov    0x10(%rbp),%rax
>     4397:       b8 01 00 00 00          mov    $0x1,%eax
>     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
>     43a0:       ff c0                   inc    %eax
>     43a2:       89 45 fc                mov    %eax,-0x4(%rbp)
>     : "m"  (*Value)           // %2
>     : "memory",
>       "cc"
>     );
>
>   return Result;
>     43a5:       8b 45 fc                mov    -0x4(%rbp),%eax
> }
>     43a8:       c9                      leaveq
>     43a9:       c3                      retq
>

The MOV $0X1,%EAX instruction corrupts the address of Value in %RAX before
we reach the XADD instruction. In fact, it makes no sense for XADD to use
%EAX as source operand and (%RAX) as destination operand at the same time.

The XADD instruction's destination operand is a read-write operand. The
GCC documentation states:

> The ordinary output operands must be write-only; GCC will assume that
> the values in these operands before the instruction are dead and need
> not be generated. Extended asm supports input-output or read-write
> operands. Use the constraint character `+' to indicate such an operand
> and list it with the output operands. You should only use read-write
> operands when the constraints for the operand (or the operand in which
> only some of the bits are to be changed) allow a register.

(The above is intentionally quoted from the oldest GCC release that edk2
supports, namely gcc-4.4:
<https://gcc.gnu.org/onlinedocs/gcc-4.4.7/gcc/Extended-Asm.html>.)

Fix the operand list accordingly.

With the patch applied, I get:

> 0000000000004383 <InternalSyncIncrement>:
> UINT32
> EFIAPI
> InternalSyncIncrement (
>   IN      volatile UINT32    *Value
>   )
> {
>     4383:       55                      push   %rbp
>     4384:       48 89 e5                mov    %rsp,%rbp
>     4387:       48 83 ec 10             sub    $0x10,%rsp
>     438b:       48 89 4d 10             mov    %rcx,0x10(%rbp)
>   UINT32  Result;
>
>   __asm__ __volatile__ (
>     438f:       48 8b 55 10             mov    0x10(%rbp),%rdx
>     4393:       48 8b 45 10             mov    0x10(%rbp),%rax
>     4397:       b8 01 00 00 00          mov    $0x1,%eax
>     439c:       f0 0f c1 02             lock xadd %eax,(%rdx)
>     43a0:       ff c0                   inc    %eax
>     43a2:       89 45 fc                mov    %eax,-0x4(%rbp)
>     :                         // no inputs that aren't also outputs
>     : "memory",
>       "cc"
>     );
>
>   return Result;
>     43a5:       8b 45 fc                mov    -0x4(%rbp),%eax
> }
>     43a8:       c9                      leaveq
>     43a9:       c3                      retq

Note that some other bugs remain in
"BaseSynchronizationLib/*/GccInline.c"; those should be addressed later,
under <https://bugzilla.tianocore.org/show_bug.cgi?id=1208>.

Cc: Jiewen Yao <jiewen.yao@intel.com>
Cc: Liming Gao <liming.gao@intel.com>
Cc: Michael Kinney <michael.d.kinney@intel.com>
Cc: Ruiyu Ni <ruiyu.ni@intel.com>
Ref: https://bugzilla.tianocore.org/show_bug.cgi?id=1207
Fixes: 17634d026f968c404b039a8d8431b6389dd396ea
Contributed-under: TianoCore Contribution Agreement 1.1
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
---

Notes:
    Repo:   https://github.com/lersek/edk2.git
    Branch: xadd_rw

 MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12 ++++++------
 MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c  | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
index d82e0205f553..fa2be7f4b35c 100644
--- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
+++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c
@@ -38,11 +38,11 @@ InternalSyncIncrement (
   __asm__ __volatile__ (
     "movl    $1, %%eax  \n\t"
     "lock               \n\t"
-    "xadd    %%eax, %2  \n\t"
+    "xadd    %%eax, %1  \n\t"
     "inc     %%eax          "
     : "=a" (Result),          // %0
-      "=m" (*Value)           // %1
-    : "m"  (*Value)           // %2
+      "+m" (*Value)           // %1
+    :                         // no inputs that aren't also outputs
     : "memory",
       "cc"
     );
@@ -75,11 +75,11 @@ InternalSyncDecrement (
   __asm__ __volatile__ (
     "movl    $-1, %%eax  \n\t"
     "lock                \n\t"
-    "xadd    %%eax, %2   \n\t"
+    "xadd    %%eax, %1   \n\t"
     "dec     %%eax                  "
     : "=a" (Result),          // %0
-      "=m" (*Value)           // %1
-    : "m"  (*Value)           // %2
+      "+m" (*Value)           // %1
+    :                         // no inputs that aren't also outputs
     : "memory",
       "cc"
     );
diff --git a/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c b/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c
index 4c4d6e3fc712..ab7efe23c4db 100644
--- a/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c
+++ b/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c
@@ -38,11 +38,11 @@ InternalSyncIncrement (
   __asm__ __volatile__ (
     "movl    $1, %%eax  \n\t"
     "lock               \n\t"
-    "xadd    %%eax, %2  \n\t"
+    "xadd    %%eax, %1  \n\t"
     "inc     %%eax          "
     : "=a" (Result),          // %0
-      "=m" (*Value)           // %1
-    : "m"  (*Value)           // %2
+      "+m" (*Value)           // %1
+    :                         // no inputs that aren't also outputs
     : "memory",
       "cc"
     );
@@ -74,11 +74,11 @@ InternalSyncDecrement (
   __asm__ __volatile__ (
     "movl    $-1, %%eax  \n\t"
     "lock                \n\t"
-    "xadd    %%eax, %2   \n\t"
+    "xadd    %%eax, %1   \n\t"
     "dec     %%eax                  "
     : "=a" (Result),          // %0
-      "=m" (*Value)           // %1
-    : "m"  (*Value)           // %2
+      "+m" (*Value)           // %1
+    :                         // no inputs that aren't also outputs
     : "memory",
       "cc"
     );
-- 
2.14.1.3.gb7cf6e02401b



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly
  2018-09-25 19:48 [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly Laszlo Ersek
@ 2018-09-26  9:05 ` Laszlo Ersek
  2018-09-26  9:34   ` Ni, Ruiyu
       [not found] ` <8ecbcc60-8e0f-e418-614e-666aa7fb007b@Intel.com>
  1 sibling, 1 reply; 6+ messages in thread
From: Laszlo Ersek @ 2018-09-26  9:05 UTC (permalink / raw)
  To: edk2-devel-01; +Cc: Michael Kinney, Ruiyu Ni, Jiewen Yao, Liming Gao

Hi,

On 09/25/18 21:48, Laszlo Ersek wrote:
> Currently, "gcc-4.8.5-28.el7_5.1.x86_64" generates the following code for
> me, from the XADD inline assembly added to "X64/GccInline.c" in commit
> 17634d026f96:
> 
>> 0000000000004383 <InternalSyncIncrement>:
>> UINT32
>> EFIAPI
>> InternalSyncIncrement (
>>   IN      volatile UINT32    *Value
>>   )
>> {
>>     4383:       55                      push   %rbp
>>     4384:       48 89 e5                mov    %rsp,%rbp
>>     4387:       48 83 ec 10             sub    $0x10,%rsp
>>     438b:       48 89 4d 10             mov    %rcx,0x10(%rbp)
>>   UINT32  Result;
>>
>>   __asm__ __volatile__ (
>>     438f:       48 8b 55 10             mov    0x10(%rbp),%rdx
>>     4393:       48 8b 45 10             mov    0x10(%rbp),%rax
>>     4397:       b8 01 00 00 00          mov    $0x1,%eax
>>     439c:       f0 0f c1 00             lock xadd %eax,(%rax)
>>     43a0:       ff c0                   inc    %eax
>>     43a2:       89 45 fc                mov    %eax,-0x4(%rbp)
>>     : "m"  (*Value)           // %2
>>     : "memory",
>>       "cc"
>>     );
>>
>>   return Result;
>>     43a5:       8b 45 fc                mov    -0x4(%rbp),%eax
>> }
>>     43a8:       c9                      leaveq
>>     43a9:       c3                      retq
>>
> 
> The MOV $0X1,%EAX instruction corrupts the address of Value in %RAX before
> we reach the XADD instruction. In fact, it makes no sense for XADD to use
> %EAX as source operand and (%RAX) as destination operand at the same time.

may I get a fast review for this patch, please? The regression from
commit 17634d026f96 prevents OVMF from booting.

Thanks
Laszlo


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly
  2018-09-26  9:05 ` Laszlo Ersek
@ 2018-09-26  9:34   ` Ni, Ruiyu
  2018-09-26 12:04     ` Laszlo Ersek
  0 siblings, 1 reply; 6+ messages in thread
From: Ni, Ruiyu @ 2018-09-26  9:34 UTC (permalink / raw)
  To: Laszlo Ersek, edk2-devel-01; +Cc: Michael Kinney, Jiewen Yao, Liming Gao

On 9/26/2018 5:05 PM, Laszlo Ersek wrote:
> Hi,
> 
> On 09/25/18 21:48, Laszlo Ersek wrote:
>> Currently, "gcc-4.8.5-28.el7_5.1.x86_64" generates the following code for
>> me, from the XADD inline assembly added to "X64/GccInline.c" in commit
>> 17634d026f96:
>>
>>> 0000000000004383 <InternalSyncIncrement>:
>>> UINT32
>>> EFIAPI
>>> InternalSyncIncrement (
>>>    IN      volatile UINT32    *Value
>>>    )
>>> {
>>>      4383:       55                      push   %rbp
>>>      4384:       48 89 e5                mov    %rsp,%rbp
>>>      4387:       48 83 ec 10             sub    $0x10,%rsp
>>>      438b:       48 89 4d 10             mov    %rcx,0x10(%rbp)
>>>    UINT32  Result;
>>>
>>>    __asm__ __volatile__ (
>>>      438f:       48 8b 55 10             mov    0x10(%rbp),%rdx
>>>      4393:       48 8b 45 10             mov    0x10(%rbp),%rax
>>>      4397:       b8 01 00 00 00          mov    $0x1,%eax
>>>      439c:       f0 0f c1 00             lock xadd %eax,(%rax)
>>>      43a0:       ff c0                   inc    %eax
>>>      43a2:       89 45 fc                mov    %eax,-0x4(%rbp)
>>>      : "m"  (*Value)           // %2
>>>      : "memory",
>>>        "cc"
>>>      );
>>>
>>>    return Result;
>>>      43a5:       8b 45 fc                mov    -0x4(%rbp),%eax
>>> }
>>>      43a8:       c9                      leaveq
>>>      43a9:       c3                      retq
>>>
>>
>> The MOV $0X1,%EAX instruction corrupts the address of Value in %RAX before
>> we reach the XADD instruction. In fact, it makes no sense for XADD to use
>> %EAX as source operand and (%RAX) as destination operand at the same time.
> 
> may I get a fast review for this patch, please? The regression from
> commit 17634d026f96 prevents OVMF from booting.

Sure. Reviewed-by: Ruiyu Ni <ruiyu.ni@intel.com>
> 
> Thanks
> Laszlo
> 


-- 
Thanks,
Ray


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly
  2018-09-26  9:34   ` Ni, Ruiyu
@ 2018-09-26 12:04     ` Laszlo Ersek
  0 siblings, 0 replies; 6+ messages in thread
From: Laszlo Ersek @ 2018-09-26 12:04 UTC (permalink / raw)
  To: Ni, Ruiyu, edk2-devel-01; +Cc: Michael Kinney, Jiewen Yao, Liming Gao

On 09/26/18 11:34, Ni, Ruiyu wrote:
> On 9/26/2018 5:05 PM, Laszlo Ersek wrote:
>> Hi,
>>
>> On 09/25/18 21:48, Laszlo Ersek wrote:
>>> Currently, "gcc-4.8.5-28.el7_5.1.x86_64" generates the following code
>>> for
>>> me, from the XADD inline assembly added to "X64/GccInline.c" in commit
>>> 17634d026f96:
>>>
>>>> 0000000000004383 <InternalSyncIncrement>:
>>>> UINT32
>>>> EFIAPI
>>>> InternalSyncIncrement (
>>>>    IN      volatile UINT32    *Value
>>>>    )
>>>> {
>>>>      4383:       55                      push   %rbp
>>>>      4384:       48 89 e5                mov    %rsp,%rbp
>>>>      4387:       48 83 ec 10             sub    $0x10,%rsp
>>>>      438b:       48 89 4d 10             mov    %rcx,0x10(%rbp)
>>>>    UINT32  Result;
>>>>
>>>>    __asm__ __volatile__ (
>>>>      438f:       48 8b 55 10             mov    0x10(%rbp),%rdx
>>>>      4393:       48 8b 45 10             mov    0x10(%rbp),%rax
>>>>      4397:       b8 01 00 00 00          mov    $0x1,%eax
>>>>      439c:       f0 0f c1 00             lock xadd %eax,(%rax)
>>>>      43a0:       ff c0                   inc    %eax
>>>>      43a2:       89 45 fc                mov    %eax,-0x4(%rbp)
>>>>      : "m"  (*Value)           // %2
>>>>      : "memory",
>>>>        "cc"
>>>>      );
>>>>
>>>>    return Result;
>>>>      43a5:       8b 45 fc                mov    -0x4(%rbp),%eax
>>>> }
>>>>      43a8:       c9                      leaveq
>>>>      43a9:       c3                      retq
>>>>
>>>
>>> The MOV $0X1,%EAX instruction corrupts the address of Value in %RAX
>>> before
>>> we reach the XADD instruction. In fact, it makes no sense for XADD to
>>> use
>>> %EAX as source operand and (%RAX) as destination operand at the same
>>> time.
>>
>> may I get a fast review for this patch, please? The regression from
>> commit 17634d026f96 prevents OVMF from booting.
> 
> Sure. Reviewed-by: Ruiyu Ni <ruiyu.ni@intel.com>

Thanks, Ray! Commit 8a94eb9283fa.
Laszlo


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly
       [not found] ` <8ecbcc60-8e0f-e418-614e-666aa7fb007b@Intel.com>
@ 2018-09-27  9:46   ` Shao, Ming
       [not found]   ` <0D32B2537B667F42AD320D616D521AF738B92170@shsmsx102.ccr.corp.intel.com>
  1 sibling, 0 replies; 6+ messages in thread
From: Shao, Ming @ 2018-09-27  9:46 UTC (permalink / raw)
  To: lersek@redhat.com; +Cc: edk2-devel@lists.01.org, Ni, Ruiyu, Shao, Ming

Hi Laszlo,



I build Ruiyu's code with gcc 4.8.5 as X64. And got below disassembled code:



[cid:image001.jpg@01D45689.F8A3F530]



So I didn't see register used as both destination and source of xadd instruction.



Then I build your patch, and got exactly the same disassembled code.



Both Ruiyu's patch and yours can pass my test.



Could you provide more details about your environment so I can reproduce this issue? Thanks.



My environment:

l  Ubuntu 18.04.1 LTS

l  gcc (Ubuntu 4.8.5-4ubuntu8) 4.8.5





-Ming


The content of this message is my personal opinion only and although I am an employee of Intel, the statements I make here in no way represent Intel's position on the issue, nor am I authorized to speak on behalf of Intel on this matter.





-----Original Message-----
From: Ni, Ruiyu
Sent: Wednesday, September 26, 2018 5:35 PM
To: Shao, Ming <ming.shao@intel.com>
Subject: Fwd: [edk2] [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly









-------- Forwarded Message --------

Subject: [edk2] [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly

Date: Tue, 25 Sep 2018 21:48:57 +0200

From: Laszlo Ersek <lersek@redhat.com<mailto:lersek@redhat.com>>

To: edk2-devel-01 <edk2-devel@lists.01.org<mailto:edk2-devel@lists.01.org>>

CC: Michael Kinney <michael.d.kinney@intel.com<mailto:michael.d.kinney@intel.com>>, Ruiyu Ni <ruiyu.ni@intel.com<mailto:ruiyu.ni@intel.com>>, Jiewen Yao <jiewen.yao@intel.com<mailto:jiewen.yao@intel.com>>, Liming Gao <liming.gao@intel.com<mailto:liming.gao@intel.com>>



Currently, "gcc-4.8.5-28.el7_5.1.x86_64" generates the following code for me, from the XADD inline assembly added to "X64/GccInline.c" in commit

17634d026f96:



> 0000000000004383 <InternalSyncIncrement>:

> UINT32

> EFIAPI

> InternalSyncIncrement (

>   IN      volatile UINT32    *Value

>   )

> {

>     4383:       55                      push   %rbp

>     4384:       48 89 e5                mov    %rsp,%rbp

>     4387:       48 83 ec 10             sub    $0x10,%rsp

>     438b:       48 89 4d 10             mov    %rcx,0x10(%rbp)

>   UINT32  Result;

>

>   __asm__ __volatile__ (

>     438f:       48 8b 55 10             mov    0x10(%rbp),%rdx

>     4393:       48 8b 45 10             mov    0x10(%rbp),%rax

>     4397:       b8 01 00 00 00          mov    $0x1,%eax

>     439c:       f0 0f c1 00             lock xadd %eax,(%rax)

>     43a0:       ff c0                   inc    %eax

>     43a2:       89 45 fc                mov    %eax,-0x4(%rbp)

>     : "m"  (*Value)           // %2

>     : "memory",

>       "cc"

>     );

>

>   return Result;

>     43a5:       8b 45 fc                mov    -0x4(%rbp),%eax

> }

>     43a8:       c9                      leaveq

>     43a9:       c3                      retq

>



The MOV $0X1,%EAX instruction corrupts the address of Value in %RAX before we reach the XADD instruction. In fact, it makes no sense for XADD to use %EAX as source operand and (%RAX) as destination operand at the same time.



The XADD instruction's destination operand is a read-write operand. The GCC documentation states:



> The ordinary output operands must be write-only; GCC will assume that

> the values in these operands before the instruction are dead and need

> not be generated. Extended asm supports input-output or read-write

> operands. Use the constraint character `+' to indicate such an operand

> and list it with the output operands. You should only use read-write

> operands when the constraints for the operand (or the operand in which

> only some of the bits are to be changed) allow a register.



(The above is intentionally quoted from the oldest GCC release that edk2 supports, namely gcc-4.4:

<https://gcc.gnu.org/onlinedocs/gcc-4.4.7/gcc/Extended-Asm.html>.)



Fix the operand list accordingly.



With the patch applied, I get:



> 0000000000004383 <InternalSyncIncrement>:

> UINT32

> EFIAPI

> InternalSyncIncrement (

>   IN      volatile UINT32    *Value

>   )

> {

>     4383:       55                      push   %rbp

>     4384:       48 89 e5                mov    %rsp,%rbp

>     4387:       48 83 ec 10             sub    $0x10,%rsp

>     438b:       48 89 4d 10             mov    %rcx,0x10(%rbp)

>   UINT32  Result;

>

>   __asm__ __volatile__ (

>     438f:       48 8b 55 10             mov    0x10(%rbp),%rdx

>     4393:       48 8b 45 10             mov    0x10(%rbp),%rax

>     4397:       b8 01 00 00 00          mov    $0x1,%eax

>     439c:       f0 0f c1 02             lock xadd %eax,(%rdx)

>     43a0:       ff c0                   inc    %eax

>     43a2:       89 45 fc                mov    %eax,-0x4(%rbp)

>     :                         // no inputs that aren't also outputs

>     : "memory",

>       "cc"

>     );

>

>   return Result;

>     43a5:       8b 45 fc                mov    -0x4(%rbp),%eax

> }

>     43a8:       c9                      leaveq

>     43a9:       c3                      retq



Note that some other bugs remain in

"BaseSynchronizationLib/*/GccInline.c"; those should be addressed later, under <https://bugzilla.tianocore.org/show_bug.cgi?id=1208>.



Cc: Jiewen Yao <jiewen.yao@intel.com<mailto:jiewen.yao@intel.com>>

Cc: Liming Gao <liming.gao@intel.com<mailto:liming.gao@intel.com>>

Cc: Michael Kinney <michael.d.kinney@intel.com<mailto:michael.d.kinney@intel.com>>

Cc: Ruiyu Ni <ruiyu.ni@intel.com<mailto:ruiyu.ni@intel.com>>

Ref: https://bugzilla.tianocore.org/show_bug.cgi?id=1207

Fixes: 17634d026f968c404b039a8d8431b6389dd396ea

Contributed-under: TianoCore Contribution Agreement 1.1

Signed-off-by: Laszlo Ersek <lersek@redhat.com<mailto:lersek@redhat.com>>

---



Notes:

     Repo:   https://github.com/lersek/edk2.git

     Branch: xadd_rw



  MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c | 12 ++++++------

  MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c  | 12 ++++++------

  2 files changed, 12 insertions(+), 12 deletions(-)



diff --git a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c

b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c

index d82e0205f553..fa2be7f4b35c 100644

--- a/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c

+++ b/MdePkg/Library/BaseSynchronizationLib/Ia32/GccInline.c

@@ -38,11 +38,11 @@ InternalSyncIncrement (

    __asm__ __volatile__ (

      "movl    $1, %%eax  \n\t"

      "lock               \n\t"

-    "xadd    %%eax, %2  \n\t"

+    "xadd    %%eax, %1  \n\t"

      "inc     %%eax          "

      : "=a" (Result),          // %0

-      "=m" (*Value)           // %1

-    : "m"  (*Value)           // %2

+      "+m" (*Value)           // %1

+    :                         // no inputs that aren't also outputs

      : "memory",

        "cc"

      );

@@ -75,11 +75,11 @@ InternalSyncDecrement (

    __asm__ __volatile__ (

      "movl    $-1, %%eax  \n\t"

      "lock                \n\t"

-    "xadd    %%eax, %2   \n\t"

+    "xadd    %%eax, %1   \n\t"

      "dec     %%eax                  "

      : "=a" (Result),          // %0

-      "=m" (*Value)           // %1

-    : "m"  (*Value)           // %2

+      "+m" (*Value)           // %1

+    :                         // no inputs that aren't also outputs

      : "memory",

        "cc"

      );

diff --git a/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c

b/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c

index 4c4d6e3fc712..ab7efe23c4db 100644

--- a/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c

+++ b/MdePkg/Library/BaseSynchronizationLib/X64/GccInline.c

@@ -38,11 +38,11 @@ InternalSyncIncrement (

    __asm__ __volatile__ (

      "movl    $1, %%eax  \n\t"

      "lock               \n\t"

-    "xadd    %%eax, %2  \n\t"

+    "xadd    %%eax, %1  \n\t"

      "inc     %%eax          "

      : "=a" (Result),          // %0

-      "=m" (*Value)           // %1

-    : "m"  (*Value)           // %2

+      "+m" (*Value)           // %1

+    :                         // no inputs that aren't also outputs

      : "memory",

        "cc"

      );

@@ -74,11 +74,11 @@ InternalSyncDecrement (

    __asm__ __volatile__ (

      "movl    $-1, %%eax  \n\t"

      "lock                \n\t"

-    "xadd    %%eax, %2   \n\t"

+    "xadd    %%eax, %1   \n\t"

      "dec     %%eax                  "

      : "=a" (Result),          // %0

-      "=m" (*Value)           // %1

-    : "m"  (*Value)           // %2

+      "+m" (*Value)           // %1

+    :                         // no inputs that aren't also outputs

      : "memory",

        "cc"

      );

--

2.14.1.3.gb7cf6e02401b



_______________________________________________

edk2-devel mailing list

edk2-devel@lists.01.org<mailto:edk2-devel@lists.01.org>

https://lists.01.org/mailman/listinfo/edk2-devel


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly
       [not found]   ` <0D32B2537B667F42AD320D616D521AF738B92170@shsmsx102.ccr.corp.intel.com>
@ 2018-09-27 10:19     ` Laszlo Ersek
  0 siblings, 0 replies; 6+ messages in thread
From: Laszlo Ersek @ 2018-09-27 10:19 UTC (permalink / raw)
  To: Shao, Ming; +Cc: edk2-devel@lists.01.org

On 09/27/18 11:28, Shao, Ming wrote:
> Hi Laszlo,
>
> I build Ruiyu's code with gcc 4.8.5 as X64. And got below disassembled
> code:
>
> [cid:image002.jpg@01D45686.AFEDF140]
>
> So I didn't see register used as both destination and source of xadd
> instruction.
>
> Then I build your patch, and got exactly the same disassembled code.
>
> Both Ray's patch and yours can pass my test.

That's not surprising: the gcc documentation says that not marking
input-output operands as such may, or may not, break the code. Such a
bug in the operand list of the inline assembly is not *required* to
break the code, it just *can*. The only guarantee given is that, if the
input-output operands are all marked properly as such, then the code
will not break.

> Could you provide more details about your environment so I can
> reproduce this issue? Thanks.

Sure. My compiler version is the following (on RHEL-7.5 Workstation):

- gcc-4.8.5-28.el7_5.1.x86_64

Binutils (not that I think it matters in this case, but anyway):

- binutils-2.27-28.base.el7_5.1.x86_64

The OVMF build command line was:

  build \
    -a X64 \
    -p OvmfPkg/OvmfPkgX64.dsc \
    -t GCC48 \
    -b NOOPT \
    -D SECURE_BOOT_ENABLE \
    -D TLS_ENABLE \
    -D NETWORK_IP6_ENABLE \
    -D HTTP_BOOT_ENABLE \
    -n 4 \
    --report-file=.../build.ovmf.report \
    --log=.../build.ovmf.log

It's very likely that the gcc version in RHEL-7.5 has a large number of
backports from later upstream changes. The base version is indeed 4.8.5,
but the internal build number is "-28.el7_5.1". That could mean a huge
number of patches. I'll append the relevant part of the RPM changelog at
the bottom of my email.

I'd just like to point out that this is not a gcc bug; the compiler
behaves according to its documentation.

Thanks,
Laszlo

> * Tue Mar 27 2018 Jeff Law <law@redhat.com> 4.8.5-29
> - s390 retpoline support for spectre mitigation (#1552021)
>
> * Fri Jan 19 2018 Marek Polacek <polacek@redhat.com> 4.8.5-28
> - Minor testsuite fixes to clean up test results (#1469697)
> - retpoline support for spectre mitigation (#1535655)
>
> * Thu Jan 11 2018 Marek Polacek <polacek@redhat.com> 4.8.5-27
> - bump for rebuild with RELRO enabled even for ppc64/ppc64le
>
> * Thu Jan 04 2018 Jeff Law <law@redhat.com> 4.8.5-26
> - Avoid red zone probing for zero residual dynamic allocation (#1469697)
> - Avoid bogus CFIs for probes in noreturn fucntions on x86/x86_64 (#1469697)
>
> * Tue Nov 14 2017 Jeff Law <law@redhat.com> 4.8.5-25
> - Avoid red zone probe on aarch64 (#1469697)
>
> * Mon Nov 06 2017 Jeff Law <law@redhat.com> 4.8.5-24
> - Sync gcc48-rh1469697-13 patch to upstream (#1469697)
> - Avoid probing in the red zone for noreturn functions (#1507980, #1469697)
> - Avoid infinite loop if probing interval is less than guard size (#1469697)
> - Fix debug information for large probing interval on aarch64 (#1469697)
> - Fix ICE on ppc port with large probing interval (#1469697)
>
> - rebuild to remove static relocations not known to older linkers (#1508968)
>
> * Thu Nov 02 2017 Marek Polacek <polacek@redhat.com> 4.8.5-23
> - rebuild to remove static relocations not known to older linkers (#1508968)
>
> * Thu Oct 19 2017 Marek Polacek <polacek@redhat.com> 4.8.5-22
> - fix gcc.c-torture/execute/pr80692.x
> - fix divmod expansion (PR middle-end/78416)
>
> * Wed Oct 18 2017 Marek Polacek <polacek@redhat.com> 4.8.5-21
> - fix 27_io/basic_fstream/53984.cc
> - fix for classes with bases with mutable members (PR c++/77375)
> - fix handling side-effects of parameters (PR c/77767)
> - fix combine's make_extraction (PR rtl-optimization/78378)
> - fix gimplification of const var initialization from COND_EXPR (PR c++/80129)
> - fix -A / -B to A / B folding (PR middle-end/80362)
> - fix comparison of decimal float zeroes (PR middle-end/80692)
> - fix __mulv[dt]i3 and expand_mul_overflow (PR target/82274)
>
> * Thu Oct 12 2017 Marek Polacek <polacek@redhat.com> 4.8.5-20
> - handle exceptions in basic_istream::sentry (#1469384)
> - don't run pr63354.c on ppc (#1468546)
> - ensure proxy privatization safety (#1491395)
> - fix incorrect codegen from rdseed intrinsic use (#1482762, CVE-2017-11671)
> - on aarch64, remove libatomic.so (#1465510)
>
> * Sun Oct 08 2017 Jeff Law <law@redhat.com> 4.8.5-19
> - Backport stack clash protection from upstream (#1469697)
>
> * Fri Oct 06 2017 Marek Polacek <polacek@redhat.com> 4.8.5-18
> - backport several -mprofile-kernel fixes (#1468546)
>
> * Thu Sep 14 2017 Marek Polacek <polacek@redhat.com> 4.8.5-17
> - fix -mcpu=power8 atomic expansion (#1437220, PR target/69644)
> - fix .toc alignment (#1487434)
>
> * Thu Jun 01 2017 Marek Polacek <polacek@redhat.com> 4.8.5-16
> - disable emitting profiling in functions marked with a special
>   attribute (#1457969)
>
> * Wed May 31 2017 Marek Polacek <polacek@redhat.com> 4.8.5-15
> - properly apply the PR70549 patch (#1349067)
>
> * Mon Mar 13 2017 Marek Polacek <polacek@redhat.com> 4.8.5-14
> - promote reloads of a PLUS to RELOAD_OTHER (#1402585)
>
> * Fri Mar 10 2017 Jakub Jelinek <jakub@redhat.com> 4.8.5-13
> - add -mstack-protector-guard={tls,global}, -mstack-protector-guard-reg=
>   and -mstack-protector-guard-offset= options on ppc* (#1415952,
>   PR target/78875)
>
> * Tue Mar 07 2017 Jakub Jelinek <jakub@redhat.com> 4.8.5-12
> - fix up std::rethrow_exception (#1375711, PR libstdc++/62258)
> - use _Unwind_GetIPInfo in __gcc_personality_v0 (#1387402, PR libgcc/78064)
> - fix vec_vsx_ld/st on ppc64le (#1389801, PR target/72863, PR target/78084)
> - fix ICE in gfc_compare_derived_types (#1369183)
> - fix EH from C++ thunks (#1427412, PR ipa/68184)
> - on ppc64{,le} emit nop after recursive call if the current function
>   is replaceable (#1420723, PR target/79439)
> - on aarch64 with -frounding-math use fnmul only with -(a*b) and not
>   with (-a)*b (#1418446, PR target/66731)
> - constrain std::valarray functions and operators (#1416214,
>   PR libstdc++/69116)
> - fix gimplification of aggregate assignments when lhs is used
>   (#1396298, PR middle-end/72747, PR c/78408)
> - fix aarch64 TLS with -mcmodel=large (#1389276, PR target/78796)
> - fix aarch64 reloading of floating point constants into general purpose
>   registers (#1349067, PR target/70549)
> - fix DW_AT_decl_line on DW_TAG_enumeration_type for C enumeration
>   definitions following forward declarations (#1423460, PR c/79969)
>
> * Wed Aug 31 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-11
> - on aarch64 emit scheduling barriers before stack deallocation in
>   function epilogues (#1362635, PR target/63293)
>
> * Wed Aug 10 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-10
> - include vecintrin.h intrinsic header on s390 (#1182152)
>
> * Fri Jul 15 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-9
> - backport OpenMP 4.5 support to libgomp (library only; #1357060,
>   PRs libgomp/68579, libgomp/64625)
>
> * Wed Jun 15 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-8
> - fix a bug in C++ ref-to-ptr conversion (#1344807)
> - fix combiner handling of jumps on aarch64 (#1344672,
>   PR rtl-optimization/52714)
>
> * Thu Jun 09 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-7
> - ensure the timestamp on printers.py is always the same (#1344291)
>
> * Mon Jun 06 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-6
> - backport s390 z13 support (#1182152)
> - fix up -fsanitize=address on powerpc64 with 46-bit virtual address space
>   (#1312850)
> - throw exception on std::random_device::_M_getval() failure (#1262846,
>   PR libstdc++/65142, CVE-2015-5276)
>
> * Tue May 10 2016 Jakub Jelinek <jakub@redhat.com> 4.8.5-5
> - fix up libitm HTM fastpath (#1180633)
> - on ppc64le default to -mcpu=power8 instead of -mcpu=power7 (#1213268)
> - fix up size in .debug_pubnames (#1278872)
> - turn powerpc* HTM insns into memory barriers (#1282755, PR target/67281)
> - make sure to handle __builtin_alloca_with_align like alloca in
>   -fstack-protector* (#1289022, PR tree-optimization/68680)
> - improve DW_AT_abstract_origin of DW_TAG_GNU_call_site on s390 with -fPIC
>   (#1312436)
> - fix up libstdc++ pretty-printers (#1076690, PR libstdc++/53477)
> - don't pass explicit --oformat option to ld on powerpc* (#1296211)
> - backport Intel Memory Protection Keys ISA support - -mpku (#1304449)
>
> * Wed Jul 15 2015 Jakub Jelinek <jakub@redhat.com> 4.8.5-4
> - fix up basic_streambuf copy constructor and assignment operator
>   (#1243366)
>
> * Thu Jul 02 2015 Jakub Jelinek <jakub@redhat.com> 4.8.5-3
> - backport aarch64 crc enablement - -mcpu=generic+crc (#1179935)
> - rebuild against fixed binutils to fix up systemtap markers on aarch64
>   (#1238462)
>
> * Wed Jul 01 2015 Jakub Jelinek <jakub@redhat.com> 4.8.5-2
> - add --enable-targets=powerpcle-linux on ppc64le (#1237363)
>
> * Tue Jun 23 2015 Jakub Jelinek <jakub@redhat.com> 4.8.5-1
> - update from the 4.8 branch (#1230103)
>   - GCC 4.8.5 release
>   - fix -imacros handling (#1004526, PR c/57653)
>   - fix up IPA type handling (#1217267, PRs ipa/63551, ipa/64153)
>   - add PowerPC analyze swaps optimization pass (#1208103, #1200336)
>   - fix PowerPC vsx_extract_<mode>* patterns (#1206341)
>   - fix PowerPC -mcrypto handling (#1200335)
>   - Power8 unaligned vectorization improvements (#1199221, PR target/65456)
>   - PRs ada/47500, ada/63225, bootstrap/64213, c++/56710, c++/58624,
> 	c++/63415, c++/63455, c++/64251, c++/64297, c++/64487, c++/65721,
> 	c++/65727, c/52769, c/61553, c/64766, debug/63342, debug/65549,
> 	fortran/56674, fortran/56867, fortran/57023, fortran/58813,
> 	fortran/59016, fortran/59024, fortran/59488, fortran/60898,
> 	fortran/61138, fortran/61407, fortran/63733, fortran/63744,
> 	fortran/63938, fortran/64244, fortran/64528, fortran/65024,
> 	gcov-profile/64634, inline-asm/63282, ipa/59626, ipa/63838,
> 	libfortran/63589, libgfortran/59513, libgfortran/60956, libgomp/61200,
> 	libstdc++/57440, libstdc++/59603, libstdc++/61947, libstdc++/63449,
> 	libstdc++/63840, libstdc++/65279, libstdc++/65543, lto/65015,
> 	lto/65193, middle-end/43631, middle-end/56917, middle-end/57748,
> 	middle-end/58624, middle-end/59990, middle-end/63608,
> 	middle-end/63665, middle-end/63704, middle-end/64067,
> 	middle-end/64111, middle-end/64199, middle-end/64225,
> 	middle-end/65409, middle-end/65680, middle-end/66133,
> 	middle-end/66251, pch/65550, preprocessor/60436,
> 	rtl-optimization/61058, rtl-optimization/63475,
> 	rtl-optimization/63483, rtl-optimization/63659,
> 	rtl-optimization/64037, rtl-optimization/64557, sanitizer/64265,
> 	target/49423, target/52941, target/53988, target/55351, target/56846,
> 	target/59593, target/60111, target/61413, target/62218, target/62642,
> 	target/63335, target/63428, target/63673, target/63947, target/64113,
> 	target/64115, target/64304, target/64358, target/64387, target/64409,
> 	target/64452, target/64453, target/64479, target/64513, target/64579,
> 	target/64580, target/64795, target/64882, target/64979, target/65138,
> 	target/65163, target/65196, target/65286, target/65368, target/65787,
> 	target/65849, target/66140, target/66148, target/66215, target/66275,
> 	target/66470, target/66474, tree-optimization/59124,
> 	tree-optimization/60656, tree-optimization/61634,
> 	tree-optimization/61686, tree-optimization/61969,
> 	tree-optimization/62031, tree-optimization/62167,
> 	tree-optimization/63375, tree-optimization/63379,
> 	tree-optimization/63551, tree-optimization/63593,
> 	tree-optimization/63605, tree-optimization/63841,
> 	tree-optimization/63844, tree-optimization/64269,
> 	tree-optimization/64277, tree-optimization/64493,
> 	tree-optimization/64495, tree-optimization/64563,
> 	tree-optimization/65063, tree-optimization/65388,
> 	tree-optimization/65518, tree-optimization/66123,
> 	tree-optimization/66233, tree-optimization/66251,
> 	tree-optimization/66272


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-09-27 10:19 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-09-25 19:48 [PATCH] MdePkg/BaseSynchronizationLib: fix XADD operands in GCC IA32/X64 assembly Laszlo Ersek
2018-09-26  9:05 ` Laszlo Ersek
2018-09-26  9:34   ` Ni, Ruiyu
2018-09-26 12:04     ` Laszlo Ersek
     [not found] ` <8ecbcc60-8e0f-e418-614e-666aa7fb007b@Intel.com>
2018-09-27  9:46   ` Shao, Ming
     [not found]   ` <0D32B2537B667F42AD320D616D521AF738B92170@shsmsx102.ccr.corp.intel.com>
2018-09-27 10:19     ` Laszlo Ersek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox