* [PATCH 0/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
@ 2021-02-04 2:59 Ni, Ray
2021-02-04 2:59 ` [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset Ni, Ray
2021-02-04 2:59 ` [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
0 siblings, 2 replies; 11+ messages in thread
From: Ni, Ray @ 2021-02-04 2:59 UTC (permalink / raw)
To: devel
Ray Ni (2):
UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset
UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc | 54 +++++++++--------
.../Library/MpInitLib/Ia32/MpFuncs.nasm | 52 +++++++---------
UefiCpuPkg/Library/MpInitLib/MpLib.c | 3 +-
UefiCpuPkg/Library/MpInitLib/MpLib.h | 3 +-
UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc | 56 ++++++++---------
UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 60 ++++++++-----------
6 files changed, 103 insertions(+), 125 deletions(-)
--
2.27.0.windows.1
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset
2021-02-04 2:59 [PATCH 0/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
@ 2021-02-04 2:59 ` Ni, Ray
2021-02-04 9:32 ` [edk2-devel] " Laszlo Ersek
2021-02-04 2:59 ` [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
1 sibling, 1 reply; 11+ messages in thread
From: Ni, Ray @ 2021-02-04 2:59 UTC (permalink / raw)
To: devel; +Cc: Eric Dong, Laszlo Ersek, Rahul Kumar
In Windows environment, "dumpbin /disasm" is used to verify the
disassembly before and after using NASM struc doesn't change.
Signed-off-by: Ray Ni <ray.ni@intel.com>
Cc: Eric Dong <eric.dong@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Rahul Kumar <rahul1.kumar@intel.com>
---
UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc | 52 ++++++++++--------
.../Library/MpInitLib/Ia32/MpFuncs.nasm | 35 ++++++------
UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc | 54 ++++++++++---------
UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 46 ++++++++--------
4 files changed, 98 insertions(+), 89 deletions(-)
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
index 4f5a7c859a..244c1e72b7 100644
--- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;
-; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
@@ -19,25 +19,31 @@ CPU_SWITCH_STATE_IDLE equ 0
CPU_SWITCH_STATE_STORED equ 1
CPU_SWITCH_STATE_LOADED equ 2
-LockLocation equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
-StackStartAddressLocation equ LockLocation + 04h
-StackSizeLocation equ LockLocation + 08h
-ApProcedureLocation equ LockLocation + 0Ch
-GdtrLocation equ LockLocation + 10h
-IdtrLocation equ LockLocation + 16h
-BufferStartLocation equ LockLocation + 1Ch
-ModeOffsetLocation equ LockLocation + 20h
-ApIndexLocation equ LockLocation + 24h
-CodeSegmentLocation equ LockLocation + 28h
-DataSegmentLocation equ LockLocation + 2Ch
-EnableExecuteDisableLocation equ LockLocation + 30h
-Cr3Location equ LockLocation + 34h
-InitFlagLocation equ LockLocation + 38h
-CpuInfoLocation equ LockLocation + 3Ch
-NumApsExecutingLocation equ LockLocation + 40h
-InitializeFloatingPointUnitsAddress equ LockLocation + 48h
-ModeTransitionMemoryLocation equ LockLocation + 4Ch
-ModeTransitionSegmentLocation equ LockLocation + 50h
-ModeHighMemoryLocation equ LockLocation + 52h
-ModeHighSegmentLocation equ LockLocation + 56h
-
+MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
+struc MP_CPU_EXCHANGE_INFO
+ .Lock: resd 1
+ .StackStart: resd 1
+ .StackSize: resd 1
+ .CFunction: resd 1
+ .GdtrProfile: resb 6
+ .IdtrProfile: resb 6
+ .BufferStart: resd 1
+ .ModeOffset: resd 1
+ .ApIndex: resd 1
+ .CodeSegment: resd 1
+ .DataSegment: resd 1
+ .EnableExecuteDisable: resd 1
+ .Cr3: resd 1
+ .InitFlag: resd 1
+ .CpuInfo: resd 1
+ .NumApsExecuting: resd 1
+ .CpuMpData: resd 1
+ .InitializeFloatingPointUnits: resd 1
+ .ModeTransitionMemory: resd 1
+ .ModeTransitionSegment:resw 1
+ .ModeHighMemory: resd 1
+ .ModeHighSegment: resw 1
+ .Enable5LevelPaging: resb 1
+ .SevEsIsEnabled: resb 1
+ .GhcbBase: resd 1
+endstruc
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
index 7e81d24aa6..908c2eb447 100644
--- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;
-; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
@@ -39,21 +39,21 @@ BITS 16
mov fs, ax
mov gs, ax
- mov si, BufferStartLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
mov ebx, [si]
- mov si, DataSegmentLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.DataSegment
mov edx, [si]
;
; Get start address of 32-bit code in low memory (<1MB)
;
- mov edi, ModeTransitionMemoryLocation
+ mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeTransitionMemory
- mov si, GdtrLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GdtrProfile
o32 lgdt [cs:si]
- mov si, IdtrLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.IdtrProfile
o32 lidt [cs:si]
;
@@ -82,7 +82,7 @@ Flat32Start: ; protected mode entry point
mov esi, ebx
mov edi, esi
- add edi, EnableExecuteDisableLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.EnableExecuteDisable
cmp byte [edi], 0
jz SkipEnableExecuteDisable
@@ -96,7 +96,7 @@ Flat32Start: ; protected mode entry point
wrmsr
mov edi, esi
- add edi, Cr3Location
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Cr3
mov eax, dword [edi]
mov cr3, eax
@@ -110,19 +110,19 @@ Flat32Start: ; protected mode entry point
SkipEnableExecuteDisable:
mov edi, esi
- add edi, InitFlagLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitFlag
cmp dword [edi], 1 ; 1 == ApInitConfig
jnz GetApicId
; Increment the number of APs executing here as early as possible
; This is decremented in C code when AP is finished executing
mov edi, esi
- add edi, NumApsExecutingLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.NumApsExecuting
lock inc dword [edi]
; AP init
mov edi, esi
- add edi, LockLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET
mov eax, NotVacantFlag
TestLock:
@@ -131,7 +131,7 @@ TestLock:
jz TestLock
mov ecx, esi
- add ecx, ApIndexLocation
+ add ecx, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
inc dword [ecx]
mov ebx, [ecx]
@@ -140,13 +140,13 @@ Releaselock:
xchg [edi], eax
mov edi, esi
- add edi, StackSizeLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
mov eax, [edi]
mov ecx, ebx
inc ecx
mul ecx ; EAX = StackSize * (CpuNumber + 1)
mov edi, esi
- add edi, StackStartAddressLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackStart
add eax, [edi]
mov esp, eax
jmp CProcedureInvoke
@@ -179,7 +179,7 @@ GetProcessorNumber:
; Note that BSP may become an AP due to SwitchBsp()
;
xor ebx, ebx
- lea eax, [esi + CpuInfoLocation]
+ lea eax, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CpuInfo]
mov edi, [eax]
GetNextProcNumber:
@@ -203,13 +203,12 @@ CProcedureInvoke:
push ebx ; Push ApIndex
mov eax, esi
- add eax, LockLocation
+ add eax, MP_CPU_EXCHANGE_INFO_OFFSET
push eax ; push address of exchange info data buffer
mov edi, esi
- add edi, ApProcedureLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CFunction
mov eax, [edi]
-
call eax ; Invoke C function
jmp $ ; Never reach here
diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
index c92daaaffd..3974330991 100644
--- a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;
-; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
@@ -19,27 +19,31 @@ CPU_SWITCH_STATE_IDLE equ 0
CPU_SWITCH_STATE_STORED equ 1
CPU_SWITCH_STATE_LOADED equ 2
-LockLocation equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
-StackStartAddressLocation equ LockLocation + 08h
-StackSizeLocation equ LockLocation + 10h
-ApProcedureLocation equ LockLocation + 18h
-GdtrLocation equ LockLocation + 20h
-IdtrLocation equ LockLocation + 2Ah
-BufferStartLocation equ LockLocation + 34h
-ModeOffsetLocation equ LockLocation + 3Ch
-ApIndexLocation equ LockLocation + 44h
-CodeSegmentLocation equ LockLocation + 4Ch
-DataSegmentLocation equ LockLocation + 54h
-EnableExecuteDisableLocation equ LockLocation + 5Ch
-Cr3Location equ LockLocation + 64h
-InitFlagLocation equ LockLocation + 6Ch
-CpuInfoLocation equ LockLocation + 74h
-NumApsExecutingLocation equ LockLocation + 7Ch
-InitializeFloatingPointUnitsAddress equ LockLocation + 8Ch
-ModeTransitionMemoryLocation equ LockLocation + 94h
-ModeTransitionSegmentLocation equ LockLocation + 98h
-ModeHighMemoryLocation equ LockLocation + 9Ah
-ModeHighSegmentLocation equ LockLocation + 9Eh
-Enable5LevelPagingLocation equ LockLocation + 0A0h
-SevEsIsEnabledLocation equ LockLocation + 0A1h
-GhcbBaseLocation equ LockLocation + 0A2h
+MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
+struc MP_CPU_EXCHANGE_INFO
+ .Lock: resq 1
+ .StackStart: resq 1
+ .StackSize: resq 1
+ .CFunction: resq 1
+ .GdtrProfile: resb 10
+ .IdtrProfile: resb 10
+ .BufferStart: resq 1
+ .ModeOffset: resq 1
+ .ApIndex: resq 1
+ .CodeSegment: resq 1
+ .DataSegment: resq 1
+ .EnableExecuteDisable: resq 1
+ .Cr3: resq 1
+ .InitFlag: resq 1
+ .CpuInfo: resq 1
+ .NumApsExecuting: resq 1
+ .CpuMpData: resq 1
+ .InitializeFloatingPointUnits: resq 1
+ .ModeTransitionMemory: resd 1
+ .ModeTransitionSegment:resw 1
+ .ModeHighMemory: resd 1
+ .ModeHighSegment: resw 1
+ .Enable5LevelPaging: resb 1
+ .SevEsIsEnabled: resb 1
+ .GhcbBase: resq 1
+endstruc
diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
index aecfd07bc0..423beb2cca 100644
--- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
@@ -1,5 +1,5 @@
;------------------------------------------------------------------------------ ;
-; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
+; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
; SPDX-License-Identifier: BSD-2-Clause-Patent
;
; Module Name:
@@ -43,21 +43,21 @@ BITS 16
mov fs, ax
mov gs, ax
- mov si, BufferStartLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
mov ebx, [si]
- mov si, DataSegmentLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.DataSegment
mov edx, [si]
;
; Get start address of 32-bit code in low memory (<1MB)
;
- mov edi, ModeTransitionMemoryLocation
+ mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeTransitionMemory
- mov si, GdtrLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GdtrProfile
o32 lgdt [cs:si]
- mov si, IdtrLocation
+ mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.IdtrProfile
o32 lidt [cs:si]
;
@@ -85,7 +85,7 @@ Flat32Start: ; protected mode entry point
;
; Enable execute disable bit
;
- mov esi, EnableExecuteDisableLocation
+ mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.EnableExecuteDisable
cmp byte [ebx + esi], 0
jz SkipEnableExecuteDisableBit
@@ -101,7 +101,7 @@ SkipEnableExecuteDisableBit:
mov eax, cr4
bts eax, 5
- mov esi, Enable5LevelPagingLocation
+ mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Enable5LevelPaging
cmp byte [ebx + esi], 0
jz SkipEnable5LevelPaging
@@ -117,7 +117,7 @@ SkipEnable5LevelPaging:
;
; Load page table
;
- mov esi, Cr3Location ; Save CR3 in ecx
+ mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Cr3 ; Save CR3 in ecx
mov ecx, [ebx + esi]
mov cr3, ecx ; Load CR3
@@ -139,26 +139,26 @@ SkipEnable5LevelPaging:
;
; Far jump to 64-bit code
;
- mov edi, ModeHighMemoryLocation
+ mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeHighMemory
add edi, ebx
jmp far [edi]
BITS 64
LongModeStart:
mov esi, ebx
- lea edi, [esi + InitFlagLocation]
+ lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitFlag]
cmp qword [edi], 1 ; ApInitConfig
jnz GetApicId
; Increment the number of APs executing here as early as possible
; This is decremented in C code when AP is finished executing
mov edi, esi
- add edi, NumApsExecutingLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.NumApsExecuting
lock inc dword [edi]
; AP init
mov edi, esi
- add edi, LockLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Lock
mov rax, NotVacantFlag
TestLock:
@@ -166,7 +166,7 @@ TestLock:
cmp rax, NotVacantFlag
jz TestLock
- lea ecx, [esi + ApIndexLocation]
+ lea ecx, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex]
inc dword [ecx]
mov ebx, [ecx]
@@ -175,17 +175,17 @@ Releaselock:
xchg qword [edi], rax
; program stack
mov edi, esi
- add edi, StackSizeLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
mov eax, dword [edi]
mov ecx, ebx
inc ecx
mul ecx ; EAX = StackSize * (CpuNumber + 1)
mov edi, esi
- add edi, StackStartAddressLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackStart
add rax, qword [edi]
mov rsp, rax
- lea edi, [esi + SevEsIsEnabledLocation]
+ lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.SevEsIsEnabled]
cmp byte [edi], 1 ; SevEsIsEnabled
jne CProcedureInvoke
@@ -199,7 +199,7 @@ Releaselock:
mov ecx, ebx
mul ecx ; EAX = SIZE_4K * 2 * CpuNumber
mov edi, esi
- add edi, GhcbBaseLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GhcbBase
add rax, qword [edi]
mov rdx, rax
shr rdx, 32
@@ -208,7 +208,7 @@ Releaselock:
jmp CProcedureInvoke
GetApicId:
- lea edi, [esi + SevEsIsEnabledLocation]
+ lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.SevEsIsEnabled]
cmp byte [edi], 1 ; SevEsIsEnabled
jne DoCpuid
@@ -302,7 +302,7 @@ GetProcessorNumber:
; Note that BSP may become an AP due to SwitchBsp()
;
xor ebx, ebx
- lea eax, [esi + CpuInfoLocation]
+ lea eax, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CpuInfo]
mov rdi, [eax]
GetNextProcNumber:
@@ -321,17 +321,17 @@ CProcedureInvoke:
push rbp
mov rbp, rsp
- mov rax, qword [esi + InitializeFloatingPointUnitsAddress]
+ mov rax, qword [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitializeFloatingPointUnits]
sub rsp, 20h
call rax ; Call assembly function to initialize FPU per UEFI spec
add rsp, 20h
mov edx, ebx ; edx is ApIndex
mov ecx, esi
- add ecx, LockLocation ; rcx is address of exchange info data buffer
+ add ecx, MP_CPU_EXCHANGE_INFO_OFFSET ; rcx is address of exchange info data buffer
mov edi, esi
- add edi, ApProcedureLocation
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CFunction
mov rax, qword [edi]
sub rsp, 20h
--
2.27.0.windows.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
2021-02-04 2:59 [PATCH 0/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
2021-02-04 2:59 ` [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset Ni, Ray
@ 2021-02-04 2:59 ` Ni, Ray
2021-02-04 9:51 ` [edk2-devel] " Laszlo Ersek
2021-02-04 11:24 ` Zeng, Star
1 sibling, 2 replies; 11+ messages in thread
From: Ni, Ray @ 2021-02-04 2:59 UTC (permalink / raw)
To: devel; +Cc: Laszlo Ersek, Eric Dong, Rahul1 Kumar
When AP firstly wakes up, MpFuncs.nasm contains below logic to assign
an unique ApIndex to each AP according to who comes first:
---ASM---
TestLock:
xchg [edi], eax
cmp eax, NotVacantFlag
jz TestLock
mov ecx, esi
add ecx, ApIndexLocation
inc dword [ecx]
mov ebx, [ecx]
Releaselock:
mov eax, VacantFlag
xchg [edi], eax
---ASM END---
"lock inc" cannot be used to increase ApIndex because not only the
global ApIndex should be increased, but also the result should be
stored to a local general purpose register EBX.
This patch learns from the NASM implementation of
InternalSyncIncrement() to use "XADD" instruction which can increase
the global ApIndex and store the original ApIndex to EBX in one
instruction.
With this patch, OVMF when running in a 255 threads QEMU spends about
one second to wakeup all APs. Original implementation needs more than
10 seconds.
Signed-off-by: Ray Ni <ray.ni@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Eric Dong <eric.dong@intel.com>
Cc: Rahul1 Kumar <rahul1.kumar@intel.com>
---
UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc | 4 ----
.../Library/MpInitLib/Ia32/MpFuncs.nasm | 21 +++++--------------
UefiCpuPkg/Library/MpInitLib/MpLib.c | 3 +--
UefiCpuPkg/Library/MpInitLib/MpLib.h | 3 +--
UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc | 4 ----
UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++------------
6 files changed, 11 insertions(+), 42 deletions(-)
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
index 244c1e72b7..5f1f0351d2 100644
--- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
@@ -12,16 +12,12 @@
;
;-------------------------------------------------------------------------------
-VacantFlag equ 00h
-NotVacantFlag equ 0ffh
-
CPU_SWITCH_STATE_IDLE equ 0
CPU_SWITCH_STATE_STORED equ 1
CPU_SWITCH_STATE_LOADED equ 2
MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
struc MP_CPU_EXCHANGE_INFO
- .Lock: resd 1
.StackStart: resd 1
.StackSize: resd 1
.CFunction: resd 1
diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
index 908c2eb447..b7267393db 100644
--- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
@@ -122,23 +122,12 @@ SkipEnableExecuteDisable:
; AP init
mov edi, esi
- add edi, MP_CPU_EXCHANGE_INFO_OFFSET
- mov eax, NotVacantFlag
-
-TestLock:
- xchg [edi], eax
- cmp eax, NotVacantFlag
- jz TestLock
-
- mov ecx, esi
- add ecx, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
- inc dword [ecx]
- mov ebx, [ecx]
-
-Releaselock:
- mov eax, VacantFlag
- xchg [edi], eax
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
+ mov ebx, 1
+ lock xadd [edi], ebx ; EBX = ApIndex++
+ inc ebx ; EBX is CpuNumber
+ ; program stack
mov edi, esi
add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
mov eax, [edi]
diff --git a/UefiCpuPkg/Library/MpInitLib/MpLib.c b/UefiCpuPkg/Library/MpInitLib/MpLib.c
index 8b1f7f84ba..32a3951742 100644
--- a/UefiCpuPkg/Library/MpInitLib/MpLib.c
+++ b/UefiCpuPkg/Library/MpInitLib/MpLib.c
@@ -1,7 +1,7 @@
/** @file
CPU MP Initialize Library common functions.
- Copyright (c) 2016 - 2020, Intel Corporation. All rights reserved.<BR>
+ Copyright (c) 2016 - 2021, Intel Corporation. All rights reserved.<BR>
Copyright (c) 2020, AMD Inc. All rights reserved.<BR>
SPDX-License-Identifier: BSD-2-Clause-Patent
@@ -1012,7 +1012,6 @@ FillExchangeInfoData (
IA32_CR4 Cr4;
ExchangeInfo = CpuMpData->MpCpuExchangeInfo;
- ExchangeInfo->Lock = 0;
ExchangeInfo->StackStart = CpuMpData->Buffer;
ExchangeInfo->StackSize = CpuMpData->CpuApStackSize;
ExchangeInfo->BufferStart = CpuMpData->WakeupBuffer;
diff --git a/UefiCpuPkg/Library/MpInitLib/MpLib.h b/UefiCpuPkg/Library/MpInitLib/MpLib.h
index 02652eaae1..0bd60388b1 100644
--- a/UefiCpuPkg/Library/MpInitLib/MpLib.h
+++ b/UefiCpuPkg/Library/MpInitLib/MpLib.h
@@ -1,7 +1,7 @@
/** @file
Common header file for MP Initialize Library.
- Copyright (c) 2016 - 2020, Intel Corporation. All rights reserved.<BR>
+ Copyright (c) 2016 - 2021, Intel Corporation. All rights reserved.<BR>
Copyright (c) 2020, AMD Inc. All rights reserved.<BR>
SPDX-License-Identifier: BSD-2-Clause-Patent
@@ -190,7 +190,6 @@ typedef struct _CPU_MP_DATA CPU_MP_DATA;
// into this structure are used in assembly code in this module
//
typedef struct {
- UINTN Lock;
UINTN StackStart;
UINTN StackSize;
UINTN CFunction;
diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
index 3974330991..32e9a262bc 100644
--- a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
@@ -12,16 +12,12 @@
;
;-------------------------------------------------------------------------------
-VacantFlag equ 00h
-NotVacantFlag equ 0ffh
-
CPU_SWITCH_STATE_IDLE equ 0
CPU_SWITCH_STATE_STORED equ 1
CPU_SWITCH_STATE_LOADED equ 2
MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
struc MP_CPU_EXCHANGE_INFO
- .Lock: resq 1
.StackStart: resq 1
.StackSize: resq 1
.CFunction: resq 1
diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
index 423beb2cca..383b4974f8 100644
--- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
+++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
@@ -158,21 +158,11 @@ LongModeStart:
; AP init
mov edi, esi
- add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Lock
- mov rax, NotVacantFlag
+ add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
+ mov ebx, 1
+ lock xadd [edi], ebx ; EBX = ApIndex++
+ inc ebx ; EBX is CpuNumber
-TestLock:
- xchg qword [edi], rax
- cmp rax, NotVacantFlag
- jz TestLock
-
- lea ecx, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex]
- inc dword [ecx]
- mov ebx, [ecx]
-
-Releaselock:
- mov rax, VacantFlag
- xchg qword [edi], rax
; program stack
mov edi, esi
add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
--
2.27.0.windows.1
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset
2021-02-04 2:59 ` [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset Ni, Ray
@ 2021-02-04 9:32 ` Laszlo Ersek
2021-02-04 9:44 ` Laszlo Ersek
0 siblings, 1 reply; 11+ messages in thread
From: Laszlo Ersek @ 2021-02-04 9:32 UTC (permalink / raw)
To: devel, ray.ni; +Cc: Eric Dong, Rahul Kumar
On 02/04/21 03:59, Ni, Ray wrote:
> In Windows environment, "dumpbin /disasm" is used to verify the
> disassembly before and after using NASM struc doesn't change.
>
> Signed-off-by: Ray Ni <ray.ni@intel.com>
> Cc: Eric Dong <eric.dong@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: Rahul Kumar <rahul1.kumar@intel.com>
> ---
> UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc | 52 ++++++++++--------
> .../Library/MpInitLib/Ia32/MpFuncs.nasm | 35 ++++++------
> UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc | 54 ++++++++++---------
> UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 46 ++++++++--------
> 4 files changed, 98 insertions(+), 89 deletions(-)
>
> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> index 4f5a7c859a..244c1e72b7 100644
> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> @@ -1,5 +1,5 @@
> ;------------------------------------------------------------------------------ ;
> -; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
> ; SPDX-License-Identifier: BSD-2-Clause-Patent
> ;
> ; Module Name:
> @@ -19,25 +19,31 @@ CPU_SWITCH_STATE_IDLE equ 0
> CPU_SWITCH_STATE_STORED equ 1
> CPU_SWITCH_STATE_LOADED equ 2
>
> -LockLocation equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
> -StackStartAddressLocation equ LockLocation + 04h
> -StackSizeLocation equ LockLocation + 08h
> -ApProcedureLocation equ LockLocation + 0Ch
> -GdtrLocation equ LockLocation + 10h
> -IdtrLocation equ LockLocation + 16h
> -BufferStartLocation equ LockLocation + 1Ch
> -ModeOffsetLocation equ LockLocation + 20h
> -ApIndexLocation equ LockLocation + 24h
> -CodeSegmentLocation equ LockLocation + 28h
> -DataSegmentLocation equ LockLocation + 2Ch
> -EnableExecuteDisableLocation equ LockLocation + 30h
> -Cr3Location equ LockLocation + 34h
> -InitFlagLocation equ LockLocation + 38h
> -CpuInfoLocation equ LockLocation + 3Ch
> -NumApsExecutingLocation equ LockLocation + 40h
> -InitializeFloatingPointUnitsAddress equ LockLocation + 48h
> -ModeTransitionMemoryLocation equ LockLocation + 4Ch
> -ModeTransitionSegmentLocation equ LockLocation + 50h
> -ModeHighMemoryLocation equ LockLocation + 52h
> -ModeHighSegmentLocation equ LockLocation + 56h
> -
> +MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
> +struc MP_CPU_EXCHANGE_INFO
> + .Lock: resd 1
> + .StackStart: resd 1
> + .StackSize: resd 1
> + .CFunction: resd 1
> + .GdtrProfile: resb 6
> + .IdtrProfile: resb 6
> + .BufferStart: resd 1
> + .ModeOffset: resd 1
> + .ApIndex: resd 1
> + .CodeSegment: resd 1
> + .DataSegment: resd 1
> + .EnableExecuteDisable: resd 1
> + .Cr3: resd 1
> + .InitFlag: resd 1
> + .CpuInfo: resd 1
> + .NumApsExecuting: resd 1
> + .CpuMpData: resd 1
> + .InitializeFloatingPointUnits: resd 1
(1) please align the "res*" on the other lines with this
(in the X64 file too)
> + .ModeTransitionMemory: resd 1
> + .ModeTransitionSegment:resw 1
> + .ModeHighMemory: resd 1
> + .ModeHighSegment: resw 1
> + .Enable5LevelPaging: resb 1
> + .SevEsIsEnabled: resb 1
> + .GhcbBase: resd 1
> +endstruc
> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> index 7e81d24aa6..908c2eb447 100644
> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> @@ -1,5 +1,5 @@
> ;------------------------------------------------------------------------------ ;
> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
> ; SPDX-License-Identifier: BSD-2-Clause-Patent
> ;
> ; Module Name:
> @@ -39,21 +39,21 @@ BITS 16
> mov fs, ax
> mov gs, ax
>
> - mov si, BufferStartLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
(2) please introduce a macro for this; in my opinion, with the currently
proposed change, the code is *harder* to read and modify than before.
Now we have a lot of fluff to spell out, for every single field reference.
Thanks
Laszlo
> mov ebx, [si]
>
> - mov si, DataSegmentLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.DataSegment
> mov edx, [si]
>
> ;
> ; Get start address of 32-bit code in low memory (<1MB)
> ;
> - mov edi, ModeTransitionMemoryLocation
> + mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeTransitionMemory
>
> - mov si, GdtrLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GdtrProfile
> o32 lgdt [cs:si]
>
> - mov si, IdtrLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.IdtrProfile
> o32 lidt [cs:si]
>
> ;
> @@ -82,7 +82,7 @@ Flat32Start: ; protected mode entry point
> mov esi, ebx
>
> mov edi, esi
> - add edi, EnableExecuteDisableLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.EnableExecuteDisable
> cmp byte [edi], 0
> jz SkipEnableExecuteDisable
>
> @@ -96,7 +96,7 @@ Flat32Start: ; protected mode entry point
> wrmsr
>
> mov edi, esi
> - add edi, Cr3Location
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Cr3
> mov eax, dword [edi]
> mov cr3, eax
>
> @@ -110,19 +110,19 @@ Flat32Start: ; protected mode entry point
>
> SkipEnableExecuteDisable:
> mov edi, esi
> - add edi, InitFlagLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitFlag
> cmp dword [edi], 1 ; 1 == ApInitConfig
> jnz GetApicId
>
> ; Increment the number of APs executing here as early as possible
> ; This is decremented in C code when AP is finished executing
> mov edi, esi
> - add edi, NumApsExecutingLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.NumApsExecuting
> lock inc dword [edi]
>
> ; AP init
> mov edi, esi
> - add edi, LockLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET
> mov eax, NotVacantFlag
>
> TestLock:
> @@ -131,7 +131,7 @@ TestLock:
> jz TestLock
>
> mov ecx, esi
> - add ecx, ApIndexLocation
> + add ecx, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
> inc dword [ecx]
> mov ebx, [ecx]
>
> @@ -140,13 +140,13 @@ Releaselock:
> xchg [edi], eax
>
> mov edi, esi
> - add edi, StackSizeLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
> mov eax, [edi]
> mov ecx, ebx
> inc ecx
> mul ecx ; EAX = StackSize * (CpuNumber + 1)
> mov edi, esi
> - add edi, StackStartAddressLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackStart
> add eax, [edi]
> mov esp, eax
> jmp CProcedureInvoke
> @@ -179,7 +179,7 @@ GetProcessorNumber:
> ; Note that BSP may become an AP due to SwitchBsp()
> ;
> xor ebx, ebx
> - lea eax, [esi + CpuInfoLocation]
> + lea eax, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CpuInfo]
> mov edi, [eax]
>
> GetNextProcNumber:
> @@ -203,13 +203,12 @@ CProcedureInvoke:
>
> push ebx ; Push ApIndex
> mov eax, esi
> - add eax, LockLocation
> + add eax, MP_CPU_EXCHANGE_INFO_OFFSET
> push eax ; push address of exchange info data buffer
>
> mov edi, esi
> - add edi, ApProcedureLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CFunction
> mov eax, [edi]
> -
> call eax ; Invoke C function
>
> jmp $ ; Never reach here
> diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> index c92daaaffd..3974330991 100644
> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> @@ -1,5 +1,5 @@
> ;------------------------------------------------------------------------------ ;
> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
> ; SPDX-License-Identifier: BSD-2-Clause-Patent
> ;
> ; Module Name:
> @@ -19,27 +19,31 @@ CPU_SWITCH_STATE_IDLE equ 0
> CPU_SWITCH_STATE_STORED equ 1
> CPU_SWITCH_STATE_LOADED equ 2
>
> -LockLocation equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
> -StackStartAddressLocation equ LockLocation + 08h
> -StackSizeLocation equ LockLocation + 10h
> -ApProcedureLocation equ LockLocation + 18h
> -GdtrLocation equ LockLocation + 20h
> -IdtrLocation equ LockLocation + 2Ah
> -BufferStartLocation equ LockLocation + 34h
> -ModeOffsetLocation equ LockLocation + 3Ch
> -ApIndexLocation equ LockLocation + 44h
> -CodeSegmentLocation equ LockLocation + 4Ch
> -DataSegmentLocation equ LockLocation + 54h
> -EnableExecuteDisableLocation equ LockLocation + 5Ch
> -Cr3Location equ LockLocation + 64h
> -InitFlagLocation equ LockLocation + 6Ch
> -CpuInfoLocation equ LockLocation + 74h
> -NumApsExecutingLocation equ LockLocation + 7Ch
> -InitializeFloatingPointUnitsAddress equ LockLocation + 8Ch
> -ModeTransitionMemoryLocation equ LockLocation + 94h
> -ModeTransitionSegmentLocation equ LockLocation + 98h
> -ModeHighMemoryLocation equ LockLocation + 9Ah
> -ModeHighSegmentLocation equ LockLocation + 9Eh
> -Enable5LevelPagingLocation equ LockLocation + 0A0h
> -SevEsIsEnabledLocation equ LockLocation + 0A1h
> -GhcbBaseLocation equ LockLocation + 0A2h
> +MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
> +struc MP_CPU_EXCHANGE_INFO
> + .Lock: resq 1
> + .StackStart: resq 1
> + .StackSize: resq 1
> + .CFunction: resq 1
> + .GdtrProfile: resb 10
> + .IdtrProfile: resb 10
> + .BufferStart: resq 1
> + .ModeOffset: resq 1
> + .ApIndex: resq 1
> + .CodeSegment: resq 1
> + .DataSegment: resq 1
> + .EnableExecuteDisable: resq 1
> + .Cr3: resq 1
> + .InitFlag: resq 1
> + .CpuInfo: resq 1
> + .NumApsExecuting: resq 1
> + .CpuMpData: resq 1
> + .InitializeFloatingPointUnits: resq 1
> + .ModeTransitionMemory: resd 1
> + .ModeTransitionSegment:resw 1
> + .ModeHighMemory: resd 1
> + .ModeHighSegment: resw 1
> + .Enable5LevelPaging: resb 1
> + .SevEsIsEnabled: resb 1
> + .GhcbBase: resq 1
> +endstruc
> diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> index aecfd07bc0..423beb2cca 100644
> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> @@ -1,5 +1,5 @@
> ;------------------------------------------------------------------------------ ;
> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
> ; SPDX-License-Identifier: BSD-2-Clause-Patent
> ;
> ; Module Name:
> @@ -43,21 +43,21 @@ BITS 16
> mov fs, ax
> mov gs, ax
>
> - mov si, BufferStartLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
> mov ebx, [si]
>
> - mov si, DataSegmentLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.DataSegment
> mov edx, [si]
>
> ;
> ; Get start address of 32-bit code in low memory (<1MB)
> ;
> - mov edi, ModeTransitionMemoryLocation
> + mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeTransitionMemory
>
> - mov si, GdtrLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GdtrProfile
> o32 lgdt [cs:si]
>
> - mov si, IdtrLocation
> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.IdtrProfile
> o32 lidt [cs:si]
>
> ;
> @@ -85,7 +85,7 @@ Flat32Start: ; protected mode entry point
> ;
> ; Enable execute disable bit
> ;
> - mov esi, EnableExecuteDisableLocation
> + mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.EnableExecuteDisable
> cmp byte [ebx + esi], 0
> jz SkipEnableExecuteDisableBit
>
> @@ -101,7 +101,7 @@ SkipEnableExecuteDisableBit:
> mov eax, cr4
> bts eax, 5
>
> - mov esi, Enable5LevelPagingLocation
> + mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Enable5LevelPaging
> cmp byte [ebx + esi], 0
> jz SkipEnable5LevelPaging
>
> @@ -117,7 +117,7 @@ SkipEnable5LevelPaging:
> ;
> ; Load page table
> ;
> - mov esi, Cr3Location ; Save CR3 in ecx
> + mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Cr3 ; Save CR3 in ecx
> mov ecx, [ebx + esi]
> mov cr3, ecx ; Load CR3
>
> @@ -139,26 +139,26 @@ SkipEnable5LevelPaging:
> ;
> ; Far jump to 64-bit code
> ;
> - mov edi, ModeHighMemoryLocation
> + mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeHighMemory
> add edi, ebx
> jmp far [edi]
>
> BITS 64
> LongModeStart:
> mov esi, ebx
> - lea edi, [esi + InitFlagLocation]
> + lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitFlag]
> cmp qword [edi], 1 ; ApInitConfig
> jnz GetApicId
>
> ; Increment the number of APs executing here as early as possible
> ; This is decremented in C code when AP is finished executing
> mov edi, esi
> - add edi, NumApsExecutingLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.NumApsExecuting
> lock inc dword [edi]
>
> ; AP init
> mov edi, esi
> - add edi, LockLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Lock
> mov rax, NotVacantFlag
>
> TestLock:
> @@ -166,7 +166,7 @@ TestLock:
> cmp rax, NotVacantFlag
> jz TestLock
>
> - lea ecx, [esi + ApIndexLocation]
> + lea ecx, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex]
> inc dword [ecx]
> mov ebx, [ecx]
>
> @@ -175,17 +175,17 @@ Releaselock:
> xchg qword [edi], rax
> ; program stack
> mov edi, esi
> - add edi, StackSizeLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
> mov eax, dword [edi]
> mov ecx, ebx
> inc ecx
> mul ecx ; EAX = StackSize * (CpuNumber + 1)
> mov edi, esi
> - add edi, StackStartAddressLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackStart
> add rax, qword [edi]
> mov rsp, rax
>
> - lea edi, [esi + SevEsIsEnabledLocation]
> + lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.SevEsIsEnabled]
> cmp byte [edi], 1 ; SevEsIsEnabled
> jne CProcedureInvoke
>
> @@ -199,7 +199,7 @@ Releaselock:
> mov ecx, ebx
> mul ecx ; EAX = SIZE_4K * 2 * CpuNumber
> mov edi, esi
> - add edi, GhcbBaseLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GhcbBase
> add rax, qword [edi]
> mov rdx, rax
> shr rdx, 32
> @@ -208,7 +208,7 @@ Releaselock:
> jmp CProcedureInvoke
>
> GetApicId:
> - lea edi, [esi + SevEsIsEnabledLocation]
> + lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.SevEsIsEnabled]
> cmp byte [edi], 1 ; SevEsIsEnabled
> jne DoCpuid
>
> @@ -302,7 +302,7 @@ GetProcessorNumber:
> ; Note that BSP may become an AP due to SwitchBsp()
> ;
> xor ebx, ebx
> - lea eax, [esi + CpuInfoLocation]
> + lea eax, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CpuInfo]
> mov rdi, [eax]
>
> GetNextProcNumber:
> @@ -321,17 +321,17 @@ CProcedureInvoke:
> push rbp
> mov rbp, rsp
>
> - mov rax, qword [esi + InitializeFloatingPointUnitsAddress]
> + mov rax, qword [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitializeFloatingPointUnits]
> sub rsp, 20h
> call rax ; Call assembly function to initialize FPU per UEFI spec
> add rsp, 20h
>
> mov edx, ebx ; edx is ApIndex
> mov ecx, esi
> - add ecx, LockLocation ; rcx is address of exchange info data buffer
> + add ecx, MP_CPU_EXCHANGE_INFO_OFFSET ; rcx is address of exchange info data buffer
>
> mov edi, esi
> - add edi, ApProcedureLocation
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CFunction
> mov rax, qword [edi]
>
> sub rsp, 20h
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset
2021-02-04 9:32 ` [edk2-devel] " Laszlo Ersek
@ 2021-02-04 9:44 ` Laszlo Ersek
2021-02-04 14:27 ` Ni, Ray
0 siblings, 1 reply; 11+ messages in thread
From: Laszlo Ersek @ 2021-02-04 9:44 UTC (permalink / raw)
To: devel, ray.ni; +Cc: Eric Dong, Rahul Kumar
On 02/04/21 10:32, Laszlo Ersek wrote:
> On 02/04/21 03:59, Ni, Ray wrote:
>> In Windows environment, "dumpbin /disasm" is used to verify the
>> disassembly before and after using NASM struc doesn't change.
>>
>> Signed-off-by: Ray Ni <ray.ni@intel.com>
>> Cc: Eric Dong <eric.dong@intel.com>
>> Cc: Laszlo Ersek <lersek@redhat.com>
>> Cc: Rahul Kumar <rahul1.kumar@intel.com>
>> ---
>> UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc | 52 ++++++++++--------
>> .../Library/MpInitLib/Ia32/MpFuncs.nasm | 35 ++++++------
>> UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc | 54 ++++++++++---------
>> UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 46 ++++++++--------
>> 4 files changed, 98 insertions(+), 89 deletions(-)
>>
>> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
>> index 4f5a7c859a..244c1e72b7 100644
>> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
>> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
>> @@ -1,5 +1,5 @@
>> ;------------------------------------------------------------------------------ ;
>> -; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
>> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
>> ; SPDX-License-Identifier: BSD-2-Clause-Patent
>> ;
>> ; Module Name:
>> @@ -19,25 +19,31 @@ CPU_SWITCH_STATE_IDLE equ 0
>> CPU_SWITCH_STATE_STORED equ 1
>> CPU_SWITCH_STATE_LOADED equ 2
>>
>> -LockLocation equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
>> -StackStartAddressLocation equ LockLocation + 04h
>> -StackSizeLocation equ LockLocation + 08h
>> -ApProcedureLocation equ LockLocation + 0Ch
>> -GdtrLocation equ LockLocation + 10h
>> -IdtrLocation equ LockLocation + 16h
>> -BufferStartLocation equ LockLocation + 1Ch
>> -ModeOffsetLocation equ LockLocation + 20h
>> -ApIndexLocation equ LockLocation + 24h
>> -CodeSegmentLocation equ LockLocation + 28h
>> -DataSegmentLocation equ LockLocation + 2Ch
>> -EnableExecuteDisableLocation equ LockLocation + 30h
>> -Cr3Location equ LockLocation + 34h
>> -InitFlagLocation equ LockLocation + 38h
>> -CpuInfoLocation equ LockLocation + 3Ch
>> -NumApsExecutingLocation equ LockLocation + 40h
>> -InitializeFloatingPointUnitsAddress equ LockLocation + 48h
>> -ModeTransitionMemoryLocation equ LockLocation + 4Ch
>> -ModeTransitionSegmentLocation equ LockLocation + 50h
>> -ModeHighMemoryLocation equ LockLocation + 52h
>> -ModeHighSegmentLocation equ LockLocation + 56h
>> -
>> +MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
>> +struc MP_CPU_EXCHANGE_INFO
>> + .Lock: resd 1
>> + .StackStart: resd 1
>> + .StackSize: resd 1
>> + .CFunction: resd 1
>> + .GdtrProfile: resb 6
>> + .IdtrProfile: resb 6
>> + .BufferStart: resd 1
>> + .ModeOffset: resd 1
>> + .ApIndex: resd 1
>> + .CodeSegment: resd 1
>> + .DataSegment: resd 1
>> + .EnableExecuteDisable: resd 1
>> + .Cr3: resd 1
>> + .InitFlag: resd 1
>> + .CpuInfo: resd 1
>> + .NumApsExecuting: resd 1
>> + .CpuMpData: resd 1
>> + .InitializeFloatingPointUnits: resd 1
>
> (1) please align the "res*" on the other lines with this
>
> (in the X64 file too)
>
>> + .ModeTransitionMemory: resd 1
>> + .ModeTransitionSegment:resw 1
>> + .ModeHighMemory: resd 1
>> + .ModeHighSegment: resw 1
>> + .Enable5LevelPaging: resb 1
>> + .SevEsIsEnabled: resb 1
>> + .GhcbBase: resd 1
>> +endstruc
>> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
>> index 7e81d24aa6..908c2eb447 100644
>> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
>> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
>> @@ -1,5 +1,5 @@
>> ;------------------------------------------------------------------------------ ;
>> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
>> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
>> ; SPDX-License-Identifier: BSD-2-Clause-Patent
>> ;
>> ; Module Name:
>> @@ -39,21 +39,21 @@ BITS 16
>> mov fs, ax
>> mov gs, ax
>>
>> - mov si, BufferStartLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
>
> (2) please introduce a macro for this; in my opinion, with the currently
> proposed change, the code is *harder* to read and modify than before.
> Now we have a lot of fluff to spell out, for every single field reference.
(3) I have a further request / suggestion:
(3a) We should extend the following files:
MdePkg/Include/Ia32/Nasm.inc
MdePkg/Include/X64/Nasm.inc
with a macro that maps UINTN to "resd" versus "resq", as appropriate,
(3b) we should reserve space for the IA32_DESCRIPTOR fields in terms of
UINT16 + UINTN -- you can use a separate "struc IA32_DESCRIPTOR" for
this that uses the UINTN trick from step (3a)
(3c) use a common definition for "struc MP_CPU_EXCHANGE_INFO", hiding
the UINTN and IA32_DESCRIPTOR size differences through the above steps.
Thanks
Laszlo
>
> Thanks
> Laszlo
>
>> mov ebx, [si]
>>
>> - mov si, DataSegmentLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.DataSegment
>> mov edx, [si]
>>
>> ;
>> ; Get start address of 32-bit code in low memory (<1MB)
>> ;
>> - mov edi, ModeTransitionMemoryLocation
>> + mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeTransitionMemory
>>
>> - mov si, GdtrLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GdtrProfile
>> o32 lgdt [cs:si]
>>
>> - mov si, IdtrLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.IdtrProfile
>> o32 lidt [cs:si]
>>
>> ;
>> @@ -82,7 +82,7 @@ Flat32Start: ; protected mode entry point
>> mov esi, ebx
>>
>> mov edi, esi
>> - add edi, EnableExecuteDisableLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.EnableExecuteDisable
>> cmp byte [edi], 0
>> jz SkipEnableExecuteDisable
>>
>> @@ -96,7 +96,7 @@ Flat32Start: ; protected mode entry point
>> wrmsr
>>
>> mov edi, esi
>> - add edi, Cr3Location
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Cr3
>> mov eax, dword [edi]
>> mov cr3, eax
>>
>> @@ -110,19 +110,19 @@ Flat32Start: ; protected mode entry point
>>
>> SkipEnableExecuteDisable:
>> mov edi, esi
>> - add edi, InitFlagLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitFlag
>> cmp dword [edi], 1 ; 1 == ApInitConfig
>> jnz GetApicId
>>
>> ; Increment the number of APs executing here as early as possible
>> ; This is decremented in C code when AP is finished executing
>> mov edi, esi
>> - add edi, NumApsExecutingLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.NumApsExecuting
>> lock inc dword [edi]
>>
>> ; AP init
>> mov edi, esi
>> - add edi, LockLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET
>> mov eax, NotVacantFlag
>>
>> TestLock:
>> @@ -131,7 +131,7 @@ TestLock:
>> jz TestLock
>>
>> mov ecx, esi
>> - add ecx, ApIndexLocation
>> + add ecx, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
>> inc dword [ecx]
>> mov ebx, [ecx]
>>
>> @@ -140,13 +140,13 @@ Releaselock:
>> xchg [edi], eax
>>
>> mov edi, esi
>> - add edi, StackSizeLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
>> mov eax, [edi]
>> mov ecx, ebx
>> inc ecx
>> mul ecx ; EAX = StackSize * (CpuNumber + 1)
>> mov edi, esi
>> - add edi, StackStartAddressLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackStart
>> add eax, [edi]
>> mov esp, eax
>> jmp CProcedureInvoke
>> @@ -179,7 +179,7 @@ GetProcessorNumber:
>> ; Note that BSP may become an AP due to SwitchBsp()
>> ;
>> xor ebx, ebx
>> - lea eax, [esi + CpuInfoLocation]
>> + lea eax, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CpuInfo]
>> mov edi, [eax]
>>
>> GetNextProcNumber:
>> @@ -203,13 +203,12 @@ CProcedureInvoke:
>>
>> push ebx ; Push ApIndex
>> mov eax, esi
>> - add eax, LockLocation
>> + add eax, MP_CPU_EXCHANGE_INFO_OFFSET
>> push eax ; push address of exchange info data buffer
>>
>> mov edi, esi
>> - add edi, ApProcedureLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CFunction
>> mov eax, [edi]
>> -
>> call eax ; Invoke C function
>>
>> jmp $ ; Never reach here
>> diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
>> index c92daaaffd..3974330991 100644
>> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
>> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
>> @@ -1,5 +1,5 @@
>> ;------------------------------------------------------------------------------ ;
>> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
>> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
>> ; SPDX-License-Identifier: BSD-2-Clause-Patent
>> ;
>> ; Module Name:
>> @@ -19,27 +19,31 @@ CPU_SWITCH_STATE_IDLE equ 0
>> CPU_SWITCH_STATE_STORED equ 1
>> CPU_SWITCH_STATE_LOADED equ 2
>>
>> -LockLocation equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
>> -StackStartAddressLocation equ LockLocation + 08h
>> -StackSizeLocation equ LockLocation + 10h
>> -ApProcedureLocation equ LockLocation + 18h
>> -GdtrLocation equ LockLocation + 20h
>> -IdtrLocation equ LockLocation + 2Ah
>> -BufferStartLocation equ LockLocation + 34h
>> -ModeOffsetLocation equ LockLocation + 3Ch
>> -ApIndexLocation equ LockLocation + 44h
>> -CodeSegmentLocation equ LockLocation + 4Ch
>> -DataSegmentLocation equ LockLocation + 54h
>> -EnableExecuteDisableLocation equ LockLocation + 5Ch
>> -Cr3Location equ LockLocation + 64h
>> -InitFlagLocation equ LockLocation + 6Ch
>> -CpuInfoLocation equ LockLocation + 74h
>> -NumApsExecutingLocation equ LockLocation + 7Ch
>> -InitializeFloatingPointUnitsAddress equ LockLocation + 8Ch
>> -ModeTransitionMemoryLocation equ LockLocation + 94h
>> -ModeTransitionSegmentLocation equ LockLocation + 98h
>> -ModeHighMemoryLocation equ LockLocation + 9Ah
>> -ModeHighSegmentLocation equ LockLocation + 9Eh
>> -Enable5LevelPagingLocation equ LockLocation + 0A0h
>> -SevEsIsEnabledLocation equ LockLocation + 0A1h
>> -GhcbBaseLocation equ LockLocation + 0A2h
>> +MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
>> +struc MP_CPU_EXCHANGE_INFO
>> + .Lock: resq 1
>> + .StackStart: resq 1
>> + .StackSize: resq 1
>> + .CFunction: resq 1
>> + .GdtrProfile: resb 10
>> + .IdtrProfile: resb 10
>> + .BufferStart: resq 1
>> + .ModeOffset: resq 1
>> + .ApIndex: resq 1
>> + .CodeSegment: resq 1
>> + .DataSegment: resq 1
>> + .EnableExecuteDisable: resq 1
>> + .Cr3: resq 1
>> + .InitFlag: resq 1
>> + .CpuInfo: resq 1
>> + .NumApsExecuting: resq 1
>> + .CpuMpData: resq 1
>> + .InitializeFloatingPointUnits: resq 1
>> + .ModeTransitionMemory: resd 1
>> + .ModeTransitionSegment:resw 1
>> + .ModeHighMemory: resd 1
>> + .ModeHighSegment: resw 1
>> + .Enable5LevelPaging: resb 1
>> + .SevEsIsEnabled: resb 1
>> + .GhcbBase: resq 1
>> +endstruc
>> diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
>> index aecfd07bc0..423beb2cca 100644
>> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
>> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
>> @@ -1,5 +1,5 @@
>> ;------------------------------------------------------------------------------ ;
>> -; Copyright (c) 2015 - 2019, Intel Corporation. All rights reserved.<BR>
>> +; Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.<BR>
>> ; SPDX-License-Identifier: BSD-2-Clause-Patent
>> ;
>> ; Module Name:
>> @@ -43,21 +43,21 @@ BITS 16
>> mov fs, ax
>> mov gs, ax
>>
>> - mov si, BufferStartLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
>> mov ebx, [si]
>>
>> - mov si, DataSegmentLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.DataSegment
>> mov edx, [si]
>>
>> ;
>> ; Get start address of 32-bit code in low memory (<1MB)
>> ;
>> - mov edi, ModeTransitionMemoryLocation
>> + mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeTransitionMemory
>>
>> - mov si, GdtrLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GdtrProfile
>> o32 lgdt [cs:si]
>>
>> - mov si, IdtrLocation
>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.IdtrProfile
>> o32 lidt [cs:si]
>>
>> ;
>> @@ -85,7 +85,7 @@ Flat32Start: ; protected mode entry point
>> ;
>> ; Enable execute disable bit
>> ;
>> - mov esi, EnableExecuteDisableLocation
>> + mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.EnableExecuteDisable
>> cmp byte [ebx + esi], 0
>> jz SkipEnableExecuteDisableBit
>>
>> @@ -101,7 +101,7 @@ SkipEnableExecuteDisableBit:
>> mov eax, cr4
>> bts eax, 5
>>
>> - mov esi, Enable5LevelPagingLocation
>> + mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Enable5LevelPaging
>> cmp byte [ebx + esi], 0
>> jz SkipEnable5LevelPaging
>>
>> @@ -117,7 +117,7 @@ SkipEnable5LevelPaging:
>> ;
>> ; Load page table
>> ;
>> - mov esi, Cr3Location ; Save CR3 in ecx
>> + mov esi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Cr3 ; Save CR3 in ecx
>> mov ecx, [ebx + esi]
>> mov cr3, ecx ; Load CR3
>>
>> @@ -139,26 +139,26 @@ SkipEnable5LevelPaging:
>> ;
>> ; Far jump to 64-bit code
>> ;
>> - mov edi, ModeHighMemoryLocation
>> + mov edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ModeHighMemory
>> add edi, ebx
>> jmp far [edi]
>>
>> BITS 64
>> LongModeStart:
>> mov esi, ebx
>> - lea edi, [esi + InitFlagLocation]
>> + lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitFlag]
>> cmp qword [edi], 1 ; ApInitConfig
>> jnz GetApicId
>>
>> ; Increment the number of APs executing here as early as possible
>> ; This is decremented in C code when AP is finished executing
>> mov edi, esi
>> - add edi, NumApsExecutingLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.NumApsExecuting
>> lock inc dword [edi]
>>
>> ; AP init
>> mov edi, esi
>> - add edi, LockLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Lock
>> mov rax, NotVacantFlag
>>
>> TestLock:
>> @@ -166,7 +166,7 @@ TestLock:
>> cmp rax, NotVacantFlag
>> jz TestLock
>>
>> - lea ecx, [esi + ApIndexLocation]
>> + lea ecx, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex]
>> inc dword [ecx]
>> mov ebx, [ecx]
>>
>> @@ -175,17 +175,17 @@ Releaselock:
>> xchg qword [edi], rax
>> ; program stack
>> mov edi, esi
>> - add edi, StackSizeLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
>> mov eax, dword [edi]
>> mov ecx, ebx
>> inc ecx
>> mul ecx ; EAX = StackSize * (CpuNumber + 1)
>> mov edi, esi
>> - add edi, StackStartAddressLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackStart
>> add rax, qword [edi]
>> mov rsp, rax
>>
>> - lea edi, [esi + SevEsIsEnabledLocation]
>> + lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.SevEsIsEnabled]
>> cmp byte [edi], 1 ; SevEsIsEnabled
>> jne CProcedureInvoke
>>
>> @@ -199,7 +199,7 @@ Releaselock:
>> mov ecx, ebx
>> mul ecx ; EAX = SIZE_4K * 2 * CpuNumber
>> mov edi, esi
>> - add edi, GhcbBaseLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.GhcbBase
>> add rax, qword [edi]
>> mov rdx, rax
>> shr rdx, 32
>> @@ -208,7 +208,7 @@ Releaselock:
>> jmp CProcedureInvoke
>>
>> GetApicId:
>> - lea edi, [esi + SevEsIsEnabledLocation]
>> + lea edi, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.SevEsIsEnabled]
>> cmp byte [edi], 1 ; SevEsIsEnabled
>> jne DoCpuid
>>
>> @@ -302,7 +302,7 @@ GetProcessorNumber:
>> ; Note that BSP may become an AP due to SwitchBsp()
>> ;
>> xor ebx, ebx
>> - lea eax, [esi + CpuInfoLocation]
>> + lea eax, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CpuInfo]
>> mov rdi, [eax]
>>
>> GetNextProcNumber:
>> @@ -321,17 +321,17 @@ CProcedureInvoke:
>> push rbp
>> mov rbp, rsp
>>
>> - mov rax, qword [esi + InitializeFloatingPointUnitsAddress]
>> + mov rax, qword [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.InitializeFloatingPointUnits]
>> sub rsp, 20h
>> call rax ; Call assembly function to initialize FPU per UEFI spec
>> add rsp, 20h
>>
>> mov edx, ebx ; edx is ApIndex
>> mov ecx, esi
>> - add ecx, LockLocation ; rcx is address of exchange info data buffer
>> + add ecx, MP_CPU_EXCHANGE_INFO_OFFSET ; rcx is address of exchange info data buffer
>>
>> mov edi, esi
>> - add edi, ApProcedureLocation
>> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.CFunction
>> mov rax, qword [edi]
>>
>> sub rsp, 20h
>>
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
2021-02-04 2:59 ` [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
@ 2021-02-04 9:51 ` Laszlo Ersek
2021-02-04 13:43 ` Ni, Ray
2021-02-04 11:24 ` Zeng, Star
1 sibling, 1 reply; 11+ messages in thread
From: Laszlo Ersek @ 2021-02-04 9:51 UTC (permalink / raw)
To: devel, ray.ni; +Cc: Eric Dong, Rahul1 Kumar
On 02/04/21 03:59, Ni, Ray wrote:
> When AP firstly wakes up, MpFuncs.nasm contains below logic to assign
> an unique ApIndex to each AP according to who comes first:
> ---ASM---
> TestLock:
> xchg [edi], eax
> cmp eax, NotVacantFlag
> jz TestLock
>
> mov ecx, esi
> add ecx, ApIndexLocation
> inc dword [ecx]
> mov ebx, [ecx]
>
> Releaselock:
> mov eax, VacantFlag
> xchg [edi], eax
> ---ASM END---
>
> "lock inc" cannot be used to increase ApIndex because not only the
> global ApIndex should be increased, but also the result should be
> stored to a local general purpose register EBX.
>
> This patch learns from the NASM implementation of
> InternalSyncIncrement() to use "XADD" instruction which can increase
> the global ApIndex and store the original ApIndex to EBX in one
> instruction.
>
> With this patch, OVMF when running in a 255 threads QEMU spends about
> one second to wakeup all APs. Original implementation needs more than
> 10 seconds.
>
> Signed-off-by: Ray Ni <ray.ni@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: Eric Dong <eric.dong@intel.com>
> Cc: Rahul1 Kumar <rahul1.kumar@intel.com>
> ---
> UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc | 4 ----
> .../Library/MpInitLib/Ia32/MpFuncs.nasm | 21 +++++--------------
> UefiCpuPkg/Library/MpInitLib/MpLib.c | 3 +--
> UefiCpuPkg/Library/MpInitLib/MpLib.h | 3 +--
> UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc | 4 ----
> UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++------------
> 6 files changed, 11 insertions(+), 42 deletions(-)
>
> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> index 244c1e72b7..5f1f0351d2 100644
> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> @@ -12,16 +12,12 @@
> ;
> ;-------------------------------------------------------------------------------
>
> -VacantFlag equ 00h
> -NotVacantFlag equ 0ffh
> -
> CPU_SWITCH_STATE_IDLE equ 0
> CPU_SWITCH_STATE_STORED equ 1
> CPU_SWITCH_STATE_LOADED equ 2
>
> MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
> struc MP_CPU_EXCHANGE_INFO
> - .Lock: resd 1
> .StackStart: resd 1
> .StackSize: resd 1
> .CFunction: resd 1
> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> index 908c2eb447..b7267393db 100644
> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> @@ -122,23 +122,12 @@ SkipEnableExecuteDisable:
>
> ; AP init
> mov edi, esi
> - add edi, MP_CPU_EXCHANGE_INFO_OFFSET
> - mov eax, NotVacantFlag
> -
> -TestLock:
> - xchg [edi], eax
> - cmp eax, NotVacantFlag
> - jz TestLock
> -
> - mov ecx, esi
> - add ecx, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
> - inc dword [ecx]
> - mov ebx, [ecx]
> -
> -Releaselock:
> - mov eax, VacantFlag
> - xchg [edi], eax
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
> + mov ebx, 1
> + lock xadd [edi], ebx ; EBX = ApIndex++
(1) For more clarity, I would suggest
lock xadd dword [edi], ebx
(even though "ebx" already specifies the width, yes)
Applies to the X64 version too.
> + inc ebx ; EBX is CpuNumber
>
> + ; program stack
(2) This change belongs in a separate patch. The X64 version already has
this comment, and making both versions makes sense. But touching
anything "stack-related" in the current patch is very confusing to me.
Thanks
Laszlo
> mov edi, esi
> add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
> mov eax, [edi]
> diff --git a/UefiCpuPkg/Library/MpInitLib/MpLib.c b/UefiCpuPkg/Library/MpInitLib/MpLib.c
> index 8b1f7f84ba..32a3951742 100644
> --- a/UefiCpuPkg/Library/MpInitLib/MpLib.c
> +++ b/UefiCpuPkg/Library/MpInitLib/MpLib.c
> @@ -1,7 +1,7 @@
> /** @file
> CPU MP Initialize Library common functions.
>
> - Copyright (c) 2016 - 2020, Intel Corporation. All rights reserved.<BR>
> + Copyright (c) 2016 - 2021, Intel Corporation. All rights reserved.<BR>
> Copyright (c) 2020, AMD Inc. All rights reserved.<BR>
>
> SPDX-License-Identifier: BSD-2-Clause-Patent
> @@ -1012,7 +1012,6 @@ FillExchangeInfoData (
> IA32_CR4 Cr4;
>
> ExchangeInfo = CpuMpData->MpCpuExchangeInfo;
> - ExchangeInfo->Lock = 0;
> ExchangeInfo->StackStart = CpuMpData->Buffer;
> ExchangeInfo->StackSize = CpuMpData->CpuApStackSize;
> ExchangeInfo->BufferStart = CpuMpData->WakeupBuffer;
> diff --git a/UefiCpuPkg/Library/MpInitLib/MpLib.h b/UefiCpuPkg/Library/MpInitLib/MpLib.h
> index 02652eaae1..0bd60388b1 100644
> --- a/UefiCpuPkg/Library/MpInitLib/MpLib.h
> +++ b/UefiCpuPkg/Library/MpInitLib/MpLib.h
> @@ -1,7 +1,7 @@
> /** @file
> Common header file for MP Initialize Library.
>
> - Copyright (c) 2016 - 2020, Intel Corporation. All rights reserved.<BR>
> + Copyright (c) 2016 - 2021, Intel Corporation. All rights reserved.<BR>
> Copyright (c) 2020, AMD Inc. All rights reserved.<BR>
>
> SPDX-License-Identifier: BSD-2-Clause-Patent
> @@ -190,7 +190,6 @@ typedef struct _CPU_MP_DATA CPU_MP_DATA;
> // into this structure are used in assembly code in this module
> //
> typedef struct {
> - UINTN Lock;
> UINTN StackStart;
> UINTN StackSize;
> UINTN CFunction;
> diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> index 3974330991..32e9a262bc 100644
> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> @@ -12,16 +12,12 @@
> ;
> ;-------------------------------------------------------------------------------
>
> -VacantFlag equ 00h
> -NotVacantFlag equ 0ffh
> -
> CPU_SWITCH_STATE_IDLE equ 0
> CPU_SWITCH_STATE_STORED equ 1
> CPU_SWITCH_STATE_LOADED equ 2
>
> MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd - RendezvousFunnelProcStart)
> struc MP_CPU_EXCHANGE_INFO
> - .Lock: resq 1
> .StackStart: resq 1
> .StackSize: resq 1
> .CFunction: resq 1
> diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> index 423beb2cca..383b4974f8 100644
> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> @@ -158,21 +158,11 @@ LongModeStart:
>
> ; AP init
> mov edi, esi
> - add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Lock
> - mov rax, NotVacantFlag
> + add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex
> + mov ebx, 1
> + lock xadd [edi], ebx ; EBX = ApIndex++
> + inc ebx ; EBX is CpuNumber
>
> -TestLock:
> - xchg qword [edi], rax
> - cmp rax, NotVacantFlag
> - jz TestLock
> -
> - lea ecx, [esi + MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex]
> - inc dword [ecx]
> - mov ebx, [ecx]
> -
> -Releaselock:
> - mov rax, VacantFlag
> - xchg qword [edi], rax
> ; program stack
> mov edi, esi
> add edi, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
2021-02-04 2:59 ` [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
2021-02-04 9:51 ` [edk2-devel] " Laszlo Ersek
@ 2021-02-04 11:24 ` Zeng, Star
2021-02-04 11:58 ` Laszlo Ersek
1 sibling, 1 reply; 11+ messages in thread
From: Zeng, Star @ 2021-02-04 11:24 UTC (permalink / raw)
To: devel@edk2.groups.io, Ni, Ray
Cc: Laszlo Ersek, Dong, Eric, Kumar, Rahul1, Zeng, Star
Hi All,
Do you think it worth or not to also update MpFuncs.nasm in Edk2\UefiCpuPkg\PiSmmCpuDxeSmm?
Thanks,
Star
> -----Original Message-----
> From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Ni, Ray
> Sent: Thursday, February 4, 2021 10:59 AM
> To: devel@edk2.groups.io
> Cc: Laszlo Ersek <lersek@redhat.com>; Dong, Eric <eric.dong@intel.com>;
> Kumar, Rahul1 <rahul1.kumar@intel.com>
> Subject: [edk2-devel] [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid
> lock acquire/release
>
> When AP firstly wakes up, MpFuncs.nasm contains below logic to assign an
> unique ApIndex to each AP according to who comes first:
> ---ASM---
> TestLock:
> xchg [edi], eax
> cmp eax, NotVacantFlag
> jz TestLock
>
> mov ecx, esi
> add ecx, ApIndexLocation
> inc dword [ecx]
> mov ebx, [ecx]
>
> Releaselock:
> mov eax, VacantFlag
> xchg [edi], eax
> ---ASM END---
>
> "lock inc" cannot be used to increase ApIndex because not only the global
> ApIndex should be increased, but also the result should be stored to a local
> general purpose register EBX.
>
> This patch learns from the NASM implementation of
> InternalSyncIncrement() to use "XADD" instruction which can increase the
> global ApIndex and store the original ApIndex to EBX in one instruction.
>
> With this patch, OVMF when running in a 255 threads QEMU spends about
> one second to wakeup all APs. Original implementation needs more than
> 10 seconds.
>
> Signed-off-by: Ray Ni <ray.ni@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: Eric Dong <eric.dong@intel.com>
> Cc: Rahul1 Kumar <rahul1.kumar@intel.com>
> ---
> UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc | 4 ----
> .../Library/MpInitLib/Ia32/MpFuncs.nasm | 21 +++++--------------
> UefiCpuPkg/Library/MpInitLib/MpLib.c | 3 +--
> UefiCpuPkg/Library/MpInitLib/MpLib.h | 3 +--
> UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc | 4 ----
> UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm | 18 ++++------------
> 6 files changed, 11 insertions(+), 42 deletions(-)
>
> diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> index 244c1e72b7..5f1f0351d2 100644
> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpEqu.inc
> @@ -12,16 +12,12 @@
> ; ;------------------------------------------------------------------------------- -
> VacantFlag equ 00h-NotVacantFlag equ 0ffh-
> CPU_SWITCH_STATE_IDLE equ 0 CPU_SWITCH_STATE_STORED
> equ 1 CPU_SWITCH_STATE_LOADED equ 2
> MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd -
> RendezvousFunnelProcStart) struc MP_CPU_EXCHANGE_INFO- .Lock:
> resd 1 .StackStart: resd 1 .StackSize: resd 1 .CFunction:
> resd 1diff --git a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> index 908c2eb447..b7267393db 100644
> --- a/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> +++ b/UefiCpuPkg/Library/MpInitLib/Ia32/MpFuncs.nasm
> @@ -122,23 +122,12 @@ SkipEnableExecuteDisable:
> ; AP init mov edi, esi- add edi,
> MP_CPU_EXCHANGE_INFO_OFFSET- mov eax, NotVacantFlag--
> TestLock:- xchg [edi], eax- cmp eax, NotVacantFlag- jz
> TestLock-- mov ecx, esi- add ecx,
> MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.ApIndex-
> inc dword [ecx]- mov ebx, [ecx]--Releaselock:- mov eax,
> VacantFlag- xchg [edi], eax+ add edi,
> MP_CPU_EXCHANGE_INFO_OFFSET +
> MP_CPU_EXCHANGE_INFO.ApIndex+ mov ebx, 1+ lock xadd [edi],
> ebx ; EBX = ApIndex+++ inc ebx ; EBX is
> CpuNumber + ; program stack mov edi, esi add edi,
> MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.StackSize
> mov eax, [edi]diff --git a/UefiCpuPkg/Library/MpInitLib/MpLib.c
> b/UefiCpuPkg/Library/MpInitLib/MpLib.c
> index 8b1f7f84ba..32a3951742 100644
> --- a/UefiCpuPkg/Library/MpInitLib/MpLib.c
> +++ b/UefiCpuPkg/Library/MpInitLib/MpLib.c
> @@ -1,7 +1,7 @@
> /** @file CPU MP Initialize Library common functions. - Copyright (c) 2016 -
> 2020, Intel Corporation. All rights reserved.<BR>+ Copyright (c) 2016 - 2021,
> Intel Corporation. All rights reserved.<BR> Copyright (c) 2020, AMD Inc. All
> rights reserved.<BR> SPDX-License-Identifier: BSD-2-Clause-Patent@@ -
> 1012,7 +1012,6 @@ FillExchangeInfoData (
> IA32_CR4 Cr4; ExchangeInfo = CpuMpData-
> >MpCpuExchangeInfo;- ExchangeInfo->Lock = 0; ExchangeInfo-
> >StackStart = CpuMpData->Buffer; ExchangeInfo->StackSize =
> CpuMpData->CpuApStackSize; ExchangeInfo->BufferStart = CpuMpData-
> >WakeupBuffer;diff --git a/UefiCpuPkg/Library/MpInitLib/MpLib.h
> b/UefiCpuPkg/Library/MpInitLib/MpLib.h
> index 02652eaae1..0bd60388b1 100644
> --- a/UefiCpuPkg/Library/MpInitLib/MpLib.h
> +++ b/UefiCpuPkg/Library/MpInitLib/MpLib.h
> @@ -1,7 +1,7 @@
> /** @file Common header file for MP Initialize Library. - Copyright (c) 2016
> - 2020, Intel Corporation. All rights reserved.<BR>+ Copyright (c) 2016 - 2021,
> Intel Corporation. All rights reserved.<BR> Copyright (c) 2020, AMD Inc. All
> rights reserved.<BR> SPDX-License-Identifier: BSD-2-Clause-Patent@@ -
> 190,7 +190,6 @@ typedef struct _CPU_MP_DATA CPU_MP_DATA;
> // into this structure are used in assembly code in this module // typedef
> struct {- UINTN Lock; UINTN StackStart; UINTN
> StackSize; UINTN CFunction;diff --git
> a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> index 3974330991..32e9a262bc 100644
> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpEqu.inc
> @@ -12,16 +12,12 @@
> ; ;------------------------------------------------------------------------------- -
> VacantFlag equ 00h-NotVacantFlag equ 0ffh-
> CPU_SWITCH_STATE_IDLE equ 0 CPU_SWITCH_STATE_STORED
> equ 1 CPU_SWITCH_STATE_LOADED equ 2
> MP_CPU_EXCHANGE_INFO_OFFSET equ (SwitchToRealProcEnd -
> RendezvousFunnelProcStart) struc MP_CPU_EXCHANGE_INFO- .Lock:
> resq 1 .StackStart: resq 1 .StackSize: resq 1 .CFunction:
> resq 1diff --git a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> index 423beb2cca..383b4974f8 100644
> --- a/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> +++ b/UefiCpuPkg/Library/MpInitLib/X64/MpFuncs.nasm
> @@ -158,21 +158,11 @@ LongModeStart:
> ; AP init mov edi, esi- add edi,
> MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.Lock-
> mov rax, NotVacantFlag+ add edi,
> MP_CPU_EXCHANGE_INFO_OFFSET +
> MP_CPU_EXCHANGE_INFO.ApIndex+ mov ebx, 1+ lock xadd [edi],
> ebx ; EBX = ApIndex+++ inc ebx ; EBX is
> CpuNumber -TestLock:- xchg qword [edi], rax- cmp rax,
> NotVacantFlag- jz TestLock-- lea ecx, [esi +
> MP_CPU_EXCHANGE_INFO_OFFSET +
> MP_CPU_EXCHANGE_INFO.ApIndex]- inc dword [ecx]- mov ebx,
> [ecx]--Releaselock:- mov rax, VacantFlag- xchg qword [edi], rax ;
> program stack mov edi, esi add edi,
> MP_CPU_EXCHANGE_INFO_OFFSET +
> MP_CPU_EXCHANGE_INFO.StackSize--
> 2.27.0.windows.1
>
>
>
> -=-=-=-=-=-=
> Groups.io Links: You receive all messages sent to this group.
> View/Reply Online (#71132): https://edk2.groups.io/g/devel/message/71132
> Mute This Topic: https://groups.io/mt/80372087/1779220
> Group Owner: devel+owner@edk2.groups.io
> Unsubscribe: https://edk2.groups.io/g/devel/unsub [star.zeng@intel.com] -
> =-=-=-=-=-=
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
2021-02-04 11:24 ` Zeng, Star
@ 2021-02-04 11:58 ` Laszlo Ersek
0 siblings, 0 replies; 11+ messages in thread
From: Laszlo Ersek @ 2021-02-04 11:58 UTC (permalink / raw)
To: Zeng, Star, devel@edk2.groups.io, Ni, Ray; +Cc: Dong, Eric, Kumar, Rahul1
On 02/04/21 12:24, Zeng, Star wrote:
> Hi All,
>
> Do you think it worth or not to also update MpFuncs.nasm in Edk2\UefiCpuPkg\PiSmmCpuDxeSmm?
I haven't done any measurements, so I couldn't say...
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release
2021-02-04 9:51 ` [edk2-devel] " Laszlo Ersek
@ 2021-02-04 13:43 ` Ni, Ray
0 siblings, 0 replies; 11+ messages in thread
From: Ni, Ray @ 2021-02-04 13:43 UTC (permalink / raw)
To: devel@edk2.groups.io, lersek@redhat.com; +Cc: Dong, Eric, Kumar, Rahul1
>
> (1) For more clarity, I would suggest
>
> lock xadd dword [edi], ebx
>
> (even though "ebx" already specifies the width, yes)
>
> Applies to the X64 version too.
OK.
>
>
> > + inc ebx ; EBX is CpuNumber
> >
> > + ; program stack
>
> (2) This change belongs in a separate patch. The X64 version already has
> this comment, and making both versions makes sense. But touching
> anything "stack-related" in the current patch is very confusing to me.
OK.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset
2021-02-04 9:44 ` Laszlo Ersek
@ 2021-02-04 14:27 ` Ni, Ray
2021-02-04 15:51 ` Laszlo Ersek
0 siblings, 1 reply; 11+ messages in thread
From: Ni, Ray @ 2021-02-04 14:27 UTC (permalink / raw)
To: devel@edk2.groups.io, lersek@redhat.com; +Cc: Dong, Eric, Kumar, Rahul1
> >
> > (1) please align the "res*" on the other lines with this
> >
> > (in the X64 file too)
> >
OK.
> >> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
> >
> > (2) please introduce a macro for this; in my opinion, with the currently
> > proposed change, the code is *harder* to read and modify than before.
> > Now we have a lot of fluff to spell out, for every single field reference.
I want to use the struc instead of original hardcode offset because
I have headache when removing the Lock field from the C structure.
All the hardcode value should be changed carefully.
Using struc, I can simply remove that field Lock from the struc.
I originally tried to supply the second parameter to struc for the initial offset
following https://www.nasm.us/doc/nasmdoc5.html#section-5.9.1
struc MP_CPU_EXCHANGE_INFO (SwitchToRealProcEnd - RendezvousFunnelProcStart)
So that
mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
can be:
mov si, MP_CPU_EXCHANGE_INFO.BufferStart
But somehow NASM compiler doesn't like it.
>
> (3) I have a further request / suggestion:
>
> (3a) We should extend the following files:
>
> MdePkg/Include/Ia32/Nasm.inc
> MdePkg/Include/X64/Nasm.inc
>
> with a macro that maps UINTN to "resd" versus "resq", as appropriate,
I am not an expert of NASM or ASM.
Do you mean to use %define as below in Ia32/Nasm.inc?
%define CTYPE_UINTN resd 1
%define CTYPE_UINT32 resd 1
%define CTYPE_UINT64 resq 1
%define CTYPE_UINT8 resb 1
%define CTYPE_BOOLEAN resb 1
%define CTYPE_UINT16 resw 1
And define below in X64/Nasm.inc?
%define CTYPE_UINTN resq 1
%define CTYPE_UINT32 resd 1
%define CTYPE_UINT64 resq 1
%define CTYPE_UINT8 resb 1
%define CTYPE_BOOLEAN resb 1
%define CTYPE_UINT16 resw 1
So, the struc definition will be as below?
.StackStart: CTYPE_UINTN
I intend to use CTYPE_xxx as prefix because simply using UINTN may cause
people think that UINTN is the C keyword UINTN.
Using CTYPE_UINTN so people can at least search this string to understand
the magic.
Anyway, I just want to use a different name other than UINTN.
Do you agree? Any suggestions on the name?
>
> (3b) we should reserve space for the IA32_DESCRIPTOR fields in terms of
> UINT16 + UINTN -- you can use a separate "struc IA32_DESCRIPTOR" for
> this that uses the UINTN trick from step (3a)
Do you mean this?
struc IA32_DESCRIPTOR
.Limit CTYPE_UINT16
.Base CTYPE_UINTN
endstruc
struc MP_CPU_EXCHANGE_INFO
...
.IdtrProfile: resb IA32_DESCRIPTOR_size
>
> (3c) use a common definition for "struc MP_CPU_EXCHANGE_INFO", hiding
> the UINTN and IA32_DESCRIPTOR size differences through the above steps.
I think it's doable.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [edk2-devel] [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset
2021-02-04 14:27 ` Ni, Ray
@ 2021-02-04 15:51 ` Laszlo Ersek
0 siblings, 0 replies; 11+ messages in thread
From: Laszlo Ersek @ 2021-02-04 15:51 UTC (permalink / raw)
To: Ni, Ray, devel@edk2.groups.io; +Cc: Dong, Eric, Kumar, Rahul1
On 02/04/21 15:27, Ni, Ray wrote:
>>>
>>> (1) please align the "res*" on the other lines with this
>>>
>>> (in the X64 file too)
>>>
>
> OK.
>
>>>> + mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
>>>
>>> (2) please introduce a macro for this; in my opinion, with the currently
>>> proposed change, the code is *harder* to read and modify than before.
>>> Now we have a lot of fluff to spell out, for every single field reference.
>
> I want to use the struc instead of original hardcode offset because
> I have headache when removing the Lock field from the C structure.
> All the hardcode value should be changed carefully.
> Using struc, I can simply remove that field Lock from the struc.
Yes, absolutely.
>
> I originally tried to supply the second parameter to struc for the initial offset
> following https://www.nasm.us/doc/nasmdoc5.html#section-5.9.1
> struc MP_CPU_EXCHANGE_INFO (SwitchToRealProcEnd - RendezvousFunnelProcStart)
>
> So that
> mov si, MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
> can be:
> mov si, MP_CPU_EXCHANGE_INFO.BufferStart
> But somehow NASM compiler doesn't like it.
Right, but that's not what I was trying to suggest. Instead, I'm
suggesting a very simple macro like
FIELD_OFS (BufferStart)
that expands to
MP_CPU_EXCHANGE_INFO_OFFSET + MP_CPU_EXCHANGE_INFO.BufferStart
That's all, really.
>
>>
>> (3) I have a further request / suggestion:
>>
>> (3a) We should extend the following files:
>>
>> MdePkg/Include/Ia32/Nasm.inc
>> MdePkg/Include/X64/Nasm.inc
>>
>> with a macro that maps UINTN to "resd" versus "resq", as appropriate,
>
> I am not an expert of NASM or ASM.
> Do you mean to use %define as below in Ia32/Nasm.inc?
> %define CTYPE_UINTN resd 1
> %define CTYPE_UINT32 resd 1
> %define CTYPE_UINT64 resq 1
> %define CTYPE_UINT8 resb 1
> %define CTYPE_BOOLEAN resb 1
> %define CTYPE_UINT16 resw 1
>
> And define below in X64/Nasm.inc?
> %define CTYPE_UINTN resq 1
> %define CTYPE_UINT32 resd 1
> %define CTYPE_UINT64 resq 1
> %define CTYPE_UINT8 resb 1
> %define CTYPE_BOOLEAN resb 1
> %define CTYPE_UINT16 resw 1
>
> So, the struc definition will be as below?
> .StackStart: CTYPE_UINTN
>
> I intend to use CTYPE_xxx as prefix because simply using UINTN may cause
> people think that UINTN is the C keyword UINTN.
> Using CTYPE_UINTN so people can at least search this string to understand
> the magic.
>
> Anyway, I just want to use a different name other than UINTN.
> Do you agree? Any suggestions on the name?
Yes, this is totally what I meant -- I didn't even intend to ask for
CTYPE_UINT32 and friends, given that they directly translate to "resd 1"
and similar. The only variable size type is UINTN, so I only asked for
CTYPE_UINTN.
But if you can add all the CTYPE_* macros, that's best!
>> (3b) we should reserve space for the IA32_DESCRIPTOR fields in terms of
>> UINT16 + UINTN -- you can use a separate "struc IA32_DESCRIPTOR" for
>> this that uses the UINTN trick from step (3a)
>
> Do you mean this?
>
> struc IA32_DESCRIPTOR
> .Limit CTYPE_UINT16
> .Base CTYPE_UINTN
> endstruc
>
> struc MP_CPU_EXCHANGE_INFO
> ...
> .IdtrProfile: resb IA32_DESCRIPTOR_size
More or less, yes.
If it's possible to embed IA32_DESCRIPTOR into MP_CPU_EXCHANGE_INFO
somehow, so that we could even refer to the individual fields in it (if
necessary), that would be even nicer.
But it's not really a requirement -- the above should work OK too.
>> (3c) use a common definition for "struc MP_CPU_EXCHANGE_INFO", hiding
>> the UINTN and IA32_DESCRIPTOR size differences through the above steps.
>
> I think it's doable.
>
Thank you!
Laszlo
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2021-02-04 15:51 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-02-04 2:59 [PATCH 0/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
2021-02-04 2:59 ` [PATCH 1/2] UefiCpuPkg/MpInitLib: Use NASM struc to avoid hardcode offset Ni, Ray
2021-02-04 9:32 ` [edk2-devel] " Laszlo Ersek
2021-02-04 9:44 ` Laszlo Ersek
2021-02-04 14:27 ` Ni, Ray
2021-02-04 15:51 ` Laszlo Ersek
2021-02-04 2:59 ` [PATCH 2/2] UefiCpuPkg/MpInitLib: Use XADD to avoid lock acquire/release Ni, Ray
2021-02-04 9:51 ` [edk2-devel] " Laszlo Ersek
2021-02-04 13:43 ` Ni, Ray
2021-02-04 11:24 ` Zeng, Star
2021-02-04 11:58 ` Laszlo Ersek
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox