* [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. @ 2019-11-28 6:17 Dong, Eric 2019-11-28 6:37 ` [edk2-devel] " Ni, Ray ` (2 more replies) 0 siblings, 3 replies; 12+ messages in thread From: Dong, Eric @ 2019-11-28 6:17 UTC (permalink / raw) To: devel; +Cc: Ray Ni, Laszlo Ersek v2 changes: Minor update based on comments. v1 changes: REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388 Current logic allocate Token every time when need to use it. The logic caused SMI latency raised to very high. Update logic to allocate Token buffer at driver's entry point. Later use the token from the allocated token buffer. Only when all the buffer have been used, then need to allocate new buffer. Signed-off-by: Eric Dong <eric.dong@intel.com> Cc: Ray Ni <ray.ni@intel.com> Cc: Laszlo Ersek <lersek@redhat.com> --- UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 ++++++++++++++++++++-- UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c index d8d2b6f444..4632e5b0c2 100644 --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c @@ -492,6 +492,23 @@ FreeTokens ( { LIST_ENTRY *Link; PROCEDURE_TOKEN *ProcToken; + TOKEN_BUFFER *TokenBuf; + + // + // Not free the buffer, just clear the UsedTokenNum. In order to + // avoid later trig allcate action again when need to use token. + // + gSmmCpuPrivate->UsedTokenNum = 0; + + Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList); + while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) { + TokenBuf = TOKEN_BUFFER_FROM_LINK (Link); + + Link = RemoveEntryList (&TokenBuf->Link); + + FreePool (TokenBuf->Buffer); + FreePool (TokenBuf); + } while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) { Link = GetFirstNode (&gSmmCpuPrivate->TokenList); @@ -499,7 +516,6 @@ FreeTokens ( RemoveEntryList (&ProcToken->Link); - FreePool ((VOID *)ProcToken->ProcedureToken); FreePool (ProcToken); } } @@ -1115,13 +1131,35 @@ CreateToken ( VOID ) { - PROCEDURE_TOKEN *ProcToken; + PROCEDURE_TOKEN *ProcToken; SPIN_LOCK *CpuToken; UINTN SpinLockSize; + TOKEN_BUFFER *TokenBuf; SpinLockSize = GetSpinLockProperties (); - CpuToken = AllocatePool (SpinLockSize); - ASSERT (CpuToken != NULL); + + if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) { + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n")); + + // + // Record current token buffer for later free action usage. + // Current used token buffer not in this list. + // + TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER)); + ASSERT (TokenBuf != NULL); + TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE; + TokenBuf->Buffer = gSmmCpuPrivate->CurrentTokenBuf; + + InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link); + + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); + gSmmCpuPrivate->UsedTokenNum = 0; + } + + CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum); + gSmmCpuPrivate->UsedTokenNum++; + InitializeSpinLock (CpuToken); AcquireSpinLock (CpuToken); @@ -1737,10 +1775,20 @@ InitializeDataForMmMp ( VOID ) { + UINTN SpinLockSize; + + SpinLockSize = GetSpinLockProperties (); + DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize)); + + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); + gSmmCpuPrivate->UsedTokenNum = 0; + gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus); ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL); InitializeListHead (&gSmmCpuPrivate->TokenList); + InitializeListHead (&gSmmCpuPrivate->OldTokenBufList); } /** diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h index 8c29f1a558..36fd0dae92 100644 --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h @@ -198,6 +198,7 @@ typedef UINT32 SMM_CPU_ARRIVAL_EXCEPTIONS; #define ARRIVAL_EXCEPTION_DELAYED 0x2 #define ARRIVAL_EXCEPTION_SMI_DISABLED 0x4 +#define MAX_PRE_RESERVE_TOKEN_COUNT 0x512 // // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE. // @@ -217,6 +218,17 @@ typedef struct { #define PROCEDURE_TOKEN_FROM_LINK(a) CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE) +#define TOKEN_BUFFER_SIGNATURE SIGNATURE_32 ('T', 'K', 'B', 'S') + +typedef struct { + UINTN Signature; + LIST_ENTRY Link; + + UINT8 *Buffer; +} TOKEN_BUFFER; + +#define TOKEN_BUFFER_FROM_LINK(a) CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE) + // // Private structure for the SMM CPU module that is stored in DXE Runtime memory // Contains the SMM Configuration Protocols that is produced. @@ -243,6 +255,10 @@ typedef struct { PROCEDURE_WRAPPER *ApWrapperFunc; LIST_ENTRY TokenList; + LIST_ENTRY OldTokenBufList; + + UINT8 *CurrentTokenBuf; + UINTN UsedTokenNum; // Only record tokens used in CurrentTokenBuf. } SMM_CPU_PRIVATE_DATA; extern SMM_CPU_PRIVATE_DATA *gSmmCpuPrivate; -- 2.23.0.windows.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-28 6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric @ 2019-11-28 6:37 ` Ni, Ray 2019-11-28 13:15 ` Philippe Mathieu-Daudé 2019-11-28 13:57 ` Laszlo Ersek 2 siblings, 0 replies; 12+ messages in thread From: Ni, Ray @ 2019-11-28 6:37 UTC (permalink / raw) To: devel@edk2.groups.io, Dong, Eric; +Cc: Laszlo Ersek Reviewed-by: Ray Ni <ray.ni@intel.com> > -----Original Message----- > From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Dong, > Eric > Sent: Thursday, November 28, 2019 2:17 PM > To: devel@edk2.groups.io > Cc: Ni, Ray <ray.ni@intel.com>; Laszlo Ersek <lersek@redhat.com> > Subject: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid > allocate Token every time. > > v2 changes: > > Minor update based on comments. > > > > v1 changes: > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388 > > Current logic allocate Token every time when need to use it. The logic > caused SMI latency raised to very high. Update logic to allocate Token > buffer at driver's entry point. Later use the token from the allocated > token buffer. Only when all the buffer have been used, then need to > allocate new buffer. > > Signed-off-by: Eric Dong <eric.dong@intel.com> > Cc: Ray Ni <ray.ni@intel.com> > Cc: Laszlo Ersek <lersek@redhat.com> > --- > UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 > ++++++++++++++++++++-- > UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ > 2 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > index d8d2b6f444..4632e5b0c2 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > @@ -492,6 +492,23 @@ FreeTokens ( > { > > LIST_ENTRY *Link; > > PROCEDURE_TOKEN *ProcToken; > > + TOKEN_BUFFER *TokenBuf; > > + > > + // > > + // Not free the buffer, just clear the UsedTokenNum. In order to > > + // avoid later trig allcate action again when need to use token. > > + // > > + gSmmCpuPrivate->UsedTokenNum = 0; > > + > > + Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList); > > + while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) { > > + TokenBuf = TOKEN_BUFFER_FROM_LINK (Link); > > + > > + Link = RemoveEntryList (&TokenBuf->Link); > > + > > + FreePool (TokenBuf->Buffer); > > + FreePool (TokenBuf); > > + } > > > > while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) { > > Link = GetFirstNode (&gSmmCpuPrivate->TokenList); > > @@ -499,7 +516,6 @@ FreeTokens ( > > > RemoveEntryList (&ProcToken->Link); > > > > - FreePool ((VOID *)ProcToken->ProcedureToken); > > FreePool (ProcToken); > > } > > } > > @@ -1115,13 +1131,35 @@ CreateToken ( > VOID > > ) > > { > > - PROCEDURE_TOKEN *ProcToken; > > + PROCEDURE_TOKEN *ProcToken; > > SPIN_LOCK *CpuToken; > > UINTN SpinLockSize; > > + TOKEN_BUFFER *TokenBuf; > > > > SpinLockSize = GetSpinLockProperties (); > > - CpuToken = AllocatePool (SpinLockSize); > > - ASSERT (CpuToken != NULL); > > + > > + if (gSmmCpuPrivate->UsedTokenNum == > MAX_PRE_RESERVE_TOKEN_COUNT) { > > + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new > buffer!\n")); > > + > > + // > > + // Record current token buffer for later free action usage. > > + // Current used token buffer not in this list. > > + // > > + TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER)); > > + ASSERT (TokenBuf != NULL); > > + TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE; > > + TokenBuf->Buffer = gSmmCpuPrivate->CurrentTokenBuf; > > + > > + InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link); > > + > > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * > MAX_PRE_RESERVE_TOKEN_COUNT); > > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > > + gSmmCpuPrivate->UsedTokenNum = 0; > > + } > > + > > + CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + > SpinLockSize * gSmmCpuPrivate->UsedTokenNum); > > + gSmmCpuPrivate->UsedTokenNum++; > > + > > InitializeSpinLock (CpuToken); > > AcquireSpinLock (CpuToken); > > > > @@ -1737,10 +1775,20 @@ InitializeDataForMmMp ( > VOID > > ) > > { > > + UINTN SpinLockSize; > > + > > + SpinLockSize = GetSpinLockProperties (); > > + DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize)); > > + > > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * > MAX_PRE_RESERVE_TOKEN_COUNT); > > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > > + gSmmCpuPrivate->UsedTokenNum = 0; > > + > > gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof > (PROCEDURE_WRAPPER) * gSmmCpuPrivate- > >SmmCoreEntryContext.NumberOfCpus); > > ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL); > > > > InitializeListHead (&gSmmCpuPrivate->TokenList); > > + InitializeListHead (&gSmmCpuPrivate->OldTokenBufList); > > } > > > > /** > > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > index 8c29f1a558..36fd0dae92 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > @@ -198,6 +198,7 @@ typedef UINT32 > SMM_CPU_ARRIVAL_EXCEPTIONS; > #define ARRIVAL_EXCEPTION_DELAYED 0x2 > > #define ARRIVAL_EXCEPTION_SMI_DISABLED 0x4 > > > > +#define MAX_PRE_RESERVE_TOKEN_COUNT 0x512 > > // > > // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE. > > // > > @@ -217,6 +218,17 @@ typedef struct { > > > #define PROCEDURE_TOKEN_FROM_LINK(a) CR (a, PROCEDURE_TOKEN, > Link, PROCEDURE_TOKEN_SIGNATURE) > > > > +#define TOKEN_BUFFER_SIGNATURE SIGNATURE_32 ('T', 'K', 'B', 'S') > > + > > +typedef struct { > > + UINTN Signature; > > + LIST_ENTRY Link; > > + > > + UINT8 *Buffer; > > +} TOKEN_BUFFER; > > + > > +#define TOKEN_BUFFER_FROM_LINK(a) CR (a, TOKEN_BUFFER, Link, > TOKEN_BUFFER_SIGNATURE) > > + > > // > > // Private structure for the SMM CPU module that is stored in DXE Runtime > memory > > // Contains the SMM Configuration Protocols that is produced. > > @@ -243,6 +255,10 @@ typedef struct { > PROCEDURE_WRAPPER *ApWrapperFunc; > > LIST_ENTRY TokenList; > > > > + LIST_ENTRY OldTokenBufList; > > + > > + UINT8 *CurrentTokenBuf; > > + UINTN UsedTokenNum; // Only record tokens used in > CurrentTokenBuf. > > } SMM_CPU_PRIVATE_DATA; > > > > extern SMM_CPU_PRIVATE_DATA *gSmmCpuPrivate; > > -- > 2.23.0.windows.1 > > > -=-=-=-=-=-= > Groups.io Links: You receive all messages sent to this group. > > View/Reply Online (#51442): https://edk2.groups.io/g/devel/message/51442 > Mute This Topic: https://groups.io/mt/63850169/1712937 > Group Owner: devel+owner@edk2.groups.io > Unsubscribe: https://edk2.groups.io/g/devel/unsub [ray.ni@intel.com] > -=-=-=-=-=-= ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-28 6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric 2019-11-28 6:37 ` [edk2-devel] " Ni, Ray @ 2019-11-28 13:15 ` Philippe Mathieu-Daudé 2019-12-04 5:44 ` Dong, Eric 2019-11-28 13:57 ` Laszlo Ersek 2 siblings, 1 reply; 12+ messages in thread From: Philippe Mathieu-Daudé @ 2019-11-28 13:15 UTC (permalink / raw) To: devel, eric.dong; +Cc: Ray Ni, Laszlo Ersek Hi Eric, On 11/28/19 7:17 AM, Dong, Eric via Groups.Io wrote: > v2 changes: > Minor update based on comments. > > v1 changes: Nitpick, previous comments should not go into the commit description, either on a cover letter, or after the '---' marker so they get stripped out by git-am. Another nitpick, remove the trailing dot in commit subject. > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388 > > Current logic allocate Token every time when need to use it. The logic > caused SMI latency raised to very high. Update logic to allocate Token > buffer at driver's entry point. Later use the token from the allocated > token buffer. Only when all the buffer have been used, then need to > allocate new buffer. > > Signed-off-by: Eric Dong <eric.dong@intel.com> > Cc: Ray Ni <ray.ni@intel.com> > Cc: Laszlo Ersek <lersek@redhat.com> > --- > UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 ++++++++++++++++++++-- > UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ > 2 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > index d8d2b6f444..4632e5b0c2 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > @@ -492,6 +492,23 @@ FreeTokens ( > { > LIST_ENTRY *Link; > PROCEDURE_TOKEN *ProcToken; > + TOKEN_BUFFER *TokenBuf; > + > + // > + // Not free the buffer, just clear the UsedTokenNum. In order to > + // avoid later trig allcate action again when need to use token. Maybe: "In order to avoid triggering allocate action when we need to use the token, do not free the buffer." Patch looks good otherwise. > + // > + gSmmCpuPrivate->UsedTokenNum = 0; > + > + Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList); > + while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) { > + TokenBuf = TOKEN_BUFFER_FROM_LINK (Link); > + > + Link = RemoveEntryList (&TokenBuf->Link); > + > + FreePool (TokenBuf->Buffer); > + FreePool (TokenBuf); > + } > > while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) { > Link = GetFirstNode (&gSmmCpuPrivate->TokenList); > @@ -499,7 +516,6 @@ FreeTokens ( > > RemoveEntryList (&ProcToken->Link); > > - FreePool ((VOID *)ProcToken->ProcedureToken); > FreePool (ProcToken); > } > } > @@ -1115,13 +1131,35 @@ CreateToken ( > VOID > ) > { > - PROCEDURE_TOKEN *ProcToken; > + PROCEDURE_TOKEN *ProcToken; > SPIN_LOCK *CpuToken; > UINTN SpinLockSize; > + TOKEN_BUFFER *TokenBuf; > > SpinLockSize = GetSpinLockProperties (); > - CpuToken = AllocatePool (SpinLockSize); > - ASSERT (CpuToken != NULL); > + > + if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) { > + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n")); > + > + // > + // Record current token buffer for later free action usage. > + // Current used token buffer not in this list. > + // > + TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER)); > + ASSERT (TokenBuf != NULL); > + TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE; > + TokenBuf->Buffer = gSmmCpuPrivate->CurrentTokenBuf; > + > + InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link); > + > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > + gSmmCpuPrivate->UsedTokenNum = 0; > + } > + > + CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum); > + gSmmCpuPrivate->UsedTokenNum++; > + > InitializeSpinLock (CpuToken); > AcquireSpinLock (CpuToken); > > @@ -1737,10 +1775,20 @@ InitializeDataForMmMp ( > VOID > ) > { > + UINTN SpinLockSize; > + > + SpinLockSize = GetSpinLockProperties (); > + DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize)); > + > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > + gSmmCpuPrivate->UsedTokenNum = 0; > + > gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus); > ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL); > > InitializeListHead (&gSmmCpuPrivate->TokenList); > + InitializeListHead (&gSmmCpuPrivate->OldTokenBufList); > } > > /** > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > index 8c29f1a558..36fd0dae92 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > @@ -198,6 +198,7 @@ typedef UINT32 SMM_CPU_ARRIVAL_EXCEPTIONS; > #define ARRIVAL_EXCEPTION_DELAYED 0x2 > #define ARRIVAL_EXCEPTION_SMI_DISABLED 0x4 > > +#define MAX_PRE_RESERVE_TOKEN_COUNT 0x512 > // > // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE. > // > @@ -217,6 +218,17 @@ typedef struct { > > #define PROCEDURE_TOKEN_FROM_LINK(a) CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE) > > +#define TOKEN_BUFFER_SIGNATURE SIGNATURE_32 ('T', 'K', 'B', 'S') > + > +typedef struct { > + UINTN Signature; > + LIST_ENTRY Link; > + > + UINT8 *Buffer; > +} TOKEN_BUFFER; > + > +#define TOKEN_BUFFER_FROM_LINK(a) CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE) > + > // > // Private structure for the SMM CPU module that is stored in DXE Runtime memory > // Contains the SMM Configuration Protocols that is produced. > @@ -243,6 +255,10 @@ typedef struct { > PROCEDURE_WRAPPER *ApWrapperFunc; > LIST_ENTRY TokenList; > > + LIST_ENTRY OldTokenBufList; > + > + UINT8 *CurrentTokenBuf; > + UINTN UsedTokenNum; // Only record tokens used in CurrentTokenBuf. > } SMM_CPU_PRIVATE_DATA; > > extern SMM_CPU_PRIVATE_DATA *gSmmCpuPrivate; > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-28 13:15 ` Philippe Mathieu-Daudé @ 2019-12-04 5:44 ` Dong, Eric 0 siblings, 0 replies; 12+ messages in thread From: Dong, Eric @ 2019-12-04 5:44 UTC (permalink / raw) To: Philippe Mathieu-Daudé, devel@edk2.groups.io; +Cc: Ni, Ray, Laszlo Ersek Hi Philippe, > -----Original Message----- > From: Philippe Mathieu-Daudé [mailto:philmd@redhat.com] > Sent: Thursday, November 28, 2019 9:16 PM > To: devel@edk2.groups.io; Dong, Eric <eric.dong@intel.com> > Cc: Ni, Ray <ray.ni@intel.com>; Laszlo Ersek <lersek@redhat.com> > Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid > allocate Token every time. > > Hi Eric, > > On 11/28/19 7:17 AM, Dong, Eric via Groups.Io wrote: > > v2 changes: > > Minor update based on comments. > > > > v1 changes: > > Nitpick, previous comments should not go into the commit description, > either on a cover letter, or after the '---' marker so they get stripped out by > git-am. [[Eric]] got it, will use this format in my next version change. Thanks. > > Another nitpick, remove the trailing dot in commit subject. [[Eric]] Update in my next version change. > > > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388 > > > > Current logic allocate Token every time when need to use it. The logic > > caused SMI latency raised to very high. Update logic to allocate Token > > buffer at driver's entry point. Later use the token from the allocated > > token buffer. Only when all the buffer have been used, then need to > > allocate new buffer. > > > > Signed-off-by: Eric Dong <eric.dong@intel.com> > > Cc: Ray Ni <ray.ni@intel.com> > > Cc: Laszlo Ersek <lersek@redhat.com> > > --- > > UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 > ++++++++++++++++++++-- > > UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ > > 2 files changed, 68 insertions(+), 4 deletions(-) > > > > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > > b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > > index d8d2b6f444..4632e5b0c2 100644 > > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > > @@ -492,6 +492,23 @@ FreeTokens ( > > { > > LIST_ENTRY *Link; > > PROCEDURE_TOKEN *ProcToken; > > + TOKEN_BUFFER *TokenBuf; > > + > > + // > > + // Not free the buffer, just clear the UsedTokenNum. In order to > > + // avoid later trig allcate action again when need to use token. > > Maybe: "In order to avoid triggering allocate action when we need to use the > token, do not free the buffer." [[Eric]] Update it in my next version change. Thanks, Eric > > Patch looks good otherwise. > > > + // > > + gSmmCpuPrivate->UsedTokenNum = 0; > > + > > + Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList); > > + while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) { > > + TokenBuf = TOKEN_BUFFER_FROM_LINK (Link); > > + > > + Link = RemoveEntryList (&TokenBuf->Link); > > + > > + FreePool (TokenBuf->Buffer); > > + FreePool (TokenBuf); > > + } > > > > while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) { > > Link = GetFirstNode (&gSmmCpuPrivate->TokenList); @@ -499,7 > > +516,6 @@ FreeTokens ( > > > > RemoveEntryList (&ProcToken->Link); > > > > - FreePool ((VOID *)ProcToken->ProcedureToken); > > FreePool (ProcToken); > > } > > } > > @@ -1115,13 +1131,35 @@ CreateToken ( > > VOID > > ) > > { > > - PROCEDURE_TOKEN *ProcToken; > > + PROCEDURE_TOKEN *ProcToken; > > SPIN_LOCK *CpuToken; > > UINTN SpinLockSize; > > + TOKEN_BUFFER *TokenBuf; > > > > SpinLockSize = GetSpinLockProperties (); > > - CpuToken = AllocatePool (SpinLockSize); > > - ASSERT (CpuToken != NULL); > > + > > + if (gSmmCpuPrivate->UsedTokenNum == > MAX_PRE_RESERVE_TOKEN_COUNT) { > > + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new > > + buffer!\n")); > > + > > + // > > + // Record current token buffer for later free action usage. > > + // Current used token buffer not in this list. > > + // > > + TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER)); > > + ASSERT (TokenBuf != NULL); > > + TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE; > > + TokenBuf->Buffer = gSmmCpuPrivate->CurrentTokenBuf; > > + > > + InsertTailList (&gSmmCpuPrivate->OldTokenBufList, > > + &TokenBuf->Link); > > + > > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize > * MAX_PRE_RESERVE_TOKEN_COUNT); > > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > > + gSmmCpuPrivate->UsedTokenNum = 0; } > > + > > + CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + > > + SpinLockSize * gSmmCpuPrivate->UsedTokenNum); > > + gSmmCpuPrivate->UsedTokenNum++; > > + > > InitializeSpinLock (CpuToken); > > AcquireSpinLock (CpuToken); > > > > @@ -1737,10 +1775,20 @@ InitializeDataForMmMp ( > > VOID > > ) > > { > > + UINTN SpinLockSize; > > + > > + SpinLockSize = GetSpinLockProperties (); DEBUG((DEBUG_INFO, > > + "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize)); > > + > > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * > > + MAX_PRE_RESERVE_TOKEN_COUNT); ASSERT > > + (gSmmCpuPrivate->CurrentTokenBuf != NULL); > > + gSmmCpuPrivate->UsedTokenNum = 0; > > + > > gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof > (PROCEDURE_WRAPPER) * gSmmCpuPrivate- > >SmmCoreEntryContext.NumberOfCpus); > > ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL); > > > > InitializeListHead (&gSmmCpuPrivate->TokenList); > > + InitializeListHead (&gSmmCpuPrivate->OldTokenBufList); > > } > > > > /** > > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > > b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > > index 8c29f1a558..36fd0dae92 100644 > > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > > @@ -198,6 +198,7 @@ typedef UINT32 > SMM_CPU_ARRIVAL_EXCEPTIONS; > > #define ARRIVAL_EXCEPTION_DELAYED 0x2 > > #define ARRIVAL_EXCEPTION_SMI_DISABLED 0x4 > > > > +#define MAX_PRE_RESERVE_TOKEN_COUNT 0x512 > > // > > // Wrapper used to convert EFI_AP_PROCEDURE2 and > EFI_AP_PROCEDURE. > > // > > @@ -217,6 +218,17 @@ typedef struct { > > > > #define PROCEDURE_TOKEN_FROM_LINK(a) CR (a, PROCEDURE_TOKEN, > Link, > > PROCEDURE_TOKEN_SIGNATURE) > > > > +#define TOKEN_BUFFER_SIGNATURE SIGNATURE_32 ('T', 'K', 'B', 'S') > > + > > +typedef struct { > > + UINTN Signature; > > + LIST_ENTRY Link; > > + > > + UINT8 *Buffer; > > +} TOKEN_BUFFER; > > + > > +#define TOKEN_BUFFER_FROM_LINK(a) CR (a, TOKEN_BUFFER, Link, > > +TOKEN_BUFFER_SIGNATURE) > > + > > // > > // Private structure for the SMM CPU module that is stored in DXE > Runtime memory > > // Contains the SMM Configuration Protocols that is produced. > > @@ -243,6 +255,10 @@ typedef struct { > > PROCEDURE_WRAPPER *ApWrapperFunc; > > LIST_ENTRY TokenList; > > > > + LIST_ENTRY OldTokenBufList; > > + > > + UINT8 *CurrentTokenBuf; > > + UINTN UsedTokenNum; // Only record tokens used in > CurrentTokenBuf. > > } SMM_CPU_PRIVATE_DATA; > > > > extern SMM_CPU_PRIVATE_DATA *gSmmCpuPrivate; > > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-28 6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric 2019-11-28 6:37 ` [edk2-devel] " Ni, Ray 2019-11-28 13:15 ` Philippe Mathieu-Daudé @ 2019-11-28 13:57 ` Laszlo Ersek 2019-11-29 1:22 ` Ni, Ray 2019-11-29 3:02 ` [edk2-devel] " Dong, Eric 2 siblings, 2 replies; 12+ messages in thread From: Laszlo Ersek @ 2019-11-28 13:57 UTC (permalink / raw) To: Eric Dong, devel; +Cc: Ray Ni, Liming Gao On 11/28/19 07:17, Eric Dong wrote: > v2 changes: > Minor update based on comments. > > v1 changes: > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388 > > Current logic allocate Token every time when need to use it. (1) Can you please clarify, in the *commit message*, what service the token allocation is needed for? To me it seems to be related to the SMM MP protocol, which was added for <https://bugzilla.tianocore.org/show_bug.cgi?id=1937>. Is that correct? (For example, the CreateToken() function was introduced in commit 51dd408ae102, "UefiCpuPkg/PiSmmCpuDxeSmm: Enable MM MP Protocol", 2019-07-16.) (2) If the problem indeed depends on MM MP protocol usage, then please update Bugzilla 2388 too, with that information ("performance regression from BZ#1937"). If a platform does not use the MM MP protocol, then highlighting this information helps platforms determine that they are not affected. (I read Bugzilla 2388 soon after it was filed, and I was surprised, because I didn't remember any "human visible" SMI handling slowdown. Maybe it's not on the "human visible" scale; I'm not sure.) > The logic > caused SMI latency raised to very high. Update logic to allocate Token > buffer at driver's entry point. Later use the token from the allocated > token buffer. Only when all the buffer have been used, then need to > allocate new buffer. > > Signed-off-by: Eric Dong <eric.dong@intel.com> > Cc: Ray Ni <ray.ni@intel.com> > Cc: Laszlo Ersek <lersek@redhat.com> > --- > UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 ++++++++++++++++++++-- > UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ > 2 files changed, 68 insertions(+), 4 deletions(-) commenting on the header file changes first: > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > index 8c29f1a558..36fd0dae92 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > @@ -198,6 +198,7 @@ typedef UINT32 SMM_CPU_ARRIVAL_EXCEPTIONS; > #define ARRIVAL_EXCEPTION_DELAYED 0x2 > #define ARRIVAL_EXCEPTION_SMI_DISABLED 0x4 > > +#define MAX_PRE_RESERVE_TOKEN_COUNT 0x512 (3) This is a very strange constant, 0x512: it's decimal 1298. Is that your intent? (4) Should we make this a global (SMRAM) variable instead, and initialize it from a PCD? I'm not sure how large tokens are, but if a platform has no use for the MM MP protocol, then pre-allocating tokens for the MM MP protocol just wastes SMRAM. If I grep edk2 (at bd85bf54c268), and edk2-platforms (at 04889ec1198b), for "gEfiMmMpProtocolGuid", the only reported module is "UefiCpuPkg/PiSmmCpuDxeSmm" itself. In other words, there is no open source consumer for the new protocol. I don't think we should waste SMRAM on platforms that don't consume this protocol. Furthermore, I've applied the following patch, and rebuilt OVMF: > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > index 0685637c2b51..5874195ba96e 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > @@ -1119,6 +1119,8 @@ CreateToken ( > SPIN_LOCK *CpuToken; > UINTN SpinLockSize; > > + ASSERT (FALSE); > + > SpinLockSize = GetSpinLockProperties (); > CpuToken = AllocatePool (SpinLockSize); > ASSERT (CpuToken != NULL); then I've re-run my usual tests. The ASSERT() has not been reached, and so I think that the new SMRAM allocation would be pure loss (= unused) for OVMF. I suggest that we introduce a new PCD for "token count per chunk", and set the default value to 1, in the DEC file. (And we should copy the PCD into a global variable, in InitializeDataForMmMp().) This way, if a platform suddenly starts consuming tokens, it will work (there will always be a single available token). If they want to improve performance (using a chunked allocation style), they can set the PCD as they see fit. Back to the patch: On 11/28/19 07:17, Eric Dong wrote: > // > // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE. > // > @@ -217,6 +218,17 @@ typedef struct { > > #define PROCEDURE_TOKEN_FROM_LINK(a) CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE) > > +#define TOKEN_BUFFER_SIGNATURE SIGNATURE_32 ('T', 'K', 'B', 'S') > + > +typedef struct { > + UINTN Signature; > + LIST_ENTRY Link; > + > + UINT8 *Buffer; > +} TOKEN_BUFFER; > + > +#define TOKEN_BUFFER_FROM_LINK(a) CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE) > + > // > // Private structure for the SMM CPU module that is stored in DXE Runtime memory > // Contains the SMM Configuration Protocols that is produced. > @@ -243,6 +255,10 @@ typedef struct { > PROCEDURE_WRAPPER *ApWrapperFunc; > LIST_ENTRY TokenList; > > + LIST_ENTRY OldTokenBufList; > + > + UINT8 *CurrentTokenBuf; > + UINTN UsedTokenNum; // Only record tokens used in CurrentTokenBuf. > } SMM_CPU_PRIVATE_DATA; > > extern SMM_CPU_PRIVATE_DATA *gSmmCpuPrivate; > (5) There are two new lines above that are overlong. Did "PatchCheck.py" not complain? > > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > index d8d2b6f444..4632e5b0c2 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > @@ -492,6 +492,23 @@ FreeTokens ( > { > LIST_ENTRY *Link; > PROCEDURE_TOKEN *ProcToken; > + TOKEN_BUFFER *TokenBuf; > + > + // > + // Not free the buffer, just clear the UsedTokenNum. In order to > + // avoid later trig allcate action again when need to use token. > + // > + gSmmCpuPrivate->UsedTokenNum = 0; (6) Here we do not zero out the current token buffer, but in CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool(). This is an inconsistency, we should call either ZeroMem() here (if zeroing matters), or AllocatePool() in the other two places (if zeroing does not matter). > + > + Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList); > + while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) { > + TokenBuf = TOKEN_BUFFER_FROM_LINK (Link); > + > + Link = RemoveEntryList (&TokenBuf->Link); > + > + FreePool (TokenBuf->Buffer); > + FreePool (TokenBuf); > + } > > while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) { > Link = GetFirstNode (&gSmmCpuPrivate->TokenList); > @@ -499,7 +516,6 @@ FreeTokens ( > > RemoveEntryList (&ProcToken->Link); > > - FreePool ((VOID *)ProcToken->ProcedureToken); > FreePool (ProcToken); > } > } > @@ -1115,13 +1131,35 @@ CreateToken ( > VOID > ) > { > - PROCEDURE_TOKEN *ProcToken; > + PROCEDURE_TOKEN *ProcToken; > SPIN_LOCK *CpuToken; > UINTN SpinLockSize; > + TOKEN_BUFFER *TokenBuf; > > SpinLockSize = GetSpinLockProperties (); > - CpuToken = AllocatePool (SpinLockSize); > - ASSERT (CpuToken != NULL); > + > + if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) { > + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n")); (7) This is an expected case, and not too much a corner case at that. Furthermore, the DEBUG message is in a performance-sensitive path. Therefore, the message should be logged with the DEBUG_VERBOSE mask, and not with the DEBUG_INFO mask. (8) In addition, the DEBUG line is too long. (9) The DEBUG line also misses a space character. > + > + // > + // Record current token buffer for later free action usage. > + // Current used token buffer not in this list. > + // > + TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER)); > + ASSERT (TokenBuf != NULL); > + TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE; > + TokenBuf->Buffer = gSmmCpuPrivate->CurrentTokenBuf; > + > + InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link); > + > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > + gSmmCpuPrivate->UsedTokenNum = 0; > + } > + > + CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum); (10) Two lines are overlong. > + gSmmCpuPrivate->UsedTokenNum++; > + > InitializeSpinLock (CpuToken); > AcquireSpinLock (CpuToken); > > @@ -1737,10 +1775,20 @@ InitializeDataForMmMp ( > VOID > ) > { > + UINTN SpinLockSize; > + > + SpinLockSize = GetSpinLockProperties (); > + DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize)); (11) For consistency with (7), you might want to downgrade this to DEBUG_VERBOSE too. But, in this case, I won't insist. (12) Space character missing after "DEBUG". (13) Are you proposing this patch for edk2-stable201911? (CC Liming) Thanks Laszlo > + > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > + gSmmCpuPrivate->UsedTokenNum = 0; > + > gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus); > ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL); > > InitializeListHead (&gSmmCpuPrivate->TokenList); > + InitializeListHead (&gSmmCpuPrivate->OldTokenBufList); > } > > /** ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-28 13:57 ` Laszlo Ersek @ 2019-11-29 1:22 ` Ni, Ray 2019-11-29 7:17 ` Laszlo Ersek 2019-11-29 3:02 ` [edk2-devel] " Dong, Eric 1 sibling, 1 reply; 12+ messages in thread From: Ni, Ray @ 2019-11-29 1:22 UTC (permalink / raw) To: Laszlo Ersek, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming > > I suggest that we introduce a new PCD for "token count per chunk", and > set the default value to 1, in the DEC file. (And we should copy the PCD > into a global variable, in InitializeDataForMmMp().) > > This way, if a platform suddenly starts consuming tokens, it will work > (there will always be a single available token). If they want to improve > performance (using a chunked allocation style), they can set the PCD as > they see fit. > Laszlo, I agree with you to use to PCD for "token count per chunk". But I suggest the default value of that PCD can be at least 64. Reasons: 1. MM_MP is the new MP protocol in SMM environment and we expect all SMM code start to use the new protocol instead of the original StartupThisAp service exposed through SMM System table. 2. A value of 1 is totally to disable the pre-allocating. I prefer this pre-allocating is activated by default given the reason #1. 64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size. Thanks, Ray ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-29 1:22 ` Ni, Ray @ 2019-11-29 7:17 ` Laszlo Ersek 2019-12-02 5:14 ` Ni, Ray 0 siblings, 1 reply; 12+ messages in thread From: Laszlo Ersek @ 2019-11-29 7:17 UTC (permalink / raw) To: Ni, Ray, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming On 11/29/19 02:22, Ni, Ray wrote: >> >> I suggest that we introduce a new PCD for "token count per chunk", and >> set the default value to 1, in the DEC file. (And we should copy the PCD >> into a global variable, in InitializeDataForMmMp().) >> >> This way, if a platform suddenly starts consuming tokens, it will work >> (there will always be a single available token). If they want to improve >> performance (using a chunked allocation style), they can set the PCD as >> they see fit. >> > > Laszlo, > I agree with you to use to PCD for "token count per chunk". > But I suggest the default value of that PCD can be at least 64. > Reasons: > 1. MM_MP is the new MP protocol in SMM environment and we expect > all SMM code start to use the new protocol instead of the original StartupThisAp > service exposed through SMM System table. If we consider the older SmmStartupThisAp() / MmStartupThisAp() member functions, the situation is almost the same -- (almost) no callers. In edk2, only SmmPeriodicSmiLib seems to call this SMST (MMST) API. In edk2-platforms, MinPlatformPkg uses the lib class, and also calls SmmStartupThisAp(). I'm just saying that "all SMM code" using "the original StartupThisAp service" in fact means a single open source platform. > 2. A value of 1 is totally to disable the pre-allocating. I disagree; with value 1, the InitializeDataForMmMp() function allocates a new chunk, with room for 1 token. That occurs ahead of actually needing the token, therefore it is a preallocation. What you mean is that the chunking will not make a difference relative to the current behavior, because every token creation/use will require a new chunk allocation. Indeed. But that doesn't change the fact that the first token will be allocated prior to actually needing it, and if a platform never needs a token, then the first chunk will just be there doing nothing. > I prefer this > pre-allocating is activated by default given the reason #1. > > 64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size. Would it be possible to *not* produce the MM MP protocol at all, if the PCD were 0? Because in that case, the DEC default for the PCD could be 64 (or any other value), and in OVMF we could set the PCD in the DSC file to zero. Thanks, Laszlo ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-29 7:17 ` Laszlo Ersek @ 2019-12-02 5:14 ` Ni, Ray 2019-12-02 12:56 ` Laszlo Ersek 0 siblings, 1 reply; 12+ messages in thread From: Ni, Ray @ 2019-12-02 5:14 UTC (permalink / raw) To: Laszlo Ersek, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming > > Laszlo, > > I agree with you to use to PCD for "token count per chunk". > > But I suggest the default value of that PCD can be at least 64. > > Reasons: > > 1. MM_MP is the new MP protocol in SMM environment and we expect > > all SMM code start to use the new protocol instead of the original > StartupThisAp > > service exposed through SMM System table. > > If we consider the older SmmStartupThisAp() / MmStartupThisAp() member > functions, the situation is almost the same -- (almost) no callers. In > edk2, only SmmPeriodicSmiLib seems to call this SMST (MMST) API. In > edk2-platforms, MinPlatformPkg uses the lib class, and also calls > SmmStartupThisAp(). > > I'm just saying that "all SMM code" using "the original StartupThisAp > service" in fact means a single open source platform. The benefit of MmMp is it introduces StartupAllAps service which is very useful in many core system. There are needs to call the new services in close source code. PI spec contains a more detailed description on the differences: * It is possible to invoke a procedure on multiple processors. * Supports blocking and non-blocking modes of operation. > > > 2. A value of 1 is totally to disable the pre-allocating. > > I disagree; with value 1, the InitializeDataForMmMp() function allocates > a new chunk, with room for 1 token. That occurs ahead of actually > needing the token, therefore it is a preallocation. > > What you mean is that the chunking will not make a difference relative > to the current behavior, because every token creation/use will require a > new chunk allocation. Indeed. > > But that doesn't change the fact that the first token will be allocated > prior to actually needing it, and if a platform never needs a token, > then the first chunk will just be there doing nothing. I agree 1 is different from disabling the pre-allocating. But 1 is not enough in a system that needs to use MmMp multiple times in a single SMI. > > > I prefer this > > pre-allocating is activated by default given the reason #1. > > > > 64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size. > > Would it be possible to *not* produce the MM MP protocol at all, if the > PCD were 0? Because in that case, the DEC default for the PCD could be > 64 (or any other value), and in OVMF we could set the PCD in the DSC > file to zero. Back to your suggestion, I am a bit curious why you prefer to have a PCD to turn on/off MmMp instead of pre-allocating 2KB/4KB memory. Any PCD introduces a cost that future developers needs to understand the meaning of the PCD and know how to set the PCD. Thanks, Ray ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-12-02 5:14 ` Ni, Ray @ 2019-12-02 12:56 ` Laszlo Ersek 0 siblings, 0 replies; 12+ messages in thread From: Laszlo Ersek @ 2019-12-02 12:56 UTC (permalink / raw) To: Ni, Ray, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming On 12/02/19 06:14, Ni, Ray wrote: >>> Laszlo, >>> I agree with you to use to PCD for "token count per chunk". >>> But I suggest the default value of that PCD can be at least 64. >>> Reasons: >>> 1. MM_MP is the new MP protocol in SMM environment and we expect >>> all SMM code start to use the new protocol instead of the original >> StartupThisAp >>> service exposed through SMM System table. >> >> If we consider the older SmmStartupThisAp() / MmStartupThisAp() member >> functions, the situation is almost the same -- (almost) no callers. In >> edk2, only SmmPeriodicSmiLib seems to call this SMST (MMST) API. In >> edk2-platforms, MinPlatformPkg uses the lib class, and also calls >> SmmStartupThisAp(). >> >> I'm just saying that "all SMM code" using "the original StartupThisAp >> service" in fact means a single open source platform. > > The benefit of MmMp is it introduces StartupAllAps service which > is very useful in many core system. > There are needs to call the new services in close source code. > > PI spec contains a more detailed description on the differences: > * It is possible to invoke a procedure on multiple processors. > * Supports blocking and non-blocking modes of operation. > >> >>> 2. A value of 1 is totally to disable the pre-allocating. >> >> I disagree; with value 1, the InitializeDataForMmMp() function allocates >> a new chunk, with room for 1 token. That occurs ahead of actually >> needing the token, therefore it is a preallocation. >> >> What you mean is that the chunking will not make a difference relative >> to the current behavior, because every token creation/use will require a >> new chunk allocation. Indeed. >> >> But that doesn't change the fact that the first token will be allocated >> prior to actually needing it, and if a platform never needs a token, >> then the first chunk will just be there doing nothing. > > I agree 1 is different from disabling the pre-allocating. > But 1 is not enough in a system that needs to use MmMp multiple times > in a single SMI. > >> >>> I prefer this >>> pre-allocating is activated by default given the reason #1. >>> >>> 64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size. >> >> Would it be possible to *not* produce the MM MP protocol at all, if the >> PCD were 0? Because in that case, the DEC default for the PCD could be >> 64 (or any other value), and in OVMF we could set the PCD in the DSC >> file to zero. > > Back to your suggestion, I am a bit curious why you prefer to have a > PCD to turn on/off MmMp instead of pre-allocating 2KB/4KB memory. I tend to perceive SMRAM as a "premium" (scarce) resource. I could be wrong. > Any PCD introduces a cost that future developers needs to understand > the meaning of the PCD and know how to set the PCD. Yes, I agree about that. But, at least, a PCD is a well-documented and explicit artifact. An allocated, but never used memory (SMRAM) block may similarly raise questions, *if* someone still remembers it later (or stumbles upon it). Documenting the SMRAM block (in the right place) can be difficult too. If we don't document it, then there seems to be no maintenance burden, but that's actually the worst solution (obscure behavior). Anyway I've said enough in this thread; please go ahead with the solution you deem best. Thanks Laszlo ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-28 13:57 ` Laszlo Ersek 2019-11-29 1:22 ` Ni, Ray @ 2019-11-29 3:02 ` Dong, Eric 2019-11-29 7:39 ` Laszlo Ersek 1 sibling, 1 reply; 12+ messages in thread From: Dong, Eric @ 2019-11-29 3:02 UTC (permalink / raw) To: devel@edk2.groups.io, lersek@redhat.com; +Cc: Ni, Ray, Gao, Liming [-- Attachment #1: Type: text/plain, Size: 11994 bytes --] Hi Laszlo, From: devel@edk2.groups.io [mailto:devel@edk2.groups.io] On Behalf Of Laszlo Ersek Sent: Thursday, November 28, 2019 9:57 PM To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com> Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. On 11/28/19 07:17, Eric Dong wrote: > v2 changes: > Minor update based on comments. > > v1 changes: > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388 > > Current logic allocate Token every time when need to use it. (1) Can you please clarify, in the *commit message*, what service the token allocation is needed for? To me it seems to be related to the SMM MP protocol, which was added for <https://bugzilla.tianocore.org/show_bug.cgi?id=1937>. Is that correct? [Eric] yes, it's a SMI latency issue occurred only in some special case. It's a regression issue caused by SMM MP protocol change. (For example, the CreateToken() function was introduced in commit 51dd408ae102, "UefiCpuPkg/PiSmmCpuDxeSmm: Enable MM MP Protocol", 2019-07-16.) (2) If the problem indeed depends on MM MP protocol usage, then please update Bugzilla 2388 too, with that information ("performance regression from BZ#1937"). If a platform does not use the MM MP protocol, then highlighting this information helps platforms determine that they are not affected. (I read Bugzilla 2388 soon after it was filed, and I was surprised, because I didn't remember any "human visible" SMI handling slowdown. Maybe it's not on the "human visible" scale; I'm not sure.) [Eric] Yes, it's a special case. I check the basic SMI (write data to B2 and collect the performance data) latency data and not found this issue. I found you already add note in this bugz. Thanks. > The logic > caused SMI latency raised to very high. Update logic to allocate Token > buffer at driver's entry point. Later use the token from the allocated > token buffer. Only when all the buffer have been used, then need to > allocate new buffer. > > Signed-off-by: Eric Dong <eric.dong@intel.com<mailto:eric.dong@intel.com>> > Cc: Ray Ni <ray.ni@intel.com<mailto:ray.ni@intel.com>> > Cc: Laszlo Ersek <lersek@redhat.com<mailto:lersek@redhat.com>> > --- > UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 ++++++++++++++++++++-- > UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ > 2 files changed, 68 insertions(+), 4 deletions(-) commenting on the header file changes first: [Eric] what's this sentence means? Follow above comments to update the comment message? > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > index 8c29f1a558..36fd0dae92 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h > @@ -198,6 +198,7 @@ typedef UINT32 SMM_CPU_ARRIVAL_EXCEPTIONS; > #define ARRIVAL_EXCEPTION_DELAYED 0x2 > #define ARRIVAL_EXCEPTION_SMI_DISABLED 0x4 > > +#define MAX_PRE_RESERVE_TOKEN_COUNT 0x512 (3) This is a very strange constant, 0x512: it's decimal 1298. Is that your intent? [Eric] add "0x" by mistake, will use PCD and remove this code. (4) Should we make this a global (SMRAM) variable instead, and initialize it from a PCD? [Eric] yes, will use PCD for this value. I'm not sure how large tokens are, but if a platform has no use for the [Eric] The value is 0x40 for the platform I used. MM MP protocol, then pre-allocating tokens for the MM MP protocol just wastes SMRAM. If I grep edk2 (at bd85bf54c268), and edk2-platforms (at 04889ec1198b), for "gEfiMmMpProtocolGuid", the only reported module is "UefiCpuPkg/PiSmmCpuDxeSmm" itself. In other words, there is no open source consumer for the new protocol. I don't think we should waste SMRAM on platforms that don't consume this protocol. Furthermore, I've applied the following patch, and rebuilt OVMF: > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > index 0685637c2b51..5874195ba96e 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > @@ -1119,6 +1119,8 @@ CreateToken ( > SPIN_LOCK *CpuToken; > UINTN SpinLockSize; > > + ASSERT (FALSE); > + > SpinLockSize = GetSpinLockProperties (); > CpuToken = AllocatePool (SpinLockSize); > ASSERT (CpuToken != NULL); then I've re-run my usual tests. The ASSERT() has not been reached, and so I think that the new SMRAM allocation would be pure loss (= unused) for OVMF. I suggest that we introduce a new PCD for "token count per chunk", and set the default value to 1, in the DEC file. (And we should copy the PCD into a global variable, in InitializeDataForMmMp().) This way, if a platform suddenly starts consuming tokens, it will work (there will always be a single available token). If they want to improve performance (using a chunked allocation style), they can set the PCD as they see fit. Back to the patch: On 11/28/19 07:17, Eric Dong wrote: > // > // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE. > // > @@ -217,6 +218,17 @@ typedef struct { > > #define PROCEDURE_TOKEN_FROM_LINK(a) CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE) > > +#define TOKEN_BUFFER_SIGNATURE SIGNATURE_32 ('T', 'K', 'B', 'S') > + > +typedef struct { > + UINTN Signature; > + LIST_ENTRY Link; > + > + UINT8 *Buffer; > +} TOKEN_BUFFER; > + > +#define TOKEN_BUFFER_FROM_LINK(a) CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE) > + > // > // Private structure for the SMM CPU module that is stored in DXE Runtime memory > // Contains the SMM Configuration Protocols that is produced. > @@ -243,6 +255,10 @@ typedef struct { > PROCEDURE_WRAPPER *ApWrapperFunc; > LIST_ENTRY TokenList; > > + LIST_ENTRY OldTokenBufList; > + > + UINT8 *CurrentTokenBuf; > + UINTN UsedTokenNum; // Only record tokens used in CurrentTokenBuf. > } SMM_CPU_PRIVATE_DATA; > > extern SMM_CPU_PRIVATE_DATA *gSmmCpuPrivate; > (5) There are two new lines above that are overlong. Did "PatchCheck.py" not complain? [Eric] I think PatchCheck tool not check the code width, only check the commit message width. Also edk2 coding style spec not has restrict rule for it, just a general rule. Content like below: 5.1 General Rules 5.1.1 Lines shall be 120 columns, or less https://github.com/tianocore/tianocore.github.io/wiki/EDK-II-Specifications > > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > index d8d2b6f444..4632e5b0c2 100644 > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > @@ -492,6 +492,23 @@ FreeTokens ( > { > LIST_ENTRY *Link; > PROCEDURE_TOKEN *ProcToken; > + TOKEN_BUFFER *TokenBuf; > + > + // > + // Not free the buffer, just clear the UsedTokenNum. In order to > + // avoid later trig allcate action again when need to use token. > + // > + gSmmCpuPrivate->UsedTokenNum = 0; (6) Here we do not zero out the current token buffer, but in CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool(). This is an inconsistency, we should call either ZeroMem() here (if zeroing matters), or AllocatePool() in the other two places (if zeroing does not matter). [Eric] Not catch your meaning here? Why can't use "=0" here? > + > + Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList); > + while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) { > + TokenBuf = TOKEN_BUFFER_FROM_LINK (Link); > + > + Link = RemoveEntryList (&TokenBuf->Link); > + > + FreePool (TokenBuf->Buffer); > + FreePool (TokenBuf); > + } > > while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) { > Link = GetFirstNode (&gSmmCpuPrivate->TokenList); > @@ -499,7 +516,6 @@ FreeTokens ( > > RemoveEntryList (&ProcToken->Link); > > - FreePool ((VOID *)ProcToken->ProcedureToken); > FreePool (ProcToken); > } > } > @@ -1115,13 +1131,35 @@ CreateToken ( > VOID > ) > { > - PROCEDURE_TOKEN *ProcToken; > + PROCEDURE_TOKEN *ProcToken; > SPIN_LOCK *CpuToken; > UINTN SpinLockSize; > + TOKEN_BUFFER *TokenBuf; > > SpinLockSize = GetSpinLockProperties (); > - CpuToken = AllocatePool (SpinLockSize); > - ASSERT (CpuToken != NULL); > + > + if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) { > + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n")); (7) This is an expected case, and not too much a corner case at that. Furthermore, the DEBUG message is in a performance-sensitive path. [Eric] this code is called by the caller. I don't think it's performance sensitive. What's your rule for "performance-sensitive path" ? I add this debug message because I want to know how often the pre allocate buffer is not enough. We can enlarge the buffer size to get better performance. Therefore, the message should be logged with the DEBUG_VERBOSE mask, and not with the DEBUG_INFO mask. (8) In addition, the DEBUG line is too long. [Eric] Same with [5], I think not break edk2 rules. (9) The DEBUG line also misses a space character. [Eric] will update in next version. > + > + // > + // Record current token buffer for later free action usage. > + // Current used token buffer not in this list. > + // > + TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER)); > + ASSERT (TokenBuf != NULL); > + TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE; > + TokenBuf->Buffer = gSmmCpuPrivate->CurrentTokenBuf; > + > + InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link); > + > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > + gSmmCpuPrivate->UsedTokenNum = 0; > + } > + > + CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum); (10) Two lines are overlong. [Eric] Same with [5], I think not break edk2 rules. > + gSmmCpuPrivate->UsedTokenNum++; > + > InitializeSpinLock (CpuToken); > AcquireSpinLock (CpuToken); > > @@ -1737,10 +1775,20 @@ InitializeDataForMmMp ( > VOID > ) > { > + UINTN SpinLockSize; > + > + SpinLockSize = GetSpinLockProperties (); > + DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize)); (11) For consistency with (7), you might want to downgrade this to DEBUG_VERBOSE too. But, in this case, I won't insist. [Eric] This value impact the final used buffer size, want to know this info in normal boot. So I use INFO level. (12) Space character missing after "DEBUG". [Eric] Will update it in next patch. (13) Are you proposing this patch for edk2-stable201911? (CC Liming) [Eric] it's too later, will not propose in this stable tag. Thanks Laszlo > + > + gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT); > + ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL); > + gSmmCpuPrivate->UsedTokenNum = 0; > + > gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus); > ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL); > > InitializeListHead (&gSmmCpuPrivate->TokenList); > + InitializeListHead (&gSmmCpuPrivate->OldTokenBufList); > } > > /** [-- Attachment #2: Type: text/html, Size: 41392 bytes --] ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-29 3:02 ` [edk2-devel] " Dong, Eric @ 2019-11-29 7:39 ` Laszlo Ersek 2019-12-04 5:33 ` Dong, Eric 0 siblings, 1 reply; 12+ messages in thread From: Laszlo Ersek @ 2019-11-29 7:39 UTC (permalink / raw) To: Dong, Eric, devel@edk2.groups.io; +Cc: Ni, Ray, Gao, Liming On 11/29/19 04:02, Dong, Eric wrote: > Hi Laszlo, > > From: devel@edk2.groups.io [mailto:devel@edk2.groups.io] On Behalf Of Laszlo Ersek > Sent: Thursday, November 28, 2019 9:57 PM > To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io > Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com> > Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. >> UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 ++++++++++++++++++++-- >> UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ >> 2 files changed, 68 insertions(+), 4 deletions(-) > > commenting on the header file changes first: > [Eric] what's this sentence means? Follow above comments to update the comment message? Your patch email included the C source file changes first, and the header file changes second. I find that more difficult to reason about than the opposite order (header first, C source second). Therefore, I split your email in two parts, and moved the H file changes to the top. And, I commented on those H file changes first. >> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c >> index d8d2b6f444..4632e5b0c2 100644 >> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c >> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c >> @@ -492,6 +492,23 @@ FreeTokens ( >> { >> LIST_ENTRY *Link; >> PROCEDURE_TOKEN *ProcToken; >> + TOKEN_BUFFER *TokenBuf; >> + >> + // >> + // Not free the buffer, just clear the UsedTokenNum. In order to >> + // avoid later trig allcate action again when need to use token. >> + // >> + gSmmCpuPrivate->UsedTokenNum = 0; > > (6) Here we do not zero out the current token buffer, but in > CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool(). > > This is an inconsistency, we should call either ZeroMem() here (if > zeroing matters), or AllocatePool() in the other two places (if zeroing > does not matter). > [Eric] Not catch your meaning here? Why can't use "=0" here? In both CreateToken() and InitializeDataForMmMp(), we perform *three* actions: (a) ensure CurrentTokenBuf is allocated, (b) clear CurrentTokenBuf, (c) set UsedTokenNum to zero. In FreeTokens(), we perform *two* actions: (a) ensure CurrentTokenBuf is allocated (it needs no explicit action, but it is an invariant nonetheless), (c) set UsedTokenNum to zero. Step (b) is missing from FreeTokens(). That's inconsistent with CreateToken() and InitializeDataForMmMp(). The question is whether the following predicate is important or not: - "all unused tokens in the current token buffer must be all-bits-zero" If this predicate is important, then you should add step (b) to FreeTokens(): ZeroMem ( gSmmCpuPrivate->CurrentTokenBuf, SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT ); If the predicate is not important, then you should replace the AllocateZeroPool() calls with AllocatePool(), in CreateToken() and InitializeDataForMmMp(). It is not consistent to clear CurrentTokenBuf in only *some* cases when UsedTokenNum is set to zero. >> @@ -1115,13 +1131,35 @@ CreateToken ( >> VOID >> ) >> { >> - PROCEDURE_TOKEN *ProcToken; >> + PROCEDURE_TOKEN *ProcToken; >> SPIN_LOCK *CpuToken; >> UINTN SpinLockSize; >> + TOKEN_BUFFER *TokenBuf; >> >> SpinLockSize = GetSpinLockProperties (); >> - CpuToken = AllocatePool (SpinLockSize); >> - ASSERT (CpuToken != NULL); >> + >> + if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) { >> + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n")); > > (7) This is an expected case, and not too much a corner case at that. > Furthermore, the DEBUG message is in a performance-sensitive path. > [Eric] this code is called by the caller. I don't think it's performance sensitive. What's your > rule for "performance-sensitive path" ? I add this debug message because I want to know > how often the pre allocate buffer is not enough. We can enlarge the buffer size to get > better performance. The patch is about making CreateToken() faster. It's done by allocating SMRAM less frequently (not on every call to CreateToken()). In some cases however, CreateToken() will still allocate SMRAM, and that's going to be a "spike" in latency, I expect. Adding a DEBUG_INFO to that code path makes the spike worse. It does not affect the throughput of CreateToken() much (the spike is amortized over MAX_PRE_RESERVE_TOKEN_COUNT calls), but it makes the distribution less even. I would use DEBUG_VERBOSE to avoid worsening the spike when a platform build does not ask for DEBUG_VERBOSE explicitly. If you disagree, I can live with DEBUG_INFO. Thanks Laszlo ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time. 2019-11-29 7:39 ` Laszlo Ersek @ 2019-12-04 5:33 ` Dong, Eric 0 siblings, 0 replies; 12+ messages in thread From: Dong, Eric @ 2019-12-04 5:33 UTC (permalink / raw) To: Laszlo Ersek, devel@edk2.groups.io; +Cc: Ni, Ray, Gao, Liming Hi Laszlo, > -----Original Message----- > From: Laszlo Ersek [mailto:lersek@redhat.com] > Sent: Friday, November 29, 2019 3:39 PM > To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io > Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com> > Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid > allocate Token every time. > > On 11/29/19 04:02, Dong, Eric wrote: > > Hi Laszlo, > > > > From: devel@edk2.groups.io [mailto:devel@edk2.groups.io] On Behalf Of > > Laszlo Ersek > > Sent: Thursday, November 28, 2019 9:57 PM > > To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io > > Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com> > > Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: > Avoid allocate Token every time. > > >> UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c | 56 > ++++++++++++++++++++-- > >> UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++ > >> 2 files changed, 68 insertions(+), 4 deletions(-) > > > > commenting on the header file changes first: > > [Eric] what's this sentence means? Follow above comments to update the > comment message? > > Your patch email included the C source file changes first, and the header file > changes second. I find that more difficult to reason about than the opposite > order (header first, C source second). > > Therefore, I split your email in two parts, and moved the H file changes to the > top. And, I commented on those H file changes first. > > >> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > >> b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > >> index d8d2b6f444..4632e5b0c2 100644 > >> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > >> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c > >> @@ -492,6 +492,23 @@ FreeTokens ( > >> { > >> LIST_ENTRY *Link; > >> PROCEDURE_TOKEN *ProcToken; > >> + TOKEN_BUFFER *TokenBuf; > >> + > >> + // > >> + // Not free the buffer, just clear the UsedTokenNum. In order to > >> + // avoid later trig allcate action again when need to use token. > >> + // > >> + gSmmCpuPrivate->UsedTokenNum = 0; > > > > (6) Here we do not zero out the current token buffer, but in > > CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool(). > > > > This is an inconsistency, we should call either ZeroMem() here (if > > zeroing matters), or AllocatePool() in the other two places (if > > zeroing does not matter). > > [Eric] Not catch your meaning here? Why can't use "=0" here? > > In both CreateToken() and InitializeDataForMmMp(), we perform *three* > actions: > (a) ensure CurrentTokenBuf is allocated, > (b) clear CurrentTokenBuf, > (c) set UsedTokenNum to zero. > > In FreeTokens(), we perform *two* actions: > (a) ensure CurrentTokenBuf is allocated (it needs no explicit action, but it is > an invariant nonetheless), > (c) set UsedTokenNum to zero. > > Step (b) is missing from FreeTokens(). That's inconsistent with > CreateToken() and InitializeDataForMmMp(). > > The question is whether the following predicate is important or not: > > - "all unused tokens in the current token buffer must be all-bits-zero" > > If this predicate is important, then you should add step (b) to > FreeTokens(): > > ZeroMem ( > gSmmCpuPrivate->CurrentTokenBuf, > SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT > ); > > If the predicate is not important, then you should replace the > AllocateZeroPool() calls with AllocatePool(), in CreateToken() and > InitializeDataForMmMp(). > > It is not consistent to clear CurrentTokenBuf in only *some* cases when > UsedTokenNum is set to zero. [[Eric]] In CreateToken function, I add code InitializeSpinLock() to initialize the Token space first, so "Clear CurrentTokenBuf" action is not must have item. I will remove the "Zero Memory" action in my next version changes. > > >> @@ -1115,13 +1131,35 @@ CreateToken ( > >> VOID > >> ) > >> { > >> - PROCEDURE_TOKEN *ProcToken; > >> + PROCEDURE_TOKEN *ProcToken; > >> SPIN_LOCK *CpuToken; > >> UINTN SpinLockSize; > >> + TOKEN_BUFFER *TokenBuf; > >> > >> SpinLockSize = GetSpinLockProperties (); > >> - CpuToken = AllocatePool (SpinLockSize); > >> - ASSERT (CpuToken != NULL); > >> + > >> + if (gSmmCpuPrivate->UsedTokenNum == > MAX_PRE_RESERVE_TOKEN_COUNT) { > >> + DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new > >> + buffer!\n")); > > > > (7) This is an expected case, and not too much a corner case at that. > > Furthermore, the DEBUG message is in a performance-sensitive path. > > [Eric] this code is called by the caller. I don't think it's > > performance sensitive. What's your rule for "performance-sensitive > > path" ? I add this debug message because I want to know how often the > > pre allocate buffer is not enough. We can enlarge the buffer size to get > better performance. > > The patch is about making CreateToken() faster. It's done by allocating > SMRAM less frequently (not on every call to CreateToken()). In some cases > however, CreateToken() will still allocate SMRAM, and that's going to be a > "spike" in latency, I expect. > > Adding a DEBUG_INFO to that code path makes the spike worse. It does not > affect the throughput of CreateToken() much (the spike is amortized over > MAX_PRE_RESERVE_TOKEN_COUNT calls), but it makes the distribution less > even. I would use DEBUG_VERBOSE to avoid worsening the spike when a > platform build does not ask for DEBUG_VERBOSE explicitly. > > If you disagree, I can live with DEBUG_INFO. > [[Eric]] I add debug message to let us know the frequency of the allocation action. It make sense to change the level to VERBOSE. I will update it in my next version changes. Thanks, Eric > Thanks > Laszlo ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2019-12-04 5:44 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2019-11-28 6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric 2019-11-28 6:37 ` [edk2-devel] " Ni, Ray 2019-11-28 13:15 ` Philippe Mathieu-Daudé 2019-12-04 5:44 ` Dong, Eric 2019-11-28 13:57 ` Laszlo Ersek 2019-11-29 1:22 ` Ni, Ray 2019-11-29 7:17 ` Laszlo Ersek 2019-12-02 5:14 ` Ni, Ray 2019-12-02 12:56 ` Laszlo Ersek 2019-11-29 3:02 ` [edk2-devel] " Dong, Eric 2019-11-29 7:39 ` Laszlo Ersek 2019-12-04 5:33 ` Dong, Eric
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox