[PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.

public inbox for devel@edk2.groups.io
 help / color / mirror / Atom feed

* [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
@ 2019-11-28  6:17 Dong, Eric
  2019-11-28  6:37 ` [edk2-devel] " Ni, Ray
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Dong, Eric @ 2019-11-28  6:17 UTC (permalink / raw)
  To: devel; +Cc: Ray Ni, Laszlo Ersek

v2 changes:
  Minor update based on comments.

v1 changes:
REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388

Current logic allocate Token every time when need to use it. The logic
caused SMI latency raised to very high. Update logic to allocate Token
buffer at driver's entry point. Later use the token from the allocated
token buffer. Only when all the buffer have been used, then need to
allocate new buffer.

Signed-off-by: Eric Dong <eric.dong@intel.com>
Cc: Ray Ni <ray.ni@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
---
 UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56 ++++++++++++++++++++--
 UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
index d8d2b6f444..4632e5b0c2 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
@@ -492,6 +492,23 @@ FreeTokens (
 {
   LIST_ENTRY            *Link;
   PROCEDURE_TOKEN       *ProcToken;
+  TOKEN_BUFFER          *TokenBuf;
+
+  //
+  // Not free the buffer, just clear the UsedTokenNum. In order to
+  // avoid later trig allcate action again when need to use token.
+  //
+  gSmmCpuPrivate->UsedTokenNum = 0;
+
+  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
+  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
+    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
+
+    Link = RemoveEntryList (&TokenBuf->Link);
+    
+    FreePool (TokenBuf->Buffer);
+    FreePool (TokenBuf);
+  }
 
   while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
     Link = GetFirstNode (&gSmmCpuPrivate->TokenList);
@@ -499,7 +516,6 @@ FreeTokens (
 
     RemoveEntryList (&ProcToken->Link);
 
-    FreePool ((VOID *)ProcToken->ProcedureToken);
     FreePool (ProcToken);
   }
 }
@@ -1115,13 +1131,35 @@ CreateToken (
   VOID
   )
 {
-  PROCEDURE_TOKEN    *ProcToken;
+  PROCEDURE_TOKEN     *ProcToken;
   SPIN_LOCK           *CpuToken;
   UINTN               SpinLockSize;
+  TOKEN_BUFFER        *TokenBuf;
 
   SpinLockSize = GetSpinLockProperties ();
-  CpuToken = AllocatePool (SpinLockSize);
-  ASSERT (CpuToken != NULL);
+
+  if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) {
+    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n"));
+
+    //
+    // Record current token buffer for later free action usage.
+    // Current used token buffer not in this list.
+    //
+    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
+    ASSERT (TokenBuf != NULL);
+    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
+    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
+
+    InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link);
+
+    gSmmCpuPrivate->CurrentTokenBuf   = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
+    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
+    gSmmCpuPrivate->UsedTokenNum = 0;
+  }
+
+  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);
+  gSmmCpuPrivate->UsedTokenNum++;
+
   InitializeSpinLock (CpuToken);
   AcquireSpinLock (CpuToken);
 
@@ -1737,10 +1775,20 @@ InitializeDataForMmMp (
   VOID
   )
 {
+  UINTN              SpinLockSize;
+
+  SpinLockSize = GetSpinLockProperties ();
+  DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize));
+
+  gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
+  ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
+  gSmmCpuPrivate->UsedTokenNum = 0;
+
   gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus);
   ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
 
   InitializeListHead (&gSmmCpuPrivate->TokenList);
+  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
 }
 
 /**
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
index 8c29f1a558..36fd0dae92 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
@@ -198,6 +198,7 @@ typedef UINT32                              SMM_CPU_ARRIVAL_EXCEPTIONS;
 #define ARRIVAL_EXCEPTION_DELAYED           0x2
 #define ARRIVAL_EXCEPTION_SMI_DISABLED      0x4
 
+#define MAX_PRE_RESERVE_TOKEN_COUNT                     0x512
 //
 // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE.
 //
@@ -217,6 +218,17 @@ typedef struct {
 
 #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE)
 
+#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
+
+typedef struct {
+  UINTN                   Signature;
+  LIST_ENTRY              Link;
+
+  UINT8                   *Buffer;
+} TOKEN_BUFFER;
+
+#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE)
+
 //
 // Private structure for the SMM CPU module that is stored in DXE Runtime memory
 // Contains the SMM Configuration Protocols that is produced.
@@ -243,6 +255,10 @@ typedef struct {
   PROCEDURE_WRAPPER               *ApWrapperFunc;
   LIST_ENTRY                      TokenList;
 
+  LIST_ENTRY                      OldTokenBufList;
+
+  UINT8                           *CurrentTokenBuf;
+  UINTN                           UsedTokenNum;     // Only record tokens used in CurrentTokenBuf.
 } SMM_CPU_PRIVATE_DATA;
 
 extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
-- 
2.23.0.windows.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-28  6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric
@ 2019-11-28  6:37 ` Ni, Ray
  2019-11-28 13:15 ` Philippe Mathieu-Daudé
  2019-11-28 13:57 ` Laszlo Ersek
  2 siblings, 0 replies; 12+ messages in thread
From: Ni, Ray @ 2019-11-28  6:37 UTC (permalink / raw)
  To: devel@edk2.groups.io, Dong, Eric; +Cc: Laszlo Ersek

Reviewed-by: Ray Ni <ray.ni@intel.com>

> -----Original Message-----
> From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Dong,
> Eric
> Sent: Thursday, November 28, 2019 2:17 PM
> To: devel@edk2.groups.io
> Cc: Ni, Ray <ray.ni@intel.com>; Laszlo Ersek <lersek@redhat.com>
> Subject: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid
> allocate Token every time.
> 
> v2 changes:
> 
>   Minor update based on comments.
> 
> 
> 
> v1 changes:
> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388
> 
> Current logic allocate Token every time when need to use it. The logic
> caused SMI latency raised to very high. Update logic to allocate Token
> buffer at driver's entry point. Later use the token from the allocated
> token buffer. Only when all the buffer have been used, then need to
> allocate new buffer.
> 
> Signed-off-by: Eric Dong <eric.dong@intel.com>
> Cc: Ray Ni <ray.ni@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> ---
>  UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56
> ++++++++++++++++++++--
>  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
>  2 files changed, 68 insertions(+), 4 deletions(-)
> 
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> index d8d2b6f444..4632e5b0c2 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> @@ -492,6 +492,23 @@ FreeTokens (
>  {
> 
>    LIST_ENTRY            *Link;
> 
>    PROCEDURE_TOKEN       *ProcToken;
> 
> +  TOKEN_BUFFER          *TokenBuf;
> 
> +
> 
> +  //
> 
> +  // Not free the buffer, just clear the UsedTokenNum. In order to
> 
> +  // avoid later trig allcate action again when need to use token.
> 
> +  //
> 
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> 
> +
> 
> +  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
> 
> +  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
> 
> +    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
> 
> +
> 
> +    Link = RemoveEntryList (&TokenBuf->Link);
> 
> +
> 
> +    FreePool (TokenBuf->Buffer);
> 
> +    FreePool (TokenBuf);
> 
> +  }
> 
> 
> 
>    while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
> 
>      Link = GetFirstNode (&gSmmCpuPrivate->TokenList);
> 
> @@ -499,7 +516,6 @@ FreeTokens (
> 
> 
>      RemoveEntryList (&ProcToken->Link);
> 
> 
> 
> -    FreePool ((VOID *)ProcToken->ProcedureToken);
> 
>      FreePool (ProcToken);
> 
>    }
> 
>  }
> 
> @@ -1115,13 +1131,35 @@ CreateToken (
>    VOID
> 
>    )
> 
>  {
> 
> -  PROCEDURE_TOKEN    *ProcToken;
> 
> +  PROCEDURE_TOKEN     *ProcToken;
> 
>    SPIN_LOCK           *CpuToken;
> 
>    UINTN               SpinLockSize;
> 
> +  TOKEN_BUFFER        *TokenBuf;
> 
> 
> 
>    SpinLockSize = GetSpinLockProperties ();
> 
> -  CpuToken = AllocatePool (SpinLockSize);
> 
> -  ASSERT (CpuToken != NULL);
> 
> +
> 
> +  if (gSmmCpuPrivate->UsedTokenNum ==
> MAX_PRE_RESERVE_TOKEN_COUNT) {
> 
> +    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new
> buffer!\n"));
> 
> +
> 
> +    //
> 
> +    // Record current token buffer for later free action usage.
> 
> +    // Current used token buffer not in this list.
> 
> +    //
> 
> +    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
> 
> +    ASSERT (TokenBuf != NULL);
> 
> +    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
> 
> +    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
> 
> +
> 
> +    InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link);
> 
> +
> 
> +    gSmmCpuPrivate->CurrentTokenBuf   = AllocateZeroPool (SpinLockSize *
> MAX_PRE_RESERVE_TOKEN_COUNT);
> 
> +    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> 
> +    gSmmCpuPrivate->UsedTokenNum = 0;
> 
> +  }
> 
> +
> 
> +  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf +
> SpinLockSize * gSmmCpuPrivate->UsedTokenNum);
> 
> +  gSmmCpuPrivate->UsedTokenNum++;
> 
> +
> 
>    InitializeSpinLock (CpuToken);
> 
>    AcquireSpinLock (CpuToken);
> 
> 
> 
> @@ -1737,10 +1775,20 @@ InitializeDataForMmMp (
>    VOID
> 
>    )
> 
>  {
> 
> +  UINTN              SpinLockSize;
> 
> +
> 
> +  SpinLockSize = GetSpinLockProperties ();
> 
> +  DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize));
> 
> +
> 
> +  gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize *
> MAX_PRE_RESERVE_TOKEN_COUNT);
> 
> +  ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> 
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> 
> +
> 
>    gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof
> (PROCEDURE_WRAPPER) * gSmmCpuPrivate-
> >SmmCoreEntryContext.NumberOfCpus);
> 
>    ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
> 
> 
> 
>    InitializeListHead (&gSmmCpuPrivate->TokenList);
> 
> +  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
> 
>  }
> 
> 
> 
>  /**
> 
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> index 8c29f1a558..36fd0dae92 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> @@ -198,6 +198,7 @@ typedef UINT32
> SMM_CPU_ARRIVAL_EXCEPTIONS;
>  #define ARRIVAL_EXCEPTION_DELAYED           0x2
> 
>  #define ARRIVAL_EXCEPTION_SMI_DISABLED      0x4
> 
> 
> 
> +#define MAX_PRE_RESERVE_TOKEN_COUNT                     0x512
> 
>  //
> 
>  // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE.
> 
>  //
> 
> @@ -217,6 +218,17 @@ typedef struct {
> 
> 
>  #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN,
> Link, PROCEDURE_TOKEN_SIGNATURE)
> 
> 
> 
> +#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
> 
> +
> 
> +typedef struct {
> 
> +  UINTN                   Signature;
> 
> +  LIST_ENTRY              Link;
> 
> +
> 
> +  UINT8                   *Buffer;
> 
> +} TOKEN_BUFFER;
> 
> +
> 
> +#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link,
> TOKEN_BUFFER_SIGNATURE)
> 
> +
> 
>  //
> 
>  // Private structure for the SMM CPU module that is stored in DXE Runtime
> memory
> 
>  // Contains the SMM Configuration Protocols that is produced.
> 
> @@ -243,6 +255,10 @@ typedef struct {
>    PROCEDURE_WRAPPER               *ApWrapperFunc;
> 
>    LIST_ENTRY                      TokenList;
> 
> 
> 
> +  LIST_ENTRY                      OldTokenBufList;
> 
> +
> 
> +  UINT8                           *CurrentTokenBuf;
> 
> +  UINTN                           UsedTokenNum;     // Only record tokens used in
> CurrentTokenBuf.
> 
>  } SMM_CPU_PRIVATE_DATA;
> 
> 
> 
>  extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
> 
> --
> 2.23.0.windows.1
> 
> 
> -=-=-=-=-=-=
> Groups.io Links: You receive all messages sent to this group.
> 
> View/Reply Online (#51442): https://edk2.groups.io/g/devel/message/51442
> Mute This Topic: https://groups.io/mt/63850169/1712937
> Group Owner: devel+owner@edk2.groups.io
> Unsubscribe: https://edk2.groups.io/g/devel/unsub  [ray.ni@intel.com]
> -=-=-=-=-=-=


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-28  6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric
  2019-11-28  6:37 ` [edk2-devel] " Ni, Ray
@ 2019-11-28 13:15 ` Philippe Mathieu-Daudé
  2019-12-04  5:44   ` Dong, Eric
  2019-11-28 13:57 ` Laszlo Ersek
  2 siblings, 1 reply; 12+ messages in thread
From: Philippe Mathieu-Daudé @ 2019-11-28 13:15 UTC (permalink / raw)
  To: devel, eric.dong; +Cc: Ray Ni, Laszlo Ersek

Hi Eric,

On 11/28/19 7:17 AM, Dong, Eric via Groups.Io wrote:
> v2 changes:
>    Minor update based on comments.
> 
> v1 changes:

Nitpick, previous comments should not go into the commit description, 
either on a cover letter, or after the '---' marker so they get stripped 
out by git-am.

Another nitpick, remove the trailing dot in commit subject.

> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388
> 
> Current logic allocate Token every time when need to use it. The logic
> caused SMI latency raised to very high. Update logic to allocate Token
> buffer at driver's entry point. Later use the token from the allocated
> token buffer. Only when all the buffer have been used, then need to
> allocate new buffer.
> 
> Signed-off-by: Eric Dong <eric.dong@intel.com>
> Cc: Ray Ni <ray.ni@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> ---
>   UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56 ++++++++++++++++++++--
>   UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
>   2 files changed, 68 insertions(+), 4 deletions(-)
> 
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> index d8d2b6f444..4632e5b0c2 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> @@ -492,6 +492,23 @@ FreeTokens (
>   {
>     LIST_ENTRY            *Link;
>     PROCEDURE_TOKEN       *ProcToken;
> +  TOKEN_BUFFER          *TokenBuf;
> +
> +  //
> +  // Not free the buffer, just clear the UsedTokenNum. In order to
> +  // avoid later trig allcate action again when need to use token.

Maybe: "In order to avoid triggering allocate action when we need to use 
the token, do not free the buffer."

Patch looks good otherwise.

> +  //
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> +
> +  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
> +  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
> +    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
> +
> +    Link = RemoveEntryList (&TokenBuf->Link);
> +
> +    FreePool (TokenBuf->Buffer);
> +    FreePool (TokenBuf);
> +  }
>   
>     while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
>       Link = GetFirstNode (&gSmmCpuPrivate->TokenList);
> @@ -499,7 +516,6 @@ FreeTokens (
>   
>       RemoveEntryList (&ProcToken->Link);
>   
> -    FreePool ((VOID *)ProcToken->ProcedureToken);
>       FreePool (ProcToken);
>     }
>   }
> @@ -1115,13 +1131,35 @@ CreateToken (
>     VOID
>     )
>   {
> -  PROCEDURE_TOKEN    *ProcToken;
> +  PROCEDURE_TOKEN     *ProcToken;
>     SPIN_LOCK           *CpuToken;
>     UINTN               SpinLockSize;
> +  TOKEN_BUFFER        *TokenBuf;
>   
>     SpinLockSize = GetSpinLockProperties ();
> -  CpuToken = AllocatePool (SpinLockSize);
> -  ASSERT (CpuToken != NULL);
> +
> +  if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) {
> +    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n"));
> +
> +    //
> +    // Record current token buffer for later free action usage.
> +    // Current used token buffer not in this list.
> +    //
> +    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
> +    ASSERT (TokenBuf != NULL);
> +    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
> +    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
> +
> +    InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link);
> +
> +    gSmmCpuPrivate->CurrentTokenBuf   = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
> +    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +    gSmmCpuPrivate->UsedTokenNum = 0;
> +  }
> +
> +  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);
> +  gSmmCpuPrivate->UsedTokenNum++;
> +
>     InitializeSpinLock (CpuToken);
>     AcquireSpinLock (CpuToken);
>   
> @@ -1737,10 +1775,20 @@ InitializeDataForMmMp (
>     VOID
>     )
>   {
> +  UINTN              SpinLockSize;
> +
> +  SpinLockSize = GetSpinLockProperties ();
> +  DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize));
> +
> +  gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
> +  ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> +
>     gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus);
>     ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
>   
>     InitializeListHead (&gSmmCpuPrivate->TokenList);
> +  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
>   }
>   
>   /**
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> index 8c29f1a558..36fd0dae92 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> @@ -198,6 +198,7 @@ typedef UINT32                              SMM_CPU_ARRIVAL_EXCEPTIONS;
>   #define ARRIVAL_EXCEPTION_DELAYED           0x2
>   #define ARRIVAL_EXCEPTION_SMI_DISABLED      0x4
>   
> +#define MAX_PRE_RESERVE_TOKEN_COUNT                     0x512
>   //
>   // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE.
>   //
> @@ -217,6 +218,17 @@ typedef struct {
>   
>   #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE)
>   
> +#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
> +
> +typedef struct {
> +  UINTN                   Signature;
> +  LIST_ENTRY              Link;
> +
> +  UINT8                   *Buffer;
> +} TOKEN_BUFFER;
> +
> +#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE)
> +
>   //
>   // Private structure for the SMM CPU module that is stored in DXE Runtime memory
>   // Contains the SMM Configuration Protocols that is produced.
> @@ -243,6 +255,10 @@ typedef struct {
>     PROCEDURE_WRAPPER               *ApWrapperFunc;
>     LIST_ENTRY                      TokenList;
>   
> +  LIST_ENTRY                      OldTokenBufList;
> +
> +  UINT8                           *CurrentTokenBuf;
> +  UINTN                           UsedTokenNum;     // Only record tokens used in CurrentTokenBuf.
>   } SMM_CPU_PRIVATE_DATA;
>   
>   extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-28 13:15 ` Philippe Mathieu-Daudé
@ 2019-12-04  5:44   ` Dong, Eric
  0 siblings, 0 replies; 12+ messages in thread
From: Dong, Eric @ 2019-12-04  5:44 UTC (permalink / raw)
  To: Philippe Mathieu-Daudé, devel@edk2.groups.io; +Cc: Ni, Ray, Laszlo Ersek

Hi Philippe,

> -----Original Message-----
> From: Philippe Mathieu-Daudé [mailto:philmd@redhat.com]
> Sent: Thursday, November 28, 2019 9:16 PM
> To: devel@edk2.groups.io; Dong, Eric <eric.dong@intel.com>
> Cc: Ni, Ray <ray.ni@intel.com>; Laszlo Ersek <lersek@redhat.com>
> Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid
> allocate Token every time.
> 
> Hi Eric,
> 
> On 11/28/19 7:17 AM, Dong, Eric via Groups.Io wrote:
> > v2 changes:
> >    Minor update based on comments.
> >
> > v1 changes:
> 
> Nitpick, previous comments should not go into the commit description,
> either on a cover letter, or after the '---' marker so they get stripped out by
> git-am.
[[Eric]] got it, will use this format in my next version change. Thanks.

> 
> Another nitpick, remove the trailing dot in commit subject.
[[Eric]] Update in my next version change.

> 
> > REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388
> >
> > Current logic allocate Token every time when need to use it. The logic
> > caused SMI latency raised to very high. Update logic to allocate Token
> > buffer at driver's entry point. Later use the token from the allocated
> > token buffer. Only when all the buffer have been used, then need to
> > allocate new buffer.
> >
> > Signed-off-by: Eric Dong <eric.dong@intel.com>
> > Cc: Ray Ni <ray.ni@intel.com>
> > Cc: Laszlo Ersek <lersek@redhat.com>
> > ---
> >   UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56
> ++++++++++++++++++++--
> >   UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
> >   2 files changed, 68 insertions(+), 4 deletions(-)
> >
> > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > index d8d2b6f444..4632e5b0c2 100644
> > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> > @@ -492,6 +492,23 @@ FreeTokens (
> >   {
> >     LIST_ENTRY            *Link;
> >     PROCEDURE_TOKEN       *ProcToken;
> > +  TOKEN_BUFFER          *TokenBuf;
> > +
> > +  //
> > +  // Not free the buffer, just clear the UsedTokenNum. In order to
> > + // avoid later trig allcate action again when need to use token.
> 
> Maybe: "In order to avoid triggering allocate action when we need to use the
> token, do not free the buffer."
[[Eric]] Update it in my next version change.

Thanks,
Eric
> 
> Patch looks good otherwise.
> 
> > +  //
> > +  gSmmCpuPrivate->UsedTokenNum = 0;
> > +
> > +  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
> > +  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
> > +    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
> > +
> > +    Link = RemoveEntryList (&TokenBuf->Link);
> > +
> > +    FreePool (TokenBuf->Buffer);
> > +    FreePool (TokenBuf);
> > +  }
> >
> >     while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
> >       Link = GetFirstNode (&gSmmCpuPrivate->TokenList); @@ -499,7
> > +516,6 @@ FreeTokens (
> >
> >       RemoveEntryList (&ProcToken->Link);
> >
> > -    FreePool ((VOID *)ProcToken->ProcedureToken);
> >       FreePool (ProcToken);
> >     }
> >   }
> > @@ -1115,13 +1131,35 @@ CreateToken (
> >     VOID
> >     )
> >   {
> > -  PROCEDURE_TOKEN    *ProcToken;
> > +  PROCEDURE_TOKEN     *ProcToken;
> >     SPIN_LOCK           *CpuToken;
> >     UINTN               SpinLockSize;
> > +  TOKEN_BUFFER        *TokenBuf;
> >
> >     SpinLockSize = GetSpinLockProperties ();
> > -  CpuToken = AllocatePool (SpinLockSize);
> > -  ASSERT (CpuToken != NULL);
> > +
> > +  if (gSmmCpuPrivate->UsedTokenNum ==
> MAX_PRE_RESERVE_TOKEN_COUNT) {
> > +    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new
> > + buffer!\n"));
> > +
> > +    //
> > +    // Record current token buffer for later free action usage.
> > +    // Current used token buffer not in this list.
> > +    //
> > +    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
> > +    ASSERT (TokenBuf != NULL);
> > +    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
> > +    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
> > +
> > +    InsertTailList (&gSmmCpuPrivate->OldTokenBufList,
> > + &TokenBuf->Link);
> > +
> > +    gSmmCpuPrivate->CurrentTokenBuf   = AllocateZeroPool (SpinLockSize
> * MAX_PRE_RESERVE_TOKEN_COUNT);
> > +    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> > +    gSmmCpuPrivate->UsedTokenNum = 0;  }
> > +
> > +  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf +
> > + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);
> > + gSmmCpuPrivate->UsedTokenNum++;
> > +
> >     InitializeSpinLock (CpuToken);
> >     AcquireSpinLock (CpuToken);
> >
> > @@ -1737,10 +1775,20 @@ InitializeDataForMmMp (
> >     VOID
> >     )
> >   {
> > +  UINTN              SpinLockSize;
> > +
> > +  SpinLockSize = GetSpinLockProperties ();  DEBUG((DEBUG_INFO,
> > + "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize));
> > +
> > +  gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize *
> > + MAX_PRE_RESERVE_TOKEN_COUNT);  ASSERT
> > + (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> > + gSmmCpuPrivate->UsedTokenNum = 0;
> > +
> >     gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof
> (PROCEDURE_WRAPPER) * gSmmCpuPrivate-
> >SmmCoreEntryContext.NumberOfCpus);
> >     ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
> >
> >     InitializeListHead (&gSmmCpuPrivate->TokenList);
> > +  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
> >   }
> >
> >   /**
> > diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > index 8c29f1a558..36fd0dae92 100644
> > --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> > @@ -198,6 +198,7 @@ typedef UINT32
> SMM_CPU_ARRIVAL_EXCEPTIONS;
> >   #define ARRIVAL_EXCEPTION_DELAYED           0x2
> >   #define ARRIVAL_EXCEPTION_SMI_DISABLED      0x4
> >
> > +#define MAX_PRE_RESERVE_TOKEN_COUNT                     0x512
> >   //
> >   // Wrapper used to convert EFI_AP_PROCEDURE2 and
> EFI_AP_PROCEDURE.
> >   //
> > @@ -217,6 +218,17 @@ typedef struct {
> >
> >   #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN,
> Link,
> > PROCEDURE_TOKEN_SIGNATURE)
> >
> > +#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
> > +
> > +typedef struct {
> > +  UINTN                   Signature;
> > +  LIST_ENTRY              Link;
> > +
> > +  UINT8                   *Buffer;
> > +} TOKEN_BUFFER;
> > +
> > +#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link,
> > +TOKEN_BUFFER_SIGNATURE)
> > +
> >   //
> >   // Private structure for the SMM CPU module that is stored in DXE
> Runtime memory
> >   // Contains the SMM Configuration Protocols that is produced.
> > @@ -243,6 +255,10 @@ typedef struct {
> >     PROCEDURE_WRAPPER               *ApWrapperFunc;
> >     LIST_ENTRY                      TokenList;
> >
> > +  LIST_ENTRY                      OldTokenBufList;
> > +
> > +  UINT8                           *CurrentTokenBuf;
> > +  UINTN                           UsedTokenNum;     // Only record tokens used in
> CurrentTokenBuf.
> >   } SMM_CPU_PRIVATE_DATA;
> >
> >   extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
> >

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-28  6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric
  2019-11-28  6:37 ` [edk2-devel] " Ni, Ray
  2019-11-28 13:15 ` Philippe Mathieu-Daudé
@ 2019-11-28 13:57 ` Laszlo Ersek
  2019-11-29  1:22   ` Ni, Ray
  2019-11-29  3:02   ` [edk2-devel] " Dong, Eric
  2 siblings, 2 replies; 12+ messages in thread
From: Laszlo Ersek @ 2019-11-28 13:57 UTC (permalink / raw)
  To: Eric Dong, devel; +Cc: Ray Ni, Liming Gao

On 11/28/19 07:17, Eric Dong wrote:
> v2 changes:
>   Minor update based on comments.
>
> v1 changes:
> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388
>
> Current logic allocate Token every time when need to use it.

(1) Can you please clarify, in the *commit message*, what service the
token allocation is needed for?

To me it seems to be related to the SMM MP protocol, which was added for
<https://bugzilla.tianocore.org/show_bug.cgi?id=1937>. Is that correct?

(For example, the CreateToken() function was introduced in commit
51dd408ae102, "UefiCpuPkg/PiSmmCpuDxeSmm: Enable MM MP Protocol",
2019-07-16.)


(2) If the problem indeed depends on MM MP protocol usage, then please
update Bugzilla 2388 too, with that information ("performance regression
from BZ#1937"). If a platform does not use the MM MP protocol, then
highlighting this information helps platforms determine that they are
not affected.

(I read Bugzilla 2388 soon after it was filed, and I was surprised,
because I didn't remember any "human visible" SMI handling slowdown.

Maybe it's not on the "human visible" scale; I'm not sure.)


>                                                              The logic
> caused SMI latency raised to very high. Update logic to allocate Token
> buffer at driver's entry point. Later use the token from the allocated
> token buffer. Only when all the buffer have been used, then need to
> allocate new buffer.
>
> Signed-off-by: Eric Dong <eric.dong@intel.com>
> Cc: Ray Ni <ray.ni@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> ---
>  UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56 ++++++++++++++++++++--
>  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
>  2 files changed, 68 insertions(+), 4 deletions(-)

commenting on the header file changes first:

> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> index 8c29f1a558..36fd0dae92 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> @@ -198,6 +198,7 @@ typedef UINT32                              SMM_CPU_ARRIVAL_EXCEPTIONS;
>  #define ARRIVAL_EXCEPTION_DELAYED           0x2
>  #define ARRIVAL_EXCEPTION_SMI_DISABLED      0x4
>
> +#define MAX_PRE_RESERVE_TOKEN_COUNT                     0x512

(3) This is a very strange constant, 0x512: it's decimal 1298. Is that
your intent?


(4) Should we make this a global (SMRAM) variable instead, and
initialize it from a PCD?

I'm not sure how large tokens are, but if a platform has no use for the
MM MP protocol, then pre-allocating tokens for the MM MP protocol just
wastes SMRAM.

If I grep edk2 (at bd85bf54c268), and edk2-platforms (at 04889ec1198b),
for "gEfiMmMpProtocolGuid", the only reported module is
"UefiCpuPkg/PiSmmCpuDxeSmm" itself. In other words, there is no open
source consumer for the new protocol. I don't think we should waste
SMRAM on platforms that don't consume this protocol.

Furthermore, I've applied the following patch, and rebuilt OVMF:

> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> index 0685637c2b51..5874195ba96e 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> @@ -1119,6 +1119,8 @@ CreateToken (
>    SPIN_LOCK           *CpuToken;
>    UINTN               SpinLockSize;
>
> +  ASSERT (FALSE);
> +
>    SpinLockSize = GetSpinLockProperties ();
>    CpuToken = AllocatePool (SpinLockSize);
>    ASSERT (CpuToken != NULL);

then I've re-run my usual tests. The ASSERT() has not been reached, and
so I think that the new SMRAM allocation would be pure loss (= unused)
for OVMF.

I suggest that we introduce a new PCD for "token count per chunk", and
set the default value to 1, in the DEC file. (And we should copy the PCD
into a global variable, in InitializeDataForMmMp().)

This way, if a platform suddenly starts consuming tokens, it will work
(there will always be a single available token). If they want to improve
performance (using a chunked allocation style), they can set the PCD as
they see fit.

Back to the patch:

On 11/28/19 07:17, Eric Dong wrote:
>  //
>  // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE.
>  //
> @@ -217,6 +218,17 @@ typedef struct {
>
>  #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE)
>
> +#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
> +
> +typedef struct {
> +  UINTN                   Signature;
> +  LIST_ENTRY              Link;
> +
> +  UINT8                   *Buffer;
> +} TOKEN_BUFFER;
> +
> +#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE)
> +
>  //
>  // Private structure for the SMM CPU module that is stored in DXE Runtime memory
>  // Contains the SMM Configuration Protocols that is produced.
> @@ -243,6 +255,10 @@ typedef struct {
>    PROCEDURE_WRAPPER               *ApWrapperFunc;
>    LIST_ENTRY                      TokenList;
>
> +  LIST_ENTRY                      OldTokenBufList;
> +
> +  UINT8                           *CurrentTokenBuf;
> +  UINTN                           UsedTokenNum;     // Only record tokens used in CurrentTokenBuf.
>  } SMM_CPU_PRIVATE_DATA;
>
>  extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
>

(5) There are two new lines above that are overlong.

Did "PatchCheck.py" not complain?

>
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> index d8d2b6f444..4632e5b0c2 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> @@ -492,6 +492,23 @@ FreeTokens (
>  {
>    LIST_ENTRY            *Link;
>    PROCEDURE_TOKEN       *ProcToken;
> +  TOKEN_BUFFER          *TokenBuf;
> +
> +  //
> +  // Not free the buffer, just clear the UsedTokenNum. In order to
> +  // avoid later trig allcate action again when need to use token.
> +  //
> +  gSmmCpuPrivate->UsedTokenNum = 0;

(6) Here we do not zero out the current token buffer, but in
CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool().

This is an inconsistency, we should call either ZeroMem() here (if
zeroing matters), or AllocatePool() in the other two places (if zeroing
does not matter).


> +
> +  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
> +  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
> +    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
> +
> +    Link = RemoveEntryList (&TokenBuf->Link);
> +
> +    FreePool (TokenBuf->Buffer);
> +    FreePool (TokenBuf);
> +  }
>
>    while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
>      Link = GetFirstNode (&gSmmCpuPrivate->TokenList);
> @@ -499,7 +516,6 @@ FreeTokens (
>
>      RemoveEntryList (&ProcToken->Link);
>
> -    FreePool ((VOID *)ProcToken->ProcedureToken);
>      FreePool (ProcToken);
>    }
>  }
> @@ -1115,13 +1131,35 @@ CreateToken (
>    VOID
>    )
>  {
> -  PROCEDURE_TOKEN    *ProcToken;
> +  PROCEDURE_TOKEN     *ProcToken;
>    SPIN_LOCK           *CpuToken;
>    UINTN               SpinLockSize;
> +  TOKEN_BUFFER        *TokenBuf;
>
>    SpinLockSize = GetSpinLockProperties ();
> -  CpuToken = AllocatePool (SpinLockSize);
> -  ASSERT (CpuToken != NULL);
> +
> +  if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) {
> +    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n"));

(7) This is an expected case, and not too much a corner case at that.
Furthermore, the DEBUG message is in a performance-sensitive path.
Therefore, the message should be logged with the DEBUG_VERBOSE mask, and
not with the DEBUG_INFO mask.

(8) In addition, the DEBUG line is too long.

(9) The DEBUG line also misses a space character.


> +
> +    //
> +    // Record current token buffer for later free action usage.
> +    // Current used token buffer not in this list.
> +    //
> +    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
> +    ASSERT (TokenBuf != NULL);
> +    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
> +    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
> +
> +    InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link);
> +
> +    gSmmCpuPrivate->CurrentTokenBuf   = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
> +    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +    gSmmCpuPrivate->UsedTokenNum = 0;
> +  }
> +
> +  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);

(10) Two lines are overlong.


> +  gSmmCpuPrivate->UsedTokenNum++;
> +
>    InitializeSpinLock (CpuToken);
>    AcquireSpinLock (CpuToken);
>
> @@ -1737,10 +1775,20 @@ InitializeDataForMmMp (
>    VOID
>    )
>  {
> +  UINTN              SpinLockSize;
> +
> +  SpinLockSize = GetSpinLockProperties ();
> +  DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize));

(11) For consistency with (7), you might want to downgrade this to
DEBUG_VERBOSE too. But, in this case, I won't insist.

(12) Space character missing after "DEBUG".

(13) Are you proposing this patch for edk2-stable201911? (CC Liming)

Thanks
Laszlo


> +
> +  gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
> +  ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> +
>    gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus);
>    ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
>
>    InitializeListHead (&gSmmCpuPrivate->TokenList);
> +  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
>  }
>
>  /**


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-28 13:57 ` Laszlo Ersek
@ 2019-11-29  1:22   ` Ni, Ray
  2019-11-29  7:17     ` Laszlo Ersek
  2019-11-29  3:02   ` [edk2-devel] " Dong, Eric
  1 sibling, 1 reply; 12+ messages in thread
From: Ni, Ray @ 2019-11-29  1:22 UTC (permalink / raw)
  To: Laszlo Ersek, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming

> 
> I suggest that we introduce a new PCD for "token count per chunk", and
> set the default value to 1, in the DEC file. (And we should copy the PCD
> into a global variable, in InitializeDataForMmMp().)
> 
> This way, if a platform suddenly starts consuming tokens, it will work
> (there will always be a single available token). If they want to improve
> performance (using a chunked allocation style), they can set the PCD as
> they see fit.
> 

Laszlo,
I agree with you to use to PCD for "token count per chunk".
But I suggest the default value of that PCD can be at least 64.
Reasons:
1. MM_MP is the new MP protocol in SMM environment and we expect
    all SMM code start to use the new protocol instead of the original StartupThisAp
    service exposed through SMM System table.
2. A value of 1 is totally to disable the pre-allocating. I prefer this
    pre-allocating is activated by default given the reason #1.

64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size.

Thanks,
Ray

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-29  1:22   ` Ni, Ray
@ 2019-11-29  7:17     ` Laszlo Ersek
  2019-12-02  5:14       ` Ni, Ray
  0 siblings, 1 reply; 12+ messages in thread
From: Laszlo Ersek @ 2019-11-29  7:17 UTC (permalink / raw)
  To: Ni, Ray, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming

On 11/29/19 02:22, Ni, Ray wrote:
>>
>> I suggest that we introduce a new PCD for "token count per chunk", and
>> set the default value to 1, in the DEC file. (And we should copy the PCD
>> into a global variable, in InitializeDataForMmMp().)
>>
>> This way, if a platform suddenly starts consuming tokens, it will work
>> (there will always be a single available token). If they want to improve
>> performance (using a chunked allocation style), they can set the PCD as
>> they see fit.
>>
> 
> Laszlo,
> I agree with you to use to PCD for "token count per chunk".
> But I suggest the default value of that PCD can be at least 64.
> Reasons:
> 1. MM_MP is the new MP protocol in SMM environment and we expect
>     all SMM code start to use the new protocol instead of the original StartupThisAp
>     service exposed through SMM System table.

If we consider the older SmmStartupThisAp() / MmStartupThisAp() member
functions, the situation is almost the same -- (almost) no callers. In
edk2, only SmmPeriodicSmiLib seems to call this SMST (MMST) API. In
edk2-platforms, MinPlatformPkg uses the lib class, and also calls
SmmStartupThisAp().

I'm just saying that "all SMM code" using "the original StartupThisAp
service" in fact means a single open source platform.

> 2. A value of 1 is totally to disable the pre-allocating.

I disagree; with value 1, the InitializeDataForMmMp() function allocates
a new chunk, with room for 1 token. That occurs ahead of actually
needing the token, therefore it is a preallocation.

What you mean is that the chunking will not make a difference relative
to the current behavior, because every token creation/use will require a
new chunk allocation. Indeed.

But that doesn't change the fact that the first token will be allocated
prior to actually needing it, and if a platform never needs a token,
then the first chunk will just be there doing nothing.

>     I prefer this
>     pre-allocating is activated by default given the reason #1.
> 
> 64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size.

Would it be possible to *not* produce the MM MP protocol at all, if the
PCD were 0? Because in that case, the DEC default for the PCD could be
64 (or any other value), and in OVMF we could set the PCD in the DSC
file to zero.

Thanks,
Laszlo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-29  7:17     ` Laszlo Ersek
@ 2019-12-02  5:14       ` Ni, Ray
  2019-12-02 12:56         ` Laszlo Ersek
  0 siblings, 1 reply; 12+ messages in thread
From: Ni, Ray @ 2019-12-02  5:14 UTC (permalink / raw)
  To: Laszlo Ersek, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming

> > Laszlo,
> > I agree with you to use to PCD for "token count per chunk".
> > But I suggest the default value of that PCD can be at least 64.
> > Reasons:
> > 1. MM_MP is the new MP protocol in SMM environment and we expect
> >     all SMM code start to use the new protocol instead of the original
> StartupThisAp
> >     service exposed through SMM System table.
> 
> If we consider the older SmmStartupThisAp() / MmStartupThisAp() member
> functions, the situation is almost the same -- (almost) no callers. In
> edk2, only SmmPeriodicSmiLib seems to call this SMST (MMST) API. In
> edk2-platforms, MinPlatformPkg uses the lib class, and also calls
> SmmStartupThisAp().
> 
> I'm just saying that "all SMM code" using "the original StartupThisAp
> service" in fact means a single open source platform.

The benefit of MmMp is it introduces StartupAllAps service which
is very useful in many core system.
There are needs to call the new services in close source code.

PI spec contains a more detailed description on the differences:
* It is possible to invoke a procedure on multiple processors.
* Supports blocking and non-blocking modes of operation.

> 
> > 2. A value of 1 is totally to disable the pre-allocating.
> 
> I disagree; with value 1, the InitializeDataForMmMp() function allocates
> a new chunk, with room for 1 token. That occurs ahead of actually
> needing the token, therefore it is a preallocation.
> 
> What you mean is that the chunking will not make a difference relative
> to the current behavior, because every token creation/use will require a
> new chunk allocation. Indeed.
> 
> But that doesn't change the fact that the first token will be allocated
> prior to actually needing it, and if a platform never needs a token,
> then the first chunk will just be there doing nothing.

I agree 1 is different  from disabling the pre-allocating.
But 1 is not enough in a system that needs to use MmMp multiple times
in a single SMI.

> 
> >     I prefer this
> >     pre-allocating is activated by default given the reason #1.
> >
> > 64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size.
> 
> Would it be possible to *not* produce the MM MP protocol at all, if the
> PCD were 0? Because in that case, the DEC default for the PCD could be
> 64 (or any other value), and in OVMF we could set the PCD in the DSC
> file to zero. 

Back to your suggestion, I am a bit curious why you prefer to have a
PCD to turn on/off MmMp instead of pre-allocating 2KB/4KB memory.

Any PCD introduces a cost that future developers needs to understand
the meaning of the PCD and know how to set the PCD.

Thanks,
Ray


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-12-02  5:14       ` Ni, Ray
@ 2019-12-02 12:56         ` Laszlo Ersek
  0 siblings, 0 replies; 12+ messages in thread
From: Laszlo Ersek @ 2019-12-02 12:56 UTC (permalink / raw)
  To: Ni, Ray, Dong, Eric, devel@edk2.groups.io; +Cc: Gao, Liming

On 12/02/19 06:14, Ni, Ray wrote:
>>> Laszlo,
>>> I agree with you to use to PCD for "token count per chunk".
>>> But I suggest the default value of that PCD can be at least 64.
>>> Reasons:
>>> 1. MM_MP is the new MP protocol in SMM environment and we expect
>>>     all SMM code start to use the new protocol instead of the original
>> StartupThisAp
>>>     service exposed through SMM System table.
>>
>> If we consider the older SmmStartupThisAp() / MmStartupThisAp() member
>> functions, the situation is almost the same -- (almost) no callers. In
>> edk2, only SmmPeriodicSmiLib seems to call this SMST (MMST) API. In
>> edk2-platforms, MinPlatformPkg uses the lib class, and also calls
>> SmmStartupThisAp().
>>
>> I'm just saying that "all SMM code" using "the original StartupThisAp
>> service" in fact means a single open source platform.
> 
> The benefit of MmMp is it introduces StartupAllAps service which
> is very useful in many core system.
> There are needs to call the new services in close source code.
> 
> PI spec contains a more detailed description on the differences:
> * It is possible to invoke a procedure on multiple processors.
> * Supports blocking and non-blocking modes of operation.
> 
>>
>>> 2. A value of 1 is totally to disable the pre-allocating.
>>
>> I disagree; with value 1, the InitializeDataForMmMp() function allocates
>> a new chunk, with room for 1 token. That occurs ahead of actually
>> needing the token, therefore it is a preallocation.
>>
>> What you mean is that the chunking will not make a difference relative
>> to the current behavior, because every token creation/use will require a
>> new chunk allocation. Indeed.
>>
>> But that doesn't change the fact that the first token will be allocated
>> prior to actually needing it, and if a platform never needs a token,
>> then the first chunk will just be there doing nothing.
> 
> I agree 1 is different  from disabling the pre-allocating.
> But 1 is not enough in a system that needs to use MmMp multiple times
> in a single SMI.
> 
>>
>>>     I prefer this
>>>     pre-allocating is activated by default given the reason #1.
>>>
>>> 64 * 32/64 (SPIN LOCK size) = 2KB/4KB is not a big size.
>>
>> Would it be possible to *not* produce the MM MP protocol at all, if the
>> PCD were 0? Because in that case, the DEC default for the PCD could be
>> 64 (or any other value), and in OVMF we could set the PCD in the DSC
>> file to zero. 
> 
> Back to your suggestion, I am a bit curious why you prefer to have a
> PCD to turn on/off MmMp instead of pre-allocating 2KB/4KB memory.

I tend to perceive SMRAM as a "premium" (scarce) resource. I could be wrong.

> Any PCD introduces a cost that future developers needs to understand
> the meaning of the PCD and know how to set the PCD.

Yes, I agree about that.

But, at least, a PCD is a well-documented and explicit artifact. An
allocated, but never used memory (SMRAM) block may similarly raise
questions, *if* someone still remembers it later (or stumbles upon it).
Documenting the SMRAM block (in the right place) can be difficult too.
If we don't document it, then there seems to be no maintenance burden,
but that's actually the worst solution (obscure behavior).

Anyway I've said enough in this thread; please go ahead with the
solution you deem best.

Thanks
Laszlo


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-28 13:57 ` Laszlo Ersek
  2019-11-29  1:22   ` Ni, Ray
@ 2019-11-29  3:02   ` Dong, Eric
  2019-11-29  7:39     ` Laszlo Ersek
  1 sibling, 1 reply; 12+ messages in thread
From: Dong, Eric @ 2019-11-29  3:02 UTC (permalink / raw)
  To: devel@edk2.groups.io, lersek@redhat.com; +Cc: Ni, Ray, Gao, Liming

[-- Attachment #1: Type: text/plain, Size: 11994 bytes --]

Hi Laszlo,

From: devel@edk2.groups.io [mailto:devel@edk2.groups.io] On Behalf Of Laszlo Ersek
Sent: Thursday, November 28, 2019 9:57 PM
To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io
Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.

On 11/28/19 07:17, Eric Dong wrote:
> v2 changes:
>   Minor update based on comments.
>
> v1 changes:
> REF: https://bugzilla.tianocore.org/show_bug.cgi?id=2388
>
> Current logic allocate Token every time when need to use it.

(1) Can you please clarify, in the *commit message*, what service the
token allocation is needed for?

To me it seems to be related to the SMM MP protocol, which was added for
<https://bugzilla.tianocore.org/show_bug.cgi?id=1937>. Is that correct?
[Eric] yes, it's a SMI latency issue occurred only in some special case.  It's a regression issue caused by SMM MP protocol change.

(For example, the CreateToken() function was introduced in commit
51dd408ae102, "UefiCpuPkg/PiSmmCpuDxeSmm: Enable MM MP Protocol",
2019-07-16.)

(2) If the problem indeed depends on MM MP protocol usage, then please
update Bugzilla 2388 too, with that information ("performance regression
from BZ#1937"). If a platform does not use the MM MP protocol, then
highlighting this information helps platforms determine that they are
not affected.

(I read Bugzilla 2388 soon after it was filed, and I was surprised,
because I didn't remember any "human visible" SMI handling slowdown.

Maybe it's not on the "human visible" scale; I'm not sure.)
[Eric] Yes, it's a special case. I check the basic SMI (write data to B2 and collect the performance data) latency data and not found this issue.
I found you already add note in this bugz. Thanks.

>                                                              The logic
> caused SMI latency raised to very high. Update logic to allocate Token
> buffer at driver's entry point. Later use the token from the allocated
> token buffer. Only when all the buffer have been used, then need to
> allocate new buffer.
>
> Signed-off-by: Eric Dong <eric.dong@intel.com<mailto:eric.dong@intel.com>>
> Cc: Ray Ni <ray.ni@intel.com<mailto:ray.ni@intel.com>>
> Cc: Laszlo Ersek <lersek@redhat.com<mailto:lersek@redhat.com>>
> ---
>  UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56 ++++++++++++++++++++--
>  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
>  2 files changed, 68 insertions(+), 4 deletions(-)

commenting on the header file changes first:
[Eric] what's this sentence means? Follow above comments to update the comment message?

> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> index 8c29f1a558..36fd0dae92 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
> @@ -198,6 +198,7 @@ typedef UINT32                              SMM_CPU_ARRIVAL_EXCEPTIONS;
>  #define ARRIVAL_EXCEPTION_DELAYED           0x2
>  #define ARRIVAL_EXCEPTION_SMI_DISABLED      0x4
>
> +#define MAX_PRE_RESERVE_TOKEN_COUNT                     0x512

(3) This is a very strange constant, 0x512: it's decimal 1298. Is that
your intent?
[Eric] add "0x" by mistake, will use PCD and remove this code.

(4) Should we make this a global (SMRAM) variable instead, and
initialize it from a PCD?
[Eric] yes, will use PCD for this value.

I'm not sure how large tokens are, but if a platform has no use for the
[Eric] The value is 0x40 for the platform I used.

MM MP protocol, then pre-allocating tokens for the MM MP protocol just
wastes SMRAM.

If I grep edk2 (at bd85bf54c268), and edk2-platforms (at 04889ec1198b),
for "gEfiMmMpProtocolGuid", the only reported module is
"UefiCpuPkg/PiSmmCpuDxeSmm" itself. In other words, there is no open
source consumer for the new protocol. I don't think we should waste
SMRAM on platforms that don't consume this protocol.

Furthermore, I've applied the following patch, and rebuilt OVMF:

> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> index 0685637c2b51..5874195ba96e 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> @@ -1119,6 +1119,8 @@ CreateToken (
>    SPIN_LOCK           *CpuToken;
>    UINTN               SpinLockSize;
>
> +  ASSERT (FALSE);
> +
>    SpinLockSize = GetSpinLockProperties ();
>    CpuToken = AllocatePool (SpinLockSize);
>    ASSERT (CpuToken != NULL);

then I've re-run my usual tests. The ASSERT() has not been reached, and
so I think that the new SMRAM allocation would be pure loss (= unused)
for OVMF.

I suggest that we introduce a new PCD for "token count per chunk", and
set the default value to 1, in the DEC file. (And we should copy the PCD
into a global variable, in InitializeDataForMmMp().)

This way, if a platform suddenly starts consuming tokens, it will work
(there will always be a single available token). If they want to improve
performance (using a chunked allocation style), they can set the PCD as
they see fit.

Back to the patch:

On 11/28/19 07:17, Eric Dong wrote:
>  //
>  // Wrapper used to convert EFI_AP_PROCEDURE2 and EFI_AP_PROCEDURE.
>  //
> @@ -217,6 +218,17 @@ typedef struct {
>
>  #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE)
>
> +#define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
> +
> +typedef struct {
> +  UINTN                   Signature;
> +  LIST_ENTRY              Link;
> +
> +  UINT8                   *Buffer;
> +} TOKEN_BUFFER;
> +
> +#define TOKEN_BUFFER_FROM_LINK(a)  CR (a, TOKEN_BUFFER, Link, TOKEN_BUFFER_SIGNATURE)
> +
>  //
>  // Private structure for the SMM CPU module that is stored in DXE Runtime memory
>  // Contains the SMM Configuration Protocols that is produced.
> @@ -243,6 +255,10 @@ typedef struct {
>    PROCEDURE_WRAPPER               *ApWrapperFunc;
>    LIST_ENTRY                      TokenList;
>
> +  LIST_ENTRY                      OldTokenBufList;
> +
> +  UINT8                           *CurrentTokenBuf;
> +  UINTN                           UsedTokenNum;     // Only record tokens used in CurrentTokenBuf.
>  } SMM_CPU_PRIVATE_DATA;
>
>  extern SMM_CPU_PRIVATE_DATA  *gSmmCpuPrivate;
>

(5) There are two new lines above that are overlong.
Did "PatchCheck.py" not complain?
[Eric] I think PatchCheck tool not check the code width, only check the commit message width.
Also edk2 coding style spec not has restrict rule for it, just a general rule. Content like below:
5.1 General Rules
5.1.1 Lines shall be 120 columns, or less
https://github.com/tianocore/tianocore.github.io/wiki/EDK-II-Specifications

>
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> index d8d2b6f444..4632e5b0c2 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> @@ -492,6 +492,23 @@ FreeTokens (
>  {
>    LIST_ENTRY            *Link;
>    PROCEDURE_TOKEN       *ProcToken;
> +  TOKEN_BUFFER          *TokenBuf;
> +
> +  //
> +  // Not free the buffer, just clear the UsedTokenNum. In order to
> +  // avoid later trig allcate action again when need to use token.
> +  //
> +  gSmmCpuPrivate->UsedTokenNum = 0;

(6) Here we do not zero out the current token buffer, but in
CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool().

This is an inconsistency, we should call either ZeroMem() here (if
zeroing matters), or AllocatePool() in the other two places (if zeroing
does not matter).
[Eric] Not catch your meaning here? Why can't use  "=0" here?

> +
> +  Link = GetFirstNode (&gSmmCpuPrivate->OldTokenBufList);
> +  while (!IsNull (&gSmmCpuPrivate->OldTokenBufList, Link)) {
> +    TokenBuf = TOKEN_BUFFER_FROM_LINK (Link);
> +
> +    Link = RemoveEntryList (&TokenBuf->Link);
> +
> +    FreePool (TokenBuf->Buffer);
> +    FreePool (TokenBuf);
> +  }
>
>    while (!IsListEmpty (&gSmmCpuPrivate->TokenList)) {
>      Link = GetFirstNode (&gSmmCpuPrivate->TokenList);
> @@ -499,7 +516,6 @@ FreeTokens (
>
>      RemoveEntryList (&ProcToken->Link);
>
> -    FreePool ((VOID *)ProcToken->ProcedureToken);
>      FreePool (ProcToken);
>    }
>  }
> @@ -1115,13 +1131,35 @@ CreateToken (
>    VOID
>    )
>  {
> -  PROCEDURE_TOKEN    *ProcToken;
> +  PROCEDURE_TOKEN     *ProcToken;
>    SPIN_LOCK           *CpuToken;
>    UINTN               SpinLockSize;
> +  TOKEN_BUFFER        *TokenBuf;
>
>    SpinLockSize = GetSpinLockProperties ();
> -  CpuToken = AllocatePool (SpinLockSize);
> -  ASSERT (CpuToken != NULL);
> +
> +  if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) {
> +    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n"));

(7) This is an expected case, and not too much a corner case at that.
Furthermore, the DEBUG message is in a performance-sensitive path.
[Eric] this code is called by the caller.  I don't think it's performance sensitive. What's your
rule for "performance-sensitive path" ? I add this debug message because I want to know
how often the pre allocate buffer is not enough.  We can enlarge the buffer size to get
better performance.

Therefore, the message should be logged with the DEBUG_VERBOSE mask, and
not with the DEBUG_INFO mask.

(8) In addition, the DEBUG line is too long.
[Eric] Same with [5], I think not break edk2 rules.

(9) The DEBUG line also misses a space character.
[Eric] will update  in next version.

> +
> +    //
> +    // Record current token buffer for later free action usage.
> +    // Current used token buffer not in this list.
> +    //
> +    TokenBuf = AllocatePool (sizeof (TOKEN_BUFFER));
> +    ASSERT (TokenBuf != NULL);
> +    TokenBuf->Signature = TOKEN_BUFFER_SIGNATURE;
> +    TokenBuf->Buffer  = gSmmCpuPrivate->CurrentTokenBuf;
> +
> +    InsertTailList (&gSmmCpuPrivate->OldTokenBufList, &TokenBuf->Link);
> +
> +    gSmmCpuPrivate->CurrentTokenBuf   = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
> +    ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +    gSmmCpuPrivate->UsedTokenNum = 0;
> +  }
> +
> +  CpuToken = (SPIN_LOCK *)(gSmmCpuPrivate->CurrentTokenBuf + SpinLockSize * gSmmCpuPrivate->UsedTokenNum);

(10) Two lines are overlong.
[Eric] Same with [5], I think not break edk2 rules.

> +  gSmmCpuPrivate->UsedTokenNum++;
> +
>    InitializeSpinLock (CpuToken);
>    AcquireSpinLock (CpuToken);
>
> @@ -1737,10 +1775,20 @@ InitializeDataForMmMp (
>    VOID
>    )
>  {
> +  UINTN              SpinLockSize;
> +
> +  SpinLockSize = GetSpinLockProperties ();
> +  DEBUG((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x\n", SpinLockSize));

(11) For consistency with (7), you might want to downgrade this to
DEBUG_VERBOSE too. But, in this case, I won't insist.
[Eric] This value impact the final used buffer size, want to know this info in normal boot. So I use INFO level.

(12) Space character missing after "DEBUG".
[Eric] Will update it in next patch.

(13) Are you proposing this patch for edk2-stable201911? (CC Liming)
[Eric] it's too later, will not propose in this stable tag.

Thanks
Laszlo

> +
> +  gSmmCpuPrivate->CurrentTokenBuf = AllocateZeroPool (SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT);
> +  ASSERT (gSmmCpuPrivate->CurrentTokenBuf != NULL);
> +  gSmmCpuPrivate->UsedTokenNum = 0;
> +
>    gSmmCpuPrivate->ApWrapperFunc = AllocatePool (sizeof (PROCEDURE_WRAPPER) * gSmmCpuPrivate->SmmCoreEntryContext.NumberOfCpus);
>    ASSERT (gSmmCpuPrivate->ApWrapperFunc != NULL);
>
>    InitializeListHead (&gSmmCpuPrivate->TokenList);
> +  InitializeListHead (&gSmmCpuPrivate->OldTokenBufList);
>  }
>
>  /**

[-- Attachment #2: Type: text/html, Size: 41392 bytes --]

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-29  3:02   ` [edk2-devel] " Dong, Eric
@ 2019-11-29  7:39     ` Laszlo Ersek
  2019-12-04  5:33       ` Dong, Eric
  0 siblings, 1 reply; 12+ messages in thread
From: Laszlo Ersek @ 2019-11-29  7:39 UTC (permalink / raw)
  To: Dong, Eric, devel@edk2.groups.io; +Cc: Ni, Ray, Gao, Liming

On 11/29/19 04:02, Dong, Eric wrote:
> Hi Laszlo,
> 
> From: devel@edk2.groups.io [mailto:devel@edk2.groups.io] On Behalf Of Laszlo Ersek
> Sent: Thursday, November 28, 2019 9:57 PM
> To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io
> Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.

>>  UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56 ++++++++++++++++++++--
>>  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
>>  2 files changed, 68 insertions(+), 4 deletions(-)
> 
> commenting on the header file changes first:
> [Eric] what's this sentence means? Follow above comments to update the comment message?

Your patch email included the C source file changes first, and the
header file changes second. I find that more difficult to reason about
than the opposite order (header first, C source second).

Therefore, I split your email in two parts, and moved the H file changes
to the top. And, I commented on those H file changes first.

>> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
>> index d8d2b6f444..4632e5b0c2 100644
>> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
>> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
>> @@ -492,6 +492,23 @@ FreeTokens (
>>  {
>>    LIST_ENTRY            *Link;
>>    PROCEDURE_TOKEN       *ProcToken;
>> +  TOKEN_BUFFER          *TokenBuf;
>> +
>> +  //
>> +  // Not free the buffer, just clear the UsedTokenNum. In order to
>> +  // avoid later trig allcate action again when need to use token.
>> +  //
>> +  gSmmCpuPrivate->UsedTokenNum = 0;
> 
> (6) Here we do not zero out the current token buffer, but in
> CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool().
> 
> This is an inconsistency, we should call either ZeroMem() here (if
> zeroing matters), or AllocatePool() in the other two places (if zeroing
> does not matter).
> [Eric] Not catch your meaning here? Why can't use  "=0" here?

In both CreateToken() and InitializeDataForMmMp(), we perform *three*
actions:
(a) ensure CurrentTokenBuf is allocated,
(b) clear CurrentTokenBuf,
(c) set UsedTokenNum to zero.

In FreeTokens(), we perform *two* actions:
(a) ensure CurrentTokenBuf is allocated (it needs no explicit action,
but it is an invariant nonetheless),
(c) set UsedTokenNum to zero.

Step (b) is missing from FreeTokens(). That's inconsistent with
CreateToken() and InitializeDataForMmMp().

The question is whether the following predicate is important or not:

- "all unused tokens in the current token buffer must be all-bits-zero"

If this predicate is important, then you should add step (b) to
FreeTokens():

  ZeroMem (
    gSmmCpuPrivate->CurrentTokenBuf,
    SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT
    );

If the predicate is not important, then you should replace the
AllocateZeroPool() calls with AllocatePool(), in CreateToken() and
InitializeDataForMmMp().

It is not consistent to clear CurrentTokenBuf in only *some* cases when
UsedTokenNum is set to zero.

>> @@ -1115,13 +1131,35 @@ CreateToken (
>>    VOID
>>    )
>>  {
>> -  PROCEDURE_TOKEN    *ProcToken;
>> +  PROCEDURE_TOKEN     *ProcToken;
>>    SPIN_LOCK           *CpuToken;
>>    UINTN               SpinLockSize;
>> +  TOKEN_BUFFER        *TokenBuf;
>>
>>    SpinLockSize = GetSpinLockProperties ();
>> -  CpuToken = AllocatePool (SpinLockSize);
>> -  ASSERT (CpuToken != NULL);
>> +
>> +  if (gSmmCpuPrivate->UsedTokenNum == MAX_PRE_RESERVE_TOKEN_COUNT) {
>> +    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new buffer!\n"));
> 
> (7) This is an expected case, and not too much a corner case at that.
> Furthermore, the DEBUG message is in a performance-sensitive path.
> [Eric] this code is called by the caller.  I don't think it's performance sensitive. What's your
> rule for "performance-sensitive path" ? I add this debug message because I want to know
> how often the pre allocate buffer is not enough.  We can enlarge the buffer size to get
> better performance.

The patch is about making CreateToken() faster. It's done by allocating
SMRAM less frequently (not on every call to CreateToken()). In some
cases however, CreateToken() will still allocate SMRAM, and that's going
to be a "spike" in latency, I expect.

Adding a DEBUG_INFO to that code path makes the spike worse. It does not
affect the throughput of CreateToken() much (the spike is amortized over
MAX_PRE_RESERVE_TOKEN_COUNT calls), but it makes the distribution less
even. I would use DEBUG_VERBOSE to avoid worsening the spike when a
platform build does not ask for DEBUG_VERBOSE explicitly.

If you disagree, I can live with DEBUG_INFO.

Thanks
Laszlo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time.
  2019-11-29  7:39     ` Laszlo Ersek
@ 2019-12-04  5:33       ` Dong, Eric
  0 siblings, 0 replies; 12+ messages in thread
From: Dong, Eric @ 2019-12-04  5:33 UTC (permalink / raw)
  To: Laszlo Ersek, devel@edk2.groups.io; +Cc: Ni, Ray, Gao, Liming

Hi Laszlo,

> -----Original Message-----
> From: Laszlo Ersek [mailto:lersek@redhat.com]
> Sent: Friday, November 29, 2019 3:39 PM
> To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io
> Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid
> allocate Token every time.
> 
> On 11/29/19 04:02, Dong, Eric wrote:
> > Hi Laszlo,
> >
> > From: devel@edk2.groups.io [mailto:devel@edk2.groups.io] On Behalf Of
> > Laszlo Ersek
> > Sent: Thursday, November 28, 2019 9:57 PM
> > To: Dong, Eric <eric.dong@intel.com>; devel@edk2.groups.io
> > Cc: Ni, Ray <ray.ni@intel.com>; Gao, Liming <liming.gao@intel.com>
> > Subject: Re: [edk2-devel] [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm:
> Avoid allocate Token every time.
> 
> >>  UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 56
> ++++++++++++++++++++--
> >>  UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h | 16 +++++++
> >>  2 files changed, 68 insertions(+), 4 deletions(-)
> >
> > commenting on the header file changes first:
> > [Eric] what's this sentence means? Follow above comments to update the
> comment message?
> 
> Your patch email included the C source file changes first, and the header file
> changes second. I find that more difficult to reason about than the opposite
> order (header first, C source second).
> 
> Therefore, I split your email in two parts, and moved the H file changes to the
> top. And, I commented on those H file changes first.
> 
> >> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> >> b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> >> index d8d2b6f444..4632e5b0c2 100644
> >> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> >> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
> >> @@ -492,6 +492,23 @@ FreeTokens (
> >>  {
> >>    LIST_ENTRY            *Link;
> >>    PROCEDURE_TOKEN       *ProcToken;
> >> +  TOKEN_BUFFER          *TokenBuf;
> >> +
> >> +  //
> >> +  // Not free the buffer, just clear the UsedTokenNum. In order to
> >> + // avoid later trig allcate action again when need to use token.
> >> +  //
> >> +  gSmmCpuPrivate->UsedTokenNum = 0;
> >
> > (6) Here we do not zero out the current token buffer, but in
> > CreateToken() and InitializeDataForMmMp(), we use AllocateZeroPool().
> >
> > This is an inconsistency, we should call either ZeroMem() here (if
> > zeroing matters), or AllocatePool() in the other two places (if
> > zeroing does not matter).
> > [Eric] Not catch your meaning here? Why can't use  "=0" here?
> 
> In both CreateToken() and InitializeDataForMmMp(), we perform *three*
> actions:
> (a) ensure CurrentTokenBuf is allocated,
> (b) clear CurrentTokenBuf,
> (c) set UsedTokenNum to zero.
> 
> In FreeTokens(), we perform *two* actions:
> (a) ensure CurrentTokenBuf is allocated (it needs no explicit action, but it is
> an invariant nonetheless),
> (c) set UsedTokenNum to zero.
> 
> Step (b) is missing from FreeTokens(). That's inconsistent with
> CreateToken() and InitializeDataForMmMp().
> 
> The question is whether the following predicate is important or not:
> 
> - "all unused tokens in the current token buffer must be all-bits-zero"
> 
> If this predicate is important, then you should add step (b) to
> FreeTokens():
> 
>   ZeroMem (
>     gSmmCpuPrivate->CurrentTokenBuf,
>     SpinLockSize * MAX_PRE_RESERVE_TOKEN_COUNT
>     );
> 
> If the predicate is not important, then you should replace the
> AllocateZeroPool() calls with AllocatePool(), in CreateToken() and
> InitializeDataForMmMp().
> 
> It is not consistent to clear CurrentTokenBuf in only *some* cases when
> UsedTokenNum is set to zero.

[[Eric]] In CreateToken function, I add code InitializeSpinLock() to initialize
the Token space first, so "Clear CurrentTokenBuf" action is not must have item.
I will remove the "Zero Memory" action in my next version changes.

> 
> >> @@ -1115,13 +1131,35 @@ CreateToken (
> >>    VOID
> >>    )
> >>  {
> >> -  PROCEDURE_TOKEN    *ProcToken;
> >> +  PROCEDURE_TOKEN     *ProcToken;
> >>    SPIN_LOCK           *CpuToken;
> >>    UINTN               SpinLockSize;
> >> +  TOKEN_BUFFER        *TokenBuf;
> >>
> >>    SpinLockSize = GetSpinLockProperties ();
> >> -  CpuToken = AllocatePool (SpinLockSize);
> >> -  ASSERT (CpuToken != NULL);
> >> +
> >> +  if (gSmmCpuPrivate->UsedTokenNum ==
> MAX_PRE_RESERVE_TOKEN_COUNT) {
> >> +    DEBUG((DEBUG_INFO, "CpuSmm: No free token buffer, allocate new
> >> + buffer!\n"));
> >
> > (7) This is an expected case, and not too much a corner case at that.
> > Furthermore, the DEBUG message is in a performance-sensitive path.
> > [Eric] this code is called by the caller.  I don't think it's
> > performance sensitive. What's your rule for "performance-sensitive
> > path" ? I add this debug message because I want to know how often the
> > pre allocate buffer is not enough.  We can enlarge the buffer size to get
> better performance.
> 
> The patch is about making CreateToken() faster. It's done by allocating
> SMRAM less frequently (not on every call to CreateToken()). In some cases
> however, CreateToken() will still allocate SMRAM, and that's going to be a
> "spike" in latency, I expect.
> 
> Adding a DEBUG_INFO to that code path makes the spike worse. It does not
> affect the throughput of CreateToken() much (the spike is amortized over
> MAX_PRE_RESERVE_TOKEN_COUNT calls), but it makes the distribution less
> even. I would use DEBUG_VERBOSE to avoid worsening the spike when a
> platform build does not ask for DEBUG_VERBOSE explicitly.
> 
> If you disagree, I can live with DEBUG_INFO.
> 
[[Eric]] I add debug message to let us know the frequency of the allocation action. 
It make sense to change the level to VERBOSE. I will update it in my next version changes.

Thanks,
Eric

> Thanks
> Laszlo

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2019-12-04  5:44 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-11-28  6:17 [PATCH v2] UefiCpuPkg/PiSmmCpuDxeSmm: Avoid allocate Token every time Dong, Eric
2019-11-28  6:37 ` [edk2-devel] " Ni, Ray
2019-11-28 13:15 ` Philippe Mathieu-Daudé
2019-12-04  5:44   ` Dong, Eric
2019-11-28 13:57 ` Laszlo Ersek
2019-11-29  1:22   ` Ni, Ray
2019-11-29  7:17     ` Laszlo Ersek
2019-12-02  5:14       ` Ni, Ray
2019-12-02 12:56         ` Laszlo Ersek
2019-11-29  3:02   ` [edk2-devel] " Dong, Eric
2019-11-29  7:39     ` Laszlo Ersek
2019-12-04  5:33       ` Dong, Eric

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox