public inbox for devel@edk2.groups.io
 help / color / mirror / Atom feed
From: "Wu, Jiaxin" <jiaxin.wu@intel.com>
To: devel@edk2.groups.io
Cc: Ray Ni <ray.ni@intel.com>, Laszlo Ersek <lersek@redhat.com>,
	Eric Dong <eric.dong@intel.com>, Zeng Star <star.zeng@intel.com>,
	Gerd Hoffmann <kraxel@redhat.com>,
	Rahul Kumar <rahul1.kumar@intel.com>
Subject: [edk2-devel] [PATCH v1 2/2] UefiCpuPkg/PiSmmCpuDxeSmm: Make RunningApCount on exclusive cacheline
Date: Wed, 06 Mar 2024 04:11:13 -0800	[thread overview]
Message-ID: <20240306121103.356-3-jiaxin.wu@intel.com> (raw)
In-Reply-To: <20240306121103.356-1-jiaxin.wu@intel.com>

For non blocking mode during SmmMpBroadcastProcedure, multiple APs might
contended access the RunningApCount in the PROCEDURE_TOKEN:

Step1: RunningApCount is initialized to the mMaxNumberOfCpus
(See GetFreeToken).
Step2: Decrease RunningApCount if the AP is not present
(See InterlockedDecrement in InternalSmmStartupAllAPs).
Step3: multiple APs are contended to decrease RunningApCount in the
same token (See ReleaseToken in APHandler).

So, Contended lock case happen during Step3. For multiple APs access
the shared memory (RunningApCount), we shall use exclusive cache line
with WB attribute for SMM Performance Tuning.

This patch makes RunningApCount on exclusive cacheline.

Cc: Ray Ni <ray.ni@intel.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Eric Dong <eric.dong@intel.com>
Cc: Zeng Star <star.zeng@intel.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Rahul Kumar <rahul1.kumar@intel.com>
Signed-off-by: Jiaxin Wu <jiaxin.wu@intel.com>
---
 UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c      | 35 +++++++++++++++++++-----------
 UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h |  2 +-
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
index 9790b4f888..05fa6854fe 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/MpService.c
@@ -421,11 +421,11 @@ ReleaseToken (
 {
   PROCEDURE_TOKEN  *Token;
 
   Token = mSmmMpSyncData->CpuData[CpuIndex].Token;
 
-  if (InterlockedDecrement (&Token->RunningApCount) == 0) {
+  if (InterlockedDecrement (Token->RunningApCount) == 0) {
     ReleaseSpinLock (Token->SpinLock);
   }
 
   mSmmMpSyncData->CpuData[CpuIndex].Token = NULL;
 }
@@ -970,12 +970,12 @@ AllocateTokenBuffer (
   )
 {
   UINTN            SpinLockSize;
   UINT32           TokenCountPerChunk;
   UINTN            Index;
-  SPIN_LOCK        *SpinLock;
-  UINT8            *SpinLockBuffer;
+  UINTN            BufferAddr;
+  VOID             *Buffer;
   PROCEDURE_TOKEN  *ProcTokens;
 
   SpinLockSize = GetSpinLockProperties ();
 
   TokenCountPerChunk = FixedPcdGet32 (PcdCpuSmmMpTokenCountPerChunk);
@@ -986,25 +986,34 @@ AllocateTokenBuffer (
   }
 
   DEBUG ((DEBUG_INFO, "CpuSmm: SpinLock Size = 0x%x, PcdCpuSmmMpTokenCountPerChunk = 0x%x\n", SpinLockSize, TokenCountPerChunk));
 
   //
-  // Separate the Spin_lock and Proc_token because the alignment requires by Spin_Lock.
+  // Allocate the buffer for SpinLock and RunningApCount to meet the alignment requirement.
   //
-  SpinLockBuffer = AllocatePool (SpinLockSize * TokenCountPerChunk);
-  ASSERT (SpinLockBuffer != NULL);
+  Buffer =  AllocatePages (EFI_SIZE_TO_PAGES (SpinLockSize * TokenCountPerChunk * 2));
+  if (Buffer == NULL) {
+    DEBUG ((DEBUG_ERROR, "AllocateTokenBuffer: Failed to allocate the buffer for SpinLock and RunningApCount!\n"));
+    CpuDeadLoop ();
+  }
 
   ProcTokens = AllocatePool (sizeof (PROCEDURE_TOKEN) * TokenCountPerChunk);
   ASSERT (ProcTokens != NULL);
 
+  BufferAddr = (UINTN)Buffer;
   for (Index = 0; Index < TokenCountPerChunk; Index++) {
-    SpinLock = (SPIN_LOCK *)(SpinLockBuffer + SpinLockSize * Index);
-    InitializeSpinLock (SpinLock);
+    ProcTokens[Index].Signature = PROCEDURE_TOKEN_SIGNATURE;
+
+    ProcTokens[Index].SpinLock = (SPIN_LOCK *)BufferAddr;
+    InitializeSpinLock (ProcTokens[Index].SpinLock);
+
+    BufferAddr += SpinLockSize;
+
+    ProcTokens[Index].RunningApCount  = (volatile UINT32 *)BufferAddr;
+    *ProcTokens[Index].RunningApCount = 0;
 
-    ProcTokens[Index].Signature      = PROCEDURE_TOKEN_SIGNATURE;
-    ProcTokens[Index].SpinLock       = SpinLock;
-    ProcTokens[Index].RunningApCount = 0;
+    BufferAddr += SpinLockSize;
 
     InsertTailList (&gSmmCpuPrivate->TokenList, &ProcTokens[Index].Link);
   }
 
   return &ProcTokens[0].Link;
@@ -1036,11 +1045,11 @@ GetFreeToken (
   }
 
   NewToken                       = PROCEDURE_TOKEN_FROM_LINK (gSmmCpuPrivate->FirstFreeToken);
   gSmmCpuPrivate->FirstFreeToken = GetNextNode (&gSmmCpuPrivate->TokenList, gSmmCpuPrivate->FirstFreeToken);
 
-  NewToken->RunningApCount = RunningApsCount;
+  *NewToken->RunningApCount = RunningApsCount;
   AcquireSpinLock (NewToken->SpinLock);
 
   return NewToken;
 }
 
@@ -1298,11 +1307,11 @@ InternalSmmStartupAllAPs (
 
       //
       // Decrease the count to mark this processor(AP or BSP) as finished.
       //
       if (ProcToken != NULL) {
-        InterlockedDecrement (&ProcToken->RunningApCount);
+        InterlockedDecrement (ProcToken->RunningApCount);
       }
     }
   }
 
   ReleaseAllAPs ();
diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
index 7f244ea803..07473208fd 100644
--- a/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
+++ b/UefiCpuPkg/PiSmmCpuDxeSmm/PiSmmCpuDxeSmm.h
@@ -213,11 +213,11 @@ typedef struct {
 typedef struct {
   UINTN              Signature;
   LIST_ENTRY         Link;
 
   SPIN_LOCK          *SpinLock;
-  volatile UINT32    RunningApCount;
+  volatile UINT32    *RunningApCount;
 } PROCEDURE_TOKEN;
 
 #define PROCEDURE_TOKEN_FROM_LINK(a)  CR (a, PROCEDURE_TOKEN, Link, PROCEDURE_TOKEN_SIGNATURE)
 
 #define TOKEN_BUFFER_SIGNATURE  SIGNATURE_32 ('T', 'K', 'B', 'S')
-- 
2.16.2.windows.1



-=-=-=-=-=-=-=-=-=-=-=-
Groups.io Links: You receive all messages sent to this group.
View/Reply Online (#116435): https://edk2.groups.io/g/devel/message/116435
Mute This Topic: https://groups.io/mt/104764105/7686176
Group Owner: devel+owner@edk2.groups.io
Unsubscribe: https://edk2.groups.io/g/devel/unsub [rebecca@openfw.io]
-=-=-=-=-=-=-=-=-=-=-=-



      parent reply	other threads:[~2024-03-06 12:11 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-06 12:11 [edk2-devel] [PATCH v1 0/2] SMM MP service performance Improvement Wu, Jiaxin
2024-03-06 12:11 ` [edk2-devel] [PATCH v1 1/2] UefiCpuPkg/PiSmmCpuDxeSmm: Remove one round of Busy spinlock Wu, Jiaxin
2024-03-06 12:11 ` Wu, Jiaxin [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-list from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240306121103.356-3-jiaxin.wu@intel.com \
    --to=devel@edk2.groups.io \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox