From: "Laszlo Ersek" <lersek@redhat.com>
To: Ray Ni <ray.ni@intel.com>, devel@edk2.groups.io
Cc: Eric Dong <eric.dong@intel.com>, Yun Lou <yun.lou@intel.com>
Subject: Re: [PATCH] UefiCpuPkg/Feature: Support different thread count per core
Date: Thu, 3 Dec 2020 11:12:19 +0100 [thread overview]
Message-ID: <c5c29bfe-fcc4-fac9-3b50-b52d2ed321c3@redhat.com> (raw)
In-Reply-To: <20201202115505.664-1-ray.ni@intel.com>
On 12/02/20 12:55, Ray Ni wrote:
> Today's code assumes every core contains the same number of threads.
> It's not always TRUE for certain model.
> Such assumption causes system hang when thread count per core
> is different and there is core or package dependency between CPU
> features (using CPU_FEATURE_CORE_BEFORE/AFTER,
> CPU_FEATURE_PACKAGE_BEFORE/AFTER).
>
> The change removes such assumption by calculating the actual thread
> count per package and per core.
>
> Signed-off-by: Ray Ni <ray.ni@intel.com>
> Cc: Eric Dong <eric.dong@intel.com>
> Cc: Yun Lou <yun.lou@intel.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> ---
> UefiCpuPkg/Include/AcpiCpuData.h | 16 ++-
> .../CpuFeaturesInitialize.c | 113 ++++++++++--------
> UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c | 73 ++++++-----
> 3 files changed, 119 insertions(+), 83 deletions(-)
Acked-by: Laszlo Ersek <lersek@redhat.com>
Thanks
Laszlo
> diff --git a/UefiCpuPkg/Include/AcpiCpuData.h b/UefiCpuPkg/Include/AcpiCpuData.h
> index 77da5d4455..b5a69ad80c 100644
> --- a/UefiCpuPkg/Include/AcpiCpuData.h
> +++ b/UefiCpuPkg/Include/AcpiCpuData.h
> @@ -1,7 +1,7 @@
> /** @file
> Definitions for CPU S3 data.
>
> -Copyright (c) 2013 - 2018, Intel Corporation. All rights reserved.<BR>
> +Copyright (c) 2013 - 2020, Intel Corporation. All rights reserved.<BR>
> SPDX-License-Identifier: BSD-2-Clause-Patent
>
> **/
> @@ -60,14 +60,24 @@ typedef struct {
> UINT32 MaxThreadCount;
> //
> // This field points to an array.
> - // This array saves valid core count (type UINT32) of each package.
> + // This array saves thread count (type UINT32) of each package.
> // The array has PackageCount elements.
> //
> // If the platform does not support MSR setting at S3 resume, and
> // therefore it doesn't need the dependency semaphores, it should set
> // this field to 0.
> //
> - EFI_PHYSICAL_ADDRESS ValidCoreCountPerPackage;
> + EFI_PHYSICAL_ADDRESS ThreadCountPerPackage;
> + //
> + // This field points to an array.
> + // This array saves thread count (type UINT8) of each core.
> + // The array has PackageCount * MaxCoreCount elements.
> + //
> + // If the platform does not support MSR setting at S3 resume, and
> + // therefore it doesn't need the dependency semaphores, it should set
> + // this field to 0.
> + //
> + EFI_PHYSICAL_ADDRESS ThreadCountPerCore;
> } CPU_STATUS_INFORMATION;
>
> //
> diff --git a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> index 5c673fa8cf..0cce909cc0 100644
> --- a/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> +++ b/UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
> @@ -103,14 +103,13 @@ CpuInitDataInitialize (
> UINT32 Package;
> UINT32 Thread;
> EFI_CPU_PHYSICAL_LOCATION *Location;
> - BOOLEAN *CoresVisited;
> - UINTN Index;
> UINT32 PackageIndex;
> UINT32 CoreIndex;
> UINT32 First;
> ACPI_CPU_DATA *AcpiCpuData;
> CPU_STATUS_INFORMATION *CpuStatus;
> - UINT32 *ValidCoreCountPerPackage;
> + UINT32 *ThreadCountPerPackage;
> + UINT8 *ThreadCountPerCore;
> UINTN NumberOfCpus;
> UINTN NumberOfEnabledProcessors;
>
> @@ -202,35 +201,32 @@ CpuInitDataInitialize (
> //
> // Collect valid core count in each package because not all cores are valid.
> //
> - ValidCoreCountPerPackage= AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
> - ASSERT (ValidCoreCountPerPackage != 0);
> - CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ValidCoreCountPerPackage;
> - CoresVisited = AllocatePool (sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
> - ASSERT (CoresVisited != NULL);
> -
> - for (Index = 0; Index < CpuStatus->PackageCount; Index ++ ) {
> - ZeroMem (CoresVisited, sizeof (BOOLEAN) * CpuStatus->MaxCoreCount);
> - //
> - // Collect valid cores in Current package.
> - //
> - for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
> - Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
> - if (Location->Package == Index && !CoresVisited[Location->Core] ) {
> - //
> - // The ValidCores position for Location->Core is valid.
> - // The possible values in ValidCores[Index] are 0 or 1.
> - // FALSE means no valid threads in this Core.
> - // TRUE means have valid threads in this core, no matter the thead count is 1 or more.
> - //
> - CoresVisited[Location->Core] = TRUE;
> - ValidCoreCountPerPackage[Index]++;
> - }
> - }
> + ThreadCountPerPackage = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);
> + ASSERT (ThreadCountPerPackage != NULL);
> + CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerPackage;
> +
> + ThreadCountPerCore = AllocateZeroPool (sizeof (UINT8) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount);
> + ASSERT (ThreadCountPerCore != NULL);
> + CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerCore;
> +
> + for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {
> + Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;
> + ThreadCountPerPackage[Location->Package]++;
> + ThreadCountPerCore[Location->Package * CpuStatus->MaxCoreCount + Location->Core]++;
> }
> - FreePool (CoresVisited);
>
> - for (Index = 0; Index <= Package; Index++) {
> - DEBUG ((DEBUG_INFO, "Package: %d, Valid Core : %d\n", Index, ValidCoreCountPerPackage[Index]));
> + for (PackageIndex = 0; PackageIndex < CpuStatus->PackageCount; PackageIndex++) {
> + if (ThreadCountPerPackage[PackageIndex] != 0) {
> + DEBUG ((DEBUG_INFO, "P%02d: Thread Count = %d\n", PackageIndex, ThreadCountPerPackage[PackageIndex]));
> + for (CoreIndex = 0; CoreIndex < CpuStatus->MaxCoreCount; CoreIndex++) {
> + if (ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex] != 0) {
> + DEBUG ((
> + DEBUG_INFO, " P%02d C%04d, Thread Count = %d\n", PackageIndex, CoreIndex,
> + ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex]
> + ));
> + }
> + }
> + }
> }
>
> CpuFeaturesData->CpuFlags.CoreSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);
> @@ -894,11 +890,11 @@ ProgramProcessorRegister (
> CPU_REGISTER_TABLE_ENTRY *RegisterTableEntryHead;
> volatile UINT32 *SemaphorePtr;
> UINT32 FirstThread;
> - UINT32 PackageThreadsCount;
> UINT32 CurrentThread;
> + UINT32 CurrentCore;
> UINTN ProcessorIndex;
> - UINTN ValidThreadCount;
> - UINT32 *ValidCoreCountPerPackage;
> + UINT32 *ThreadCountPerPackage;
> + UINT8 *ThreadCountPerCore;
> EFI_STATUS Status;
> UINT64 CurrentValue;
>
> @@ -1029,28 +1025,44 @@ ProgramProcessorRegister (
> switch (RegisterTableEntry->Value) {
> case CoreDepType:
> SemaphorePtr = CpuFlags->CoreSemaphoreCount;
> + ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
> +
> + CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
> //
> // Get Offset info for the first thread in the core which current thread belongs to.
> //
> - FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
> + FirstThread = CurrentCore * CpuStatus->MaxThreadCount;
> CurrentThread = FirstThread + ApLocation->Thread;
> +
> //
> - // First Notify all threads in current Core that this thread has ready.
> + // Different cores may have different valid threads in them. If driver maintail clearly
> + // thread index in different cores, the logic will be much complicated.
> + // Here driver just simply records the max thread number in all cores and use it as expect
> + // thread number for all cores.
> + // In below two steps logic, first current thread will Release semaphore for each thread
> + // in current core. Maybe some threads are not valid in this core, but driver don't
> + // care. Second, driver will let current thread wait semaphore for all valid threads in
> + // current core. Because only the valid threads will do release semaphore for this
> + // thread, driver here only need to wait the valid thread count.
> + //
> +
> + //
> + // First Notify ALL THREADs in current Core that this thread is ready.
> //
> for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> - LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
> + LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
> }
> //
> - // Second, check whether all valid threads in current core have ready.
> + // Second, check whether all VALID THREADs (not all threads) in current core are ready.
> //
> - for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> + for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
> LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
> }
> break;
>
> case PackageDepType:
> SemaphorePtr = CpuFlags->PackageSemaphoreCount;
> - ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
> + ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
> //
> // Get Offset info for the first thread in the package which current thread belongs to.
> //
> @@ -1058,18 +1070,13 @@ ProgramProcessorRegister (
> //
> // Get the possible threads count for current package.
> //
> - PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
> CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
> - //
> - // Get the valid thread count for current package.
> - //
> - ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
>
> //
> - // Different packages may have different valid cores in them. If driver maintail clearly
> - // cores number in different packages, the logic will be much complicated.
> - // Here driver just simply records the max core number in all packages and use it as expect
> - // core number for all packages.
> + // Different packages may have different valid threads in them. If driver maintail clearly
> + // thread index in different packages, the logic will be much complicated.
> + // Here driver just simply records the max thread number in all packages and use it as expect
> + // thread number for all packages.
> // In below two steps logic, first current thread will Release semaphore for each thread
> // in current package. Maybe some threads are not valid in this package, but driver don't
> // care. Second, driver will let current thread wait semaphore for all valid threads in
> @@ -1078,15 +1085,15 @@ ProgramProcessorRegister (
> //
>
> //
> - // First Notify ALL THREADS in current package that this thread has ready.
> + // First Notify ALL THREADS in current package that this thread is ready.
> //
> - for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
> - LibReleaseSemaphore ((UINT32 *) &SemaphorePtr[FirstThread + ProcessorIndex]);
> + for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
> + LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
> }
> //
> - // Second, check whether VALID THREADS (not all threads) in current package have ready.
> + // Second, check whether VALID THREADS (not all threads) in current package are ready.
> //
> - for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
> + for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
> LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);
> }
> break;
> diff --git a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> index 29e9ba92b4..9592430636 100644
> --- a/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> +++ b/UefiCpuPkg/PiSmmCpuDxeSmm/CpuS3.c
> @@ -1,7 +1,7 @@
> /** @file
> Code for Processor S3 restoration
>
> -Copyright (c) 2006 - 2019, Intel Corporation. All rights reserved.<BR>
> +Copyright (c) 2006 - 2020, Intel Corporation. All rights reserved.<BR>
> SPDX-License-Identifier: BSD-2-Clause-Patent
>
> **/
> @@ -235,11 +235,11 @@ ProgramProcessorRegister (
> CPU_REGISTER_TABLE_ENTRY *RegisterTableEntryHead;
> volatile UINT32 *SemaphorePtr;
> UINT32 FirstThread;
> - UINT32 PackageThreadsCount;
> UINT32 CurrentThread;
> + UINT32 CurrentCore;
> UINTN ProcessorIndex;
> - UINTN ValidThreadCount;
> - UINT32 *ValidCoreCountPerPackage;
> + UINT32 *ThreadCountPerPackage;
> + UINT8 *ThreadCountPerCore;
> EFI_STATUS Status;
> UINT64 CurrentValue;
>
> @@ -372,35 +372,52 @@ ProgramProcessorRegister (
> //
> ASSERT (
> (ApLocation != NULL) &&
> - (CpuStatus->ValidCoreCountPerPackage != 0) &&
> + (CpuStatus->ThreadCountPerPackage != 0) &&
> + (CpuStatus->ThreadCountPerCore != 0) &&
> (CpuFlags->CoreSemaphoreCount != NULL) &&
> (CpuFlags->PackageSemaphoreCount != NULL)
> );
> switch (RegisterTableEntry->Value) {
> case CoreDepType:
> SemaphorePtr = CpuFlags->CoreSemaphoreCount;
> + ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;
> +
> + CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;
> //
> // Get Offset info for the first thread in the core which current thread belongs to.
> //
> - FirstThread = (ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core) * CpuStatus->MaxThreadCount;
> + FirstThread = CurrentCore * CpuStatus->MaxThreadCount;
> CurrentThread = FirstThread + ApLocation->Thread;
> +
> //
> - // First Notify all threads in current Core that this thread has ready.
> + // Different cores may have different valid threads in them. If driver maintail clearly
> + // thread index in different cores, the logic will be much complicated.
> + // Here driver just simply records the max thread number in all cores and use it as expect
> + // thread number for all cores.
> + // In below two steps logic, first current thread will Release semaphore for each thread
> + // in current core. Maybe some threads are not valid in this core, but driver don't
> + // care. Second, driver will let current thread wait semaphore for all valid threads in
> + // current core. Because only the valid threads will do release semaphore for this
> + // thread, driver here only need to wait the valid thread count.
> + //
> +
> + //
> + // First Notify ALL THREADs in current Core that this thread is ready.
> //
> for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
> }
> //
> - // Second, check whether all valid threads in current core have ready.
> + // Second, check whether all VALID THREADs (not all threads) in current core are ready.
> //
> - for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {
> + for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {
> S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
> }
> break;
>
> case PackageDepType:
> SemaphorePtr = CpuFlags->PackageSemaphoreCount;
> - ValidCoreCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ValidCoreCountPerPackage;
> + ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;
> //
> // Get Offset info for the first thread in the package which current thread belongs to.
> //
> @@ -408,18 +425,13 @@ ProgramProcessorRegister (
> //
> // Get the possible threads count for current package.
> //
> - PackageThreadsCount = CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount;
> CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;
> - //
> - // Get the valid thread count for current package.
> - //
> - ValidThreadCount = CpuStatus->MaxThreadCount * ValidCoreCountPerPackage[ApLocation->Package];
>
> //
> - // Different packages may have different valid cores in them. If driver maintail clearly
> - // cores number in different packages, the logic will be much complicated.
> - // Here driver just simply records the max core number in all packages and use it as expect
> - // core number for all packages.
> + // Different packages may have different valid threads in them. If driver maintail clearly
> + // thread index in different packages, the logic will be much complicated.
> + // Here driver just simply records the max thread number in all packages and use it as expect
> + // thread number for all packages.
> // In below two steps logic, first current thread will Release semaphore for each thread
> // in current package. Maybe some threads are not valid in this package, but driver don't
> // care. Second, driver will let current thread wait semaphore for all valid threads in
> @@ -428,15 +440,15 @@ ProgramProcessorRegister (
> //
>
> //
> - // First Notify all threads in current package that this thread has ready.
> + // First Notify ALL THREADS in current package that this thread is ready.
> //
> - for (ProcessorIndex = 0; ProcessorIndex < PackageThreadsCount ; ProcessorIndex ++) {
> + for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {
> S3ReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);
> }
> //
> - // Second, check whether all valid threads in current package have ready.
> + // Second, check whether VALID THREADS (not all threads) in current package are ready.
> //
> - for (ProcessorIndex = 0; ProcessorIndex < ValidThreadCount; ProcessorIndex ++) {
> + for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {
> S3WaitForSemaphore (&SemaphorePtr[CurrentThread]);
> }
> break;
> @@ -1059,12 +1071,19 @@ GetAcpiCpuData (
>
> CpuStatus = &mAcpiCpuData.CpuStatus;
> CopyMem (CpuStatus, &AcpiCpuData->CpuStatus, sizeof (CPU_STATUS_INFORMATION));
> - if (AcpiCpuData->CpuStatus.ValidCoreCountPerPackage != 0) {
> - CpuStatus->ValidCoreCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> + if (AcpiCpuData->CpuStatus.ThreadCountPerPackage != 0) {
> + CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> sizeof (UINT32) * CpuStatus->PackageCount,
> - (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ValidCoreCountPerPackage
> + (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerPackage
> + );
> + ASSERT (CpuStatus->ThreadCountPerPackage != 0);
> + }
> + if (AcpiCpuData->CpuStatus.ThreadCountPerCore != 0) {
> + CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
> + sizeof (UINT8) * (CpuStatus->PackageCount * CpuStatus->MaxCoreCount),
> + (UINT32 *)(UINTN)AcpiCpuData->CpuStatus.ThreadCountPerCore
> );
> - ASSERT (CpuStatus->ValidCoreCountPerPackage != 0);
> + ASSERT (CpuStatus->ThreadCountPerCore != 0);
> }
> if (AcpiCpuData->ApLocation != 0) {
> mAcpiCpuData.ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)AllocateCopyPool (
>
prev parent reply other threads:[~2020-12-03 10:12 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-02 11:55 [PATCH] UefiCpuPkg/Feature: Support different thread count per core Ni, Ray
2020-12-03 7:26 ` Dong, Eric
2020-12-03 10:12 ` Laszlo Ersek [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-list from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c5c29bfe-fcc4-fac9-3b50-b52d2ed321c3@redhat.com \
--to=devel@edk2.groups.io \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox