From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by mx.groups.io with SMTP id smtpd.web11.43666.1635772685932348170 for ; Mon, 01 Nov 2021 06:18:12 -0700 Authentication-Results: mx.groups.io; dkim=missing; spf=pass (domain: intel.com, ip: 192.55.52.120, mailfrom: min.m.xu@intel.com) X-IronPort-AV: E=McAfee;i="6200,9189,10154"; a="229737643" X-IronPort-AV: E=Sophos;i="5.87,199,1631602800"; d="scan'208";a="229737643" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 01 Nov 2021 06:18:11 -0700 X-IronPort-AV: E=Sophos;i="5.87,199,1631602800"; d="scan'208";a="500035717" Received: from mxu9-mobl1.ccr.corp.intel.com ([10.255.29.216]) by orsmga008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 01 Nov 2021 06:18:08 -0700 From: "Min Xu" To: devel@edk2.groups.io Cc: Min Xu , Ard Biesheuvel , Jordan Justen , Brijesh Singh , Erdem Aktas , James Bottomley , Jiewen Yao , Tom Lendacky , Gerd Hoffmann Subject: [PATCH V3 15/29] OvmfPkg: Update SecEntry.nasm to support Tdx Date: Mon, 1 Nov 2021 21:16:04 +0800 Message-Id: <867e8a2aaf28c308b20a659057217453c6e38e00.1635769996.git.min.m.xu@intel.com> X-Mailer: git-send-email 2.29.2.windows.2 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: 8bit RFC: https://bugzilla.tianocore.org/show_bug.cgi?id=3429 In TDX BSP and APs goes to the same entry point in SecEntry.nasm. BSP initialize the temporary stack and then jumps to SecMain, just as legacy Ovmf does. APs spin in a modified mailbox loop using initial mailbox structure. Its structure defition is in OvmfPkg/Include/IndustryStandard/IntelTdx.h. APs wait for command to see if the command is for me. If so execute the command. There are 2 commands are supported: - WakeUp: BSP issues this command to move APs to final OS spinloop and Mailbox in reserved memory. - AcceptPages: To mitigate the performance impact of accepting pages in SEC phase on BSP, BSP will parse memory resources and assign each AP the task of accepting a subset of pages. This command may be called several times until all memory resources are processed. In accepting pages, PageLevel may fall back to smaller one if SIZE_MISMATCH error is returned. TdxCommondefs.inc is added which includes the common definitions used by the APs in SecEntry.nasm. Cc: Ard Biesheuvel Cc: Jordan Justen Cc: Brijesh Singh Cc: Erdem Aktas Cc: James Bottomley Cc: Jiewen Yao Cc: Tom Lendacky Cc: Gerd Hoffmann Signed-off-by: Min Xu --- OvmfPkg/Include/TdxCommondefs.inc | 51 +++++ OvmfPkg/Sec/SecMain.inf | 1 + OvmfPkg/Sec/X64/SecEntry.nasm | 314 ++++++++++++++++++++++++++++++ 3 files changed, 366 insertions(+) create mode 100644 OvmfPkg/Include/TdxCommondefs.inc diff --git a/OvmfPkg/Include/TdxCommondefs.inc b/OvmfPkg/Include/TdxCommondefs.inc new file mode 100644 index 000000000000..970eac96592a --- /dev/null +++ b/OvmfPkg/Include/TdxCommondefs.inc @@ -0,0 +1,51 @@ +;------------------------------------------------------------------------------ +; @file +; TDX Common defitions used by the APs in mailbox +; +; Copyright (c) 2021, Intel Corporation. All rights reserved.
+; SPDX-License-Identifier: BSD-2-Clause-Patent +; +;------------------------------------------------------------------------------ + +CommandOffset equ 00h +ApicidOffset equ 04h +WakeupVectorOffset equ 08h +OSArgsOffset equ 10h +FirmwareArgsOffset equ 800h +WakeupArgsRelocatedMailBox equ 800h +AcceptPageArgsPhysicalStart equ 800h +AcceptPageArgsPhysicalEnd equ 808h +AcceptPageArgsChunkSize equ 810h +AcceptPageArgsPageSize equ 818h +CpuArrivalOffset equ 900h +CpusExitingOffset equ 0a00h +TalliesOffset equ 0a08h +ErrorsOffset equ 0e08h + +SIZE_4KB equ 1000h +SIZE_2MB equ 200000h +SIZE_1GB equ 40000000h + +PAGE_ACCEPT_LEVEL_4K equ 0 +PAGE_ACCEPT_LEVEL_2M equ 1 +PAGE_ACCEPT_LEVEL_1G equ 2 + +TDX_PAGE_ALREADY_ACCEPTED equ 0x00000b0a +TDX_PAGE_SIZE_MISMATCH equ 0xc0000b0b + +; Errors of APs in Mailbox +ERROR_NON equ 0 +ERROR_INVALID_ACCEPT_PAGE_SIZE equ 1 +ERROR_ACCEPT_PAGE_ERROR equ 2 +ERROR_INVALID_FALLBACK_PAGE_LEVEL equ 3 + +MpProtectedModeWakeupCommandNoop equ 0 +MpProtectedModeWakeupCommandWakeup equ 1 +MpProtectedModeWakeupCommandSleep equ 2 +MpProtectedModeWakeupCommandAcceptPages equ 3 + +MailboxApicIdInvalid equ 0xffffffff +MailboxApicidBroadcast equ 0xfffffffe + +%define TDCALL_TDINFO 0x1 +%define TDCALL_TDACCEPTPAGE 0x6 diff --git a/OvmfPkg/Sec/SecMain.inf b/OvmfPkg/Sec/SecMain.inf index ea4b9611f52d..6083fa21a433 100644 --- a/OvmfPkg/Sec/SecMain.inf +++ b/OvmfPkg/Sec/SecMain.inf @@ -72,6 +72,7 @@ gEfiMdeModulePkgTokenSpaceGuid.PcdInitValueInTempStack gUefiOvmfPkgTokenSpaceGuid.PcdOvmfConfidentialComputingWorkAreaHeader gUefiOvmfPkgTokenSpaceGuid.PcdOvmfWorkAreaBase + gUefiOvmfPkgTokenSpaceGuid.PcdOvmfSecGhcbBackupBase [FeaturePcd] gUefiOvmfPkgTokenSpaceGuid.PcdSmmSmramRequire diff --git a/OvmfPkg/Sec/X64/SecEntry.nasm b/OvmfPkg/Sec/X64/SecEntry.nasm index 1cc680a70716..d0833db68410 100644 --- a/OvmfPkg/Sec/X64/SecEntry.nasm +++ b/OvmfPkg/Sec/X64/SecEntry.nasm @@ -10,12 +10,17 @@ ;------------------------------------------------------------------------------ #include +%include "TdxCommondefs.inc" DEFAULT REL SECTION .text extern ASM_PFX(SecCoreStartupWithStack) +%macro tdcall 0 + db 0x66, 0x0f, 0x01, 0xcc +%endmacro + ; ; SecCore Entry Point ; @@ -35,6 +40,32 @@ extern ASM_PFX(SecCoreStartupWithStack) global ASM_PFX(_ModuleEntryPoint) ASM_PFX(_ModuleEntryPoint): + ; + ; Guest type is stored in OVMF_WORK_AREA + ; + %define OVMF_WORK_AREA FixedPcdGet32 (PcdOvmfWorkAreaBase) + %define VM_GUEST_TYPE_TDX 2 + mov eax, OVMF_WORK_AREA + cmp byte[eax], VM_GUEST_TYPE_TDX + jne InitStack + + mov rax, TDCALL_TDINFO + tdcall + + ; + ; R8 [31:0] NUM_VCPUS + ; [63:32] MAX_VCPUS + ; R9 [31:0] VCPU_INDEX + ; Td Guest set the VCPU0 as the BSP, others are the APs + ; APs jump to spinloop and get released by DXE's MpInitLib + ; + mov rax, r9 + and rax, 0xffff + test rax, rax + jne ParkAp + +InitStack: + ; ; Fill the temporary RAM with the initial stack value. ; The loop below will seed the heap as well, but that's harmless. @@ -67,3 +98,286 @@ ASM_PFX(_ModuleEntryPoint): sub rsp, 0x20 call ASM_PFX(SecCoreStartupWithStack) + ; + ; Note: BSP never gets here. APs will be unblocked by DXE + ; + ; R8 [31:0] NUM_VCPUS + ; [63:32] MAX_VCPUS + ; R9 [31:0] VCPU_INDEX + ; +ParkAp: + + mov rbp, r9 + +.do_wait_loop: + mov rsp, FixedPcdGet32 (PcdOvmfSecGhcbBackupBase) + + ; + ; register itself in [rsp + CpuArrivalOffset] + ; + mov rax, 1 + lock xadd dword [rsp + CpuArrivalOffset], eax + inc eax + +.check_arrival_cnt: + cmp eax, r8d + je .check_command + mov eax, dword[rsp + CpuArrivalOffset] + jmp .check_arrival_cnt + +.check_command: + mov eax, dword[rsp + CommandOffset] + cmp eax, MpProtectedModeWakeupCommandNoop + je .check_command + + cmp eax, MpProtectedModeWakeupCommandWakeup + je .do_wakeup + + cmp eax, MpProtectedModeWakeupCommandAcceptPages + jne .check_command + + ; + ; AP Accept Pages + ; + ; Accept Pages in TDX is time-consuming, especially for big memory. + ; One of the mitigation is to accept pages by BSP and APs parallely. + ; + ; For example, there are 4 CPUs (1 BSP and 3 APs). Totally there are + ; 1G memory to be accepted. + ; + ; BSP is responsible for the memory regions of: + ; Start : StartAddress + ChunkSize * (4) * Index + ; Length: ChunkSize + ; APs is reponsible for the memory regions of: + ; Start : StartAddress + ChunkSize * (4) * Index + ChunkSize * CpuId + ; Length: ChunkSize + ; + ; TDCALL_TDACCEPTPAGE supports the PageSize of 4K and 2M. Sometimes when + ; the PageSize is 2M, TDX_PAGE_SIZE_MISMATCH is returned as the error code. + ; In this case, TDVF need fall back to 4k PageSize to accept again. + ; + ; If any errors happened in accept pages, an error code is recorded in + ; Mailbox [ErrorsOffset + CpuIndex] + ; +.ap_accept_page: + + ; + ; Clear the errors and fallback flag + ; + mov al, ERROR_NON + mov byte[rsp + ErrorsOffset + rbp], al + xor r12, r12 + + ; + ; Get PhysicalAddress/ChunkSize/PageSize + ; + mov rcx, [rsp + AcceptPageArgsPhysicalStart] + mov rbx, [rsp + AcceptPageArgsChunkSize] + + ; + ; Set AcceptPageLevel based on the AcceptPagesize + ; Currently only 2M/4K page size is acceptable + ; + mov r15, [rsp + AcceptPageArgsPageSize] + cmp r15, SIZE_4KB + je .set_4kb + cmp r15, SIZE_2MB + je .set_2mb + + mov al, ERROR_INVALID_ACCEPT_PAGE_SIZE + mov byte[rsp + ErrorsOffset + rbp], al + jmp .do_finish_command + +.set_4kb: + mov r15, PAGE_ACCEPT_LEVEL_4K + jmp .physical_address + +.set_2mb: + mov r15, PAGE_ACCEPT_LEVEL_2M + +.physical_address: + ; + ; PhysicalAddress += (CpuId * ChunkSize) + ; + xor rdx, rdx + mov eax, ebp + mul ebx + add rcx, rax + shl rdx, 32 + add rcx, rdx + +.do_accept_next_range: + ; + ; Make sure we don't accept page beyond ending page + ; This could happen is ChunkSize crosses the end of region + ; + cmp rcx, [rsp + AcceptPageArgsPhysicalEnd ] + jge .do_finish_command + + ; + ; Save starting address for this region + ; + mov r11, rcx + + ; + ; Size = MIN(ChunkSize, PhysicalEnd - PhysicalAddress); + ; + mov rax, [rsp + AcceptPageArgsPhysicalEnd] + sub rax, rcx + cmp rax, rbx + jge .do_accept_loop + mov rbx, rax + +.do_accept_loop: + ; + ; RCX: Accept address + ; R15: Accept Page Level + ; R12: Flag of fall back accept + ; + mov rax, TDCALL_TDACCEPTPAGE + xor rdx, rdx + or rcx, r15 + + tdcall + + ; + ; Check status code in RAX + ; + test rax, rax + jz .accept_success + + shr rax, 32 + cmp eax, TDX_PAGE_ALREADY_ACCEPTED + jz .already_accepted + + cmp eax, TDX_PAGE_SIZE_MISMATCH + jz .accept_size_mismatch + + ; + ; other error + ; + mov al, ERROR_ACCEPT_PAGE_ERROR + mov byte[rsp + ErrorsOffset + rbp], al + jmp .do_finish_command + +.accept_size_mismatch: + ; + ; Check the current PageLevel. + ; ACCEPT_LEVEL_4K is the least level and cannot fall back any more. + ; If in this case, just record the error and return + ; + cmp r15, PAGE_ACCEPT_LEVEL_4K + jne .do_fallback_accept + mov al, ERROR_INVALID_FALLBACK_PAGE_LEVEL + mov byte[rsp + ErrorsOffset + rbp], al + jmp .do_finish_command + +.do_fallback_accept: + ; + ; In fall back accept, just loop 512 times (2M = 512 * 4K) + ; Save the rcx in r13. + ; Decrease the PageLevel in R15. + ; R12 indicates it is in a fall back accept loop. + ; + mov r14, 512 + and rcx, ~0x3ULL + mov r13, rcx + xor rdx, rdx + dec r15 + mov r12, 1 + + jmp .do_accept_loop + +.accept_success: + ; + ; Keep track of how many accepts per cpu + ; + inc dword[rsp + TalliesOffset + rbp * 4] + + ; + ; R12 indicate whether it is a fall back accept + ; If it is a success of fall back accept + ; Just loop 512 times to .do_accept_loop + ; + test r12, r12 + jz .normal_accept_success + + ; + ; This is fallback accept success + ; + add rcx, SIZE_4KB + dec r14 + test r14, r14 + jz .fallback_accept_done + jmp .do_accept_loop + +.fallback_accept_done: + ; + ; Fall back accept done. + ; Restore the start address to RCX from R13 + ; Clear the fall back accept flag + ; + mov rcx, r13 + inc r15 + xor r12, r12 + +.already_accepted: + ; + ; Handle the sitution of fall back accpet + ; + test r12, r12 + jnz .accept_success + +.normal_accept_success: + ; + ; Reduce accept size by a PageSize, and increment address + ; + mov r12, [rsp + AcceptPageArgsPageSize] + sub rbx, r12 + add rcx, r12 + xor r12, r12 + + ; + ; We may be given multiple pages to accept, make sure we + ; aren't done + ; + test rbx, rbx + jne .do_accept_loop + + ; + ; Restore address before, and then increment by stride (num-cpus * ChunkSize) + ; + xor rdx, rdx + mov rcx, r11 + mov eax, r8d + mov ebx, [rsp + AcceptPageArgsChunkSize] + mul ebx + add rcx, rax + shl rdx, 32 + add rcx, rdx + jmp .do_accept_next_range + +.do_finish_command: + mov eax, 0FFFFFFFFh + lock xadd dword [rsp + CpusExitingOffset], eax + dec eax + +.check_exiting_cnt: + cmp eax, 0 + je .do_wait_loop + mov eax, dword[rsp + CpusExitingOffset] + jmp .check_exiting_cnt + +.do_wakeup: + ; + ; BSP sets these variables before unblocking APs + ; RAX: WakeupVectorOffset + ; RBX: Relocated mailbox address + ; RBP: vCpuId + ; + mov rax, 0 + mov eax, dword[rsp + WakeupVectorOffset] + mov rbx, [rsp + WakeupArgsRelocatedMailBox] + nop + jmp rax + jmp $ -- 2.29.2.windows.2