* [PATCH v3 1/4] MdeModulePkg/EbcDxe AARCH64: clean up comment style in ASM file
2016-08-17 14:59 [PATCH v3 0/4] MdeModulePkg/EbcDxe: AARCH64 improvements Ard Biesheuvel
@ 2016-08-17 14:59 ` Ard Biesheuvel
2016-08-26 11:06 ` Leif Lindholm
2016-08-17 14:59 ` [PATCH v3 2/4] MdeModulePkg/EbcDxe AARCH64: use a fixed size thunk structure Ard Biesheuvel
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Ard Biesheuvel @ 2016-08-17 14:59 UTC (permalink / raw)
To: edk2-devel, leif.lindholm, jbrasen, feng.tian, star.zeng,
daniil.egranov
Cc: Ard Biesheuvel
Change to consistent // style comments. Also, remove bogus global
definitions for external functions, and move the real exports to
the top of the file.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 111 +++++++++-----------
1 file changed, 52 insertions(+), 59 deletions(-)
diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
index e858227586a8..a9678432d549 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
@@ -1,40 +1,35 @@
-#/** @file
-#
-# This code provides low level routines that support the Virtual Machine
-# for option ROMs.
-#
-# Copyright (c) 2015, The Linux Foundation. All rights reserved.
-# Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.<BR>
-# This program and the accompanying materials
-# are licensed and made available under the terms and conditions of the BSD License
-# which accompanies this distribution. The full text of the license may be found at
-# http://opensource.org/licenses/bsd-license.php
-#
-# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
-# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
-#
-#**/
-
-#---------------------------------------------------------------------------
-# Equate files needed.
-#---------------------------------------------------------------------------
-
-ASM_GLOBAL ASM_PFX(CopyMem);
-ASM_GLOBAL ASM_PFX(EbcInterpret);
-ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
-
-#****************************************************************************
-# EbcLLCALLEX
-#
-# This function is called to execute an EBC CALLEX instruction.
-# This instruction requires that we thunk out to external native
-# code. For AArch64, we copy the VM stack into the main stack and then pop
-# the first 8 arguments off according to the AArch64 Procedure Call Standard
-# On return, we restore the stack pointer to its original location.
-#
-#****************************************************************************
-# UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
-ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative);
+///** @file
+//
+// This code provides low level routines that support the Virtual Machine
+// for option ROMs.
+//
+// Copyright (c) 2015, The Linux Foundation. All rights reserved.
+// Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.<BR>
+// This program and the accompanying materials
+// are licensed and made available under the terms and conditions of the BSD License
+// which accompanies this distribution. The full text of the license may be found at
+// http://opensource.org/licenses/bsd-license.php
+//
+// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
+// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+//
+//**/
+
+ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative)
+ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret)
+ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint)
+
+//****************************************************************************
+// EbcLLCALLEX
+//
+// This function is called to execute an EBC CALLEX instruction.
+// This instruction requires that we thunk out to external native
+// code. For AArch64, we copy the VM stack into the main stack and then pop
+// the first 8 arguments off according to the AArch64 Procedure Call Standard
+// On return, we restore the stack pointer to its original location.
+//
+//****************************************************************************
+// UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
ASM_PFX(EbcLLCALLEXNative):
stp x19, x20, [sp, #-16]!
stp x29, x30, [sp, #-16]!
@@ -61,16 +56,15 @@ ASM_PFX(EbcLLCALLEXNative):
ret
-#****************************************************************************
-# EbcLLEbcInterpret
-#
-# This function is called by the thunk code to handle an Native to EBC call
-# This can handle up to 16 arguments (1-8 on in x0-x7, 9-16 are on the stack)
-# x9 contains the Entry point that will be the first argument when
-# EBCInterpret is called.
-#
-#****************************************************************************
-ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret);
+//****************************************************************************
+// EbcLLEbcInterpret
+//
+// This function is called by the thunk code to handle an Native to EBC call
+// This can handle up to 16 arguments (1-8 on in x0-x7, 9-16 are on the stack)
+// x9 contains the Entry point that will be the first argument when
+// EBCInterpret is called.
+//
+//****************************************************************************
ASM_PFX(EbcLLEbcInterpret):
stp x29, x30, [sp, #-16]!
@@ -105,7 +99,7 @@ ASM_PFX(EbcLLEbcInterpret):
mov x1, x0
mov x0, x9
- # call C-code
+ // call C-code
bl ASM_PFX(EbcInterpret)
add sp, sp, #80
@@ -113,23 +107,22 @@ ASM_PFX(EbcLLEbcInterpret):
ret
-#****************************************************************************
-# EbcLLExecuteEbcImageEntryPoint
-#
-# This function is called by the thunk code to handle the image entry point
-# x9 contains the Entry point that will be the first argument when
-# ExecuteEbcImageEntryPoint is called.
-#
-#****************************************************************************
-ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint);
+//****************************************************************************
+// EbcLLExecuteEbcImageEntryPoint
+//
+// This function is called by the thunk code to handle the image entry point
+// x9 contains the Entry point that will be the first argument when
+// ExecuteEbcImageEntryPoint is called.
+//
+//****************************************************************************
ASM_PFX(EbcLLExecuteEbcImageEntryPoint):
stp x29, x30, [sp, #-16]!
- # build new paramater calling convention
+ // build new paramater calling convention
mov x2, x1
mov x1, x0
mov x0, x9
- # call C-code
+ // call C-code
bl ASM_PFX(ExecuteEbcImageEntryPoint)
ldp x29, x30, [sp], #16
ret
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/4] MdeModulePkg/EbcDxe AARCH64: clean up comment style in ASM file
2016-08-17 14:59 ` [PATCH v3 1/4] MdeModulePkg/EbcDxe AARCH64: clean up comment style in ASM file Ard Biesheuvel
@ 2016-08-26 11:06 ` Leif Lindholm
0 siblings, 0 replies; 12+ messages in thread
From: Leif Lindholm @ 2016-08-26 11:06 UTC (permalink / raw)
To: Ard Biesheuvel; +Cc: edk2-devel, jbrasen, feng.tian, star.zeng, daniil.egranov
On Wed, Aug 17, 2016 at 04:59:02PM +0200, Ard Biesheuvel wrote:
> Change to consistent // style comments. Also, remove bogus global
> definitions for external functions, and move the real exports to
> the top of the file.
>
> Contributed-under: TianoCore Contribution Agreement 1.0
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 111 +++++++++-----------
> 1 file changed, 52 insertions(+), 59 deletions(-)
>
> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> index e858227586a8..a9678432d549 100644
> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> @@ -1,40 +1,35 @@
> -#/** @file
> -#
> -# This code provides low level routines that support the Virtual Machine
> -# for option ROMs.
> -#
> -# Copyright (c) 2015, The Linux Foundation. All rights reserved.
> -# Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.<BR>
> -# This program and the accompanying materials
> -# are licensed and made available under the terms and conditions of the BSD License
> -# which accompanies this distribution. The full text of the license may be found at
> -# http://opensource.org/licenses/bsd-license.php
> -#
> -# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
> -# WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
> -#
> -#**/
> -
> -#---------------------------------------------------------------------------
> -# Equate files needed.
> -#---------------------------------------------------------------------------
> -
> -ASM_GLOBAL ASM_PFX(CopyMem);
> -ASM_GLOBAL ASM_PFX(EbcInterpret);
> -ASM_GLOBAL ASM_PFX(ExecuteEbcImageEntryPoint);
> -
> -#****************************************************************************
> -# EbcLLCALLEX
> -#
> -# This function is called to execute an EBC CALLEX instruction.
> -# This instruction requires that we thunk out to external native
> -# code. For AArch64, we copy the VM stack into the main stack and then pop
> -# the first 8 arguments off according to the AArch64 Procedure Call Standard
> -# On return, we restore the stack pointer to its original location.
> -#
> -#****************************************************************************
> -# UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
> -ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative);
> +///** @file
> +//
> +// This code provides low level routines that support the Virtual Machine
> +// for option ROMs.
> +//
> +// Copyright (c) 2015, The Linux Foundation. All rights reserved.
> +// Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.<BR>
> +// This program and the accompanying materials
> +// are licensed and made available under the terms and conditions of the BSD License
> +// which accompanies this distribution. The full text of the license may be found at
> +// http://opensource.org/licenses/bsd-license.php
> +//
> +// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
> +// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
> +//
> +//**/
> +
> +ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative)
> +ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret)
> +ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint)
> +
> +//****************************************************************************
> +// EbcLLCALLEX
> +//
> +// This function is called to execute an EBC CALLEX instruction.
> +// This instruction requires that we thunk out to external native
> +// code. For AArch64, we copy the VM stack into the main stack and then pop
> +// the first 8 arguments off according to the AArch64 Procedure Call Standard
> +// On return, we restore the stack pointer to its original location.
> +//
> +//****************************************************************************
> +// UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
> ASM_PFX(EbcLLCALLEXNative):
> stp x19, x20, [sp, #-16]!
> stp x29, x30, [sp, #-16]!
> @@ -61,16 +56,15 @@ ASM_PFX(EbcLLCALLEXNative):
>
> ret
>
> -#****************************************************************************
> -# EbcLLEbcInterpret
> -#
> -# This function is called by the thunk code to handle an Native to EBC call
> -# This can handle up to 16 arguments (1-8 on in x0-x7, 9-16 are on the stack)
> -# x9 contains the Entry point that will be the first argument when
> -# EBCInterpret is called.
> -#
> -#****************************************************************************
> -ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret);
> +//****************************************************************************
> +// EbcLLEbcInterpret
> +//
> +// This function is called by the thunk code to handle an Native to EBC call
> +// This can handle up to 16 arguments (1-8 on in x0-x7, 9-16 are on the stack)
> +// x9 contains the Entry point that will be the first argument when
> +// EBCInterpret is called.
> +//
> +//****************************************************************************
> ASM_PFX(EbcLLEbcInterpret):
> stp x29, x30, [sp, #-16]!
>
> @@ -105,7 +99,7 @@ ASM_PFX(EbcLLEbcInterpret):
> mov x1, x0
> mov x0, x9
>
> - # call C-code
> + // call C-code
> bl ASM_PFX(EbcInterpret)
> add sp, sp, #80
>
> @@ -113,23 +107,22 @@ ASM_PFX(EbcLLEbcInterpret):
>
> ret
>
> -#****************************************************************************
> -# EbcLLExecuteEbcImageEntryPoint
> -#
> -# This function is called by the thunk code to handle the image entry point
> -# x9 contains the Entry point that will be the first argument when
> -# ExecuteEbcImageEntryPoint is called.
> -#
> -#****************************************************************************
> -ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint);
> +//****************************************************************************
> +// EbcLLExecuteEbcImageEntryPoint
> +//
> +// This function is called by the thunk code to handle the image entry point
> +// x9 contains the Entry point that will be the first argument when
> +// ExecuteEbcImageEntryPoint is called.
> +//
> +//****************************************************************************
> ASM_PFX(EbcLLExecuteEbcImageEntryPoint):
> stp x29, x30, [sp, #-16]!
> - # build new paramater calling convention
> + // build new paramater calling convention
Fix typo while you're at it (paramater -> parameter)?
With that:
Reviewed-by: Leif Lindholm <leif.lindholm@linaro.org>
> mov x2, x1
> mov x1, x0
> mov x0, x9
>
> - # call C-code
> + // call C-code
> bl ASM_PFX(ExecuteEbcImageEntryPoint)
> ldp x29, x30, [sp], #16
> ret
> --
> 2.7.4
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 2/4] MdeModulePkg/EbcDxe AARCH64: use a fixed size thunk structure
2016-08-17 14:59 [PATCH v3 0/4] MdeModulePkg/EbcDxe: AARCH64 improvements Ard Biesheuvel
2016-08-17 14:59 ` [PATCH v3 1/4] MdeModulePkg/EbcDxe AARCH64: clean up comment style in ASM file Ard Biesheuvel
@ 2016-08-17 14:59 ` Ard Biesheuvel
2016-08-26 12:28 ` Leif Lindholm
2016-08-17 14:59 ` [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk Ard Biesheuvel
2016-08-17 14:59 ` [PATCH v3 4/4] MdeModulePkg/EbcDxe AARCH64: simplify interpreter entry point thunks Ard Biesheuvel
3 siblings, 1 reply; 12+ messages in thread
From: Ard Biesheuvel @ 2016-08-17 14:59 UTC (permalink / raw)
To: edk2-devel, leif.lindholm, jbrasen, feng.tian, star.zeng,
daniil.egranov
Cc: Ard Biesheuvel
The thunk generation is needlessly complex, given that it attempts to
deal with variable length instructions, which don't exist on AArch64.
So replace it with a simple template coded in assembler, with a matching
struct definition in C. That way, we can create and manipulate the thunks
easily without looping over the instructions looking for 'magic' numbers.
Also, use x16 rather than x9, since it is the architectural register to
use for thunks/veneers.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 32 ++++-
MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c | 133 +++++---------------
2 files changed, 58 insertions(+), 107 deletions(-)
diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
index a9678432d549..cb7a70b5a4f8 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
@@ -3,8 +3,10 @@
// This code provides low level routines that support the Virtual Machine
// for option ROMs.
//
-// Copyright (c) 2015, The Linux Foundation. All rights reserved.
+// Copyright (c) 2016, Linaro, Ltd. All rights reserved.<BR>
+// Copyright (c) 2015, The Linux Foundation. All rights reserved.<BR>
// Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.<BR>
+//
// This program and the accompanying materials
// are licensed and made available under the terms and conditions of the BSD License
// which accompanies this distribution. The full text of the license may be found at
@@ -19,6 +21,8 @@ ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative)
ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret)
ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint)
+ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate)
+
//****************************************************************************
// EbcLLCALLEX
//
@@ -61,7 +65,7 @@ ASM_PFX(EbcLLCALLEXNative):
//
// This function is called by the thunk code to handle an Native to EBC call
// This can handle up to 16 arguments (1-8 on in x0-x7, 9-16 are on the stack)
-// x9 contains the Entry point that will be the first argument when
+// x16 contains the Entry point that will be the first argument when
// EBCInterpret is called.
//
//****************************************************************************
@@ -97,7 +101,7 @@ ASM_PFX(EbcLLEbcInterpret):
mov x3, x2
mov x2, x1
mov x1, x0
- mov x0, x9
+ mov x0, x16
// call C-code
bl ASM_PFX(EbcInterpret)
@@ -111,7 +115,7 @@ ASM_PFX(EbcLLEbcInterpret):
// EbcLLExecuteEbcImageEntryPoint
//
// This function is called by the thunk code to handle the image entry point
-// x9 contains the Entry point that will be the first argument when
+// x16 contains the Entry point that will be the third argument when
// ExecuteEbcImageEntryPoint is called.
//
//****************************************************************************
@@ -120,9 +124,27 @@ ASM_PFX(EbcLLExecuteEbcImageEntryPoint):
// build new paramater calling convention
mov x2, x1
mov x1, x0
- mov x0, x9
+ mov x0, x16
// call C-code
bl ASM_PFX(ExecuteEbcImageEntryPoint)
ldp x29, x30, [sp], #16
ret
+
+//****************************************************************************
+// mEbcInstructionBufferTemplate
+//****************************************************************************
+ .section ".rodata", "a"
+ .align 3
+ASM_PFX(mEbcInstructionBufferTemplate):
+ adr x17, 0f
+ ldp x16, x17, [x17]
+ br x17
+
+ //
+ // Add a magic code here to help the VM recognize the thunk.
+ //
+ hlt #0xEBC
+
+0: .quad 0 // EBC_ENTRYPOINT_SIGNATURE
+ .quad 0 // EBC_LL_EBC_ENTRYPOINT_SIGNATURE
diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
index 23261a070143..a5f21f400274 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
@@ -2,8 +2,10 @@
This module contains EBC support routines that are customized based on
the target AArch64 processor.
-Copyright (c) 2015, The Linux Foundation. All rights reserved.
+Copyright (c) 2016, Linaro, Ltd. All rights reserved.<BR>
+Copyright (c) 2015, The Linux Foundation. All rights reserved.<BR>
Copyright (c) 2006 - 2014, Intel Corporation. All rights reserved.<BR>
+
This program and the accompanying materials
are licensed and made available under the terms and conditions of the BSD License
which accompanies this distribution. The full text of the license may be found at
@@ -22,47 +24,16 @@ WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
//
#define STACK_REMAIN_SIZE (1024 * 4)
-//
-// This is instruction buffer used to create EBC thunk
-//
-#define EBC_MAGIC_SIGNATURE 0xCA112EBCCA112EBCull
-#define EBC_ENTRYPOINT_SIGNATURE 0xAFAFAFAFAFAFAFAFull
-#define EBC_LL_EBC_ENTRYPOINT_SIGNATURE 0xFAFAFAFAFAFAFAFAull
-UINT8 mInstructionBufferTemplate[] = {
- 0x03, 0x00, 0x00, 0x14, //b pc+16
- //
- // Add a magic code here to help the VM recognize the thunk..
- //
- (UINT8)(EBC_MAGIC_SIGNATURE & 0xFF),
- (UINT8)((EBC_MAGIC_SIGNATURE >> 8) & 0xFF),
- (UINT8)((EBC_MAGIC_SIGNATURE >> 16) & 0xFF),
- (UINT8)((EBC_MAGIC_SIGNATURE >> 24) & 0xFF),
- (UINT8)((EBC_MAGIC_SIGNATURE >> 32) & 0xFF),
- (UINT8)((EBC_MAGIC_SIGNATURE >> 40) & 0xFF),
- (UINT8)((EBC_MAGIC_SIGNATURE >> 48) & 0xFF),
- (UINT8)((EBC_MAGIC_SIGNATURE >> 56) & 0xFF),
- 0x69, 0x00, 0x00, 0x58, //ldr x9, #32
- 0x8A, 0x00, 0x00, 0x58, //ldr x10, #40
- 0x05, 0x00, 0x00, 0x14, //b pc+32
- (UINT8)(EBC_ENTRYPOINT_SIGNATURE & 0xFF),
- (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 8) & 0xFF),
- (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 16) & 0xFF),
- (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 24) & 0xFF),
- (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 32) & 0xFF),
- (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 40) & 0xFF),
- (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 48) & 0xFF),
- (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 56) & 0xFF),
- (UINT8)(EBC_LL_EBC_ENTRYPOINT_SIGNATURE & 0xFF),
- (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 8) & 0xFF),
- (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 16) & 0xFF),
- (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 24) & 0xFF),
- (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 32) & 0xFF),
- (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 40) & 0xFF),
- (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 48) & 0xFF),
- (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 56) & 0xFF),
- 0x40, 0x01, 0x1F, 0xD6 //br x10
-
-};
+#pragma pack(1)
+typedef struct {
+ UINT32 Instr[3];
+ UINT32 Magic;
+ UINT64 EbcEntryPoint;
+ UINT64 EbcLlEntryPoint;
+} EBC_INSTRUCTION_BUFFER;
+#pragma pack()
+
+extern CONST EBC_INSTRUCTION_BUFFER mEbcInstructionBufferTemplate;
/**
Begin executing an EBC image.
@@ -414,10 +385,7 @@ EbcCreateThunks (
IN UINT32 Flags
)
{
- UINT8 *Ptr;
- UINT8 *ThunkBase;
- UINT32 Index;
- INT32 ThunkSize;
+ EBC_INSTRUCTION_BUFFER *InstructionBuffer;
//
// Check alignment of pointer to EBC code
@@ -426,51 +394,38 @@ EbcCreateThunks (
return EFI_INVALID_PARAMETER;
}
- ThunkSize = sizeof(mInstructionBufferTemplate);
-
- Ptr = AllocatePool (sizeof(mInstructionBufferTemplate));
-
- if (Ptr == NULL) {
+ InstructionBuffer = AllocatePool (sizeof (EBC_INSTRUCTION_BUFFER));
+ if (InstructionBuffer == NULL) {
return EFI_OUT_OF_RESOURCES;
}
- //
- // Print(L"Allocate TH: 0x%X\n", (UINT32)Ptr);
- //
- // Save the start address so we can add a pointer to it to a list later.
- //
- ThunkBase = Ptr;
//
// Give them the address of our buffer we're going to fix up
//
- *Thunk = (VOID *) Ptr;
+ *Thunk = InstructionBuffer;
//
// Copy whole thunk instruction buffer template
//
- CopyMem (Ptr, mInstructionBufferTemplate, sizeof(mInstructionBufferTemplate));
+ CopyMem (InstructionBuffer, &mEbcInstructionBufferTemplate,
+ sizeof (EBC_INSTRUCTION_BUFFER));
//
// Patch EbcEntryPoint and EbcLLEbcInterpret
//
- for (Index = 0; Index < sizeof(mInstructionBufferTemplate) - sizeof(UINTN); Index++) {
- if (*(UINTN *)&Ptr[Index] == EBC_ENTRYPOINT_SIGNATURE) {
- *(UINTN *)&Ptr[Index] = (UINTN)EbcEntryPoint;
- }
- if (*(UINTN *)&Ptr[Index] == EBC_LL_EBC_ENTRYPOINT_SIGNATURE) {
- if ((Flags & FLAG_THUNK_ENTRY_POINT) != 0) {
- *(UINTN *)&Ptr[Index] = (UINTN)EbcLLExecuteEbcImageEntryPoint;
- } else {
- *(UINTN *)&Ptr[Index] = (UINTN)EbcLLEbcInterpret;
- }
- }
+ InstructionBuffer->EbcEntryPoint = (UINT64)EbcEntryPoint;
+ if ((Flags & FLAG_THUNK_ENTRY_POINT) != 0) {
+ InstructionBuffer->EbcLlEntryPoint = (UINT64)EbcLLExecuteEbcImageEntryPoint;
+ } else {
+ InstructionBuffer->EbcLlEntryPoint = (UINT64)EbcLLEbcInterpret;
}
//
// Add the thunk to the list for this image. Do this last since the add
// function flushes the cache for us.
//
- EbcAddImageThunk (ImageHandle, (VOID *) ThunkBase, ThunkSize);
+ EbcAddImageThunk (ImageHandle, InstructionBuffer,
+ sizeof (EBC_INSTRUCTION_BUFFER));
return EFI_SUCCESS;
}
@@ -500,40 +455,15 @@ EbcLLCALLEX (
IN UINT8 Size
)
{
- UINTN IsThunk;
- UINTN TargetEbcAddr;
- UINT8 InstructionBuffer[sizeof(mInstructionBufferTemplate)];
- UINTN Index;
- UINTN IndexOfEbcEntrypoint;
-
- IsThunk = 1;
- TargetEbcAddr = 0;
- IndexOfEbcEntrypoint = 0;
+ CONST EBC_INSTRUCTION_BUFFER *InstructionBuffer;
//
// Processor specific code to check whether the callee is a thunk to EBC.
//
- CopyMem (InstructionBuffer, (VOID *)FuncAddr, sizeof(InstructionBuffer));
- //
- // Fill the signature according to mInstructionBufferTemplate
- //
- for (Index = 0; Index < sizeof(mInstructionBufferTemplate) - sizeof(UINTN); Index++) {
- if (*(UINTN *)&mInstructionBufferTemplate[Index] == EBC_ENTRYPOINT_SIGNATURE) {
- *(UINTN *)&InstructionBuffer[Index] = EBC_ENTRYPOINT_SIGNATURE;
- IndexOfEbcEntrypoint = Index;
- }
- if (*(UINTN *)&mInstructionBufferTemplate[Index] == EBC_LL_EBC_ENTRYPOINT_SIGNATURE) {
- *(UINTN *)&InstructionBuffer[Index] = EBC_LL_EBC_ENTRYPOINT_SIGNATURE;
- }
- }
- //
- // Check if we need thunk to native
- //
- if (CompareMem (InstructionBuffer, mInstructionBufferTemplate, sizeof(mInstructionBufferTemplate)) != 0) {
- IsThunk = 0;
- }
+ InstructionBuffer = (EBC_INSTRUCTION_BUFFER *)FuncAddr;
- if (IsThunk == 1){
+ if (CompareMem (InstructionBuffer, &mEbcInstructionBufferTemplate,
+ sizeof(EBC_INSTRUCTION_BUFFER) - 2 * sizeof (UINT64)) == 0) {
//
// The callee is a thunk to EBC, adjust the stack pointer down 16 bytes and
// put our return address and frame pointer on the VM stack.
@@ -545,8 +475,7 @@ EbcLLCALLEX (
VmPtr->Gpr[0] -= 8;
VmWriteMem64 (VmPtr, (UINTN) VmPtr->Gpr[0], (UINT64) (UINTN) (VmPtr->Ip + Size));
- CopyMem (&TargetEbcAddr, (UINT8 *)FuncAddr + IndexOfEbcEntrypoint, sizeof(UINTN));
- VmPtr->Ip = (VMIP) (UINTN) TargetEbcAddr;
+ VmPtr->Ip = (VMIP) InstructionBuffer->EbcEntryPoint;
} else {
//
// The callee is not a thunk to EBC, call native code,
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 2/4] MdeModulePkg/EbcDxe AARCH64: use a fixed size thunk structure
2016-08-17 14:59 ` [PATCH v3 2/4] MdeModulePkg/EbcDxe AARCH64: use a fixed size thunk structure Ard Biesheuvel
@ 2016-08-26 12:28 ` Leif Lindholm
0 siblings, 0 replies; 12+ messages in thread
From: Leif Lindholm @ 2016-08-26 12:28 UTC (permalink / raw)
To: Ard Biesheuvel; +Cc: edk2-devel, jbrasen, feng.tian, star.zeng, daniil.egranov
On Wed, Aug 17, 2016 at 04:59:03PM +0200, Ard Biesheuvel wrote:
> The thunk generation is needlessly complex, given that it attempts to
> deal with variable length instructions, which don't exist on AArch64.
>
> So replace it with a simple template coded in assembler, with a matching
> struct definition in C. That way, we can create and manipulate the thunks
> easily without looping over the instructions looking for 'magic' numbers.
>
> Also, use x16 rather than x9, since it is the architectural register to
> use for thunks/veneers.
>
> Contributed-under: TianoCore Contribution Agreement 1.0
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Leif Lindholm <leif.lindholm@linaro.org>
> ---
> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 32 ++++-
> MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c | 133 +++++---------------
> 2 files changed, 58 insertions(+), 107 deletions(-)
>
> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> index a9678432d549..cb7a70b5a4f8 100644
> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> @@ -3,8 +3,10 @@
> // This code provides low level routines that support the Virtual Machine
> // for option ROMs.
> //
> -// Copyright (c) 2015, The Linux Foundation. All rights reserved.
> +// Copyright (c) 2016, Linaro, Ltd. All rights reserved.<BR>
> +// Copyright (c) 2015, The Linux Foundation. All rights reserved.<BR>
> // Copyright (c) 2007 - 2014, Intel Corporation. All rights reserved.<BR>
> +//
> // This program and the accompanying materials
> // are licensed and made available under the terms and conditions of the BSD License
> // which accompanies this distribution. The full text of the license may be found at
> @@ -19,6 +21,8 @@ ASM_GLOBAL ASM_PFX(EbcLLCALLEXNative)
> ASM_GLOBAL ASM_PFX(EbcLLEbcInterpret)
> ASM_GLOBAL ASM_PFX(EbcLLExecuteEbcImageEntryPoint)
>
> +ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate)
> +
> //****************************************************************************
> // EbcLLCALLEX
> //
> @@ -61,7 +65,7 @@ ASM_PFX(EbcLLCALLEXNative):
> //
> // This function is called by the thunk code to handle an Native to EBC call
> // This can handle up to 16 arguments (1-8 on in x0-x7, 9-16 are on the stack)
> -// x9 contains the Entry point that will be the first argument when
> +// x16 contains the Entry point that will be the first argument when
> // EBCInterpret is called.
> //
> //****************************************************************************
> @@ -97,7 +101,7 @@ ASM_PFX(EbcLLEbcInterpret):
> mov x3, x2
> mov x2, x1
> mov x1, x0
> - mov x0, x9
> + mov x0, x16
>
> // call C-code
> bl ASM_PFX(EbcInterpret)
> @@ -111,7 +115,7 @@ ASM_PFX(EbcLLEbcInterpret):
> // EbcLLExecuteEbcImageEntryPoint
> //
> // This function is called by the thunk code to handle the image entry point
> -// x9 contains the Entry point that will be the first argument when
> +// x16 contains the Entry point that will be the third argument when
> // ExecuteEbcImageEntryPoint is called.
> //
> //****************************************************************************
> @@ -120,9 +124,27 @@ ASM_PFX(EbcLLExecuteEbcImageEntryPoint):
> // build new paramater calling convention
> mov x2, x1
> mov x1, x0
> - mov x0, x9
> + mov x0, x16
>
> // call C-code
> bl ASM_PFX(ExecuteEbcImageEntryPoint)
> ldp x29, x30, [sp], #16
> ret
> +
> +//****************************************************************************
> +// mEbcInstructionBufferTemplate
> +//****************************************************************************
> + .section ".rodata", "a"
> + .align 3
> +ASM_PFX(mEbcInstructionBufferTemplate):
> + adr x17, 0f
> + ldp x16, x17, [x17]
> + br x17
> +
> + //
> + // Add a magic code here to help the VM recognize the thunk.
> + //
> + hlt #0xEBC
> +
> +0: .quad 0 // EBC_ENTRYPOINT_SIGNATURE
> + .quad 0 // EBC_LL_EBC_ENTRYPOINT_SIGNATURE
> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
> index 23261a070143..a5f21f400274 100644
> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
> @@ -2,8 +2,10 @@
> This module contains EBC support routines that are customized based on
> the target AArch64 processor.
>
> -Copyright (c) 2015, The Linux Foundation. All rights reserved.
> +Copyright (c) 2016, Linaro, Ltd. All rights reserved.<BR>
> +Copyright (c) 2015, The Linux Foundation. All rights reserved.<BR>
> Copyright (c) 2006 - 2014, Intel Corporation. All rights reserved.<BR>
> +
> This program and the accompanying materials
> are licensed and made available under the terms and conditions of the BSD License
> which accompanies this distribution. The full text of the license may be found at
> @@ -22,47 +24,16 @@ WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
> //
> #define STACK_REMAIN_SIZE (1024 * 4)
>
> -//
> -// This is instruction buffer used to create EBC thunk
> -//
> -#define EBC_MAGIC_SIGNATURE 0xCA112EBCCA112EBCull
> -#define EBC_ENTRYPOINT_SIGNATURE 0xAFAFAFAFAFAFAFAFull
> -#define EBC_LL_EBC_ENTRYPOINT_SIGNATURE 0xFAFAFAFAFAFAFAFAull
> -UINT8 mInstructionBufferTemplate[] = {
> - 0x03, 0x00, 0x00, 0x14, //b pc+16
> - //
> - // Add a magic code here to help the VM recognize the thunk..
> - //
> - (UINT8)(EBC_MAGIC_SIGNATURE & 0xFF),
> - (UINT8)((EBC_MAGIC_SIGNATURE >> 8) & 0xFF),
> - (UINT8)((EBC_MAGIC_SIGNATURE >> 16) & 0xFF),
> - (UINT8)((EBC_MAGIC_SIGNATURE >> 24) & 0xFF),
> - (UINT8)((EBC_MAGIC_SIGNATURE >> 32) & 0xFF),
> - (UINT8)((EBC_MAGIC_SIGNATURE >> 40) & 0xFF),
> - (UINT8)((EBC_MAGIC_SIGNATURE >> 48) & 0xFF),
> - (UINT8)((EBC_MAGIC_SIGNATURE >> 56) & 0xFF),
> - 0x69, 0x00, 0x00, 0x58, //ldr x9, #32
> - 0x8A, 0x00, 0x00, 0x58, //ldr x10, #40
> - 0x05, 0x00, 0x00, 0x14, //b pc+32
> - (UINT8)(EBC_ENTRYPOINT_SIGNATURE & 0xFF),
> - (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 8) & 0xFF),
> - (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 16) & 0xFF),
> - (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 24) & 0xFF),
> - (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 32) & 0xFF),
> - (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 40) & 0xFF),
> - (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 48) & 0xFF),
> - (UINT8)((EBC_ENTRYPOINT_SIGNATURE >> 56) & 0xFF),
> - (UINT8)(EBC_LL_EBC_ENTRYPOINT_SIGNATURE & 0xFF),
> - (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 8) & 0xFF),
> - (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 16) & 0xFF),
> - (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 24) & 0xFF),
> - (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 32) & 0xFF),
> - (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 40) & 0xFF),
> - (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 48) & 0xFF),
> - (UINT8)((EBC_LL_EBC_ENTRYPOINT_SIGNATURE >> 56) & 0xFF),
> - 0x40, 0x01, 0x1F, 0xD6 //br x10
> -
> -};
> +#pragma pack(1)
> +typedef struct {
> + UINT32 Instr[3];
> + UINT32 Magic;
> + UINT64 EbcEntryPoint;
> + UINT64 EbcLlEntryPoint;
> +} EBC_INSTRUCTION_BUFFER;
> +#pragma pack()
> +
> +extern CONST EBC_INSTRUCTION_BUFFER mEbcInstructionBufferTemplate;
>
> /**
> Begin executing an EBC image.
> @@ -414,10 +385,7 @@ EbcCreateThunks (
> IN UINT32 Flags
> )
> {
> - UINT8 *Ptr;
> - UINT8 *ThunkBase;
> - UINT32 Index;
> - INT32 ThunkSize;
> + EBC_INSTRUCTION_BUFFER *InstructionBuffer;
>
> //
> // Check alignment of pointer to EBC code
> @@ -426,51 +394,38 @@ EbcCreateThunks (
> return EFI_INVALID_PARAMETER;
> }
>
> - ThunkSize = sizeof(mInstructionBufferTemplate);
> -
> - Ptr = AllocatePool (sizeof(mInstructionBufferTemplate));
> -
> - if (Ptr == NULL) {
> + InstructionBuffer = AllocatePool (sizeof (EBC_INSTRUCTION_BUFFER));
> + if (InstructionBuffer == NULL) {
> return EFI_OUT_OF_RESOURCES;
> }
> - //
> - // Print(L"Allocate TH: 0x%X\n", (UINT32)Ptr);
> - //
> - // Save the start address so we can add a pointer to it to a list later.
> - //
> - ThunkBase = Ptr;
>
> //
> // Give them the address of our buffer we're going to fix up
> //
> - *Thunk = (VOID *) Ptr;
> + *Thunk = InstructionBuffer;
>
> //
> // Copy whole thunk instruction buffer template
> //
> - CopyMem (Ptr, mInstructionBufferTemplate, sizeof(mInstructionBufferTemplate));
> + CopyMem (InstructionBuffer, &mEbcInstructionBufferTemplate,
> + sizeof (EBC_INSTRUCTION_BUFFER));
>
> //
> // Patch EbcEntryPoint and EbcLLEbcInterpret
> //
> - for (Index = 0; Index < sizeof(mInstructionBufferTemplate) - sizeof(UINTN); Index++) {
> - if (*(UINTN *)&Ptr[Index] == EBC_ENTRYPOINT_SIGNATURE) {
> - *(UINTN *)&Ptr[Index] = (UINTN)EbcEntryPoint;
> - }
> - if (*(UINTN *)&Ptr[Index] == EBC_LL_EBC_ENTRYPOINT_SIGNATURE) {
> - if ((Flags & FLAG_THUNK_ENTRY_POINT) != 0) {
> - *(UINTN *)&Ptr[Index] = (UINTN)EbcLLExecuteEbcImageEntryPoint;
> - } else {
> - *(UINTN *)&Ptr[Index] = (UINTN)EbcLLEbcInterpret;
> - }
> - }
> + InstructionBuffer->EbcEntryPoint = (UINT64)EbcEntryPoint;
> + if ((Flags & FLAG_THUNK_ENTRY_POINT) != 0) {
> + InstructionBuffer->EbcLlEntryPoint = (UINT64)EbcLLExecuteEbcImageEntryPoint;
> + } else {
> + InstructionBuffer->EbcLlEntryPoint = (UINT64)EbcLLEbcInterpret;
> }
>
> //
> // Add the thunk to the list for this image. Do this last since the add
> // function flushes the cache for us.
> //
> - EbcAddImageThunk (ImageHandle, (VOID *) ThunkBase, ThunkSize);
> + EbcAddImageThunk (ImageHandle, InstructionBuffer,
> + sizeof (EBC_INSTRUCTION_BUFFER));
>
> return EFI_SUCCESS;
> }
> @@ -500,40 +455,15 @@ EbcLLCALLEX (
> IN UINT8 Size
> )
> {
> - UINTN IsThunk;
> - UINTN TargetEbcAddr;
> - UINT8 InstructionBuffer[sizeof(mInstructionBufferTemplate)];
> - UINTN Index;
> - UINTN IndexOfEbcEntrypoint;
> -
> - IsThunk = 1;
> - TargetEbcAddr = 0;
> - IndexOfEbcEntrypoint = 0;
> + CONST EBC_INSTRUCTION_BUFFER *InstructionBuffer;
>
> //
> // Processor specific code to check whether the callee is a thunk to EBC.
> //
> - CopyMem (InstructionBuffer, (VOID *)FuncAddr, sizeof(InstructionBuffer));
> - //
> - // Fill the signature according to mInstructionBufferTemplate
> - //
> - for (Index = 0; Index < sizeof(mInstructionBufferTemplate) - sizeof(UINTN); Index++) {
> - if (*(UINTN *)&mInstructionBufferTemplate[Index] == EBC_ENTRYPOINT_SIGNATURE) {
> - *(UINTN *)&InstructionBuffer[Index] = EBC_ENTRYPOINT_SIGNATURE;
> - IndexOfEbcEntrypoint = Index;
> - }
> - if (*(UINTN *)&mInstructionBufferTemplate[Index] == EBC_LL_EBC_ENTRYPOINT_SIGNATURE) {
> - *(UINTN *)&InstructionBuffer[Index] = EBC_LL_EBC_ENTRYPOINT_SIGNATURE;
> - }
> - }
> - //
> - // Check if we need thunk to native
> - //
> - if (CompareMem (InstructionBuffer, mInstructionBufferTemplate, sizeof(mInstructionBufferTemplate)) != 0) {
> - IsThunk = 0;
> - }
> + InstructionBuffer = (EBC_INSTRUCTION_BUFFER *)FuncAddr;
>
> - if (IsThunk == 1){
> + if (CompareMem (InstructionBuffer, &mEbcInstructionBufferTemplate,
> + sizeof(EBC_INSTRUCTION_BUFFER) - 2 * sizeof (UINT64)) == 0) {
> //
> // The callee is a thunk to EBC, adjust the stack pointer down 16 bytes and
> // put our return address and frame pointer on the VM stack.
> @@ -545,8 +475,7 @@ EbcLLCALLEX (
> VmPtr->Gpr[0] -= 8;
> VmWriteMem64 (VmPtr, (UINTN) VmPtr->Gpr[0], (UINT64) (UINTN) (VmPtr->Ip + Size));
>
> - CopyMem (&TargetEbcAddr, (UINT8 *)FuncAddr + IndexOfEbcEntrypoint, sizeof(UINTN));
> - VmPtr->Ip = (VMIP) (UINTN) TargetEbcAddr;
> + VmPtr->Ip = (VMIP) InstructionBuffer->EbcEntryPoint;
> } else {
> //
> // The callee is not a thunk to EBC, call native code,
> --
> 2.7.4
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk
2016-08-17 14:59 [PATCH v3 0/4] MdeModulePkg/EbcDxe: AARCH64 improvements Ard Biesheuvel
2016-08-17 14:59 ` [PATCH v3 1/4] MdeModulePkg/EbcDxe AARCH64: clean up comment style in ASM file Ard Biesheuvel
2016-08-17 14:59 ` [PATCH v3 2/4] MdeModulePkg/EbcDxe AARCH64: use a fixed size thunk structure Ard Biesheuvel
@ 2016-08-17 14:59 ` Ard Biesheuvel
2016-08-26 12:54 ` Leif Lindholm
2016-08-17 14:59 ` [PATCH v3 4/4] MdeModulePkg/EbcDxe AARCH64: simplify interpreter entry point thunks Ard Biesheuvel
3 siblings, 1 reply; 12+ messages in thread
From: Ard Biesheuvel @ 2016-08-17 14:59 UTC (permalink / raw)
To: edk2-devel, leif.lindholm, jbrasen, feng.tian, star.zeng,
daniil.egranov
Cc: Ard Biesheuvel
Instead of pessimistically copying at least 64 bytes from the VM stack
to the native stack, and popping off the register arguments again
before doing the native call, try to avoid touching the stack completely
if the VM stack frame is < 64 bytes. Also, if the stack frame does exceed
64 bytes, there is no need to copy the first 64 bytes, since we are passing
those in registers anyway.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 73 +++++++++++++++-----
1 file changed, 55 insertions(+), 18 deletions(-)
diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
index cb7a70b5a4f8..d95713e82b0f 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
@@ -35,30 +35,67 @@ ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate)
//****************************************************************************
// UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
ASM_PFX(EbcLLCALLEXNative):
- stp x19, x20, [sp, #-16]!
- stp x29, x30, [sp, #-16]!
+ mov x8, x0 // Preserve x0
+ mov x9, x1 // Preserve x1
- mov x19, x0
- mov x20, sp
- sub x2, x2, x1 // Length = NewStackPointer-FramePtr
- sub sp, sp, x2
- sub sp, sp, #64 // Make sure there is room for at least 8 args in the new stack
- mov x0, sp
+ //
+ // If the EBC stack frame is smaller than or equal to 64 bytes, we know there
+ // are no stacked arguments #9 and beyond that we need to copy to the native
+ // stack. In this case, we can perform a tail call which is much more
+ // efficient, since there is no need to touch the native stack at all.
+ //
+ sub x3, x2, x1 // Length = NewStackPointer - FramePtr
+ cmp x3, #64
+ b.gt 1f
- bl CopyMem // Sp, NewStackPointer, Length
+ adr x0, 0f
+ sub x0, x0, x3, lsr #1
+ br x0
- ldp x0, x1, [sp], #16
- ldp x2, x3, [sp], #16
- ldp x4, x5, [sp], #16
- ldp x6, x7, [sp], #16
+ ldr x7, [x9, #56]
+ ldr x6, [x9, #48]
+ ldr x5, [x9, #40]
+ ldr x4, [x9, #32]
+ ldr x3, [x9, #24]
+ ldr x2, [x9, #16]
+ ldr x1, [x9, #8]
+ ldr x0, [x9]
- blr x19
+0: br x8
- mov sp, x20
- ldp x29, x30, [sp], #16
- ldp x19, x20, [sp], #16
+ //
+ // More than 64 bytes: we need to build the full native stack frame and copy
+ // the part of the VM stack exceeding 64 bytes (which may contain stacked
+ // arguments) to the native stack
+ //
+1: stp x29, x30, [sp, #-16]!
+ mov x29, sp
- ret
+ //
+ // Ensure that the stack pointer remains 16 byte aligned,
+ // even if the size of the VM stack frame is not a multiple of 16
+ //
+ add x1, x1, #64 // Skip over [potential] reg params
+ tbz x3, #3, 2f // Multiple of 16?
+ ldr x4, [x2, #-8]! // No? Then push one word
+ str x4, [sp, #-16]! // ... but use two slots
+ b 3f
+
+2: ldp x4, x5, [x2, #-16]!
+ stp x4, x5, [sp, #-16]!
+3: cmp x2, x1
+ b.gt 2b
+
+ ldp x0, x1, [x9]
+ ldp x2, x3, [x9, #16]
+ ldp x4, x5, [x9, #32]
+ ldp x6, x7, [x9, #48]
+
+ blr x8
+
+ mov sp, x29
+ ldp x29, x30, [sp], #16
+ ret
//****************************************************************************
// EbcLLEbcInterpret
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk
2016-08-17 14:59 ` [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk Ard Biesheuvel
@ 2016-08-26 12:54 ` Leif Lindholm
2016-08-26 17:14 ` Ard Biesheuvel
0 siblings, 1 reply; 12+ messages in thread
From: Leif Lindholm @ 2016-08-26 12:54 UTC (permalink / raw)
To: Ard Biesheuvel; +Cc: edk2-devel, jbrasen, feng.tian, star.zeng, daniil.egranov
On Wed, Aug 17, 2016 at 04:59:04PM +0200, Ard Biesheuvel wrote:
> Instead of pessimistically copying at least 64 bytes from the VM stack
> to the native stack, and popping off the register arguments again
> before doing the native call, try to avoid touching the stack completely
> if the VM stack frame is < 64 bytes. Also, if the stack frame does exceed
Should this say "does not"?
> 64 bytes, there is no need to copy the first 64 bytes, since we are passing
> those in registers anyway.
>
> Contributed-under: TianoCore Contribution Agreement 1.0
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 73 +++++++++++++++-----
> 1 file changed, 55 insertions(+), 18 deletions(-)
>
> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> index cb7a70b5a4f8..d95713e82b0f 100644
> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> @@ -35,30 +35,67 @@ ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate)
> //****************************************************************************
> // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
> ASM_PFX(EbcLLCALLEXNative):
> - stp x19, x20, [sp, #-16]!
> - stp x29, x30, [sp, #-16]!
> + mov x8, x0 // Preserve x0
> + mov x9, x1 // Preserve x1
>
> - mov x19, x0
> - mov x20, sp
> - sub x2, x2, x1 // Length = NewStackPointer-FramePtr
> - sub sp, sp, x2
> - sub sp, sp, #64 // Make sure there is room for at least 8 args in the new stack
> - mov x0, sp
> + //
> + // If the EBC stack frame is smaller than or equal to 64 bytes, we know there
> + // are no stacked arguments #9 and beyond that we need to copy to the native
> + // stack. In this case, we can perform a tail call which is much more
> + // efficient, since there is no need to touch the native stack at all.
> + //
> + sub x3, x2, x1 // Length = NewStackPointer - FramePtr
> + cmp x3, #64
> + b.gt 1f
>
> - bl CopyMem // Sp, NewStackPointer, Length
> + adr x0, 0f
> + sub x0, x0, x3, lsr #1
> + br x0
>
> - ldp x0, x1, [sp], #16
> - ldp x2, x3, [sp], #16
> - ldp x4, x5, [sp], #16
> - ldp x6, x7, [sp], #16
> + ldr x7, [x9, #56]
> + ldr x6, [x9, #48]
> + ldr x5, [x9, #40]
> + ldr x4, [x9, #32]
> + ldr x3, [x9, #24]
> + ldr x2, [x9, #16]
> + ldr x1, [x9, #8]
> + ldr x0, [x9]
Why not keep using ldp, but with x9?
>
> - blr x19
> +0: br x8
>
> - mov sp, x20
> - ldp x29, x30, [sp], #16
> - ldp x19, x20, [sp], #16
> + //
> + // More than 64 bytes: we need to build the full native stack frame and copy
> + // the part of the VM stack exceeding 64 bytes (which may contain stacked
> + // arguments) to the native stack
> + //
> +1: stp x29, x30, [sp, #-16]!
> + mov x29, sp
>
> - ret
> + //
> + // Ensure that the stack pointer remains 16 byte aligned,
> + // even if the size of the VM stack frame is not a multiple of 16
> + //
> + add x1, x1, #64 // Skip over [potential] reg params
> + tbz x3, #3, 2f // Multiple of 16?
> + ldr x4, [x2, #-8]! // No? Then push one word
> + str x4, [sp, #-16]! // ... but use two slots
> + b 3f
> +
> +2: ldp x4, x5, [x2, #-16]!
> + stp x4, x5, [sp, #-16]!
> +3: cmp x2, x1
> + b.gt 2b
> +
> + ldp x0, x1, [x9]
> + ldp x2, x3, [x9, #16]
> + ldp x4, x5, [x9, #32]
> + ldp x6, x7, [x9, #48]
> +
> + blr x8
> +
> + mov sp, x29
> + ldp x29, x30, [sp], #16
> + ret
>
> //****************************************************************************
> // EbcLLEbcInterpret
> --
> 2.7.4
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk
2016-08-26 12:54 ` Leif Lindholm
@ 2016-08-26 17:14 ` Ard Biesheuvel
2016-08-26 18:10 ` Leif Lindholm
0 siblings, 1 reply; 12+ messages in thread
From: Ard Biesheuvel @ 2016-08-26 17:14 UTC (permalink / raw)
To: Leif Lindholm
Cc: edk2-devel-01, Jeff Brasen, Tian, Feng, Zeng, Star,
Daniil Egranov
On 26 August 2016 at 13:54, Leif Lindholm <leif.lindholm@linaro.org> wrote:
> On Wed, Aug 17, 2016 at 04:59:04PM +0200, Ard Biesheuvel wrote:
>> Instead of pessimistically copying at least 64 bytes from the VM stack
>> to the native stack, and popping off the register arguments again
>> before doing the native call, try to avoid touching the stack completely
>> if the VM stack frame is < 64 bytes. Also, if the stack frame does exceed
>
> Should this say "does not"?
>
No, if it exceeds 64 bytes, we may have arguments that should be
passed on the stack (but we're not sure, unfortunately, since the VM
stack frame is not annotated, i.e., it could simply be local vars in
the caller) In this case, there is no need to push all arguments onto
the native stack, and pop off the first 8 into registers again, which
is what the original code was doing.
>> 64 bytes, there is no need to copy the first 64 bytes, since we are passing
>> those in registers anyway.
>>
>> Contributed-under: TianoCore Contribution Agreement 1.0
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 73 +++++++++++++++-----
>> 1 file changed, 55 insertions(+), 18 deletions(-)
>>
>> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>> index cb7a70b5a4f8..d95713e82b0f 100644
>> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>> @@ -35,30 +35,67 @@ ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate)
>> //****************************************************************************
>> // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
>> ASM_PFX(EbcLLCALLEXNative):
>> - stp x19, x20, [sp, #-16]!
>> - stp x29, x30, [sp, #-16]!
>> + mov x8, x0 // Preserve x0
>> + mov x9, x1 // Preserve x1
>>
>> - mov x19, x0
>> - mov x20, sp
>> - sub x2, x2, x1 // Length = NewStackPointer-FramePtr
>> - sub sp, sp, x2
>> - sub sp, sp, #64 // Make sure there is room for at least 8 args in the new stack
>> - mov x0, sp
>> + //
>> + // If the EBC stack frame is smaller than or equal to 64 bytes, we know there
>> + // are no stacked arguments #9 and beyond that we need to copy to the native
>> + // stack. In this case, we can perform a tail call which is much more
>> + // efficient, since there is no need to touch the native stack at all.
>> + //
>> + sub x3, x2, x1 // Length = NewStackPointer - FramePtr
>> + cmp x3, #64
>> + b.gt 1f
>>
>> - bl CopyMem // Sp, NewStackPointer, Length
>> + adr x0, 0f
>> + sub x0, x0, x3, lsr #1
>> + br x0
>>
>> - ldp x0, x1, [sp], #16
>> - ldp x2, x3, [sp], #16
>> - ldp x4, x5, [sp], #16
>> - ldp x6, x7, [sp], #16
>> + ldr x7, [x9, #56]
>> + ldr x6, [x9, #48]
>> + ldr x5, [x9, #40]
>> + ldr x4, [x9, #32]
>> + ldr x3, [x9, #24]
>> + ldr x2, [x9, #16]
>> + ldr x1, [x9, #8]
>> + ldr x0, [x9]
>
> Why not keep using ldp, but with x9?
>
This is a computed jump, depending on the size of the stack frame,
hence the inverted order, and the LDRs. While probably harmless in
most cases, it is arguably incorrect to copy random data from memory
into these registers (and could even be a security issue)
>>
>> - blr x19
>> +0: br x8
>>
>> - mov sp, x20
>> - ldp x29, x30, [sp], #16
>> - ldp x19, x20, [sp], #16
>> + //
>> + // More than 64 bytes: we need to build the full native stack frame and copy
>> + // the part of the VM stack exceeding 64 bytes (which may contain stacked
>> + // arguments) to the native stack
>> + //
>> +1: stp x29, x30, [sp, #-16]!
>> + mov x29, sp
>>
>> - ret
>> + //
>> + // Ensure that the stack pointer remains 16 byte aligned,
>> + // even if the size of the VM stack frame is not a multiple of 16
>> + //
>> + add x1, x1, #64 // Skip over [potential] reg params
>> + tbz x3, #3, 2f // Multiple of 16?
>> + ldr x4, [x2, #-8]! // No? Then push one word
>> + str x4, [sp, #-16]! // ... but use two slots
>> + b 3f
>> +
>> +2: ldp x4, x5, [x2, #-16]!
>> + stp x4, x5, [sp, #-16]!
>> +3: cmp x2, x1
>> + b.gt 2b
>> +
>> + ldp x0, x1, [x9]
>> + ldp x2, x3, [x9, #16]
>> + ldp x4, x5, [x9, #32]
>> + ldp x6, x7, [x9, #48]
>> +
>> + blr x8
>> +
>> + mov sp, x29
>> + ldp x29, x30, [sp], #16
>> + ret
>>
>> //****************************************************************************
>> // EbcLLEbcInterpret
>> --
>> 2.7.4
>>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk
2016-08-26 17:14 ` Ard Biesheuvel
@ 2016-08-26 18:10 ` Leif Lindholm
2016-08-26 18:41 ` Ard Biesheuvel
0 siblings, 1 reply; 12+ messages in thread
From: Leif Lindholm @ 2016-08-26 18:10 UTC (permalink / raw)
To: Ard Biesheuvel
Cc: edk2-devel-01, Jeff Brasen, Tian, Feng, Zeng, Star,
Daniil Egranov
On Fri, Aug 26, 2016 at 06:14:10PM +0100, Ard Biesheuvel wrote:
> On 26 August 2016 at 13:54, Leif Lindholm <leif.lindholm@linaro.org> wrote:
> > On Wed, Aug 17, 2016 at 04:59:04PM +0200, Ard Biesheuvel wrote:
> >> Instead of pessimistically copying at least 64 bytes from the VM stack
> >> to the native stack, and popping off the register arguments again
> >> before doing the native call, try to avoid touching the stack completely
> >> if the VM stack frame is < 64 bytes. Also, if the stack frame does exceed
> >
> > Should this say "does not"?
>
> No, if it exceeds 64 bytes, we may have arguments that should be
> passed on the stack (but we're not sure, unfortunately, since the VM
> stack frame is not annotated, i.e., it could simply be local vars in
> the caller) In this case, there is no need to push all arguments onto
> the native stack, and pop off the first 8 into registers again, which
> is what the original code was doing.
OK, thanks.
Should that not be <= 64 bytes above then?
> >> 64 bytes, there is no need to copy the first 64 bytes, since we are passing
> >> those in registers anyway.
> >>
> >> Contributed-under: TianoCore Contribution Agreement 1.0
> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> ---
> >> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 73 +++++++++++++++-----
> >> 1 file changed, 55 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> >> index cb7a70b5a4f8..d95713e82b0f 100644
> >> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> >> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> >> @@ -35,30 +35,67 @@ ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate)
> >> //****************************************************************************
> >> // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
> >> ASM_PFX(EbcLLCALLEXNative):
> >> - stp x19, x20, [sp, #-16]!
> >> - stp x29, x30, [sp, #-16]!
> >> + mov x8, x0 // Preserve x0
> >> + mov x9, x1 // Preserve x1
> >>
> >> - mov x19, x0
> >> - mov x20, sp
> >> - sub x2, x2, x1 // Length = NewStackPointer-FramePtr
> >> - sub sp, sp, x2
> >> - sub sp, sp, #64 // Make sure there is room for at least 8 args in the new stack
> >> - mov x0, sp
> >> + //
> >> + // If the EBC stack frame is smaller than or equal to 64 bytes, we know there
> >> + // are no stacked arguments #9 and beyond that we need to copy to the native
> >> + // stack. In this case, we can perform a tail call which is much more
> >> + // efficient, since there is no need to touch the native stack at all.
> >> + //
> >> + sub x3, x2, x1 // Length = NewStackPointer - FramePtr
> >> + cmp x3, #64
> >> + b.gt 1f
> >>
> >> - bl CopyMem // Sp, NewStackPointer, Length
> >> + adr x0, 0f
> >> + sub x0, x0, x3, lsr #1
> >> + br x0
> >>
> >> - ldp x0, x1, [sp], #16
> >> - ldp x2, x3, [sp], #16
> >> - ldp x4, x5, [sp], #16
> >> - ldp x6, x7, [sp], #16
> >> + ldr x7, [x9, #56]
> >> + ldr x6, [x9, #48]
> >> + ldr x5, [x9, #40]
> >> + ldr x4, [x9, #32]
> >> + ldr x3, [x9, #24]
> >> + ldr x2, [x9, #16]
> >> + ldr x1, [x9, #8]
> >> + ldr x0, [x9]
> >
> > Why not keep using ldp, but with x9?
>
> This is a computed jump, depending on the size of the stack frame,
> hence the inverted order, and the LDRs. While probably harmless in
> most cases, it is arguably incorrect to copy random data from memory
> into these registers (and could even be a security issue)
OK, that's way too clever for someone like me to realise from the
existing code (which arguably, you have simply corrected). Yes, even
with the block comment above.
Could we add some kind of for-dummies comment by this code to indicate
what is going on at the actual instructions.
Say:
+ ldr x7, [x9, #56] // Branch target for 8 arguments
+ ldr x6, [x9, #48] // |
+ ldr x5, [x9, #40] // |
+ ldr x4, [x9, #32] // |
+ ldr x3, [x9, #24] // |
+ ldr x2, [x9, #16] // |
+ ldr x1, [x9, #8] // V
+ ldr x0, [x9] // Branch target for 1 argument
- blr x19
+0: br x8 // Branch target for 0 arguments
(Unless I'm still misunderstanding.)
/
Leif
>
> >>
> >> - blr x19
> >> +0: br x8
> >>
> >> - mov sp, x20
> >> - ldp x29, x30, [sp], #16
> >> - ldp x19, x20, [sp], #16
> >> + //
> >> + // More than 64 bytes: we need to build the full native stack frame and copy
> >> + // the part of the VM stack exceeding 64 bytes (which may contain stacked
> >> + // arguments) to the native stack
> >> + //
> >> +1: stp x29, x30, [sp, #-16]!
> >> + mov x29, sp
> >>
> >> - ret
> >> + //
> >> + // Ensure that the stack pointer remains 16 byte aligned,
> >> + // even if the size of the VM stack frame is not a multiple of 16
> >> + //
> >> + add x1, x1, #64 // Skip over [potential] reg params
> >> + tbz x3, #3, 2f // Multiple of 16?
> >> + ldr x4, [x2, #-8]! // No? Then push one word
> >> + str x4, [sp, #-16]! // ... but use two slots
> >> + b 3f
> >> +
> >> +2: ldp x4, x5, [x2, #-16]!
> >> + stp x4, x5, [sp, #-16]!
> >> +3: cmp x2, x1
> >> + b.gt 2b
> >> +
> >> + ldp x0, x1, [x9]
> >> + ldp x2, x3, [x9, #16]
> >> + ldp x4, x5, [x9, #32]
> >> + ldp x6, x7, [x9, #48]
> >> +
> >> + blr x8
> >> +
> >> + mov sp, x29
> >> + ldp x29, x30, [sp], #16
> >> + ret
> >>
> >> //****************************************************************************
> >> // EbcLLEbcInterpret
> >> --
> >> 2.7.4
> >>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk
2016-08-26 18:10 ` Leif Lindholm
@ 2016-08-26 18:41 ` Ard Biesheuvel
0 siblings, 0 replies; 12+ messages in thread
From: Ard Biesheuvel @ 2016-08-26 18:41 UTC (permalink / raw)
To: Leif Lindholm
Cc: edk2-devel-01, Jeff Brasen, Tian, Feng, Zeng, Star,
Daniil Egranov
> On 26 aug. 2016, at 19:10, Leif Lindholm <leif.lindholm@linaro.org> wrote:
>
>> On Fri, Aug 26, 2016 at 06:14:10PM +0100, Ard Biesheuvel wrote:
>>> On 26 August 2016 at 13:54, Leif Lindholm <leif.lindholm@linaro.org> wrote:
>>>> On Wed, Aug 17, 2016 at 04:59:04PM +0200, Ard Biesheuvel wrote:
>>>> Instead of pessimistically copying at least 64 bytes from the VM stack
>>>> to the native stack, and popping off the register arguments again
>>>> before doing the native call, try to avoid touching the stack completely
>>>> if the VM stack frame is < 64 bytes. Also, if the stack frame does exceed
>>>
>>> Should this say "does not"?
>>
>> No, if it exceeds 64 bytes, we may have arguments that should be
>> passed on the stack (but we're not sure, unfortunately, since the VM
>> stack frame is not annotated, i.e., it could simply be local vars in
>> the caller) In this case, there is no need to push all arguments onto
>> the native stack, and pop off the first 8 into registers again, which
>> is what the original code was doing.
>
> OK, thanks.
> Should that not be <= 64 bytes above then?
>
Correct
>>>> 64 bytes, there is no need to copy the first 64 bytes, since we are passing
>>>> those in registers anyway.
>>>>
>>>> Contributed-under: TianoCore Contribution Agreement 1.0
>>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>>> ---
>>>> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 73 +++++++++++++++-----
>>>> 1 file changed, 55 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>>>> index cb7a70b5a4f8..d95713e82b0f 100644
>>>> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>>>> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
>>>> @@ -35,30 +35,67 @@ ASM_GLOBAL ASM_PFX(mEbcInstructionBufferTemplate)
>>>> //****************************************************************************
>>>> // UINTN EbcLLCALLEXNative(UINTN FuncAddr, UINTN NewStackPointer, VOID *FramePtr)
>>>> ASM_PFX(EbcLLCALLEXNative):
>>>> - stp x19, x20, [sp, #-16]!
>>>> - stp x29, x30, [sp, #-16]!
>>>> + mov x8, x0 // Preserve x0
>>>> + mov x9, x1 // Preserve x1
>>>>
>>>> - mov x19, x0
>>>> - mov x20, sp
>>>> - sub x2, x2, x1 // Length = NewStackPointer-FramePtr
>>>> - sub sp, sp, x2
>>>> - sub sp, sp, #64 // Make sure there is room for at least 8 args in the new stack
>>>> - mov x0, sp
>>>> + //
>>>> + // If the EBC stack frame is smaller than or equal to 64 bytes, we know there
>>>> + // are no stacked arguments #9 and beyond that we need to copy to the native
>>>> + // stack. In this case, we can perform a tail call which is much more
>>>> + // efficient, since there is no need to touch the native stack at all.
>>>> + //
>>>> + sub x3, x2, x1 // Length = NewStackPointer - FramePtr
>>>> + cmp x3, #64
>>>> + b.gt 1f
>>>>
>>>> - bl CopyMem // Sp, NewStackPointer, Length
>>>> + adr x0, 0f
>>>> + sub x0, x0, x3, lsr #1
>>>> + br x0
>>>>
>>>> - ldp x0, x1, [sp], #16
>>>> - ldp x2, x3, [sp], #16
>>>> - ldp x4, x5, [sp], #16
>>>> - ldp x6, x7, [sp], #16
>>>> + ldr x7, [x9, #56]
>>>> + ldr x6, [x9, #48]
>>>> + ldr x5, [x9, #40]
>>>> + ldr x4, [x9, #32]
>>>> + ldr x3, [x9, #24]
>>>> + ldr x2, [x9, #16]
>>>> + ldr x1, [x9, #8]
>>>> + ldr x0, [x9]
>>>
>>> Why not keep using ldp, but with x9?
>>
>> This is a computed jump, depending on the size of the stack frame,
>> hence the inverted order, and the LDRs. While probably harmless in
>> most cases, it is arguably incorrect to copy random data from memory
>> into these registers (and could even be a security issue)
>
> OK, that's way too clever for someone like me to realise from the
> existing code (which arguably, you have simply corrected). Yes, even
> with the block comment above.
> Could we add some kind of for-dummies comment by this code to indicate
> what is going on at the actual instructions.
>
> Say:
>
> + ldr x7, [x9, #56] // Branch target for 8 arguments
> + ldr x6, [x9, #48] // |
> + ldr x5, [x9, #40] // |
> + ldr x4, [x9, #32] // |
> + ldr x3, [x9, #24] // |
> + ldr x2, [x9, #16] // |
> + ldr x1, [x9, #8] // V
> + ldr x0, [x9] // Branch target for 1 argument
>
> - blr x19
> +0: br x8 // Branch target for 0 arguments
>
> (Unless I'm still misunderstanding.)
>
No, that is accurate, and a worthwhile clarification
> /
> Leif
>
>>
>>>>
>>>> - blr x19
>>>> +0: br x8
>>>>
>>>> - mov sp, x20
>>>> - ldp x29, x30, [sp], #16
>>>> - ldp x19, x20, [sp], #16
>>>> + //
>>>> + // More than 64 bytes: we need to build the full native stack frame and copy
>>>> + // the part of the VM stack exceeding 64 bytes (which may contain stacked
>>>> + // arguments) to the native stack
>>>> + //
>>>> +1: stp x29, x30, [sp, #-16]!
>>>> + mov x29, sp
>>>>
>>>> - ret
>>>> + //
>>>> + // Ensure that the stack pointer remains 16 byte aligned,
>>>> + // even if the size of the VM stack frame is not a multiple of 16
>>>> + //
>>>> + add x1, x1, #64 // Skip over [potential] reg params
>>>> + tbz x3, #3, 2f // Multiple of 16?
>>>> + ldr x4, [x2, #-8]! // No? Then push one word
>>>> + str x4, [sp, #-16]! // ... but use two slots
>>>> + b 3f
>>>> +
>>>> +2: ldp x4, x5, [x2, #-16]!
>>>> + stp x4, x5, [sp, #-16]!
>>>> +3: cmp x2, x1
>>>> + b.gt 2b
>>>> +
>>>> + ldp x0, x1, [x9]
>>>> + ldp x2, x3, [x9, #16]
>>>> + ldp x4, x5, [x9, #32]
>>>> + ldp x6, x7, [x9, #48]
>>>> +
>>>> + blr x8
>>>> +
>>>> + mov sp, x29
>>>> + ldp x29, x30, [sp], #16
>>>> + ret
>>>>
>>>> //****************************************************************************
>>>> // EbcLLEbcInterpret
>>>> --
>>>> 2.7.4
>>>>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v3 4/4] MdeModulePkg/EbcDxe AARCH64: simplify interpreter entry point thunks
2016-08-17 14:59 [PATCH v3 0/4] MdeModulePkg/EbcDxe: AARCH64 improvements Ard Biesheuvel
` (2 preceding siblings ...)
2016-08-17 14:59 ` [PATCH v3 3/4] MdeModulePkg/EbxDxe AARCH64: use tail call for EBC to native thunk Ard Biesheuvel
@ 2016-08-17 14:59 ` Ard Biesheuvel
2016-08-26 12:56 ` Leif Lindholm
3 siblings, 1 reply; 12+ messages in thread
From: Ard Biesheuvel @ 2016-08-17 14:59 UTC (permalink / raw)
To: edk2-devel, leif.lindholm, jbrasen, feng.tian, star.zeng,
daniil.egranov
Cc: Ard Biesheuvel
The prototypes of EbcInterpret() and ExecuteEbcImageEntryPoint() are
private to the AARCH64 implementation of EbcDxe, so we can shuffle
the arguments around a bit and make the assembler thunking clue a lot
simpler.
For ExecuteEbcImageEntryPoint(), this involves passing the EntryPoint
argument as the third parameter, rather than the first, which allows
us to do a tail call. For EbcInterpret(), instead of copying each
argument beyond #8 from one native stack frame to the next (before
another copy is made into the VM stack), pass a pointer to the
argument stack.
Contributed-under: TianoCore Contribution Agreement 1.0
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 57 +++++---------------
MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c | 44 ++++++---------
2 files changed, 27 insertions(+), 74 deletions(-)
diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
index d95713e82b0f..f90cd711ec90 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
@@ -107,45 +107,18 @@ ASM_PFX(EbcLLCALLEXNative):
//
//****************************************************************************
ASM_PFX(EbcLLEbcInterpret):
- stp x29, x30, [sp, #-16]!
-
- // copy the current arguments 9-16 from old location and add arg 7 to stack
- // keeping 16 byte stack alignment
- sub sp, sp, #80
- str x7, [sp]
- ldr x11, [sp, #96]
- str x11, [sp, #8]
- ldr x11, [sp, #104]
- str x11, [sp, #16]
- ldr x11, [sp, #112]
- str x11, [sp, #24]
- ldr x11, [sp, #120]
- str x11, [sp, #32]
- ldr x11, [sp, #128]
- str x11, [sp, #40]
- ldr x11, [sp, #136]
- str x11, [sp, #48]
- ldr x11, [sp, #144]
- str x11, [sp, #56]
- ldr x11, [sp, #152]
- str x11, [sp, #64]
-
- // Shift arguments and add entry point and as argument 1
- mov x7, x6
- mov x6, x5
- mov x5, x4
- mov x4, x3
- mov x3, x2
- mov x2, x1
- mov x1, x0
- mov x0, x16
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
- // call C-code
- bl ASM_PFX(EbcInterpret)
- add sp, sp, #80
+ // push the entry point and the address of args #9 - #16 onto the stack
+ add x17, sp, #16
+ stp x16, x17, [sp, #-16]!
- ldp x29, x30, [sp], #16
+ // call C-code
+ bl ASM_PFX(EbcInterpret)
+ add sp, sp, #16
+ ldp x29, x30, [sp], #16
ret
//****************************************************************************
@@ -157,16 +130,10 @@ ASM_PFX(EbcLLEbcInterpret):
//
//****************************************************************************
ASM_PFX(EbcLLExecuteEbcImageEntryPoint):
- stp x29, x30, [sp, #-16]!
- // build new paramater calling convention
- mov x2, x1
- mov x1, x0
- mov x0, x16
+ mov x2, x16
- // call C-code
- bl ASM_PFX(ExecuteEbcImageEntryPoint)
- ldp x29, x30, [sp], #16
- ret
+ // tail call to C code
+ b ASM_PFX(ExecuteEbcImageEntryPoint)
//****************************************************************************
// mEbcInstructionBufferTemplate
diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
index a5f21f400274..f059b0e7e102 100644
--- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
+++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
@@ -89,7 +89,6 @@ PushU64 (
This is a thunk function.
- @param EntryPoint The entrypoint of EBC code.
@param Arg1 The 1st argument.
@param Arg2 The 2nd argument.
@param Arg3 The 3rd argument.
@@ -98,14 +97,8 @@ PushU64 (
@param Arg6 The 6th argument.
@param Arg7 The 7th argument.
@param Arg8 The 8th argument.
- @param Arg9 The 9th argument.
- @param Arg10 The 10th argument.
- @param Arg11 The 11th argument.
- @param Arg12 The 12th argument.
- @param Arg13 The 13th argument.
- @param Arg14 The 14th argument.
- @param Arg15 The 15th argument.
- @param Arg16 The 16th argument.
+ @param EntryPoint The entrypoint of EBC code.
+ @param Args9_16[] Array containing arguments #9 to #16.
@return The value returned by the EBC application we're going to run.
@@ -113,7 +106,6 @@ PushU64 (
UINT64
EFIAPI
EbcInterpret (
- IN UINTN EntryPoint,
IN UINTN Arg1,
IN UINTN Arg2,
IN UINTN Arg3,
@@ -122,14 +114,8 @@ EbcInterpret (
IN UINTN Arg6,
IN UINTN Arg7,
IN UINTN Arg8,
- IN UINTN Arg9,
- IN UINTN Arg10,
- IN UINTN Arg11,
- IN UINTN Arg12,
- IN UINTN Arg13,
- IN UINTN Arg14,
- IN UINTN Arg15,
- IN UINTN Arg16
+ IN UINTN EntryPoint,
+ IN UINTN Args9_16[]
)
{
//
@@ -193,14 +179,14 @@ EbcInterpret (
// For the worst case, assume there are 4 arguments passed in registers, store
// them to VM's stack.
//
- PushU64 (&VmContext, (UINT64) Arg16);
- PushU64 (&VmContext, (UINT64) Arg15);
- PushU64 (&VmContext, (UINT64) Arg14);
- PushU64 (&VmContext, (UINT64) Arg13);
- PushU64 (&VmContext, (UINT64) Arg12);
- PushU64 (&VmContext, (UINT64) Arg11);
- PushU64 (&VmContext, (UINT64) Arg10);
- PushU64 (&VmContext, (UINT64) Arg9);
+ PushU64 (&VmContext, (UINT64) Args9_16[7]);
+ PushU64 (&VmContext, (UINT64) Args9_16[6]);
+ PushU64 (&VmContext, (UINT64) Args9_16[5]);
+ PushU64 (&VmContext, (UINT64) Args9_16[4]);
+ PushU64 (&VmContext, (UINT64) Args9_16[3]);
+ PushU64 (&VmContext, (UINT64) Args9_16[2]);
+ PushU64 (&VmContext, (UINT64) Args9_16[1]);
+ PushU64 (&VmContext, (UINT64) Args9_16[0]);
PushU64 (&VmContext, (UINT64) Arg8);
PushU64 (&VmContext, (UINT64) Arg7);
PushU64 (&VmContext, (UINT64) Arg6);
@@ -252,10 +238,10 @@ EbcInterpret (
/**
Begin executing an EBC image.
- @param EntryPoint The entrypoint of EBC code.
@param ImageHandle image handle for the EBC application we're executing
@param SystemTable standard system table passed into an driver's entry
point
+ @param EntryPoint The entrypoint of EBC code.
@return The value returned by the EBC application we're going to run.
@@ -263,9 +249,9 @@ EbcInterpret (
UINT64
EFIAPI
ExecuteEbcImageEntryPoint (
- IN UINTN EntryPoint,
IN EFI_HANDLE ImageHandle,
- IN EFI_SYSTEM_TABLE *SystemTable
+ IN EFI_SYSTEM_TABLE *SystemTable,
+ IN UINTN EntryPoint
)
{
//
--
2.7.4
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 4/4] MdeModulePkg/EbcDxe AARCH64: simplify interpreter entry point thunks
2016-08-17 14:59 ` [PATCH v3 4/4] MdeModulePkg/EbcDxe AARCH64: simplify interpreter entry point thunks Ard Biesheuvel
@ 2016-08-26 12:56 ` Leif Lindholm
0 siblings, 0 replies; 12+ messages in thread
From: Leif Lindholm @ 2016-08-26 12:56 UTC (permalink / raw)
To: Ard Biesheuvel; +Cc: edk2-devel, jbrasen, feng.tian, star.zeng, daniil.egranov
On Wed, Aug 17, 2016 at 04:59:05PM +0200, Ard Biesheuvel wrote:
> The prototypes of EbcInterpret() and ExecuteEbcImageEntryPoint() are
> private to the AARCH64 implementation of EbcDxe, so we can shuffle
> the arguments around a bit and make the assembler thunking clue a lot
> simpler.
>
> For ExecuteEbcImageEntryPoint(), this involves passing the EntryPoint
> argument as the third parameter, rather than the first, which allows
> us to do a tail call. For EbcInterpret(), instead of copying each
> argument beyond #8 from one native stack frame to the next (before
> another copy is made into the VM stack), pass a pointer to the
> argument stack.
>
> Contributed-under: TianoCore Contribution Agreement 1.0
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
> MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S | 57 +++++---------------
> MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c | 44 ++++++---------
> 2 files changed, 27 insertions(+), 74 deletions(-)
>
> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> index d95713e82b0f..f90cd711ec90 100644
> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcLowLevel.S
> @@ -107,45 +107,18 @@ ASM_PFX(EbcLLCALLEXNative):
> //
> //****************************************************************************
> ASM_PFX(EbcLLEbcInterpret):
> - stp x29, x30, [sp, #-16]!
> -
> - // copy the current arguments 9-16 from old location and add arg 7 to stack
> - // keeping 16 byte stack alignment
> - sub sp, sp, #80
> - str x7, [sp]
> - ldr x11, [sp, #96]
> - str x11, [sp, #8]
> - ldr x11, [sp, #104]
> - str x11, [sp, #16]
> - ldr x11, [sp, #112]
> - str x11, [sp, #24]
> - ldr x11, [sp, #120]
> - str x11, [sp, #32]
> - ldr x11, [sp, #128]
> - str x11, [sp, #40]
> - ldr x11, [sp, #136]
> - str x11, [sp, #48]
> - ldr x11, [sp, #144]
> - str x11, [sp, #56]
> - ldr x11, [sp, #152]
> - str x11, [sp, #64]
> -
> - // Shift arguments and add entry point and as argument 1
> - mov x7, x6
> - mov x6, x5
> - mov x5, x4
> - mov x4, x3
> - mov x3, x2
> - mov x2, x1
> - mov x1, x0
> - mov x0, x16
> + stp x29, x30, [sp, #-16]!
> + mov x29, sp
>
> - // call C-code
> - bl ASM_PFX(EbcInterpret)
> - add sp, sp, #80
> + // push the entry point and the address of args #9 - #16 onto the stack
> + add x17, sp, #16
> + stp x16, x17, [sp, #-16]!
>
> - ldp x29, x30, [sp], #16
> + // call C-code
> + bl ASM_PFX(EbcInterpret)
>
> + add sp, sp, #16
> + ldp x29, x30, [sp], #16
> ret
>
> //****************************************************************************
> @@ -157,16 +130,10 @@ ASM_PFX(EbcLLEbcInterpret):
> //
> //****************************************************************************
> ASM_PFX(EbcLLExecuteEbcImageEntryPoint):
> - stp x29, x30, [sp, #-16]!
> - // build new paramater calling convention
> - mov x2, x1
> - mov x1, x0
> - mov x0, x16
> + mov x2, x16
>
> - // call C-code
> - bl ASM_PFX(ExecuteEbcImageEntryPoint)
> - ldp x29, x30, [sp], #16
> - ret
> + // tail call to C code
> + b ASM_PFX(ExecuteEbcImageEntryPoint)
>
> //****************************************************************************
> // mEbcInstructionBufferTemplate
> diff --git a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
> index a5f21f400274..f059b0e7e102 100644
> --- a/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
> +++ b/MdeModulePkg/Universal/EbcDxe/AArch64/EbcSupport.c
> @@ -89,7 +89,6 @@ PushU64 (
>
> This is a thunk function.
>
> - @param EntryPoint The entrypoint of EBC code.
> @param Arg1 The 1st argument.
> @param Arg2 The 2nd argument.
> @param Arg3 The 3rd argument.
> @@ -98,14 +97,8 @@ PushU64 (
> @param Arg6 The 6th argument.
> @param Arg7 The 7th argument.
> @param Arg8 The 8th argument.
> - @param Arg9 The 9th argument.
> - @param Arg10 The 10th argument.
> - @param Arg11 The 11th argument.
> - @param Arg12 The 12th argument.
> - @param Arg13 The 13th argument.
> - @param Arg14 The 14th argument.
> - @param Arg15 The 15th argument.
> - @param Arg16 The 16th argument.
> + @param EntryPoint The entrypoint of EBC code.
> + @param Args9_16[] Array containing arguments #9 to #16.
>
> @return The value returned by the EBC application we're going to run.
>
> @@ -113,7 +106,6 @@ PushU64 (
> UINT64
> EFIAPI
> EbcInterpret (
> - IN UINTN EntryPoint,
> IN UINTN Arg1,
> IN UINTN Arg2,
> IN UINTN Arg3,
> @@ -122,14 +114,8 @@ EbcInterpret (
> IN UINTN Arg6,
> IN UINTN Arg7,
> IN UINTN Arg8,
> - IN UINTN Arg9,
> - IN UINTN Arg10,
> - IN UINTN Arg11,
> - IN UINTN Arg12,
> - IN UINTN Arg13,
> - IN UINTN Arg14,
> - IN UINTN Arg15,
> - IN UINTN Arg16
> + IN UINTN EntryPoint,
> + IN UINTN Args9_16[]
> )
> {
> //
> @@ -193,14 +179,14 @@ EbcInterpret (
> // For the worst case, assume there are 4 arguments passed in registers, store
> // them to VM's stack.
> //
> - PushU64 (&VmContext, (UINT64) Arg16);
> - PushU64 (&VmContext, (UINT64) Arg15);
> - PushU64 (&VmContext, (UINT64) Arg14);
> - PushU64 (&VmContext, (UINT64) Arg13);
> - PushU64 (&VmContext, (UINT64) Arg12);
> - PushU64 (&VmContext, (UINT64) Arg11);
> - PushU64 (&VmContext, (UINT64) Arg10);
> - PushU64 (&VmContext, (UINT64) Arg9);
> + PushU64 (&VmContext, (UINT64) Args9_16[7]);
> + PushU64 (&VmContext, (UINT64) Args9_16[6]);
> + PushU64 (&VmContext, (UINT64) Args9_16[5]);
> + PushU64 (&VmContext, (UINT64) Args9_16[4]);
> + PushU64 (&VmContext, (UINT64) Args9_16[3]);
> + PushU64 (&VmContext, (UINT64) Args9_16[2]);
> + PushU64 (&VmContext, (UINT64) Args9_16[1]);
> + PushU64 (&VmContext, (UINT64) Args9_16[0]);
> PushU64 (&VmContext, (UINT64) Arg8);
> PushU64 (&VmContext, (UINT64) Arg7);
> PushU64 (&VmContext, (UINT64) Arg6);
> @@ -252,10 +238,10 @@ EbcInterpret (
> /**
> Begin executing an EBC image.
>
> - @param EntryPoint The entrypoint of EBC code.
> @param ImageHandle image handle for the EBC application we're executing
> @param SystemTable standard system table passed into an driver's entry
> point
> + @param EntryPoint The entrypoint of EBC code.
>
> @return The value returned by the EBC application we're going to run.
>
> @@ -263,9 +249,9 @@ EbcInterpret (
> UINT64
> EFIAPI
> ExecuteEbcImageEntryPoint (
> - IN UINTN EntryPoint,
> IN EFI_HANDLE ImageHandle,
> - IN EFI_SYSTEM_TABLE *SystemTable
> + IN EFI_SYSTEM_TABLE *SystemTable,
> + IN UINTN EntryPoint
> )
> {
> //
> --
> 2.7.4
Neat!
Reviewed-by: Leif Lindholm <leif.lindholm@linaro.org>
^ permalink raw reply [flat|nested] 12+ messages in thread