[PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics

public inbox for devel@edk2.groups.io
 help / color / mirror / Atom feed

* [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
@ 2020-05-20 11:44 Ard Biesheuvel
  2020-05-20 12:37 ` [edk2-devel] " Philippe Mathieu-Daudé
                   ` (3 more replies)
  0 siblings, 4 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2020-05-20 11:44 UTC (permalink / raw)
  To: devel; +Cc: glin, leif, lersek, liming.gao, Ard Biesheuvel

Gary reports the GCC 10 will emit calls to atomics intrinsics routines
unless -mno-outline-atomics is specified. This means GCC-10 introduces
new intrinsics, and even though it would be possible to work around this
by specifying the command line option, this would require a new GCC10
toolchain profile to be created, which we prefer to avoid.

So instead, add the new intrinsics to our library so they are provided
when necessary.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
---
v2:
- add missing .globl to export the functions from the object file
- add function end markers so the size of each is visible in the ELF metadata
- add some comments to describe what is going on

 ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
 ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
 2 files changed, 145 insertions(+)

diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
index d5bad9467758..fcf48c678119 100644
--- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
+++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
@@ -79,6 +79,9 @@ [Sources.ARM]
   Arm/ldivmod.asm      | MSFT
   Arm/llsr.asm         | MSFT
 
+[Sources.AARCH64]
+  AArch64/Atomics.S    | GCC
+
 [Packages]
   MdePkg/MdePkg.dec
   ArmPkg/ArmPkg.dec
diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
new file mode 100644
index 000000000000..dc61d6bb8e52
--- /dev/null
+++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
@@ -0,0 +1,142 @@
+#------------------------------------------------------------------------------
+#
+# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
+#
+# SPDX-License-Identifier: BSD-2-Clause-Patent
+#
+#------------------------------------------------------------------------------
+
+	/*
+	 * Provide the GCC intrinsics that are required when using GCC 9 or
+	 * later with the -moutline-atomics options (which became the default
+	 * in GCC 10)
+	 */
+	.arch armv8-a
+
+	.macro		reg_alias, pfx, sz
+	r0_\sz		.req	\pfx\()0
+	r1_\sz		.req	\pfx\()1
+	tmp0_\sz	.req	\pfx\()16
+	tmp1_\sz	.req	\pfx\()17
+	.endm
+
+	/*
+	 * Define register aliases of the right type for each size
+	 * (xN for 8 bytes, wN for everything smaller)
+	 */
+	reg_alias	w, 1
+	reg_alias	w, 2
+	reg_alias	w, 4
+	reg_alias	x, 8
+
+	.macro		fn_start, name:req
+	.section	.text.\name
+	.globl		\name
+	.type		\name, %function
+\name\():
+	.endm
+
+	.macro		fn_end, name:req
+	.size		\name, . - \name
+	.endm
+
+	/*
+	 * Emit an atomic helper for \model with operands of size \sz, using
+	 * the operation specified by \insn (which is the LSE name), and which
+	 * can be implemented using the generic load-locked/store-conditional
+	 * (LL/SC) sequence below, using the arithmetic operation given by
+	 * \opc.
+	 */
+	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
+	fn_start	__aarch64_\insn\()\sz\()\model
+	mov		tmp0_\sz, r0_\sz
+0:	ld\a\()xr\s	r0_\sz, [x1]
+	.ifnc		\insn, swp
+	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
+	.else
+	\opc		tmp1_\sz, tmp0_\sz
+	.endif
+	st\l\()xr\s	w15, tmp1_\sz, [x1]
+	cbnz		w15, 0b
+	ret
+	fn_end		__aarch64_\insn\()\sz\()\model
+	.endm
+
+	/*
+	 * Emit atomic helpers for \model for operand sizes in the
+	 * set {1, 2, 4, 8}, for the instruction pattern given by
+	 * \insn. (This is the LSE name, but this implementation uses
+	 * the generic LL/SC sequence using \opc as the arithmetic
+	 * operation on the target.)
+	 */
+	.macro		emit_ld, insn:req, opc:req, model:req, a, l
+	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
+	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
+	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
+	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
+	.endm
+
+	/*
+	 * Emit the compare and swap helper for \model and size \sz
+	 * using LL/SC instructions.
+	 */
+	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
+	fn_start	__aarch64_cas\sz\()\model
+	\uxt		tmp0_\sz, r0_\sz
+0:	ld\a\()xr\s	r0_\sz, [x2]
+	cmp		r0_\sz, tmp0_\sz
+	bne		1f
+	st\l\()xr\s	w15, r1_\sz, [x2]
+	cbnz		w15, 0b
+1:	ret
+	fn_end		__aarch64_cas\sz\()\model
+	.endm
+
+	/*
+	 * Emit compare-and-swap helpers for \model for operand sizes in the
+	 * set {1, 2, 4, 8, 16}.
+	 */
+	.macro		emit_cas, model:req, a, l
+	emit_cas_sz	1, \model, uxtb, b, \a, \l
+	emit_cas_sz	2, \model, uxth, h, \a, \l
+	emit_cas_sz	4, \model, mov ,  , \a, \l
+	emit_cas_sz	8, \model, mov ,  , \a, \l
+
+	/*
+	 * We cannot use the parameterized sequence for 16 byte CAS, so we
+	 * need to define it explicitly.
+	 */
+	fn_start	__aarch64_cas16\model
+	mov		x16, x0
+	mov		x17, x1
+0:	ld\a\()xp	x0, x1, [x4]
+	cmp		x0, x16
+	ccmp		x1, x17, #0, eq
+	bne		1f
+	st\l\()xp	w15, x16, x17, [x4]
+	cbnz		w15, 0b
+1:	ret
+	fn_end		__aarch64_cas16\model
+	.endm
+
+	/*
+	 * Emit the set of GCC outline atomic helper functions for
+	 * the memory ordering model given by \model:
+	 * - relax	unordered loads and stores
+	 * - acq	load-acquire, unordered store
+	 * - rel	unordered load, store-release
+	 * - acq_rel	load-acquire, store-release
+	 */
+	.macro		emit_model, model:req, a, l
+	emit_ld		ldadd, add, \model, \a, \l
+	emit_ld		ldclr, bic, \model, \a, \l
+	emit_ld		ldeor, eor, \model, \a, \l
+	emit_ld		ldset, orr, \model, \a, \l
+	emit_ld		swp,   mov, \model, \a, \l
+	emit_cas	\model, \a, \l
+	.endm
+
+	emit_model	_relax
+	emit_model	_acq, a
+	emit_model	_rel,, l
+	emit_model	_acq_rel, a, l
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-20 11:44 [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics Ard Biesheuvel
@ 2020-05-20 12:37 ` Philippe Mathieu-Daudé
  2020-05-21 16:40   ` Philippe Mathieu-Daudé
  2020-05-20 15:59 ` Laszlo Ersek
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 25+ messages in thread
From: Philippe Mathieu-Daudé @ 2020-05-20 12:37 UTC (permalink / raw)
  To: devel, ard.biesheuvel; +Cc: glin, leif, lersek, liming.gao

Hi Ard,

On 5/20/20 1:44 PM, Ard Biesheuvel wrote:
> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
> unless -mno-outline-atomics is specified. This means GCC-10 introduces
> new intrinsics, and even though it would be possible to work around this
> by specifying the command line option, this would require a new GCC10
> toolchain profile to be created, which we prefer to avoid.
> 
> So instead, add the new intrinsics to our library so they are provided
> when necessary.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
> ---
> v2:
> - add missing .globl to export the functions from the object file
> - add function end markers so the size of each is visible in the ELF metadata
> - add some comments to describe what is going on

Thanks, head hurts a bit less...

> 
>   ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>   ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
>   2 files changed, 145 insertions(+)
> 
> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> index d5bad9467758..fcf48c678119 100644
> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> @@ -79,6 +79,9 @@ [Sources.ARM]
>     Arm/ldivmod.asm      | MSFT
>     Arm/llsr.asm         | MSFT
>   
> +[Sources.AARCH64]
> +  AArch64/Atomics.S    | GCC
> +
>   [Packages]
>     MdePkg/MdePkg.dec
>     ArmPkg/ArmPkg.dec
> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> new file mode 100644
> index 000000000000..dc61d6bb8e52
> --- /dev/null
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> @@ -0,0 +1,142 @@
> +#------------------------------------------------------------------------------
> +#
> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
> +#
> +# SPDX-License-Identifier: BSD-2-Clause-Patent
> +#
> +#------------------------------------------------------------------------------
> +
> +	/*
> +	 * Provide the GCC intrinsics that are required when using GCC 9 or
> +	 * later with the -moutline-atomics options (which became the default
> +	 * in GCC 10)
> +	 */
> +	.arch armv8-a
> +
> +	.macro		reg_alias, pfx, sz
> +	r0_\sz		.req	\pfx\()0
> +	r1_\sz		.req	\pfx\()1
> +	tmp0_\sz	.req	\pfx\()16
> +	tmp1_\sz	.req	\pfx\()17
> +	.endm
> +
> +	/*
> +	 * Define register aliases of the right type for each size
> +	 * (xN for 8 bytes, wN for everything smaller)
> +	 */
> +	reg_alias	w, 1
> +	reg_alias	w, 2
> +	reg_alias	w, 4
> +	reg_alias	x, 8
> +
> +	.macro		fn_start, name:req
> +	.section	.text.\name
> +	.globl		\name
> +	.type		\name, %function
> +\name\():
> +	.endm
> +
> +	.macro		fn_end, name:req
> +	.size		\name, . - \name
> +	.endm
> +
> +	/*
> +	 * Emit an atomic helper for \model with operands of size \sz, using
> +	 * the operation specified by \insn (which is the LSE name), and which
> +	 * can be implemented using the generic load-locked/store-conditional
> +	 * (LL/SC) sequence below, using the arithmetic operation given by
> +	 * \opc.
> +	 */
> +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
> +	fn_start	__aarch64_\insn\()\sz\()\model
> +	mov		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x1]
> +	.ifnc		\insn, swp
> +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
> +	.else
> +	\opc		tmp1_\sz, tmp0_\sz
> +	.endif
> +	st\l\()xr\s	w15, tmp1_\sz, [x1]
> +	cbnz		w15, 0b

I see at the end \s is in {,b,h} range.

Don't you need to use x15 on 64-bit?

> +	ret
> +	fn_end		__aarch64_\insn\()\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit atomic helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8}, for the instruction pattern given by
> +	 * \insn. (This is the LSE name, but this implementation uses
> +	 * the generic LL/SC sequence using \opc as the arithmetic
> +	 * operation on the target.)
> +	 */
> +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
> +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
> +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
> +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
> +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
> +	.endm
> +
> +	/*
> +	 * Emit the compare and swap helper for \model and size \sz
> +	 * using LL/SC instructions.
> +	 */
> +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
> +	fn_start	__aarch64_cas\sz\()\model
> +	\uxt		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x2]
> +	cmp		r0_\sz, tmp0_\sz
> +	bne		1f
> +	st\l\()xr\s	w15, r1_\sz, [x2]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit compare-and-swap helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8, 16}.
> +	 */
> +	.macro		emit_cas, model:req, a, l
> +	emit_cas_sz	1, \model, uxtb, b, \a, \l
> +	emit_cas_sz	2, \model, uxth, h, \a, \l
> +	emit_cas_sz	4, \model, mov ,  , \a, \l
> +	emit_cas_sz	8, \model, mov ,  , \a, \l
> +
> +	/*
> +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
> +	 * need to define it explicitly.
> +	 */
> +	fn_start	__aarch64_cas16\model
> +	mov		x16, x0
> +	mov		x17, x1
> +0:	ld\a\()xp	x0, x1, [x4]
> +	cmp		x0, x16
> +	ccmp		x1, x17, #0, eq
> +	bne		1f
> +	st\l\()xp	w15, x16, x17, [x4]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas16\model
> +	.endm
> +
> +	/*
> +	 * Emit the set of GCC outline atomic helper functions for
> +	 * the memory ordering model given by \model:
> +	 * - relax	unordered loads and stores
> +	 * - acq	load-acquire, unordered store
> +	 * - rel	unordered load, store-release
> +	 * - acq_rel	load-acquire, store-release
> +	 */
> +	.macro		emit_model, model:req, a, l
> +	emit_ld		ldadd, add, \model, \a, \l
> +	emit_ld		ldclr, bic, \model, \a, \l
> +	emit_ld		ldeor, eor, \model, \a, \l
> +	emit_ld		ldset, orr, \model, \a, \l
> +	emit_ld		swp,   mov, \model, \a, \l
> +	emit_cas	\model, \a, \l
> +	.endm
> +
> +	emit_model	_relax
> +	emit_model	_acq, a
> +	emit_model	_rel,, l
> +	emit_model	_acq_rel, a, l
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-20 11:44 [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics Ard Biesheuvel
  2020-05-20 12:37 ` [edk2-devel] " Philippe Mathieu-Daudé
@ 2020-05-20 15:59 ` Laszlo Ersek
  2020-05-21 11:23 ` Leif Lindholm
  2020-05-28  1:36 ` Gary Lin
  3 siblings, 0 replies; 25+ messages in thread
From: Laszlo Ersek @ 2020-05-20 15:59 UTC (permalink / raw)
  To: Ard Biesheuvel, devel; +Cc: glin, leif, liming.gao

On 05/20/20 13:44, Ard Biesheuvel wrote:
> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
> unless -mno-outline-atomics is specified. This means GCC-10 introduces
> new intrinsics, and even though it would be possible to work around this
> by specifying the command line option, this would require a new GCC10
> toolchain profile to be created, which we prefer to avoid.
> 
> So instead, add the new intrinsics to our library so they are provided
> when necessary.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
> ---
> v2:
> - add missing .globl to export the functions from the object file
> - add function end markers so the size of each is visible in the ELF metadata
> - add some comments to describe what is going on
> 
>  ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>  ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
>  2 files changed, 145 insertions(+)

I can't review this patch at any technical level, unfortunately.
However, I certainly agree with (and am thankful for) this approach.

Acked-by: Laszlo Ersek <lersek@redhat.com>

If new versions are needed, please carry forward the A-b.

Thanks!
Laszlo

> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> index d5bad9467758..fcf48c678119 100644
> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> @@ -79,6 +79,9 @@ [Sources.ARM]
>    Arm/ldivmod.asm      | MSFT
>    Arm/llsr.asm         | MSFT
>  
> +[Sources.AARCH64]
> +  AArch64/Atomics.S    | GCC
> +
>  [Packages]
>    MdePkg/MdePkg.dec
>    ArmPkg/ArmPkg.dec
> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> new file mode 100644
> index 000000000000..dc61d6bb8e52
> --- /dev/null
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> @@ -0,0 +1,142 @@
> +#------------------------------------------------------------------------------
> +#
> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
> +#
> +# SPDX-License-Identifier: BSD-2-Clause-Patent
> +#
> +#------------------------------------------------------------------------------
> +
> +	/*
> +	 * Provide the GCC intrinsics that are required when using GCC 9 or
> +	 * later with the -moutline-atomics options (which became the default
> +	 * in GCC 10)
> +	 */
> +	.arch armv8-a
> +
> +	.macro		reg_alias, pfx, sz
> +	r0_\sz		.req	\pfx\()0
> +	r1_\sz		.req	\pfx\()1
> +	tmp0_\sz	.req	\pfx\()16
> +	tmp1_\sz	.req	\pfx\()17
> +	.endm
> +
> +	/*
> +	 * Define register aliases of the right type for each size
> +	 * (xN for 8 bytes, wN for everything smaller)
> +	 */
> +	reg_alias	w, 1
> +	reg_alias	w, 2
> +	reg_alias	w, 4
> +	reg_alias	x, 8
> +
> +	.macro		fn_start, name:req
> +	.section	.text.\name
> +	.globl		\name
> +	.type		\name, %function
> +\name\():
> +	.endm
> +
> +	.macro		fn_end, name:req
> +	.size		\name, . - \name
> +	.endm
> +
> +	/*
> +	 * Emit an atomic helper for \model with operands of size \sz, using
> +	 * the operation specified by \insn (which is the LSE name), and which
> +	 * can be implemented using the generic load-locked/store-conditional
> +	 * (LL/SC) sequence below, using the arithmetic operation given by
> +	 * \opc.
> +	 */
> +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
> +	fn_start	__aarch64_\insn\()\sz\()\model
> +	mov		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x1]
> +	.ifnc		\insn, swp
> +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
> +	.else
> +	\opc		tmp1_\sz, tmp0_\sz
> +	.endif
> +	st\l\()xr\s	w15, tmp1_\sz, [x1]
> +	cbnz		w15, 0b
> +	ret
> +	fn_end		__aarch64_\insn\()\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit atomic helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8}, for the instruction pattern given by
> +	 * \insn. (This is the LSE name, but this implementation uses
> +	 * the generic LL/SC sequence using \opc as the arithmetic
> +	 * operation on the target.)
> +	 */
> +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
> +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
> +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
> +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
> +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
> +	.endm
> +
> +	/*
> +	 * Emit the compare and swap helper for \model and size \sz
> +	 * using LL/SC instructions.
> +	 */
> +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
> +	fn_start	__aarch64_cas\sz\()\model
> +	\uxt		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x2]
> +	cmp		r0_\sz, tmp0_\sz
> +	bne		1f
> +	st\l\()xr\s	w15, r1_\sz, [x2]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit compare-and-swap helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8, 16}.
> +	 */
> +	.macro		emit_cas, model:req, a, l
> +	emit_cas_sz	1, \model, uxtb, b, \a, \l
> +	emit_cas_sz	2, \model, uxth, h, \a, \l
> +	emit_cas_sz	4, \model, mov ,  , \a, \l
> +	emit_cas_sz	8, \model, mov ,  , \a, \l
> +
> +	/*
> +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
> +	 * need to define it explicitly.
> +	 */
> +	fn_start	__aarch64_cas16\model
> +	mov		x16, x0
> +	mov		x17, x1
> +0:	ld\a\()xp	x0, x1, [x4]
> +	cmp		x0, x16
> +	ccmp		x1, x17, #0, eq
> +	bne		1f
> +	st\l\()xp	w15, x16, x17, [x4]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas16\model
> +	.endm
> +
> +	/*
> +	 * Emit the set of GCC outline atomic helper functions for
> +	 * the memory ordering model given by \model:
> +	 * - relax	unordered loads and stores
> +	 * - acq	load-acquire, unordered store
> +	 * - rel	unordered load, store-release
> +	 * - acq_rel	load-acquire, store-release
> +	 */
> +	.macro		emit_model, model:req, a, l
> +	emit_ld		ldadd, add, \model, \a, \l
> +	emit_ld		ldclr, bic, \model, \a, \l
> +	emit_ld		ldeor, eor, \model, \a, \l
> +	emit_ld		ldset, orr, \model, \a, \l
> +	emit_ld		swp,   mov, \model, \a, \l
> +	emit_cas	\model, \a, \l
> +	.endm
> +
> +	emit_model	_relax
> +	emit_model	_acq, a
> +	emit_model	_rel,, l
> +	emit_model	_acq_rel, a, l
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-20 11:44 [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics Ard Biesheuvel
  2020-05-20 12:37 ` [edk2-devel] " Philippe Mathieu-Daudé
  2020-05-20 15:59 ` Laszlo Ersek
@ 2020-05-21 11:23 ` Leif Lindholm
  2020-05-21 12:58   ` [edk2-devel] " Ard Biesheuvel
  2020-05-28  1:36 ` Gary Lin
  3 siblings, 1 reply; 25+ messages in thread
From: Leif Lindholm @ 2020-05-21 11:23 UTC (permalink / raw)
  To: Ard Biesheuvel; +Cc: devel, glin, lersek, liming.gao

On Wed, May 20, 2020 at 13:44:48 +0200, Ard Biesheuvel wrote:
> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
> unless -mno-outline-atomics is specified. This means GCC-10 introduces
> new intrinsics, and even though it would be possible to work around this
> by specifying the command line option, this would require a new GCC10
> toolchain profile to be created, which we prefer to avoid.
> 
> So instead, add the new intrinsics to our library so they are provided
> when necessary.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>

I feel unsure about whether this change is still proposed to go in,
but if it is, please fold in a BZ reference
(https://bugzilla.tianocore.org/show_bug.cgi?id=2723) in commit
message. With that:

Reviewed-by: Leif Lindholm <leif@nuviainc.com>
Thanks!

> ---
> v2:
> - add missing .globl to export the functions from the object file
> - add function end markers so the size of each is visible in the ELF metadata
> - add some comments to describe what is going on
> 
>  ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>  ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
>  2 files changed, 145 insertions(+)
> 
> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> index d5bad9467758..fcf48c678119 100644
> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> @@ -79,6 +79,9 @@ [Sources.ARM]
>    Arm/ldivmod.asm      | MSFT
>    Arm/llsr.asm         | MSFT
>  
> +[Sources.AARCH64]
> +  AArch64/Atomics.S    | GCC
> +
>  [Packages]
>    MdePkg/MdePkg.dec
>    ArmPkg/ArmPkg.dec
> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> new file mode 100644
> index 000000000000..dc61d6bb8e52
> --- /dev/null
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> @@ -0,0 +1,142 @@
> +#------------------------------------------------------------------------------
> +#
> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
> +#
> +# SPDX-License-Identifier: BSD-2-Clause-Patent
> +#
> +#------------------------------------------------------------------------------
> +
> +	/*
> +	 * Provide the GCC intrinsics that are required when using GCC 9 or
> +	 * later with the -moutline-atomics options (which became the default
> +	 * in GCC 10)
> +	 */
> +	.arch armv8-a
> +
> +	.macro		reg_alias, pfx, sz
> +	r0_\sz		.req	\pfx\()0
> +	r1_\sz		.req	\pfx\()1
> +	tmp0_\sz	.req	\pfx\()16
> +	tmp1_\sz	.req	\pfx\()17
> +	.endm
> +
> +	/*
> +	 * Define register aliases of the right type for each size
> +	 * (xN for 8 bytes, wN for everything smaller)
> +	 */
> +	reg_alias	w, 1
> +	reg_alias	w, 2
> +	reg_alias	w, 4
> +	reg_alias	x, 8
> +
> +	.macro		fn_start, name:req
> +	.section	.text.\name
> +	.globl		\name
> +	.type		\name, %function
> +\name\():
> +	.endm
> +
> +	.macro		fn_end, name:req
> +	.size		\name, . - \name
> +	.endm
> +
> +	/*
> +	 * Emit an atomic helper for \model with operands of size \sz, using
> +	 * the operation specified by \insn (which is the LSE name), and which
> +	 * can be implemented using the generic load-locked/store-conditional
> +	 * (LL/SC) sequence below, using the arithmetic operation given by
> +	 * \opc.
> +	 */
> +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
> +	fn_start	__aarch64_\insn\()\sz\()\model
> +	mov		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x1]
> +	.ifnc		\insn, swp
> +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
> +	.else
> +	\opc		tmp1_\sz, tmp0_\sz
> +	.endif
> +	st\l\()xr\s	w15, tmp1_\sz, [x1]
> +	cbnz		w15, 0b
> +	ret
> +	fn_end		__aarch64_\insn\()\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit atomic helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8}, for the instruction pattern given by
> +	 * \insn. (This is the LSE name, but this implementation uses
> +	 * the generic LL/SC sequence using \opc as the arithmetic
> +	 * operation on the target.)
> +	 */
> +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
> +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
> +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
> +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
> +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
> +	.endm
> +
> +	/*
> +	 * Emit the compare and swap helper for \model and size \sz
> +	 * using LL/SC instructions.
> +	 */
> +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
> +	fn_start	__aarch64_cas\sz\()\model
> +	\uxt		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x2]
> +	cmp		r0_\sz, tmp0_\sz
> +	bne		1f
> +	st\l\()xr\s	w15, r1_\sz, [x2]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit compare-and-swap helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8, 16}.
> +	 */
> +	.macro		emit_cas, model:req, a, l
> +	emit_cas_sz	1, \model, uxtb, b, \a, \l
> +	emit_cas_sz	2, \model, uxth, h, \a, \l
> +	emit_cas_sz	4, \model, mov ,  , \a, \l
> +	emit_cas_sz	8, \model, mov ,  , \a, \l
> +
> +	/*
> +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
> +	 * need to define it explicitly.
> +	 */
> +	fn_start	__aarch64_cas16\model
> +	mov		x16, x0
> +	mov		x17, x1
> +0:	ld\a\()xp	x0, x1, [x4]
> +	cmp		x0, x16
> +	ccmp		x1, x17, #0, eq
> +	bne		1f
> +	st\l\()xp	w15, x16, x17, [x4]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas16\model
> +	.endm
> +
> +	/*
> +	 * Emit the set of GCC outline atomic helper functions for
> +	 * the memory ordering model given by \model:
> +	 * - relax	unordered loads and stores
> +	 * - acq	load-acquire, unordered store
> +	 * - rel	unordered load, store-release
> +	 * - acq_rel	load-acquire, store-release
> +	 */
> +	.macro		emit_model, model:req, a, l
> +	emit_ld		ldadd, add, \model, \a, \l
> +	emit_ld		ldclr, bic, \model, \a, \l
> +	emit_ld		ldeor, eor, \model, \a, \l
> +	emit_ld		ldset, orr, \model, \a, \l
> +	emit_ld		swp,   mov, \model, \a, \l
> +	emit_cas	\model, \a, \l
> +	.endm
> +
> +	emit_model	_relax
> +	emit_model	_acq, a
> +	emit_model	_rel,, l
> +	emit_model	_acq_rel, a, l
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 11:23 ` Leif Lindholm
@ 2020-05-21 12:58   ` Ard Biesheuvel
  2020-05-21 13:16     ` Leif Lindholm
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2020-05-21 12:58 UTC (permalink / raw)
  To: devel, leif; +Cc: glin, lersek, liming.gao

On 5/21/20 1:23 PM, Leif Lindholm via groups.io wrote:
> On Wed, May 20, 2020 at 13:44:48 +0200, Ard Biesheuvel wrote:
>> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
>> unless -mno-outline-atomics is specified. This means GCC-10 introduces
>> new intrinsics, and even though it would be possible to work around this
>> by specifying the command line option, this would require a new GCC10
>> toolchain profile to be created, which we prefer to avoid.
>>
>> So instead, add the new intrinsics to our library so they are provided
>> when necessary.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
> 
> I feel unsure about whether this change is still proposed to go in,
> but if it is, please fold in a BZ reference
> (https://bugzilla.tianocore.org/show_bug.cgi?id=2723) in commit
> message. With that:
> 
> Reviewed-by: Leif Lindholm <leif@nuviainc.com>
> Thanks!
> 

Apparently, GCC 10 is getting fixed so that we can set a #pragma in 
MdePkg/Include/AArch64/ProcessorBind.h, and be done with it.

That does mean you would need GCC 10.2 at least, or you get the error, 
unless we merge this patch as well. That would mean the intrinsics are 
never used in practice, but using a compiler that does use them will not 
break the build.

I am leaning towards omitting this patch, but I could be convinced to 
adopt both mitigations. But the #pragma is strongly preferred.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 12:58   ` [edk2-devel] " Ard Biesheuvel
@ 2020-05-21 13:16     ` Leif Lindholm
  2020-05-21 13:31       ` Ard Biesheuvel
  0 siblings, 1 reply; 25+ messages in thread
From: Leif Lindholm @ 2020-05-21 13:16 UTC (permalink / raw)
  To: Ard Biesheuvel; +Cc: devel, glin, lersek, liming.gao

On Thu, May 21, 2020 at 14:58:56 +0200, Ard Biesheuvel wrote:
> > Reviewed-by: Leif Lindholm <leif@nuviainc.com>
> > Thanks!
> > 
> 
> Apparently, GCC 10 is getting fixed so that we can set a #pragma in
> MdePkg/Include/AArch64/ProcessorBind.h, and be done with it.
> 
> That does mean you would need GCC 10.2 at least, or you get the error,
> unless we merge this patch as well. That would mean the intrinsics are never
> used in practice, but using a compiler that does use them will not break the
> build.
> 
> I am leaning towards omitting this patch, but I could be convinced to adopt
> both mitigations. But the #pragma is strongly preferred.

What is the codesize impact on a full RELEASE image?
If non-negligable, how about #ifdefing the whole content out unless
some specific preprocessor flag is passed? Then at least there's a
quick workaround for anyone who needs it.

/
    Leif

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 13:16     ` Leif Lindholm
@ 2020-05-21 13:31       ` Ard Biesheuvel
  2020-05-21 14:16         ` Leif Lindholm
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2020-05-21 13:31 UTC (permalink / raw)
  To: Leif Lindholm; +Cc: devel, glin, lersek, liming.gao

On 5/21/20 3:16 PM, Leif Lindholm wrote:
> On Thu, May 21, 2020 at 14:58:56 +0200, Ard Biesheuvel wrote:
>>> Reviewed-by: Leif Lindholm <leif@nuviainc.com>
>>> Thanks!
>>>
>>
>> Apparently, GCC 10 is getting fixed so that we can set a #pragma in
>> MdePkg/Include/AArch64/ProcessorBind.h, and be done with it.
>>
>> That does mean you would need GCC 10.2 at least, or you get the error,
>> unless we merge this patch as well. That would mean the intrinsics are never
>> used in practice, but using a compiler that does use them will not break the
>> build.
>>
>> I am leaning towards omitting this patch, but I could be convinced to adopt
>> both mitigations. But the #pragma is strongly preferred.
> 
> What is the codesize impact on a full RELEASE image?

Not sure how that matters? Given the modular nature of EDK2, I would not 
expect the outline atomics to have any measurable size benefit. The 
intrinsics library is static, with each of the 100 different functions 
emitted into a separate section, so you only get what you use.

> If non-negligable, how about #ifdefing the whole content out unless
> some specific preprocessor flag is passed? Then at least there's a
> quick workaround for anyone who needs it.
> 

The linker will only incorporate the parts that are actually used.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 13:31       ` Ard Biesheuvel
@ 2020-05-21 14:16         ` Leif Lindholm
  2020-05-21 20:22           ` Laszlo Ersek
  0 siblings, 1 reply; 25+ messages in thread
From: Leif Lindholm @ 2020-05-21 14:16 UTC (permalink / raw)
  To: devel, ard.biesheuvel; +Cc: glin, lersek, liming.gao

On Thu, May 21, 2020 at 15:31:44 +0200, Ard Biesheuvel wrote:
> > > That does mean you would need GCC 10.2 at least, or you get the error,
> > > unless we merge this patch as well. That would mean the intrinsics are never
> > > used in practice, but using a compiler that does use them will not break the
> > > build.
> > > 
> > > I am leaning towards omitting this patch, but I could be convinced to adopt
> > > both mitigations. But the #pragma is strongly preferred.
> > 
> > What is the codesize impact on a full RELEASE image?
> 
> Not sure how that matters? Given the modular nature of EDK2, I would not
> expect the outline atomics to have any measurable size benefit. The
> intrinsics library is static, with each of the 100 different functions
> emitted into a separate section, so you only get what you use.

Ah, I'd missed (misfiled?) the separate section thing.

> > If non-negligable, how about #ifdefing the whole content out unless
> > some specific preprocessor flag is passed? Then at least there's a
> > quick workaround for anyone who needs it.
> > 
> 
> The linker will only incorporate the parts that are actually used.

Yeah, so not an issue.
OK, then I would vote *for* merging the patch regardless. We know how
long some toolchain versions can stick around simply because they were
mentioned in some blog post somewhere that ended up high in search
rankings.

Once gcc 10.2 is released (and we have verified the problem can be
worked around elsewhere), I guess we could add a note saying "once all
gcc 10.0 and 10.1 toolchains are considered obsolete, this file can
be deleted".

/
    Leif

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-20 12:37 ` [edk2-devel] " Philippe Mathieu-Daudé
@ 2020-05-21 16:40   ` Philippe Mathieu-Daudé
  2020-05-21 16:45     ` Ard Biesheuvel
  0 siblings, 1 reply; 25+ messages in thread
From: Philippe Mathieu-Daudé @ 2020-05-21 16:40 UTC (permalink / raw)
  To: devel, ard.biesheuvel; +Cc: glin, leif, lersek, liming.gao

On 5/20/20 2:37 PM, Philippe Mathieu-Daudé wrote:
> Hi Ard,
> 
> On 5/20/20 1:44 PM, Ard Biesheuvel wrote:
>> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
>> unless -mno-outline-atomics is specified. This means GCC-10 introduces
>> new intrinsics, and even though it would be possible to work around this
>> by specifying the command line option, this would require a new GCC10
>> toolchain profile to be created, which we prefer to avoid.
>>
>> So instead, add the new intrinsics to our library so they are provided
>> when necessary.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
>> ---
>> v2:
>> - add missing .globl to export the functions from the object file
>> - add function end markers so the size of each is visible in the ELF 
>> metadata
>> - add some comments to describe what is going on
> 
> Thanks, head hurts a bit less...
> 
>>
>>   ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>>   ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 
>> ++++++++++++++++++++
>>   2 files changed, 145 insertions(+)
>>
>> diff --git 
>> a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf 
>> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>> index d5bad9467758..fcf48c678119 100644
>> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>> @@ -79,6 +79,9 @@ [Sources.ARM]
>>     Arm/ldivmod.asm      | MSFT
>>     Arm/llsr.asm         | MSFT
>> +[Sources.AARCH64]
>> +  AArch64/Atomics.S    | GCC
>> +
>>   [Packages]
>>     MdePkg/MdePkg.dec
>>     ArmPkg/ArmPkg.dec
>> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S 
>> b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>> new file mode 100644
>> index 000000000000..dc61d6bb8e52
>> --- /dev/null
>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>> @@ -0,0 +1,142 @@
>> +#------------------------------------------------------------------------------ 
>>
>> +#
>> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
>> +#
>> +# SPDX-License-Identifier: BSD-2-Clause-Patent
>> +#
>> +#------------------------------------------------------------------------------ 
>>
>> +
>> +    /*
>> +     * Provide the GCC intrinsics that are required when using GCC 9 or
>> +     * later with the -moutline-atomics options (which became the 
>> default
>> +     * in GCC 10)
>> +     */
>> +    .arch armv8-a
>> +
>> +    .macro        reg_alias, pfx, sz
>> +    r0_\sz        .req    \pfx\()0
>> +    r1_\sz        .req    \pfx\()1
>> +    tmp0_\sz    .req    \pfx\()16
>> +    tmp1_\sz    .req    \pfx\()17
>> +    .endm
>> +
>> +    /*
>> +     * Define register aliases of the right type for each size
>> +     * (xN for 8 bytes, wN for everything smaller)
>> +     */
>> +    reg_alias    w, 1
>> +    reg_alias    w, 2
>> +    reg_alias    w, 4
>> +    reg_alias    x, 8
>> +
>> +    .macro        fn_start, name:req
>> +    .section    .text.\name
>> +    .globl        \name
>> +    .type        \name, %function
>> +\name\():
>> +    .endm
>> +
>> +    .macro        fn_end, name:req
>> +    .size        \name, . - \name
>> +    .endm
>> +
>> +    /*
>> +     * Emit an atomic helper for \model with operands of size \sz, using
>> +     * the operation specified by \insn (which is the LSE name), and 
>> which
>> +     * can be implemented using the generic 
>> load-locked/store-conditional
>> +     * (LL/SC) sequence below, using the arithmetic operation given by
>> +     * \opc.
>> +     */
>> +    .macro         emit_ld_sz, sz:req, insn:req, opc:req, model:req, 
>> s, a, l
>> +    fn_start    __aarch64_\insn\()\sz\()\model
>> +    mov        tmp0_\sz, r0_\sz
>> +0:    ld\a\()xr\s    r0_\sz, [x1]
>> +    .ifnc        \insn, swp
>> +    \opc        tmp1_\sz, r0_\sz, tmp0_\sz
>> +    .else
>> +    \opc        tmp1_\sz, tmp0_\sz
>> +    .endif
>> +    st\l\()xr\s    w15, tmp1_\sz, [x1]
>> +    cbnz        w15, 0b
> 
> I see at the end \s is in {,b,h} range.
> 
> Don't you need to use x15 on 64-bit?

Ard, I expanded all macros and reviewed this patch, but I am still 
having hard time to figure why w15 temp is OK instead of x15. Any hint?

> 
>> +    ret
>> +    fn_end        __aarch64_\insn\()\sz\()\model
>> +    .endm
>> +
>> +    /*
>> +     * Emit atomic helpers for \model for operand sizes in the
>> +     * set {1, 2, 4, 8}, for the instruction pattern given by
>> +     * \insn. (This is the LSE name, but this implementation uses
>> +     * the generic LL/SC sequence using \opc as the arithmetic
>> +     * operation on the target.)
>> +     */
>> +    .macro        emit_ld, insn:req, opc:req, model:req, a, l
>> +    emit_ld_sz    1, \insn, \opc, \model, b, \a, \l
>> +    emit_ld_sz    2, \insn, \opc, \model, h, \a, \l
>> +    emit_ld_sz    4, \insn, \opc, \model,  , \a, \l
>> +    emit_ld_sz    8, \insn, \opc, \model,  , \a, \l
>> +    .endm
>> +
>> +    /*
>> +     * Emit the compare and swap helper for \model and size \sz
>> +     * using LL/SC instructions.
>> +     */
>> +    .macro         emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
>> +    fn_start    __aarch64_cas\sz\()\model
>> +    \uxt        tmp0_\sz, r0_\sz
>> +0:    ld\a\()xr\s    r0_\sz, [x2]
>> +    cmp        r0_\sz, tmp0_\sz
>> +    bne        1f
>> +    st\l\()xr\s    w15, r1_\sz, [x2]
>> +    cbnz        w15, 0b
>> +1:    ret
>> +    fn_end        __aarch64_cas\sz\()\model
>> +    .endm
>> +
>> +    /*
>> +     * Emit compare-and-swap helpers for \model for operand sizes in the
>> +     * set {1, 2, 4, 8, 16}.
>> +     */
>> +    .macro        emit_cas, model:req, a, l
>> +    emit_cas_sz    1, \model, uxtb, b, \a, \l
>> +    emit_cas_sz    2, \model, uxth, h, \a, \l
>> +    emit_cas_sz    4, \model, mov ,  , \a, \l
>> +    emit_cas_sz    8, \model, mov ,  , \a, \l
>> +
>> +    /*
>> +     * We cannot use the parameterized sequence for 16 byte CAS, so we
>> +     * need to define it explicitly.
>> +     */
>> +    fn_start    __aarch64_cas16\model
>> +    mov        x16, x0
>> +    mov        x17, x1
>> +0:    ld\a\()xp    x0, x1, [x4]
>> +    cmp        x0, x16
>> +    ccmp        x1, x17, #0, eq
>> +    bne        1f
>> +    st\l\()xp    w15, x16, x17, [x4]
>> +    cbnz        w15, 0b
>> +1:    ret
>> +    fn_end        __aarch64_cas16\model
>> +    .endm
>> +
>> +    /*
>> +     * Emit the set of GCC outline atomic helper functions for
>> +     * the memory ordering model given by \model:
>> +     * - relax    unordered loads and stores
>> +     * - acq    load-acquire, unordered store
>> +     * - rel    unordered load, store-release
>> +     * - acq_rel    load-acquire, store-release
>> +     */
>> +    .macro        emit_model, model:req, a, l
>> +    emit_ld        ldadd, add, \model, \a, \l
>> +    emit_ld        ldclr, bic, \model, \a, \l
>> +    emit_ld        ldeor, eor, \model, \a, \l
>> +    emit_ld        ldset, orr, \model, \a, \l
>> +    emit_ld        swp,   mov, \model, \a, \l
>> +    emit_cas    \model, \a, \l
>> +    .endm
>> +
>> +    emit_model    _relax
>> +    emit_model    _acq, a
>> +    emit_model    _rel,, l
>> +    emit_model    _acq_rel, a, l
>>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 16:40   ` Philippe Mathieu-Daudé
@ 2020-05-21 16:45     ` Ard Biesheuvel
  2020-05-21 16:59       ` Philippe Mathieu-Daudé
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2020-05-21 16:45 UTC (permalink / raw)
  To: devel, philmd; +Cc: glin, leif, lersek, liming.gao

On 5/21/20 6:40 PM, Philippe Mathieu-Daudé via groups.io wrote:
> On 5/20/20 2:37 PM, Philippe Mathieu-Daudé wrote:
>> Hi Ard,
>>
>> On 5/20/20 1:44 PM, Ard Biesheuvel wrote:
>>> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
>>> unless -mno-outline-atomics is specified. This means GCC-10 introduces
>>> new intrinsics, and even though it would be possible to work around this
>>> by specifying the command line option, this would require a new GCC10
>>> toolchain profile to be created, which we prefer to avoid.
>>>
>>> So instead, add the new intrinsics to our library so they are provided
>>> when necessary.
>>>
>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
>>> ---
>>> v2:
>>> - add missing .globl to export the functions from the object file
>>> - add function end markers so the size of each is visible in the ELF 
>>> metadata
>>> - add some comments to describe what is going on
>>
>> Thanks, head hurts a bit less...
>>
>>>
>>>   ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>>>   ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 
>>> 142 ++++++++++++++++++++
>>>   2 files changed, 145 insertions(+)
>>>
>>> diff --git 
>>> a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf 
>>> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>> index d5bad9467758..fcf48c678119 100644
>>> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>> @@ -79,6 +79,9 @@ [Sources.ARM]
>>>     Arm/ldivmod.asm      | MSFT
>>>     Arm/llsr.asm         | MSFT
>>> +[Sources.AARCH64]
>>> +  AArch64/Atomics.S    | GCC
>>> +
>>>   [Packages]
>>>     MdePkg/MdePkg.dec
>>>     ArmPkg/ArmPkg.dec
>>> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S 
>>> b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>> new file mode 100644
>>> index 000000000000..dc61d6bb8e52
>>> --- /dev/null
>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>> @@ -0,0 +1,142 @@
>>> +#------------------------------------------------------------------------------ 
>>>
>>> +#
>>> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
>>> +#
>>> +# SPDX-License-Identifier: BSD-2-Clause-Patent
>>> +#
>>> +#------------------------------------------------------------------------------ 
>>>
>>> +
>>> +    /*
>>> +     * Provide the GCC intrinsics that are required when using GCC 9 or
>>> +     * later with the -moutline-atomics options (which became the 
>>> default
>>> +     * in GCC 10)
>>> +     */
>>> +    .arch armv8-a
>>> +
>>> +    .macro        reg_alias, pfx, sz
>>> +    r0_\sz        .req    \pfx\()0
>>> +    r1_\sz        .req    \pfx\()1
>>> +    tmp0_\sz    .req    \pfx\()16
>>> +    tmp1_\sz    .req    \pfx\()17
>>> +    .endm
>>> +
>>> +    /*
>>> +     * Define register aliases of the right type for each size
>>> +     * (xN for 8 bytes, wN for everything smaller)
>>> +     */
>>> +    reg_alias    w, 1
>>> +    reg_alias    w, 2
>>> +    reg_alias    w, 4
>>> +    reg_alias    x, 8
>>> +
>>> +    .macro        fn_start, name:req
>>> +    .section    .text.\name
>>> +    .globl        \name
>>> +    .type        \name, %function
>>> +\name\():
>>> +    .endm
>>> +
>>> +    .macro        fn_end, name:req
>>> +    .size        \name, . - \name
>>> +    .endm
>>> +
>>> +    /*
>>> +     * Emit an atomic helper for \model with operands of size \sz, 
>>> using
>>> +     * the operation specified by \insn (which is the LSE name), and 
>>> which
>>> +     * can be implemented using the generic 
>>> load-locked/store-conditional
>>> +     * (LL/SC) sequence below, using the arithmetic operation given by
>>> +     * \opc.
>>> +     */
>>> +    .macro         emit_ld_sz, sz:req, insn:req, opc:req, model:req, 
>>> s, a, l
>>> +    fn_start    __aarch64_\insn\()\sz\()\model
>>> +    mov        tmp0_\sz, r0_\sz
>>> +0:    ld\a\()xr\s    r0_\sz, [x1]
>>> +    .ifnc        \insn, swp
>>> +    \opc        tmp1_\sz, r0_\sz, tmp0_\sz
>>> +    .else
>>> +    \opc        tmp1_\sz, tmp0_\sz
>>> +    .endif
>>> +    st\l\()xr\s    w15, tmp1_\sz, [x1]
>>> +    cbnz        w15, 0b
>>
>> I see at the end \s is in {,b,h} range.
>>
>> Don't you need to use x15 on 64-bit?
> 
> Ard, I expanded all macros and reviewed this patch, but I am still 
> having hard time to figure why w15 temp is OK instead of x15. Any hint?
> 

Why do you think it should be x15?


>>
>>> +    ret
>>> +    fn_end        __aarch64_\insn\()\sz\()\model
>>> +    .endm
>>> +
>>> +    /*
>>> +     * Emit atomic helpers for \model for operand sizes in the
>>> +     * set {1, 2, 4, 8}, for the instruction pattern given by
>>> +     * \insn. (This is the LSE name, but this implementation uses
>>> +     * the generic LL/SC sequence using \opc as the arithmetic
>>> +     * operation on the target.)
>>> +     */
>>> +    .macro        emit_ld, insn:req, opc:req, model:req, a, l
>>> +    emit_ld_sz    1, \insn, \opc, \model, b, \a, \l
>>> +    emit_ld_sz    2, \insn, \opc, \model, h, \a, \l
>>> +    emit_ld_sz    4, \insn, \opc, \model,  , \a, \l
>>> +    emit_ld_sz    8, \insn, \opc, \model,  , \a, \l
>>> +    .endm
>>> +
>>> +    /*
>>> +     * Emit the compare and swap helper for \model and size \sz
>>> +     * using LL/SC instructions.
>>> +     */
>>> +    .macro         emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
>>> +    fn_start    __aarch64_cas\sz\()\model
>>> +    \uxt        tmp0_\sz, r0_\sz
>>> +0:    ld\a\()xr\s    r0_\sz, [x2]
>>> +    cmp        r0_\sz, tmp0_\sz
>>> +    bne        1f
>>> +    st\l\()xr\s    w15, r1_\sz, [x2]
>>> +    cbnz        w15, 0b
>>> +1:    ret
>>> +    fn_end        __aarch64_cas\sz\()\model
>>> +    .endm
>>> +
>>> +    /*
>>> +     * Emit compare-and-swap helpers for \model for operand sizes in 
>>> the
>>> +     * set {1, 2, 4, 8, 16}.
>>> +     */
>>> +    .macro        emit_cas, model:req, a, l
>>> +    emit_cas_sz    1, \model, uxtb, b, \a, \l
>>> +    emit_cas_sz    2, \model, uxth, h, \a, \l
>>> +    emit_cas_sz    4, \model, mov ,  , \a, \l
>>> +    emit_cas_sz    8, \model, mov ,  , \a, \l
>>> +
>>> +    /*
>>> +     * We cannot use the parameterized sequence for 16 byte CAS, so we
>>> +     * need to define it explicitly.
>>> +     */
>>> +    fn_start    __aarch64_cas16\model
>>> +    mov        x16, x0
>>> +    mov        x17, x1
>>> +0:    ld\a\()xp    x0, x1, [x4]
>>> +    cmp        x0, x16
>>> +    ccmp        x1, x17, #0, eq
>>> +    bne        1f
>>> +    st\l\()xp    w15, x16, x17, [x4]
>>> +    cbnz        w15, 0b
>>> +1:    ret
>>> +    fn_end        __aarch64_cas16\model
>>> +    .endm
>>> +
>>> +    /*
>>> +     * Emit the set of GCC outline atomic helper functions for
>>> +     * the memory ordering model given by \model:
>>> +     * - relax    unordered loads and stores
>>> +     * - acq    load-acquire, unordered store
>>> +     * - rel    unordered load, store-release
>>> +     * - acq_rel    load-acquire, store-release
>>> +     */
>>> +    .macro        emit_model, model:req, a, l
>>> +    emit_ld        ldadd, add, \model, \a, \l
>>> +    emit_ld        ldclr, bic, \model, \a, \l
>>> +    emit_ld        ldeor, eor, \model, \a, \l
>>> +    emit_ld        ldset, orr, \model, \a, \l
>>> +    emit_ld        swp,   mov, \model, \a, \l
>>> +    emit_cas    \model, \a, \l
>>> +    .endm
>>> +
>>> +    emit_model    _relax
>>> +    emit_model    _acq, a
>>> +    emit_model    _rel,, l
>>> +    emit_model    _acq_rel, a, l
>>>
> 
> 
> 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 16:45     ` Ard Biesheuvel
@ 2020-05-21 16:59       ` Philippe Mathieu-Daudé
  2020-05-21 17:02         ` Ard Biesheuvel
  0 siblings, 1 reply; 25+ messages in thread
From: Philippe Mathieu-Daudé @ 2020-05-21 16:59 UTC (permalink / raw)
  To: Ard Biesheuvel, devel; +Cc: glin, leif, lersek, liming.gao

On 5/21/20 6:45 PM, Ard Biesheuvel wrote:
> On 5/21/20 6:40 PM, Philippe Mathieu-Daudé via groups.io wrote:
>> On 5/20/20 2:37 PM, Philippe Mathieu-Daudé wrote:
>>> Hi Ard,
>>>
>>> On 5/20/20 1:44 PM, Ard Biesheuvel wrote:
>>>> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
>>>> unless -mno-outline-atomics is specified. This means GCC-10 introduces
>>>> new intrinsics, and even though it would be possible to work around 
>>>> this
>>>> by specifying the command line option, this would require a new GCC10
>>>> toolchain profile to be created, which we prefer to avoid.
>>>>
>>>> So instead, add the new intrinsics to our library so they are provided
>>>> when necessary.
>>>>
>>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
>>>> ---
>>>> v2:
>>>> - add missing .globl to export the functions from the object file
>>>> - add function end markers so the size of each is visible in the ELF 
>>>> metadata
>>>> - add some comments to describe what is going on
>>>
>>> Thanks, head hurts a bit less...
>>>
>>>>
>>>>   ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   
>>>> 3 +
>>>>   ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 
>>>> 142 ++++++++++++++++++++
>>>>   2 files changed, 145 insertions(+)
>>>>
>>>> diff --git 
>>>> a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf 
>>>> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> index d5bad9467758..fcf48c678119 100644
>>>> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> @@ -79,6 +79,9 @@ [Sources.ARM]
>>>>     Arm/ldivmod.asm      | MSFT
>>>>     Arm/llsr.asm         | MSFT
>>>> +[Sources.AARCH64]
>>>> +  AArch64/Atomics.S    | GCC
>>>> +
>>>>   [Packages]
>>>>     MdePkg/MdePkg.dec
>>>>     ArmPkg/ArmPkg.dec
>>>> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S 
>>>> b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>> new file mode 100644
>>>> index 000000000000..dc61d6bb8e52
>>>> --- /dev/null
>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>> @@ -0,0 +1,142 @@
>>>> +#------------------------------------------------------------------------------ 
>>>>
>>>> +#
>>>> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
>>>> +#
>>>> +# SPDX-License-Identifier: BSD-2-Clause-Patent
>>>> +#
>>>> +#------------------------------------------------------------------------------ 
>>>>
>>>> +
>>>> +    /*
>>>> +     * Provide the GCC intrinsics that are required when using GCC 
>>>> 9 or
>>>> +     * later with the -moutline-atomics options (which became the 
>>>> default
>>>> +     * in GCC 10)
>>>> +     */
>>>> +    .arch armv8-a
>>>> +
>>>> +    .macro        reg_alias, pfx, sz
>>>> +    r0_\sz        .req    \pfx\()0
>>>> +    r1_\sz        .req    \pfx\()1
>>>> +    tmp0_\sz    .req    \pfx\()16
>>>> +    tmp1_\sz    .req    \pfx\()17
>>>> +    .endm
>>>> +
>>>> +    /*
>>>> +     * Define register aliases of the right type for each size
>>>> +     * (xN for 8 bytes, wN for everything smaller)
>>>> +     */
>>>> +    reg_alias    w, 1
>>>> +    reg_alias    w, 2
>>>> +    reg_alias    w, 4
>>>> +    reg_alias    x, 8
>>>> +
>>>> +    .macro        fn_start, name:req
>>>> +    .section    .text.\name
>>>> +    .globl        \name
>>>> +    .type        \name, %function
>>>> +\name\():
>>>> +    .endm
>>>> +
>>>> +    .macro        fn_end, name:req
>>>> +    .size        \name, . - \name
>>>> +    .endm
>>>> +
>>>> +    /*
>>>> +     * Emit an atomic helper for \model with operands of size \sz, 
>>>> using
>>>> +     * the operation specified by \insn (which is the LSE name), 
>>>> and which
>>>> +     * can be implemented using the generic 
>>>> load-locked/store-conditional
>>>> +     * (LL/SC) sequence below, using the arithmetic operation given by
>>>> +     * \opc.
>>>> +     */
>>>> +    .macro         emit_ld_sz, sz:req, insn:req, opc:req, 
>>>> model:req, s, a, l
>>>> +    fn_start    __aarch64_\insn\()\sz\()\model
>>>> +    mov        tmp0_\sz, r0_\sz
>>>> +0:    ld\a\()xr\s    r0_\sz, [x1]
>>>> +    .ifnc        \insn, swp
>>>> +    \opc        tmp1_\sz, r0_\sz, tmp0_\sz
>>>> +    .else
>>>> +    \opc        tmp1_\sz, tmp0_\sz
>>>> +    .endif
>>>> +    st\l\()xr\s    w15, tmp1_\sz, [x1]
>>>> +    cbnz        w15, 0b
>>>
>>> I see at the end \s is in {,b,h} range.
>>>
>>> Don't you need to use x15 on 64-bit?
>>
>> Ard, I expanded all macros and reviewed this patch, but I am still 
>> having hard time to figure why w15 temp is OK instead of x15. Any hint?
>>
> 
> Why do you think it should be x15?

I.e.:

https://developer.arm.com/docs/100076/0100/instruction-set-reference/a64-data-transfer-instructions/ldaxr

Syntax

LDAXR Wt, [Xn|SP{,#0}] ; 32-bit
LDAXR Xt, [Xn|SP{,#0}] ; 64-bit

> 
> 
>>>
>>>> +    ret
>>>> +    fn_end        __aarch64_\insn\()\sz\()\model
>>>> +    .endm
>>>> +
>>>> +    /*
>>>> +     * Emit atomic helpers for \model for operand sizes in the
>>>> +     * set {1, 2, 4, 8}, for the instruction pattern given by
>>>> +     * \insn. (This is the LSE name, but this implementation uses
>>>> +     * the generic LL/SC sequence using \opc as the arithmetic
>>>> +     * operation on the target.)
>>>> +     */
>>>> +    .macro        emit_ld, insn:req, opc:req, model:req, a, l
>>>> +    emit_ld_sz    1, \insn, \opc, \model, b, \a, \l
>>>> +    emit_ld_sz    2, \insn, \opc, \model, h, \a, \l
>>>> +    emit_ld_sz    4, \insn, \opc, \model,  , \a, \l
>>>> +    emit_ld_sz    8, \insn, \opc, \model,  , \a, \l
>>>> +    .endm
>>>> +
>>>> +    /*
>>>> +     * Emit the compare and swap helper for \model and size \sz
>>>> +     * using LL/SC instructions.
>>>> +     */
>>>> +    .macro         emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
>>>> +    fn_start    __aarch64_cas\sz\()\model
>>>> +    \uxt        tmp0_\sz, r0_\sz
>>>> +0:    ld\a\()xr\s    r0_\sz, [x2]
>>>> +    cmp        r0_\sz, tmp0_\sz
>>>> +    bne        1f
>>>> +    st\l\()xr\s    w15, r1_\sz, [x2]
>>>> +    cbnz        w15, 0b
>>>> +1:    ret
>>>> +    fn_end        __aarch64_cas\sz\()\model
>>>> +    .endm
>>>> +
>>>> +    /*
>>>> +     * Emit compare-and-swap helpers for \model for operand sizes 
>>>> in the
>>>> +     * set {1, 2, 4, 8, 16}.
>>>> +     */
>>>> +    .macro        emit_cas, model:req, a, l
>>>> +    emit_cas_sz    1, \model, uxtb, b, \a, \l
>>>> +    emit_cas_sz    2, \model, uxth, h, \a, \l
>>>> +    emit_cas_sz    4, \model, mov ,  , \a, \l
>>>> +    emit_cas_sz    8, \model, mov ,  , \a, \l
>>>> +
>>>> +    /*
>>>> +     * We cannot use the parameterized sequence for 16 byte CAS, so we
>>>> +     * need to define it explicitly.
>>>> +     */
>>>> +    fn_start    __aarch64_cas16\model
>>>> +    mov        x16, x0
>>>> +    mov        x17, x1
>>>> +0:    ld\a\()xp    x0, x1, [x4]
>>>> +    cmp        x0, x16
>>>> +    ccmp        x1, x17, #0, eq
>>>> +    bne        1f
>>>> +    st\l\()xp    w15, x16, x17, [x4]
>>>> +    cbnz        w15, 0b
>>>> +1:    ret
>>>> +    fn_end        __aarch64_cas16\model
>>>> +    .endm
>>>> +
>>>> +    /*
>>>> +     * Emit the set of GCC outline atomic helper functions for
>>>> +     * the memory ordering model given by \model:
>>>> +     * - relax    unordered loads and stores
>>>> +     * - acq    load-acquire, unordered store
>>>> +     * - rel    unordered load, store-release
>>>> +     * - acq_rel    load-acquire, store-release
>>>> +     */
>>>> +    .macro        emit_model, model:req, a, l
>>>> +    emit_ld        ldadd, add, \model, \a, \l
>>>> +    emit_ld        ldclr, bic, \model, \a, \l
>>>> +    emit_ld        ldeor, eor, \model, \a, \l
>>>> +    emit_ld        ldset, orr, \model, \a, \l
>>>> +    emit_ld        swp,   mov, \model, \a, \l
>>>> +    emit_cas    \model, \a, \l
>>>> +    .endm
>>>> +
>>>> +    emit_model    _relax
>>>> +    emit_model    _acq, a
>>>> +    emit_model    _rel,, l
>>>> +    emit_model    _acq_rel, a, l
>>>>
>>
>>
>> 
>>
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 16:59       ` Philippe Mathieu-Daudé
@ 2020-05-21 17:02         ` Ard Biesheuvel
  2020-05-21 17:07           ` Philippe Mathieu-Daudé
  0 siblings, 1 reply; 25+ messages in thread
From: Ard Biesheuvel @ 2020-05-21 17:02 UTC (permalink / raw)
  To: Philippe Mathieu-Daudé, devel; +Cc: glin, leif, lersek, liming.gao

On 5/21/20 6:59 PM, Philippe Mathieu-Daudé wrote:
> On 5/21/20 6:45 PM, Ard Biesheuvel wrote:
>> On 5/21/20 6:40 PM, Philippe Mathieu-Daudé via groups.io wrote:
>>> On 5/20/20 2:37 PM, Philippe Mathieu-Daudé wrote:
>>>> Hi Ard,
>>>>
>>>> On 5/20/20 1:44 PM, Ard Biesheuvel wrote:
>>>>> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
>>>>> unless -mno-outline-atomics is specified. This means GCC-10 introduces
>>>>> new intrinsics, and even though it would be possible to work around 
>>>>> this
>>>>> by specifying the command line option, this would require a new GCC10
>>>>> toolchain profile to be created, which we prefer to avoid.
>>>>>
>>>>> So instead, add the new intrinsics to our library so they are provided
>>>>> when necessary.
>>>>>
>>>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
>>>>> ---
>>>>> v2:
>>>>> - add missing .globl to export the functions from the object file
>>>>> - add function end markers so the size of each is visible in the 
>>>>> ELF metadata
>>>>> - add some comments to describe what is going on
>>>>
>>>> Thanks, head hurts a bit less...
>>>>
>>>>>
>>>>>   ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf | 3 +
>>>>>   ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 
>>>>> 142 ++++++++++++++++++++
>>>>>   2 files changed, 145 insertions(+)
>>>>>
>>>>> diff --git 
>>>>> a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf 
>>>>> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>>> index d5bad9467758..fcf48c678119 100644
>>>>> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>>> @@ -79,6 +79,9 @@ [Sources.ARM]
>>>>>     Arm/ldivmod.asm      | MSFT
>>>>>     Arm/llsr.asm         | MSFT
>>>>> +[Sources.AARCH64]
>>>>> +  AArch64/Atomics.S    | GCC
>>>>> +
>>>>>   [Packages]
>>>>>     MdePkg/MdePkg.dec
>>>>>     ArmPkg/ArmPkg.dec
>>>>> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S 
>>>>> b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>>> new file mode 100644
>>>>> index 000000000000..dc61d6bb8e52
>>>>> --- /dev/null
>>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>>> @@ -0,0 +1,142 @@
>>>>> +#------------------------------------------------------------------------------ 
>>>>>
>>>>> +#
>>>>> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
>>>>> +#
>>>>> +# SPDX-License-Identifier: BSD-2-Clause-Patent
>>>>> +#
>>>>> +#------------------------------------------------------------------------------ 
>>>>>
>>>>> +
>>>>> +    /*
>>>>> +     * Provide the GCC intrinsics that are required when using GCC 
>>>>> 9 or
>>>>> +     * later with the -moutline-atomics options (which became the 
>>>>> default
>>>>> +     * in GCC 10)
>>>>> +     */
>>>>> +    .arch armv8-a
>>>>> +
>>>>> +    .macro        reg_alias, pfx, sz
>>>>> +    r0_\sz        .req    \pfx\()0
>>>>> +    r1_\sz        .req    \pfx\()1
>>>>> +    tmp0_\sz    .req    \pfx\()16
>>>>> +    tmp1_\sz    .req    \pfx\()17
>>>>> +    .endm
>>>>> +
>>>>> +    /*
>>>>> +     * Define register aliases of the right type for each size
>>>>> +     * (xN for 8 bytes, wN for everything smaller)
>>>>> +     */
>>>>> +    reg_alias    w, 1
>>>>> +    reg_alias    w, 2
>>>>> +    reg_alias    w, 4
>>>>> +    reg_alias    x, 8
>>>>> +
>>>>> +    .macro        fn_start, name:req
>>>>> +    .section    .text.\name
>>>>> +    .globl        \name
>>>>> +    .type        \name, %function
>>>>> +\name\():
>>>>> +    .endm
>>>>> +
>>>>> +    .macro        fn_end, name:req
>>>>> +    .size        \name, . - \name
>>>>> +    .endm
>>>>> +
>>>>> +    /*
>>>>> +     * Emit an atomic helper for \model with operands of size \sz, 
>>>>> using
>>>>> +     * the operation specified by \insn (which is the LSE name), 
>>>>> and which
>>>>> +     * can be implemented using the generic 
>>>>> load-locked/store-conditional
>>>>> +     * (LL/SC) sequence below, using the arithmetic operation 
>>>>> given by
>>>>> +     * \opc.
>>>>> +     */
>>>>> +    .macro         emit_ld_sz, sz:req, insn:req, opc:req, 
>>>>> model:req, s, a, l
>>>>> +    fn_start    __aarch64_\insn\()\sz\()\model
>>>>> +    mov        tmp0_\sz, r0_\sz
>>>>> +0:    ld\a\()xr\s    r0_\sz, [x1]
>>>>> +    .ifnc        \insn, swp
>>>>> +    \opc        tmp1_\sz, r0_\sz, tmp0_\sz
>>>>> +    .else
>>>>> +    \opc        tmp1_\sz, tmp0_\sz
>>>>> +    .endif
>>>>> +    st\l\()xr\s    w15, tmp1_\sz, [x1]
>>>>> +    cbnz        w15, 0b
>>>>
>>>> I see at the end \s is in {,b,h} range.
>>>>
>>>> Don't you need to use x15 on 64-bit?
>>>
>>> Ard, I expanded all macros and reviewed this patch, but I am still 
>>> having hard time to figure why w15 temp is OK instead of x15. Any hint?
>>>
>>
>> Why do you think it should be x15?
> 
> I.e.:
> 
> https://developer.arm.com/docs/100076/0100/instruction-set-reference/a64-data-transfer-instructions/ldaxr 
> 
> 
> Syntax
> 
> LDAXR Wt, [Xn|SP{,#0}] ; 32-bit
> LDAXR Xt, [Xn|SP{,#0}] ; 64-bit
> 

That is the load part, where Wt and Xt map onto r0 in the code above.

https://developer.arm.com/docs/100076/0100/instruction-set-reference/a64-data-transfer-instructions/stlxr

gives the description for the store part, where the w15 register is 
actually used.


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 17:02         ` Ard Biesheuvel
@ 2020-05-21 17:07           ` Philippe Mathieu-Daudé
  0 siblings, 0 replies; 25+ messages in thread
From: Philippe Mathieu-Daudé @ 2020-05-21 17:07 UTC (permalink / raw)
  To: Ard Biesheuvel, devel; +Cc: glin, leif, lersek, liming.gao

On 5/21/20 7:02 PM, Ard Biesheuvel wrote:
> On 5/21/20 6:59 PM, Philippe Mathieu-Daudé wrote:
>> On 5/21/20 6:45 PM, Ard Biesheuvel wrote:
>>> On 5/21/20 6:40 PM, Philippe Mathieu-Daudé via groups.io wrote:
>>>> On 5/20/20 2:37 PM, Philippe Mathieu-Daudé wrote:
>>>>> Hi Ard,
>>>>>
>>>>> On 5/20/20 1:44 PM, Ard Biesheuvel wrote:
>>>>>> Gary reports the GCC 10 will emit calls to atomics intrinsics 
>>>>>> routines
>>>>>> unless -mno-outline-atomics is specified. This means GCC-10 
>>>>>> introduces
>>>>>> new intrinsics, and even though it would be possible to work 
>>>>>> around this
>>>>>> by specifying the command line option, this would require a new GCC10
>>>>>> toolchain profile to be created, which we prefer to avoid.
>>>>>>
>>>>>> So instead, add the new intrinsics to our library so they are 
>>>>>> provided
>>>>>> when necessary.
>>>>>>
>>>>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
>>>>>> ---
>>>>>> v2:
>>>>>> - add missing .globl to export the functions from the object file
>>>>>> - add function end markers so the size of each is visible in the 
>>>>>> ELF metadata
>>>>>> - add some comments to describe what is going on
>>>>>
>>>>> Thanks, head hurts a bit less...
>>>>>
>>>>>>
>>>>>>   ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf | 
>>>>>> 3 +
>>>>>>   ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 
>>>>>> 142 ++++++++++++++++++++
>>>>>>   2 files changed, 145 insertions(+)
>>>>>>
>>>>>> diff --git 
>>>>>> a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf 
>>>>>> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>>>> index d5bad9467758..fcf48c678119 100644
>>>>>> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>>>> @@ -79,6 +79,9 @@ [Sources.ARM]
>>>>>>     Arm/ldivmod.asm      | MSFT
>>>>>>     Arm/llsr.asm         | MSFT
>>>>>> +[Sources.AARCH64]
>>>>>> +  AArch64/Atomics.S    | GCC
>>>>>> +
>>>>>>   [Packages]
>>>>>>     MdePkg/MdePkg.dec
>>>>>>     ArmPkg/ArmPkg.dec
>>>>>> diff --git 
>>>>>> a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S 
>>>>>> b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>>>> new file mode 100644
>>>>>> index 000000000000..dc61d6bb8e52
>>>>>> --- /dev/null
>>>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>>>> @@ -0,0 +1,142 @@
>>>>>> +#------------------------------------------------------------------------------ 
>>>>>>
>>>>>> +#
>>>>>> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
>>>>>> +#
>>>>>> +# SPDX-License-Identifier: BSD-2-Clause-Patent
>>>>>> +#
>>>>>> +#------------------------------------------------------------------------------ 
>>>>>>
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Provide the GCC intrinsics that are required when using 
>>>>>> GCC 9 or
>>>>>> +     * later with the -moutline-atomics options (which became the 
>>>>>> default
>>>>>> +     * in GCC 10)
>>>>>> +     */
>>>>>> +    .arch armv8-a
>>>>>> +
>>>>>> +    .macro        reg_alias, pfx, sz
>>>>>> +    r0_\sz        .req    \pfx\()0
>>>>>> +    r1_\sz        .req    \pfx\()1
>>>>>> +    tmp0_\sz    .req    \pfx\()16
>>>>>> +    tmp1_\sz    .req    \pfx\()17
>>>>>> +    .endm
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Define register aliases of the right type for each size
>>>>>> +     * (xN for 8 bytes, wN for everything smaller)
>>>>>> +     */
>>>>>> +    reg_alias    w, 1
>>>>>> +    reg_alias    w, 2
>>>>>> +    reg_alias    w, 4
>>>>>> +    reg_alias    x, 8
>>>>>> +
>>>>>> +    .macro        fn_start, name:req
>>>>>> +    .section    .text.\name
>>>>>> +    .globl        \name
>>>>>> +    .type        \name, %function
>>>>>> +\name\():
>>>>>> +    .endm
>>>>>> +
>>>>>> +    .macro        fn_end, name:req
>>>>>> +    .size        \name, . - \name
>>>>>> +    .endm
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Emit an atomic helper for \model with operands of size 
>>>>>> \sz, using
>>>>>> +     * the operation specified by \insn (which is the LSE name), 
>>>>>> and which
>>>>>> +     * can be implemented using the generic 
>>>>>> load-locked/store-conditional
>>>>>> +     * (LL/SC) sequence below, using the arithmetic operation 
>>>>>> given by
>>>>>> +     * \opc.
>>>>>> +     */
>>>>>> +    .macro         emit_ld_sz, sz:req, insn:req, opc:req, 
>>>>>> model:req, s, a, l
>>>>>> +    fn_start    __aarch64_\insn\()\sz\()\model
>>>>>> +    mov        tmp0_\sz, r0_\sz
>>>>>> +0:    ld\a\()xr\s    r0_\sz, [x1]
>>>>>> +    .ifnc        \insn, swp
>>>>>> +    \opc        tmp1_\sz, r0_\sz, tmp0_\sz
>>>>>> +    .else
>>>>>> +    \opc        tmp1_\sz, tmp0_\sz
>>>>>> +    .endif
>>>>>> +    st\l\()xr\s    w15, tmp1_\sz, [x1]
>>>>>> +    cbnz        w15, 0b
>>>>>
>>>>> I see at the end \s is in {,b,h} range.
>>>>>
>>>>> Don't you need to use x15 on 64-bit?
>>>>
>>>> Ard, I expanded all macros and reviewed this patch, but I am still 
>>>> having hard time to figure why w15 temp is OK instead of x15. Any hint?
>>>>
>>>
>>> Why do you think it should be x15?
>>
>> I.e.:
>>
>> https://developer.arm.com/docs/100076/0100/instruction-set-reference/a64-data-transfer-instructions/ldaxr 
>>
>>
>> Syntax
>>
>> LDAXR Wt, [Xn|SP{,#0}] ; 32-bit
>> LDAXR Xt, [Xn|SP{,#0}] ; 64-bit
>>
> 
> That is the load part, where Wt and Xt map onto r0 in the code above.
> 
> https://developer.arm.com/docs/100076/0100/instruction-set-reference/a64-data-transfer-instructions/stlxr 

Grrrrrrrrr I had the both link open and looked at the wrong tab 
:facepalm: Sorry for the noise.

> 
> 
> gives the description for the store part, where the w15 register is 
> actually used.
> 

Reviewed-by: Philippe Mathieu-Daude <philmd@redhat.com>


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 14:16         ` Leif Lindholm
@ 2020-05-21 20:22           ` Laszlo Ersek
  2020-05-22  8:16             ` Ard Biesheuvel
  2020-05-22 10:54             ` Leif Lindholm
  0 siblings, 2 replies; 25+ messages in thread
From: Laszlo Ersek @ 2020-05-21 20:22 UTC (permalink / raw)
  To: Leif Lindholm, devel, ard.biesheuvel; +Cc: glin, liming.gao

On 05/21/20 16:16, Leif Lindholm wrote:

> OK, then I would vote *for* merging the patch regardless. We know how
> long some toolchain versions can stick around simply because they were
> mentioned in some blog post somewhere that ended up high in search
> rankings.
> 
> Once gcc 10.2 is released (and we have verified the problem can be
> worked around elsewhere), I guess we could add a note saying "once all
> gcc 10.0 and 10.1 toolchains are considered obsolete, this file can
> be deleted".

I think we can expect all distros that ship gcc-10 to eventually migrate
to gcc-10.2+. Until then, this patch should hopefully work. (I'm quite
annoyed by having to call the patch "temporary", as it feels very
technically impressive.)

So I think I agree with Leif, with a small modification to the idea:
rather than a *note* saying "back this out once 10.0 and 10.1 have been
replaced by 10.2+ in all 'large' distros", I would suggest filing a *BZ*
for the same. And I recommend making the new BZ dependent on
TianoCore#2723 (i.e. the present BZ).

Thanks!
Laszlo

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 20:22           ` Laszlo Ersek
@ 2020-05-22  8:16             ` Ard Biesheuvel
  2020-05-22 10:54             ` Leif Lindholm
  1 sibling, 0 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2020-05-22  8:16 UTC (permalink / raw)
  To: Laszlo Ersek, Leif Lindholm, devel; +Cc: glin, liming.gao

On 5/21/20 10:22 PM, Laszlo Ersek wrote:
> On 05/21/20 16:16, Leif Lindholm wrote:
> 
>> OK, then I would vote *for* merging the patch regardless. We know how
>> long some toolchain versions can stick around simply because they were
>> mentioned in some blog post somewhere that ended up high in search
>> rankings.
>>
>> Once gcc 10.2 is released (and we have verified the problem can be
>> worked around elsewhere), I guess we could add a note saying "once all
>> gcc 10.0 and 10.1 toolchains are considered obsolete, this file can
>> be deleted".
> 
> I think we can expect all distros that ship gcc-10 to eventually migrate
> to gcc-10.2+. Until then, this patch should hopefully work. (I'm quite
> annoyed by having to call the patch "temporary", as it feels very
> technically impressive.)
> 

Thanks, but it would be better not to use the code at all, unless we 
really need it.

> So I think I agree with Leif, with a small modification to the idea:
> rather than a *note* saying "back this out once 10.0 and 10.1 have been
> replaced by 10.2+ in all 'large' distros", I would suggest filing a *BZ*
> for the same. And I recommend making the new BZ dependent on
> TianoCore#2723 (i.e. the present BZ).
> 

Sure.



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-21 20:22           ` Laszlo Ersek
  2020-05-22  8:16             ` Ard Biesheuvel
@ 2020-05-22 10:54             ` Leif Lindholm
  2020-05-22 13:27               ` Ard Biesheuvel
  1 sibling, 1 reply; 25+ messages in thread
From: Leif Lindholm @ 2020-05-22 10:54 UTC (permalink / raw)
  To: Laszlo Ersek; +Cc: devel, ard.biesheuvel, glin, liming.gao

On Thu, May 21, 2020 at 22:22:58 +0200, Laszlo Ersek wrote:
> On 05/21/20 16:16, Leif Lindholm wrote:
> 
> > OK, then I would vote *for* merging the patch regardless. We know how
> > long some toolchain versions can stick around simply because they were
> > mentioned in some blog post somewhere that ended up high in search
> > rankings.
> > 
> > Once gcc 10.2 is released (and we have verified the problem can be
> > worked around elsewhere), I guess we could add a note saying "once all
> > gcc 10.0 and 10.1 toolchains are considered obsolete, this file can
> > be deleted".
> 
> I think we can expect all distros that ship gcc-10 to eventually migrate
> to gcc-10.2+. Until then, this patch should hopefully work. (I'm quite
> annoyed by having to call the patch "temporary", as it feels very
> technically impressive.)
> 
> So I think I agree with Leif, with a small modification to the idea:
> rather than a *note* saying "back this out once 10.0 and 10.1 have been
> replaced by 10.2+ in all 'large' distros"

That isn't actually exatly what I meant - I meant properly obsolete
as in "we are now reasonably certain no one is still using some silly
ancient cross compiler they checked into their build infrastructure
years ago".

> , I would suggest filing a *BZ*
> for the same. And I recommend making the new BZ dependent on
> TianoCore#2723 (i.e. the present BZ).

But I don't object to that approach.

Regards,

Leif

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-22 10:54             ` Leif Lindholm
@ 2020-05-22 13:27               ` Ard Biesheuvel
  2020-05-22 19:04                 ` Laszlo Ersek
  2020-06-02  0:50                 ` Liming Gao
  0 siblings, 2 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2020-05-22 13:27 UTC (permalink / raw)
  To: Leif Lindholm, Laszlo Ersek; +Cc: devel, glin, liming.gao

On 5/22/20 12:54 PM, Leif Lindholm wrote:
> On Thu, May 21, 2020 at 22:22:58 +0200, Laszlo Ersek wrote:
>> On 05/21/20 16:16, Leif Lindholm wrote:
>>
>>> OK, then I would vote *for* merging the patch regardless. We know how
>>> long some toolchain versions can stick around simply because they were
>>> mentioned in some blog post somewhere that ended up high in search
>>> rankings.
>>>
>>> Once gcc 10.2 is released (and we have verified the problem can be
>>> worked around elsewhere), I guess we could add a note saying "once all
>>> gcc 10.0 and 10.1 toolchains are considered obsolete, this file can
>>> be deleted".
>>
>> I think we can expect all distros that ship gcc-10 to eventually migrate
>> to gcc-10.2+. Until then, this patch should hopefully work. (I'm quite
>> annoyed by having to call the patch "temporary", as it feels very
>> technically impressive.)
>>
>> So I think I agree with Leif, with a small modification to the idea:
>> rather than a *note* saying "back this out once 10.0 and 10.1 have been
>> replaced by 10.2+ in all 'large' distros"
> 
> That isn't actually exatly what I meant - I meant properly obsolete
> as in "we are now reasonably certain no one is still using some silly
> ancient cross compiler they checked into their build infrastructure
> years ago".
> 
>> , I would suggest filing a *BZ*
>> for the same. And I recommend making the new BZ dependent on
>> TianoCore#2723 (i.e. the present BZ).
> 
> But I don't object to that approach.
> 

OK, so i will leave it up to Liming and the stewards to decide whether 
this gets incorporated ino the stable tag or not. If it is, I would like 
to fold in the fixup below

--- a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
+++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
@@ -53,10 +53,10 @@
  0:     ld\a\()xr\s     r0_\sz, [x1]
         .ifnc           \insn, swp
         \opc            tmp1_\sz, r0_\sz, tmp0_\sz
+       st\l\()xr\s     w15, tmp1_\sz, [x1]
         .else
-       \opc            tmp1_\sz, tmp0_\sz
+       st\l\()xr\s     w15, tmp0_\sz, [x1]
         .endif
-       st\l\()xr\s     w15, tmp1_\sz, [x1]
         cbnz            w15, 0b
         ret
         fn_end          __aarch64_\insn\()\sz\()\model

to get rid of the redundant 'mov' for the SWP flavor of the atomics helpers.



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-22 13:27               ` Ard Biesheuvel
@ 2020-05-22 19:04                 ` Laszlo Ersek
  2020-06-02  0:50                 ` Liming Gao
  1 sibling, 0 replies; 25+ messages in thread
From: Laszlo Ersek @ 2020-05-22 19:04 UTC (permalink / raw)
  To: Ard Biesheuvel, Leif Lindholm; +Cc: devel, glin, liming.gao

On 05/22/20 15:27, Ard Biesheuvel wrote:
> On 5/22/20 12:54 PM, Leif Lindholm wrote:
>> On Thu, May 21, 2020 at 22:22:58 +0200, Laszlo Ersek wrote:
>>> On 05/21/20 16:16, Leif Lindholm wrote:
>>>
>>>> OK, then I would vote *for* merging the patch regardless. We know how
>>>> long some toolchain versions can stick around simply because they were
>>>> mentioned in some blog post somewhere that ended up high in search
>>>> rankings.
>>>>
>>>> Once gcc 10.2 is released (and we have verified the problem can be
>>>> worked around elsewhere), I guess we could add a note saying "once all
>>>> gcc 10.0 and 10.1 toolchains are considered obsolete, this file can
>>>> be deleted".
>>>
>>> I think we can expect all distros that ship gcc-10 to eventually migrate
>>> to gcc-10.2+. Until then, this patch should hopefully work. (I'm quite
>>> annoyed by having to call the patch "temporary", as it feels very
>>> technically impressive.)
>>>
>>> So I think I agree with Leif, with a small modification to the idea:
>>> rather than a *note* saying "back this out once 10.0 and 10.1 have been
>>> replaced by 10.2+ in all 'large' distros"
>>
>> That isn't actually exatly what I meant - I meant properly obsolete
>> as in "we are now reasonably certain no one is still using some silly
>> ancient cross compiler they checked into their build infrastructure
>> years ago".
>>
>>> , I would suggest filing a *BZ*
>>> for the same. And I recommend making the new BZ dependent on
>>> TianoCore#2723 (i.e. the present BZ).
>>
>> But I don't object to that approach.
>>
> 
> OK, so i will leave it up to Liming and the stewards to decide whether
> this gets incorporated ino the stable tag or not.

I'd delay it -- first, maybe have some more discussion around it,
second, we can consider this "GCC10 feature enablement".

IMO anyway.

Thanks
Laszlo


> If it is, I would like
> to fold in the fixup below
> 
> --- a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> @@ -53,10 +53,10 @@
>  0:     ld\a\()xr\s     r0_\sz, [x1]
>         .ifnc           \insn, swp
>         \opc            tmp1_\sz, r0_\sz, tmp0_\sz
> +       st\l\()xr\s     w15, tmp1_\sz, [x1]
>         .else
> -       \opc            tmp1_\sz, tmp0_\sz
> +       st\l\()xr\s     w15, tmp0_\sz, [x1]
>         .endif
> -       st\l\()xr\s     w15, tmp1_\sz, [x1]
>         cbnz            w15, 0b
>         ret
>         fn_end          __aarch64_\insn\()\sz\()\model
> 
> to get rid of the redundant 'mov' for the SWP flavor of the atomics
> helpers.
> 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-20 11:44 [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics Ard Biesheuvel
                   ` (2 preceding siblings ...)
  2020-05-21 11:23 ` Leif Lindholm
@ 2020-05-28  1:36 ` Gary Lin
  2020-05-28  9:49   ` Leif Lindholm
  3 siblings, 1 reply; 25+ messages in thread
From: Gary Lin @ 2020-05-28  1:36 UTC (permalink / raw)
  To: devel, ard.biesheuvel; +Cc: leif, lersek, liming.gao

On Wed, May 20, 2020 at 01:44:48PM +0200, Ard Biesheuvel wrote:
> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
> unless -mno-outline-atomics is specified. This means GCC-10 introduces
> new intrinsics, and even though it would be possible to work around this
> by specifying the command line option, this would require a new GCC10
> toolchain profile to be created, which we prefer to avoid.
> 
> So instead, add the new intrinsics to our library so they are provided
> when necessary.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
After applying this patch, gcc 10 can build ArmVirtPkg without the
linking error.

Tested-by: Gary Lin <glin@suse.com>

> ---
> v2:
> - add missing .globl to export the functions from the object file
> - add function end markers so the size of each is visible in the ELF metadata
> - add some comments to describe what is going on
> 
>  ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>  ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
>  2 files changed, 145 insertions(+)
> 
> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> index d5bad9467758..fcf48c678119 100644
> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> @@ -79,6 +79,9 @@ [Sources.ARM]
>    Arm/ldivmod.asm      | MSFT
>    Arm/llsr.asm         | MSFT
>  
> +[Sources.AARCH64]
> +  AArch64/Atomics.S    | GCC
> +
>  [Packages]
>    MdePkg/MdePkg.dec
>    ArmPkg/ArmPkg.dec
> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> new file mode 100644
> index 000000000000..dc61d6bb8e52
> --- /dev/null
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> @@ -0,0 +1,142 @@
> +#------------------------------------------------------------------------------
> +#
> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
> +#
> +# SPDX-License-Identifier: BSD-2-Clause-Patent
> +#
> +#------------------------------------------------------------------------------
> +
> +	/*
> +	 * Provide the GCC intrinsics that are required when using GCC 9 or
> +	 * later with the -moutline-atomics options (which became the default
> +	 * in GCC 10)
> +	 */
> +	.arch armv8-a
> +
> +	.macro		reg_alias, pfx, sz
> +	r0_\sz		.req	\pfx\()0
> +	r1_\sz		.req	\pfx\()1
> +	tmp0_\sz	.req	\pfx\()16
> +	tmp1_\sz	.req	\pfx\()17
> +	.endm
> +
> +	/*
> +	 * Define register aliases of the right type for each size
> +	 * (xN for 8 bytes, wN for everything smaller)
> +	 */
> +	reg_alias	w, 1
> +	reg_alias	w, 2
> +	reg_alias	w, 4
> +	reg_alias	x, 8
> +
> +	.macro		fn_start, name:req
> +	.section	.text.\name
> +	.globl		\name
> +	.type		\name, %function
> +\name\():
> +	.endm
> +
> +	.macro		fn_end, name:req
> +	.size		\name, . - \name
> +	.endm
> +
> +	/*
> +	 * Emit an atomic helper for \model with operands of size \sz, using
> +	 * the operation specified by \insn (which is the LSE name), and which
> +	 * can be implemented using the generic load-locked/store-conditional
> +	 * (LL/SC) sequence below, using the arithmetic operation given by
> +	 * \opc.
> +	 */
> +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
> +	fn_start	__aarch64_\insn\()\sz\()\model
> +	mov		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x1]
> +	.ifnc		\insn, swp
> +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
> +	.else
> +	\opc		tmp1_\sz, tmp0_\sz
> +	.endif
> +	st\l\()xr\s	w15, tmp1_\sz, [x1]
> +	cbnz		w15, 0b
> +	ret
> +	fn_end		__aarch64_\insn\()\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit atomic helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8}, for the instruction pattern given by
> +	 * \insn. (This is the LSE name, but this implementation uses
> +	 * the generic LL/SC sequence using \opc as the arithmetic
> +	 * operation on the target.)
> +	 */
> +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
> +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
> +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
> +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
> +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
> +	.endm
> +
> +	/*
> +	 * Emit the compare and swap helper for \model and size \sz
> +	 * using LL/SC instructions.
> +	 */
> +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
> +	fn_start	__aarch64_cas\sz\()\model
> +	\uxt		tmp0_\sz, r0_\sz
> +0:	ld\a\()xr\s	r0_\sz, [x2]
> +	cmp		r0_\sz, tmp0_\sz
> +	bne		1f
> +	st\l\()xr\s	w15, r1_\sz, [x2]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas\sz\()\model
> +	.endm
> +
> +	/*
> +	 * Emit compare-and-swap helpers for \model for operand sizes in the
> +	 * set {1, 2, 4, 8, 16}.
> +	 */
> +	.macro		emit_cas, model:req, a, l
> +	emit_cas_sz	1, \model, uxtb, b, \a, \l
> +	emit_cas_sz	2, \model, uxth, h, \a, \l
> +	emit_cas_sz	4, \model, mov ,  , \a, \l
> +	emit_cas_sz	8, \model, mov ,  , \a, \l
> +
> +	/*
> +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
> +	 * need to define it explicitly.
> +	 */
> +	fn_start	__aarch64_cas16\model
> +	mov		x16, x0
> +	mov		x17, x1
> +0:	ld\a\()xp	x0, x1, [x4]
> +	cmp		x0, x16
> +	ccmp		x1, x17, #0, eq
> +	bne		1f
> +	st\l\()xp	w15, x16, x17, [x4]
> +	cbnz		w15, 0b
> +1:	ret
> +	fn_end		__aarch64_cas16\model
> +	.endm
> +
> +	/*
> +	 * Emit the set of GCC outline atomic helper functions for
> +	 * the memory ordering model given by \model:
> +	 * - relax	unordered loads and stores
> +	 * - acq	load-acquire, unordered store
> +	 * - rel	unordered load, store-release
> +	 * - acq_rel	load-acquire, store-release
> +	 */
> +	.macro		emit_model, model:req, a, l
> +	emit_ld		ldadd, add, \model, \a, \l
> +	emit_ld		ldclr, bic, \model, \a, \l
> +	emit_ld		ldeor, eor, \model, \a, \l
> +	emit_ld		ldset, orr, \model, \a, \l
> +	emit_ld		swp,   mov, \model, \a, \l
> +	emit_cas	\model, \a, \l
> +	.endm
> +
> +	emit_model	_relax
> +	emit_model	_acq, a
> +	emit_model	_rel,, l
> +	emit_model	_acq_rel, a, l
> -- 
> 2.17.1
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-28  1:36 ` Gary Lin
@ 2020-05-28  9:49   ` Leif Lindholm
  2020-05-28 15:48     ` Liming Gao
  0 siblings, 1 reply; 25+ messages in thread
From: Leif Lindholm @ 2020-05-28  9:49 UTC (permalink / raw)
  To: Gary Lin; +Cc: devel, ard.biesheuvel, lersek, liming.gao

On Thu, May 28, 2020 at 09:36:33 +0800, Gary Lin wrote:
> On Wed, May 20, 2020 at 01:44:48PM +0200, Ard Biesheuvel wrote:
> > Gary reports the GCC 10 will emit calls to atomics intrinsics routines
> > unless -mno-outline-atomics is specified. This means GCC-10 introduces
> > new intrinsics, and even though it would be possible to work around this
> > by specifying the command line option, this would require a new GCC10
> > toolchain profile to be created, which we prefer to avoid.
> > 
> > So instead, add the new intrinsics to our library so they are provided
> > when necessary.
> > 
> > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
> After applying this patch, gcc 10 can build ArmVirtPkg without the
> linking error.
> 
> Tested-by: Gary Lin <glin@suse.com>

Thanks Gary.

Liming, can we consider this patch for stable tag please?

With an added link to
https://bugzilla.tianocore.org/show_bug.cgi?id=2723 in the commit message:
Reviewed-by: Leif Lindholm <leif@nuviainc.com>

/
    Leif

> > ---
> > v2:
> > - add missing .globl to export the functions from the object file
> > - add function end markers so the size of each is visible in the ELF metadata
> > - add some comments to describe what is going on
> > 
> >  ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
> >  ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
> >  2 files changed, 145 insertions(+)
> > 
> > diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> > index d5bad9467758..fcf48c678119 100644
> > --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> > +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> > @@ -79,6 +79,9 @@ [Sources.ARM]
> >    Arm/ldivmod.asm      | MSFT
> >    Arm/llsr.asm         | MSFT
> >  
> > +[Sources.AARCH64]
> > +  AArch64/Atomics.S    | GCC
> > +
> >  [Packages]
> >    MdePkg/MdePkg.dec
> >    ArmPkg/ArmPkg.dec
> > diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> > new file mode 100644
> > index 000000000000..dc61d6bb8e52
> > --- /dev/null
> > +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> > @@ -0,0 +1,142 @@
> > +#------------------------------------------------------------------------------
> > +#
> > +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
> > +#
> > +# SPDX-License-Identifier: BSD-2-Clause-Patent
> > +#
> > +#------------------------------------------------------------------------------
> > +
> > +	/*
> > +	 * Provide the GCC intrinsics that are required when using GCC 9 or
> > +	 * later with the -moutline-atomics options (which became the default
> > +	 * in GCC 10)
> > +	 */
> > +	.arch armv8-a
> > +
> > +	.macro		reg_alias, pfx, sz
> > +	r0_\sz		.req	\pfx\()0
> > +	r1_\sz		.req	\pfx\()1
> > +	tmp0_\sz	.req	\pfx\()16
> > +	tmp1_\sz	.req	\pfx\()17
> > +	.endm
> > +
> > +	/*
> > +	 * Define register aliases of the right type for each size
> > +	 * (xN for 8 bytes, wN for everything smaller)
> > +	 */
> > +	reg_alias	w, 1
> > +	reg_alias	w, 2
> > +	reg_alias	w, 4
> > +	reg_alias	x, 8
> > +
> > +	.macro		fn_start, name:req
> > +	.section	.text.\name
> > +	.globl		\name
> > +	.type		\name, %function
> > +\name\():
> > +	.endm
> > +
> > +	.macro		fn_end, name:req
> > +	.size		\name, . - \name
> > +	.endm
> > +
> > +	/*
> > +	 * Emit an atomic helper for \model with operands of size \sz, using
> > +	 * the operation specified by \insn (which is the LSE name), and which
> > +	 * can be implemented using the generic load-locked/store-conditional
> > +	 * (LL/SC) sequence below, using the arithmetic operation given by
> > +	 * \opc.
> > +	 */
> > +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
> > +	fn_start	__aarch64_\insn\()\sz\()\model
> > +	mov		tmp0_\sz, r0_\sz
> > +0:	ld\a\()xr\s	r0_\sz, [x1]
> > +	.ifnc		\insn, swp
> > +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
> > +	.else
> > +	\opc		tmp1_\sz, tmp0_\sz
> > +	.endif
> > +	st\l\()xr\s	w15, tmp1_\sz, [x1]
> > +	cbnz		w15, 0b
> > +	ret
> > +	fn_end		__aarch64_\insn\()\sz\()\model
> > +	.endm
> > +
> > +	/*
> > +	 * Emit atomic helpers for \model for operand sizes in the
> > +	 * set {1, 2, 4, 8}, for the instruction pattern given by
> > +	 * \insn. (This is the LSE name, but this implementation uses
> > +	 * the generic LL/SC sequence using \opc as the arithmetic
> > +	 * operation on the target.)
> > +	 */
> > +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
> > +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
> > +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
> > +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
> > +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
> > +	.endm
> > +
> > +	/*
> > +	 * Emit the compare and swap helper for \model and size \sz
> > +	 * using LL/SC instructions.
> > +	 */
> > +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
> > +	fn_start	__aarch64_cas\sz\()\model
> > +	\uxt		tmp0_\sz, r0_\sz
> > +0:	ld\a\()xr\s	r0_\sz, [x2]
> > +	cmp		r0_\sz, tmp0_\sz
> > +	bne		1f
> > +	st\l\()xr\s	w15, r1_\sz, [x2]
> > +	cbnz		w15, 0b
> > +1:	ret
> > +	fn_end		__aarch64_cas\sz\()\model
> > +	.endm
> > +
> > +	/*
> > +	 * Emit compare-and-swap helpers for \model for operand sizes in the
> > +	 * set {1, 2, 4, 8, 16}.
> > +	 */
> > +	.macro		emit_cas, model:req, a, l
> > +	emit_cas_sz	1, \model, uxtb, b, \a, \l
> > +	emit_cas_sz	2, \model, uxth, h, \a, \l
> > +	emit_cas_sz	4, \model, mov ,  , \a, \l
> > +	emit_cas_sz	8, \model, mov ,  , \a, \l
> > +
> > +	/*
> > +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
> > +	 * need to define it explicitly.
> > +	 */
> > +	fn_start	__aarch64_cas16\model
> > +	mov		x16, x0
> > +	mov		x17, x1
> > +0:	ld\a\()xp	x0, x1, [x4]
> > +	cmp		x0, x16
> > +	ccmp		x1, x17, #0, eq
> > +	bne		1f
> > +	st\l\()xp	w15, x16, x17, [x4]
> > +	cbnz		w15, 0b
> > +1:	ret
> > +	fn_end		__aarch64_cas16\model
> > +	.endm
> > +
> > +	/*
> > +	 * Emit the set of GCC outline atomic helper functions for
> > +	 * the memory ordering model given by \model:
> > +	 * - relax	unordered loads and stores
> > +	 * - acq	load-acquire, unordered store
> > +	 * - rel	unordered load, store-release
> > +	 * - acq_rel	load-acquire, store-release
> > +	 */
> > +	.macro		emit_model, model:req, a, l
> > +	emit_ld		ldadd, add, \model, \a, \l
> > +	emit_ld		ldclr, bic, \model, \a, \l
> > +	emit_ld		ldeor, eor, \model, \a, \l
> > +	emit_ld		ldset, orr, \model, \a, \l
> > +	emit_ld		swp,   mov, \model, \a, \l
> > +	emit_cas	\model, \a, \l
> > +	.endm
> > +
> > +	emit_model	_relax
> > +	emit_model	_acq, a
> > +	emit_model	_rel,, l
> > +	emit_model	_acq_rel, a, l
> > -- 
> > 2.17.1
> > 
> > 
> > 
> > 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-28  9:49   ` Leif Lindholm
@ 2020-05-28 15:48     ` Liming Gao
  2020-05-28 20:09       ` Laszlo Ersek
  0 siblings, 1 reply; 25+ messages in thread
From: Liming Gao @ 2020-05-28 15:48 UTC (permalink / raw)
  To: Leif Lindholm, Gary Lin, afish@apple.com, Kinney, Michael D
  Cc: devel@edk2.groups.io, ard.biesheuvel@arm.com, lersek@redhat.com,
	Gao, Liming

Leif:
  BZ 2723 is a tiano feature request. This patch adds support for new GCC10. I agree with Laszlo that this change is like the enhancement instead of the critical bug fix. So, I suggest to delay it after this stable tag. 

Thanks
Liming
> -----Original Message-----
> From: Leif Lindholm <leif@nuviainc.com>
> Sent: Thursday, May 28, 2020 5:49 PM
> To: Gary Lin <glin@suse.com>
> Cc: devel@edk2.groups.io; ard.biesheuvel@arm.com; lersek@redhat.com; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
> 
> On Thu, May 28, 2020 at 09:36:33 +0800, Gary Lin wrote:
> > On Wed, May 20, 2020 at 01:44:48PM +0200, Ard Biesheuvel wrote:
> > > Gary reports the GCC 10 will emit calls to atomics intrinsics routines
> > > unless -mno-outline-atomics is specified. This means GCC-10 introduces
> > > new intrinsics, and even though it would be possible to work around this
> > > by specifying the command line option, this would require a new GCC10
> > > toolchain profile to be created, which we prefer to avoid.
> > >
> > > So instead, add the new intrinsics to our library so they are provided
> > > when necessary.
> > >
> > > Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
> > After applying this patch, gcc 10 can build ArmVirtPkg without the
> > linking error.
> >
> > Tested-by: Gary Lin <glin@suse.com>
> 
> Thanks Gary.
> 
> Liming, can we consider this patch for stable tag please?
> 
> With an added link to
> https://bugzilla.tianocore.org/show_bug.cgi?id=2723 in the commit message:
> Reviewed-by: Leif Lindholm <leif@nuviainc.com>
> 
> /
>     Leif
> 
> > > ---
> > > v2:
> > > - add missing .globl to export the functions from the object file
> > > - add function end markers so the size of each is visible in the ELF metadata
> > > - add some comments to describe what is going on
> > >
> > >  ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
> > >  ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
> > >  2 files changed, 145 insertions(+)
> > >
> > > diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> > > index d5bad9467758..fcf48c678119 100644
> > > --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> > > +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
> > > @@ -79,6 +79,9 @@ [Sources.ARM]
> > >    Arm/ldivmod.asm      | MSFT
> > >    Arm/llsr.asm         | MSFT
> > >
> > > +[Sources.AARCH64]
> > > +  AArch64/Atomics.S    | GCC
> > > +
> > >  [Packages]
> > >    MdePkg/MdePkg.dec
> > >    ArmPkg/ArmPkg.dec
> > > diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> > > new file mode 100644
> > > index 000000000000..dc61d6bb8e52
> > > --- /dev/null
> > > +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> > > @@ -0,0 +1,142 @@
> > > +#------------------------------------------------------------------------------
> > > +#
> > > +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
> > > +#
> > > +# SPDX-License-Identifier: BSD-2-Clause-Patent
> > > +#
> > > +#------------------------------------------------------------------------------
> > > +
> > > +	/*
> > > +	 * Provide the GCC intrinsics that are required when using GCC 9 or
> > > +	 * later with the -moutline-atomics options (which became the default
> > > +	 * in GCC 10)
> > > +	 */
> > > +	.arch armv8-a
> > > +
> > > +	.macro		reg_alias, pfx, sz
> > > +	r0_\sz		.req	\pfx\()0
> > > +	r1_\sz		.req	\pfx\()1
> > > +	tmp0_\sz	.req	\pfx\()16
> > > +	tmp1_\sz	.req	\pfx\()17
> > > +	.endm
> > > +
> > > +	/*
> > > +	 * Define register aliases of the right type for each size
> > > +	 * (xN for 8 bytes, wN for everything smaller)
> > > +	 */
> > > +	reg_alias	w, 1
> > > +	reg_alias	w, 2
> > > +	reg_alias	w, 4
> > > +	reg_alias	x, 8
> > > +
> > > +	.macro		fn_start, name:req
> > > +	.section	.text.\name
> > > +	.globl		\name
> > > +	.type		\name, %function
> > > +\name\():
> > > +	.endm
> > > +
> > > +	.macro		fn_end, name:req
> > > +	.size		\name, . - \name
> > > +	.endm
> > > +
> > > +	/*
> > > +	 * Emit an atomic helper for \model with operands of size \sz, using
> > > +	 * the operation specified by \insn (which is the LSE name), and which
> > > +	 * can be implemented using the generic load-locked/store-conditional
> > > +	 * (LL/SC) sequence below, using the arithmetic operation given by
> > > +	 * \opc.
> > > +	 */
> > > +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
> > > +	fn_start	__aarch64_\insn\()\sz\()\model
> > > +	mov		tmp0_\sz, r0_\sz
> > > +0:	ld\a\()xr\s	r0_\sz, [x1]
> > > +	.ifnc		\insn, swp
> > > +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
> > > +	.else
> > > +	\opc		tmp1_\sz, tmp0_\sz
> > > +	.endif
> > > +	st\l\()xr\s	w15, tmp1_\sz, [x1]
> > > +	cbnz		w15, 0b
> > > +	ret
> > > +	fn_end		__aarch64_\insn\()\sz\()\model
> > > +	.endm
> > > +
> > > +	/*
> > > +	 * Emit atomic helpers for \model for operand sizes in the
> > > +	 * set {1, 2, 4, 8}, for the instruction pattern given by
> > > +	 * \insn. (This is the LSE name, but this implementation uses
> > > +	 * the generic LL/SC sequence using \opc as the arithmetic
> > > +	 * operation on the target.)
> > > +	 */
> > > +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
> > > +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
> > > +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
> > > +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
> > > +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
> > > +	.endm
> > > +
> > > +	/*
> > > +	 * Emit the compare and swap helper for \model and size \sz
> > > +	 * using LL/SC instructions.
> > > +	 */
> > > +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
> > > +	fn_start	__aarch64_cas\sz\()\model
> > > +	\uxt		tmp0_\sz, r0_\sz
> > > +0:	ld\a\()xr\s	r0_\sz, [x2]
> > > +	cmp		r0_\sz, tmp0_\sz
> > > +	bne		1f
> > > +	st\l\()xr\s	w15, r1_\sz, [x2]
> > > +	cbnz		w15, 0b
> > > +1:	ret
> > > +	fn_end		__aarch64_cas\sz\()\model
> > > +	.endm
> > > +
> > > +	/*
> > > +	 * Emit compare-and-swap helpers for \model for operand sizes in the
> > > +	 * set {1, 2, 4, 8, 16}.
> > > +	 */
> > > +	.macro		emit_cas, model:req, a, l
> > > +	emit_cas_sz	1, \model, uxtb, b, \a, \l
> > > +	emit_cas_sz	2, \model, uxth, h, \a, \l
> > > +	emit_cas_sz	4, \model, mov ,  , \a, \l
> > > +	emit_cas_sz	8, \model, mov ,  , \a, \l
> > > +
> > > +	/*
> > > +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
> > > +	 * need to define it explicitly.
> > > +	 */
> > > +	fn_start	__aarch64_cas16\model
> > > +	mov		x16, x0
> > > +	mov		x17, x1
> > > +0:	ld\a\()xp	x0, x1, [x4]
> > > +	cmp		x0, x16
> > > +	ccmp		x1, x17, #0, eq
> > > +	bne		1f
> > > +	st\l\()xp	w15, x16, x17, [x4]
> > > +	cbnz		w15, 0b
> > > +1:	ret
> > > +	fn_end		__aarch64_cas16\model
> > > +	.endm
> > > +
> > > +	/*
> > > +	 * Emit the set of GCC outline atomic helper functions for
> > > +	 * the memory ordering model given by \model:
> > > +	 * - relax	unordered loads and stores
> > > +	 * - acq	load-acquire, unordered store
> > > +	 * - rel	unordered load, store-release
> > > +	 * - acq_rel	load-acquire, store-release
> > > +	 */
> > > +	.macro		emit_model, model:req, a, l
> > > +	emit_ld		ldadd, add, \model, \a, \l
> > > +	emit_ld		ldclr, bic, \model, \a, \l
> > > +	emit_ld		ldeor, eor, \model, \a, \l
> > > +	emit_ld		ldset, orr, \model, \a, \l
> > > +	emit_ld		swp,   mov, \model, \a, \l
> > > +	emit_cas	\model, \a, \l
> > > +	.endm
> > > +
> > > +	emit_model	_relax
> > > +	emit_model	_acq, a
> > > +	emit_model	_rel,, l
> > > +	emit_model	_acq_rel, a, l
> > > --
> > > 2.17.1
> > >
> > >
> > > 
> > >

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-28 15:48     ` Liming Gao
@ 2020-05-28 20:09       ` Laszlo Ersek
  2020-05-29  3:04         ` Liming Gao
  0 siblings, 1 reply; 25+ messages in thread
From: Laszlo Ersek @ 2020-05-28 20:09 UTC (permalink / raw)
  To: Gao, Liming, Leif Lindholm, Gary Lin, afish@apple.com,
	Kinney, Michael D
  Cc: devel@edk2.groups.io, ard.biesheuvel@arm.com

On 05/28/20 17:48, Gao, Liming wrote:
> Leif:
>   BZ 2723 is a tiano feature request. This patch adds support for new GCC10. I agree with Laszlo that this change is like the enhancement instead of the critical bug fix. So, I suggest to delay it after this stable tag. 

In <https://bugzilla.tianocore.org/show_bug.cgi?id=2723#c22> I mentioned
"If there's strong disagreement, we can revert these BZ field changes",
and in another mailing list thread Leif pointed out that the patch only
affects builds that would otherwise fail.

So at the moment I'm neutral on this work; I'm fine either way (merged
or postponed).

Liming: would you consider merging if we delayed the stable tag by a few
days (mid next week)?

Anyway, let me fade into the background on this topic.

Thanks
Laszlo

>> -----Original Message-----
>> From: Leif Lindholm <leif@nuviainc.com>
>> Sent: Thursday, May 28, 2020 5:49 PM
>> To: Gary Lin <glin@suse.com>
>> Cc: devel@edk2.groups.io; ard.biesheuvel@arm.com; lersek@redhat.com; Gao, Liming <liming.gao@intel.com>
>> Subject: Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
>>
>> On Thu, May 28, 2020 at 09:36:33 +0800, Gary Lin wrote:
>>> On Wed, May 20, 2020 at 01:44:48PM +0200, Ard Biesheuvel wrote:
>>>> Gary reports the GCC 10 will emit calls to atomics intrinsics routines
>>>> unless -mno-outline-atomics is specified. This means GCC-10 introduces
>>>> new intrinsics, and even though it would be possible to work around this
>>>> by specifying the command line option, this would require a new GCC10
>>>> toolchain profile to be created, which we prefer to avoid.
>>>>
>>>> So instead, add the new intrinsics to our library so they are provided
>>>> when necessary.
>>>>
>>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
>>> After applying this patch, gcc 10 can build ArmVirtPkg without the
>>> linking error.
>>>
>>> Tested-by: Gary Lin <glin@suse.com>
>>
>> Thanks Gary.
>>
>> Liming, can we consider this patch for stable tag please?
>>
>> With an added link to
>> https://bugzilla.tianocore.org/show_bug.cgi?id=2723 in the commit message:
>> Reviewed-by: Leif Lindholm <leif@nuviainc.com>
>>
>> /
>>     Leif
>>
>>>> ---
>>>> v2:
>>>> - add missing .globl to export the functions from the object file
>>>> - add function end markers so the size of each is visible in the ELF metadata
>>>> - add some comments to describe what is going on
>>>>
>>>>  ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>>>>  ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
>>>>  2 files changed, 145 insertions(+)
>>>>
>>>> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> index d5bad9467758..fcf48c678119 100644
>>>> --- a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> @@ -79,6 +79,9 @@ [Sources.ARM]
>>>>    Arm/ldivmod.asm      | MSFT
>>>>    Arm/llsr.asm         | MSFT
>>>>
>>>> +[Sources.AARCH64]
>>>> +  AArch64/Atomics.S    | GCC
>>>> +
>>>>  [Packages]
>>>>    MdePkg/MdePkg.dec
>>>>    ArmPkg/ArmPkg.dec
>>>> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>> new file mode 100644
>>>> index 000000000000..dc61d6bb8e52
>>>> --- /dev/null
>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>> @@ -0,0 +1,142 @@
>>>> +#------------------------------------------------------------------------------
>>>> +#
>>>> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR>
>>>> +#
>>>> +# SPDX-License-Identifier: BSD-2-Clause-Patent
>>>> +#
>>>> +#------------------------------------------------------------------------------
>>>> +
>>>> +	/*
>>>> +	 * Provide the GCC intrinsics that are required when using GCC 9 or
>>>> +	 * later with the -moutline-atomics options (which became the default
>>>> +	 * in GCC 10)
>>>> +	 */
>>>> +	.arch armv8-a
>>>> +
>>>> +	.macro		reg_alias, pfx, sz
>>>> +	r0_\sz		.req	\pfx\()0
>>>> +	r1_\sz		.req	\pfx\()1
>>>> +	tmp0_\sz	.req	\pfx\()16
>>>> +	tmp1_\sz	.req	\pfx\()17
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Define register aliases of the right type for each size
>>>> +	 * (xN for 8 bytes, wN for everything smaller)
>>>> +	 */
>>>> +	reg_alias	w, 1
>>>> +	reg_alias	w, 2
>>>> +	reg_alias	w, 4
>>>> +	reg_alias	x, 8
>>>> +
>>>> +	.macro		fn_start, name:req
>>>> +	.section	.text.\name
>>>> +	.globl		\name
>>>> +	.type		\name, %function
>>>> +\name\():
>>>> +	.endm
>>>> +
>>>> +	.macro		fn_end, name:req
>>>> +	.size		\name, . - \name
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit an atomic helper for \model with operands of size \sz, using
>>>> +	 * the operation specified by \insn (which is the LSE name), and which
>>>> +	 * can be implemented using the generic load-locked/store-conditional
>>>> +	 * (LL/SC) sequence below, using the arithmetic operation given by
>>>> +	 * \opc.
>>>> +	 */
>>>> +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
>>>> +	fn_start	__aarch64_\insn\()\sz\()\model
>>>> +	mov		tmp0_\sz, r0_\sz
>>>> +0:	ld\a\()xr\s	r0_\sz, [x1]
>>>> +	.ifnc		\insn, swp
>>>> +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
>>>> +	.else
>>>> +	\opc		tmp1_\sz, tmp0_\sz
>>>> +	.endif
>>>> +	st\l\()xr\s	w15, tmp1_\sz, [x1]
>>>> +	cbnz		w15, 0b
>>>> +	ret
>>>> +	fn_end		__aarch64_\insn\()\sz\()\model
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit atomic helpers for \model for operand sizes in the
>>>> +	 * set {1, 2, 4, 8}, for the instruction pattern given by
>>>> +	 * \insn. (This is the LSE name, but this implementation uses
>>>> +	 * the generic LL/SC sequence using \opc as the arithmetic
>>>> +	 * operation on the target.)
>>>> +	 */
>>>> +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
>>>> +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
>>>> +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
>>>> +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
>>>> +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit the compare and swap helper for \model and size \sz
>>>> +	 * using LL/SC instructions.
>>>> +	 */
>>>> +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
>>>> +	fn_start	__aarch64_cas\sz\()\model
>>>> +	\uxt		tmp0_\sz, r0_\sz
>>>> +0:	ld\a\()xr\s	r0_\sz, [x2]
>>>> +	cmp		r0_\sz, tmp0_\sz
>>>> +	bne		1f
>>>> +	st\l\()xr\s	w15, r1_\sz, [x2]
>>>> +	cbnz		w15, 0b
>>>> +1:	ret
>>>> +	fn_end		__aarch64_cas\sz\()\model
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit compare-and-swap helpers for \model for operand sizes in the
>>>> +	 * set {1, 2, 4, 8, 16}.
>>>> +	 */
>>>> +	.macro		emit_cas, model:req, a, l
>>>> +	emit_cas_sz	1, \model, uxtb, b, \a, \l
>>>> +	emit_cas_sz	2, \model, uxth, h, \a, \l
>>>> +	emit_cas_sz	4, \model, mov ,  , \a, \l
>>>> +	emit_cas_sz	8, \model, mov ,  , \a, \l
>>>> +
>>>> +	/*
>>>> +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
>>>> +	 * need to define it explicitly.
>>>> +	 */
>>>> +	fn_start	__aarch64_cas16\model
>>>> +	mov		x16, x0
>>>> +	mov		x17, x1
>>>> +0:	ld\a\()xp	x0, x1, [x4]
>>>> +	cmp		x0, x16
>>>> +	ccmp		x1, x17, #0, eq
>>>> +	bne		1f
>>>> +	st\l\()xp	w15, x16, x17, [x4]
>>>> +	cbnz		w15, 0b
>>>> +1:	ret
>>>> +	fn_end		__aarch64_cas16\model
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit the set of GCC outline atomic helper functions for
>>>> +	 * the memory ordering model given by \model:
>>>> +	 * - relax	unordered loads and stores
>>>> +	 * - acq	load-acquire, unordered store
>>>> +	 * - rel	unordered load, store-release
>>>> +	 * - acq_rel	load-acquire, store-release
>>>> +	 */
>>>> +	.macro		emit_model, model:req, a, l
>>>> +	emit_ld		ldadd, add, \model, \a, \l
>>>> +	emit_ld		ldclr, bic, \model, \a, \l
>>>> +	emit_ld		ldeor, eor, \model, \a, \l
>>>> +	emit_ld		ldset, orr, \model, \a, \l
>>>> +	emit_ld		swp,   mov, \model, \a, \l
>>>> +	emit_cas	\model, \a, \l
>>>> +	.endm
>>>> +
>>>> +	emit_model	_relax
>>>> +	emit_model	_acq, a
>>>> +	emit_model	_rel,, l
>>>> +	emit_model	_acq_rel, a, l
>>>> --
>>>> 2.17.1
>>>>
>>>>
>>>> 
>>>>
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-28 20:09       ` Laszlo Ersek
@ 2020-05-29  3:04         ` Liming Gao
  0 siblings, 0 replies; 25+ messages in thread
From: Liming Gao @ 2020-05-29  3:04 UTC (permalink / raw)
  To: devel@edk2.groups.io, lersek@redhat.com, Leif Lindholm, Gary Lin,
	afish@apple.com, Kinney, Michael D
  Cc: ard.biesheuvel@arm.com

Laszlo:
 I agree the proposal to delay few days to collect more feedbacks. I just send the mail to highlight this request in Hard Feature Freeze https://edk2.groups.io/g/devel/message/60421. 

Thanks
Liming
-----Original Message-----
From: devel@edk2.groups.io <devel@edk2.groups.io> On Behalf Of Laszlo Ersek
Sent: 2020年5月29日 4:09
To: Gao, Liming <liming.gao@intel.com>; Leif Lindholm <leif@nuviainc.com>; Gary Lin <glin@suse.com>; afish@apple.com; Kinney, Michael D <michael.d.kinney@intel.com>
Cc: devel@edk2.groups.io; ard.biesheuvel@arm.com
Subject: Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics

On 05/28/20 17:48, Gao, Liming wrote:
> Leif:
>   BZ 2723 is a tiano feature request. This patch adds support for new GCC10. I agree with Laszlo that this change is like the enhancement instead of the critical bug fix. So, I suggest to delay it after this stable tag. 

In <https://bugzilla.tianocore.org/show_bug.cgi?id=2723#c22> I mentioned "If there's strong disagreement, we can revert these BZ field changes", and in another mailing list thread Leif pointed out that the patch only affects builds that would otherwise fail.

So at the moment I'm neutral on this work; I'm fine either way (merged or postponed).

Liming: would you consider merging if we delayed the stable tag by a few days (mid next week)?

Anyway, let me fade into the background on this topic.

Thanks
Laszlo

>> -----Original Message-----
>> From: Leif Lindholm <leif@nuviainc.com>
>> Sent: Thursday, May 28, 2020 5:49 PM
>> To: Gary Lin <glin@suse.com>
>> Cc: devel@edk2.groups.io; ard.biesheuvel@arm.com; lersek@redhat.com; 
>> Gao, Liming <liming.gao@intel.com>
>> Subject: Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: 
>> provide atomics intrinsics
>>
>> On Thu, May 28, 2020 at 09:36:33 +0800, Gary Lin wrote:
>>> On Wed, May 20, 2020 at 01:44:48PM +0200, Ard Biesheuvel wrote:
>>>> Gary reports the GCC 10 will emit calls to atomics intrinsics 
>>>> routines unless -mno-outline-atomics is specified. This means 
>>>> GCC-10 introduces new intrinsics, and even though it would be 
>>>> possible to work around this by specifying the command line option, 
>>>> this would require a new GCC10 toolchain profile to be created, which we prefer to avoid.
>>>>
>>>> So instead, add the new intrinsics to our library so they are 
>>>> provided when necessary.
>>>>
>>>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@arm.com>
>>> After applying this patch, gcc 10 can build ArmVirtPkg without the 
>>> linking error.
>>>
>>> Tested-by: Gary Lin <glin@suse.com>
>>
>> Thanks Gary.
>>
>> Liming, can we consider this patch for stable tag please?
>>
>> With an added link to
>> https://bugzilla.tianocore.org/show_bug.cgi?id=2723 in the commit message:
>> Reviewed-by: Leif Lindholm <leif@nuviainc.com>
>>
>> /
>>     Leif
>>
>>>> ---
>>>> v2:
>>>> - add missing .globl to export the functions from the object file
>>>> - add function end markers so the size of each is visible in the 
>>>> ELF metadata
>>>> - add some comments to describe what is going on
>>>>
>>>>  ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf |   3 +
>>>>  ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S         | 142 ++++++++++++++++++++
>>>>  2 files changed, 145 insertions(+)
>>>>
>>>> diff --git 
>>>> a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>> b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> index d5bad9467758..fcf48c678119 100644
>>>> --- 
>>>> a/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.inf
>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/CompilerIntrinsicsLib.in
>>>> +++ f
>>>> @@ -79,6 +79,9 @@ [Sources.ARM]
>>>>    Arm/ldivmod.asm      | MSFT
>>>>    Arm/llsr.asm         | MSFT
>>>>
>>>> +[Sources.AARCH64]
>>>> +  AArch64/Atomics.S    | GCC
>>>> +
>>>>  [Packages]
>>>>    MdePkg/MdePkg.dec
>>>>    ArmPkg/ArmPkg.dec
>>>> diff --git a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S 
>>>> b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>> new file mode 100644
>>>> index 000000000000..dc61d6bb8e52
>>>> --- /dev/null
>>>> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
>>>> @@ -0,0 +1,142 @@
>>>> +#-----------------------------------------------------------------
>>>> +-------------
>>>> +#
>>>> +# Copyright (c) 2020, Arm, Limited. All rights reserved.<BR> # # 
>>>> +SPDX-License-Identifier: BSD-2-Clause-Patent #
>>>> +#-----------------------------------------------------------------
>>>> +-------------
>>>> +
>>>> +	/*
>>>> +	 * Provide the GCC intrinsics that are required when using GCC 9 or
>>>> +	 * later with the -moutline-atomics options (which became the default
>>>> +	 * in GCC 10)
>>>> +	 */
>>>> +	.arch armv8-a
>>>> +
>>>> +	.macro		reg_alias, pfx, sz
>>>> +	r0_\sz		.req	\pfx\()0
>>>> +	r1_\sz		.req	\pfx\()1
>>>> +	tmp0_\sz	.req	\pfx\()16
>>>> +	tmp1_\sz	.req	\pfx\()17
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Define register aliases of the right type for each size
>>>> +	 * (xN for 8 bytes, wN for everything smaller)
>>>> +	 */
>>>> +	reg_alias	w, 1
>>>> +	reg_alias	w, 2
>>>> +	reg_alias	w, 4
>>>> +	reg_alias	x, 8
>>>> +
>>>> +	.macro		fn_start, name:req
>>>> +	.section	.text.\name
>>>> +	.globl		\name
>>>> +	.type		\name, %function
>>>> +\name\():
>>>> +	.endm
>>>> +
>>>> +	.macro		fn_end, name:req
>>>> +	.size		\name, . - \name
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit an atomic helper for \model with operands of size \sz, using
>>>> +	 * the operation specified by \insn (which is the LSE name), and which
>>>> +	 * can be implemented using the generic load-locked/store-conditional
>>>> +	 * (LL/SC) sequence below, using the arithmetic operation given by
>>>> +	 * \opc.
>>>> +	 */
>>>> +	.macro 		emit_ld_sz, sz:req, insn:req, opc:req, model:req, s, a, l
>>>> +	fn_start	__aarch64_\insn\()\sz\()\model
>>>> +	mov		tmp0_\sz, r0_\sz
>>>> +0:	ld\a\()xr\s	r0_\sz, [x1]
>>>> +	.ifnc		\insn, swp
>>>> +	\opc		tmp1_\sz, r0_\sz, tmp0_\sz
>>>> +	.else
>>>> +	\opc		tmp1_\sz, tmp0_\sz
>>>> +	.endif
>>>> +	st\l\()xr\s	w15, tmp1_\sz, [x1]
>>>> +	cbnz		w15, 0b
>>>> +	ret
>>>> +	fn_end		__aarch64_\insn\()\sz\()\model
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit atomic helpers for \model for operand sizes in the
>>>> +	 * set {1, 2, 4, 8}, for the instruction pattern given by
>>>> +	 * \insn. (This is the LSE name, but this implementation uses
>>>> +	 * the generic LL/SC sequence using \opc as the arithmetic
>>>> +	 * operation on the target.)
>>>> +	 */
>>>> +	.macro		emit_ld, insn:req, opc:req, model:req, a, l
>>>> +	emit_ld_sz	1, \insn, \opc, \model, b, \a, \l
>>>> +	emit_ld_sz	2, \insn, \opc, \model, h, \a, \l
>>>> +	emit_ld_sz	4, \insn, \opc, \model,  , \a, \l
>>>> +	emit_ld_sz	8, \insn, \opc, \model,  , \a, \l
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit the compare and swap helper for \model and size \sz
>>>> +	 * using LL/SC instructions.
>>>> +	 */
>>>> +	.macro 		emit_cas_sz, sz:req, model:req, uxt:req, s, a, l
>>>> +	fn_start	__aarch64_cas\sz\()\model
>>>> +	\uxt		tmp0_\sz, r0_\sz
>>>> +0:	ld\a\()xr\s	r0_\sz, [x2]
>>>> +	cmp		r0_\sz, tmp0_\sz
>>>> +	bne		1f
>>>> +	st\l\()xr\s	w15, r1_\sz, [x2]
>>>> +	cbnz		w15, 0b
>>>> +1:	ret
>>>> +	fn_end		__aarch64_cas\sz\()\model
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit compare-and-swap helpers for \model for operand sizes in the
>>>> +	 * set {1, 2, 4, 8, 16}.
>>>> +	 */
>>>> +	.macro		emit_cas, model:req, a, l
>>>> +	emit_cas_sz	1, \model, uxtb, b, \a, \l
>>>> +	emit_cas_sz	2, \model, uxth, h, \a, \l
>>>> +	emit_cas_sz	4, \model, mov ,  , \a, \l
>>>> +	emit_cas_sz	8, \model, mov ,  , \a, \l
>>>> +
>>>> +	/*
>>>> +	 * We cannot use the parameterized sequence for 16 byte CAS, so we
>>>> +	 * need to define it explicitly.
>>>> +	 */
>>>> +	fn_start	__aarch64_cas16\model
>>>> +	mov		x16, x0
>>>> +	mov		x17, x1
>>>> +0:	ld\a\()xp	x0, x1, [x4]
>>>> +	cmp		x0, x16
>>>> +	ccmp		x1, x17, #0, eq
>>>> +	bne		1f
>>>> +	st\l\()xp	w15, x16, x17, [x4]
>>>> +	cbnz		w15, 0b
>>>> +1:	ret
>>>> +	fn_end		__aarch64_cas16\model
>>>> +	.endm
>>>> +
>>>> +	/*
>>>> +	 * Emit the set of GCC outline atomic helper functions for
>>>> +	 * the memory ordering model given by \model:
>>>> +	 * - relax	unordered loads and stores
>>>> +	 * - acq	load-acquire, unordered store
>>>> +	 * - rel	unordered load, store-release
>>>> +	 * - acq_rel	load-acquire, store-release
>>>> +	 */
>>>> +	.macro		emit_model, model:req, a, l
>>>> +	emit_ld		ldadd, add, \model, \a, \l
>>>> +	emit_ld		ldclr, bic, \model, \a, \l
>>>> +	emit_ld		ldeor, eor, \model, \a, \l
>>>> +	emit_ld		ldset, orr, \model, \a, \l
>>>> +	emit_ld		swp,   mov, \model, \a, \l
>>>> +	emit_cas	\model, \a, \l
>>>> +	.endm
>>>> +
>>>> +	emit_model	_relax
>>>> +	emit_model	_acq, a
>>>> +	emit_model	_rel,, l
>>>> +	emit_model	_acq_rel, a, l
>>>> --
>>>> 2.17.1
>>>>
>>>>
>>>> 
>>>>
> 





^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-05-22 13:27               ` Ard Biesheuvel
  2020-05-22 19:04                 ` Laszlo Ersek
@ 2020-06-02  0:50                 ` Liming Gao
  2020-06-02  7:09                   ` Ard Biesheuvel
  1 sibling, 1 reply; 25+ messages in thread
From: Liming Gao @ 2020-06-02  0:50 UTC (permalink / raw)
  To: Ard Biesheuvel, Leif Lindholm, Laszlo Ersek
  Cc: devel@edk2.groups.io, glin@suse.com

Ard:
  There is no objection to merge this change into the stable tag 202005. I see this patch has Reviewed-by and Tested-by. Can you update this patch and merge it today?
  
  The stable tag will be created on 2020-06-03 (tomorrow).

Thanks
Liming
-----Original Message-----
From: Ard Biesheuvel <ard.biesheuvel@arm.com> 
Sent: 2020年5月22日 21:27
To: Leif Lindholm <leif@nuviainc.com>; Laszlo Ersek <lersek@redhat.com>
Cc: devel@edk2.groups.io; glin@suse.com; Gao, Liming <liming.gao@intel.com>
Subject: Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics

On 5/22/20 12:54 PM, Leif Lindholm wrote:
> On Thu, May 21, 2020 at 22:22:58 +0200, Laszlo Ersek wrote:
>> On 05/21/20 16:16, Leif Lindholm wrote:
>>
>>> OK, then I would vote *for* merging the patch regardless. We know 
>>> how long some toolchain versions can stick around simply because 
>>> they were mentioned in some blog post somewhere that ended up high 
>>> in search rankings.
>>>
>>> Once gcc 10.2 is released (and we have verified the problem can be 
>>> worked around elsewhere), I guess we could add a note saying "once 
>>> all gcc 10.0 and 10.1 toolchains are considered obsolete, this file 
>>> can be deleted".
>>
>> I think we can expect all distros that ship gcc-10 to eventually 
>> migrate to gcc-10.2+. Until then, this patch should hopefully work. 
>> (I'm quite annoyed by having to call the patch "temporary", as it 
>> feels very technically impressive.)
>>
>> So I think I agree with Leif, with a small modification to the idea:
>> rather than a *note* saying "back this out once 10.0 and 10.1 have 
>> been replaced by 10.2+ in all 'large' distros"
> 
> That isn't actually exatly what I meant - I meant properly obsolete as 
> in "we are now reasonably certain no one is still using some silly 
> ancient cross compiler they checked into their build infrastructure 
> years ago".
> 
>> , I would suggest filing a *BZ*
>> for the same. And I recommend making the new BZ dependent on
>> TianoCore#2723 (i.e. the present BZ).
> 
> But I don't object to that approach.
> 

OK, so i will leave it up to Liming and the stewards to decide whether this gets incorporated ino the stable tag or not. If it is, I would like to fold in the fixup below

--- a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
+++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
@@ -53,10 +53,10 @@
  0:     ld\a\()xr\s     r0_\sz, [x1]
         .ifnc           \insn, swp
         \opc            tmp1_\sz, r0_\sz, tmp0_\sz
+       st\l\()xr\s     w15, tmp1_\sz, [x1]
         .else
-       \opc            tmp1_\sz, tmp0_\sz
+       st\l\()xr\s     w15, tmp0_\sz, [x1]
         .endif
-       st\l\()xr\s     w15, tmp1_\sz, [x1]
         cbnz            w15, 0b
         ret
         fn_end          __aarch64_\insn\()\sz\()\model

to get rid of the redundant 'mov' for the SWP flavor of the atomics helpers.



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
  2020-06-02  0:50                 ` Liming Gao
@ 2020-06-02  7:09                   ` Ard Biesheuvel
  0 siblings, 0 replies; 25+ messages in thread
From: Ard Biesheuvel @ 2020-06-02  7:09 UTC (permalink / raw)
  To: devel, liming.gao, Leif Lindholm, Laszlo Ersek; +Cc: glin@suse.com

On 6/2/20 2:50 AM, Liming Gao via groups.io wrote:
> Ard:
>    There is no objection to merge this change into the stable tag 202005. I see this patch has Reviewed-by and Tested-by. Can you update this patch and merge it today?
>
>    The stable tag will be created on 2020-06-03 (tomorrow).
> 

Merged as #648


> -----Original Message-----
> From: Ard Biesheuvel <ard.biesheuvel@arm.com>
> Sent: 2020年5月22日 21:27
> To: Leif Lindholm <leif@nuviainc.com>; Laszlo Ersek <lersek@redhat.com>
> Cc: devel@edk2.groups.io; glin@suse.com; Gao, Liming <liming.gao@intel.com>
> Subject: Re: [edk2-devel] [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics
> 
> On 5/22/20 12:54 PM, Leif Lindholm wrote:
>> On Thu, May 21, 2020 at 22:22:58 +0200, Laszlo Ersek wrote:
>>> On 05/21/20 16:16, Leif Lindholm wrote:
>>>
>>>> OK, then I would vote *for* merging the patch regardless. We know
>>>> how long some toolchain versions can stick around simply because
>>>> they were mentioned in some blog post somewhere that ended up high
>>>> in search rankings.
>>>>
>>>> Once gcc 10.2 is released (and we have verified the problem can be
>>>> worked around elsewhere), I guess we could add a note saying "once
>>>> all gcc 10.0 and 10.1 toolchains are considered obsolete, this file
>>>> can be deleted".
>>>
>>> I think we can expect all distros that ship gcc-10 to eventually
>>> migrate to gcc-10.2+. Until then, this patch should hopefully work.
>>> (I'm quite annoyed by having to call the patch "temporary", as it
>>> feels very technically impressive.)
>>>
>>> So I think I agree with Leif, with a small modification to the idea:
>>> rather than a *note* saying "back this out once 10.0 and 10.1 have
>>> been replaced by 10.2+ in all 'large' distros"
>>
>> That isn't actually exatly what I meant - I meant properly obsolete as
>> in "we are now reasonably certain no one is still using some silly
>> ancient cross compiler they checked into their build infrastructure
>> years ago".
>>
>>> , I would suggest filing a *BZ*
>>> for the same. And I recommend making the new BZ dependent on
>>> TianoCore#2723 (i.e. the present BZ).
>>
>> But I don't object to that approach.
>>
> 
> OK, so i will leave it up to Liming and the stewards to decide whether this gets incorporated ino the stable tag or not. If it is, I would like to fold in the fixup below
> 
> --- a/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> +++ b/ArmPkg/Library/CompilerIntrinsicsLib/AArch64/Atomics.S
> @@ -53,10 +53,10 @@
>    0:     ld\a\()xr\s     r0_\sz, [x1]
>           .ifnc           \insn, swp
>           \opc            tmp1_\sz, r0_\sz, tmp0_\sz
> +       st\l\()xr\s     w15, tmp1_\sz, [x1]
>           .else
> -       \opc            tmp1_\sz, tmp0_\sz
> +       st\l\()xr\s     w15, tmp0_\sz, [x1]
>           .endif
> -       st\l\()xr\s     w15, tmp1_\sz, [x1]
>           cbnz            w15, 0b
>           ret
>           fn_end          __aarch64_\insn\()\sz\()\model
> 
> to get rid of the redundant 'mov' for the SWP flavor of the atomics helpers.
> 
> 
> 
> 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2020-06-02  7:09 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-05-20 11:44 [PATCH v2] ArmPkg/CompilerIntrinsicsLib: provide atomics intrinsics Ard Biesheuvel
2020-05-20 12:37 ` [edk2-devel] " Philippe Mathieu-Daudé
2020-05-21 16:40   ` Philippe Mathieu-Daudé
2020-05-21 16:45     ` Ard Biesheuvel
2020-05-21 16:59       ` Philippe Mathieu-Daudé
2020-05-21 17:02         ` Ard Biesheuvel
2020-05-21 17:07           ` Philippe Mathieu-Daudé
2020-05-20 15:59 ` Laszlo Ersek
2020-05-21 11:23 ` Leif Lindholm
2020-05-21 12:58   ` [edk2-devel] " Ard Biesheuvel
2020-05-21 13:16     ` Leif Lindholm
2020-05-21 13:31       ` Ard Biesheuvel
2020-05-21 14:16         ` Leif Lindholm
2020-05-21 20:22           ` Laszlo Ersek
2020-05-22  8:16             ` Ard Biesheuvel
2020-05-22 10:54             ` Leif Lindholm
2020-05-22 13:27               ` Ard Biesheuvel
2020-05-22 19:04                 ` Laszlo Ersek
2020-06-02  0:50                 ` Liming Gao
2020-06-02  7:09                   ` Ard Biesheuvel
2020-05-28  1:36 ` Gary Lin
2020-05-28  9:49   ` Leif Lindholm
2020-05-28 15:48     ` Liming Gao
2020-05-28 20:09       ` Laszlo Ersek
2020-05-29  3:04         ` Liming Gao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox