public inbox for devel@edk2.groups.io
 help / color / mirror / Atom feed
From: "Christopher Zurcher" <zurcher@gmail.com>
To: devel@edk2.groups.io
Cc: Yi Li <yi1.li@intel.com>, Jiewen Yao <jiewen.yao@intel.com>,
	Jian J Wang <jian.j.wang@intel.com>,
	Xiaoyu Lu <xiaoyu1.lu@intel.com>,
	Guomin Jiang <guomin.jiang@intel.com>
Subject: [PATCH v1 2/3] CryptoPkg/OpensslLib: Commit the auto-generated assembly files for IA32
Date: Wed, 21 Sep 2022 13:25:40 -0700	[thread overview]
Message-ID: <20220921202541.3691-3-christopher.zurcher@microsoft.com> (raw)
In-Reply-To: <20220921202541.3691-1-christopher.zurcher@microsoft.com>

REF: https://bugzilla.tianocore.org/show_bug.cgi?id=3654

Adding the auto-generated assembly files for IA32 architectures.

Cc: Yi Li <yi1.li@intel.com>
Cc: Jiewen Yao <jiewen.yao@intel.com>
Cc: Jian J Wang <jian.j.wang@intel.com>
Cc: Xiaoyu Lu <xiaoyu1.lu@intel.com>
Cc: Guomin Jiang <guomin.jiang@intel.com>
Signed-off-by: Christopher Zurcher <christopher.zurcher@microsoft.com>
---
 CryptoPkg/Library/OpensslLib/IA32/crypto/aes/aesni-x86.nasm   | 3212 +++++++++++++++++++
 CryptoPkg/Library/OpensslLib/IA32/crypto/aes/vpaes-x86.nasm   |  651 ++++
 CryptoPkg/Library/OpensslLib/IA32/crypto/modes/ghash-x86.nasm |  700 ++++
 CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha1-586.nasm    | 1394 ++++++++
 CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha256-586.nasm  | 3364 ++++++++++++++++++++
 CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha512-586.nasm  |  579 ++++
 CryptoPkg/Library/OpensslLib/IA32/crypto/x86cpuid.nasm        |  433 +++
 CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/aesni-x86.S   | 3247 +++++++++++++++++++
 CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/vpaes-x86.S   |  670 ++++
 CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/modes/ghash-x86.S |  703 ++++
 CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha1-586.S    | 1389 ++++++++
 CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha256-586.S  | 3356 +++++++++++++++++++
 CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha512-586.S  |  574 ++++
 CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/x86cpuid.S        |  449 +++
 14 files changed, 20721 insertions(+)

diff --git a/CryptoPkg/Library/OpensslLib/IA32/crypto/aes/aesni-x86.nasm b/CryptoPkg/Library/OpensslLib/IA32/crypto/aes/aesni-x86.nasm
new file mode 100644
index 0000000000..e0b5058273
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32/crypto/aes/aesni-x86.nasm
@@ -0,0 +1,3212 @@
+; WARNING: do not edit!
+; Generated from openssl/crypto/aes/asm/aesni-x86.pl
+;
+; Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License").  You may not use
+; this file except in compliance with the License.  You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
+%ifidn __OUTPUT_FORMAT__,obj
+section code    use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text   code align=64
+%else
+section .text   code
+%endif
+;extern _OPENSSL_ia32cap_P
+global  _aesni_encrypt
+align   16
+_aesni_encrypt:
+L$_aesni_encrypt_begin:
+        mov     eax,DWORD [4+esp]
+        mov     edx,DWORD [12+esp]
+        movups  xmm2,[eax]
+        mov     ecx,DWORD [240+edx]
+        mov     eax,DWORD [8+esp]
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$000enc1_loop_1:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$000enc1_loop_1
+db      102,15,56,221,209
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        movups  [eax],xmm2
+        pxor    xmm2,xmm2
+        ret
+global  _aesni_decrypt
+align   16
+_aesni_decrypt:
+L$_aesni_decrypt_begin:
+        mov     eax,DWORD [4+esp]
+        mov     edx,DWORD [12+esp]
+        movups  xmm2,[eax]
+        mov     ecx,DWORD [240+edx]
+        mov     eax,DWORD [8+esp]
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$001dec1_loop_2:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$001dec1_loop_2
+db      102,15,56,223,209
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        movups  [eax],xmm2
+        pxor    xmm2,xmm2
+        ret
+align   16
+__aesni_encrypt2:
+        movups  xmm0,[edx]
+        shl     ecx,4
+        movups  xmm1,[16+edx]
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        movups  xmm0,[32+edx]
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+        add     ecx,16
+L$002enc2_loop:
+db      102,15,56,220,209
+db      102,15,56,220,217
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,220,208
+db      102,15,56,220,216
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$002enc2_loop
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,221,208
+db      102,15,56,221,216
+        ret
+align   16
+__aesni_decrypt2:
+        movups  xmm0,[edx]
+        shl     ecx,4
+        movups  xmm1,[16+edx]
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        movups  xmm0,[32+edx]
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+        add     ecx,16
+L$003dec2_loop:
+db      102,15,56,222,209
+db      102,15,56,222,217
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,222,208
+db      102,15,56,222,216
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$003dec2_loop
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,223,208
+db      102,15,56,223,216
+        ret
+align   16
+__aesni_encrypt3:
+        movups  xmm0,[edx]
+        shl     ecx,4
+        movups  xmm1,[16+edx]
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+        movups  xmm0,[32+edx]
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+        add     ecx,16
+L$004enc3_loop:
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,220,208
+db      102,15,56,220,216
+db      102,15,56,220,224
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$004enc3_loop
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+db      102,15,56,221,208
+db      102,15,56,221,216
+db      102,15,56,221,224
+        ret
+align   16
+__aesni_decrypt3:
+        movups  xmm0,[edx]
+        shl     ecx,4
+        movups  xmm1,[16+edx]
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+        movups  xmm0,[32+edx]
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+        add     ecx,16
+L$005dec3_loop:
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,222,208
+db      102,15,56,222,216
+db      102,15,56,222,224
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$005dec3_loop
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+db      102,15,56,223,208
+db      102,15,56,223,216
+db      102,15,56,223,224
+        ret
+align   16
+__aesni_encrypt4:
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        shl     ecx,4
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+        pxor    xmm5,xmm0
+        movups  xmm0,[32+edx]
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+db      15,31,64,0
+        add     ecx,16
+L$006enc4_loop:
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+db      102,15,56,220,233
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,220,208
+db      102,15,56,220,216
+db      102,15,56,220,224
+db      102,15,56,220,232
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$006enc4_loop
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+db      102,15,56,220,233
+db      102,15,56,221,208
+db      102,15,56,221,216
+db      102,15,56,221,224
+db      102,15,56,221,232
+        ret
+align   16
+__aesni_decrypt4:
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        shl     ecx,4
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+        pxor    xmm5,xmm0
+        movups  xmm0,[32+edx]
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+db      15,31,64,0
+        add     ecx,16
+L$007dec4_loop:
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+db      102,15,56,222,233
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,222,208
+db      102,15,56,222,216
+db      102,15,56,222,224
+db      102,15,56,222,232
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$007dec4_loop
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+db      102,15,56,222,233
+db      102,15,56,223,208
+db      102,15,56,223,216
+db      102,15,56,223,224
+db      102,15,56,223,232
+        ret
+align   16
+__aesni_encrypt6:
+        movups  xmm0,[edx]
+        shl     ecx,4
+        movups  xmm1,[16+edx]
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+db      102,15,56,220,209
+        pxor    xmm5,xmm0
+        pxor    xmm6,xmm0
+db      102,15,56,220,217
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+db      102,15,56,220,225
+        pxor    xmm7,xmm0
+        movups  xmm0,[ecx*1+edx]
+        add     ecx,16
+        jmp     NEAR L$008_aesni_encrypt6_inner
+align   16
+L$009enc6_loop:
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+L$008_aesni_encrypt6_inner:
+db      102,15,56,220,233
+db      102,15,56,220,241
+db      102,15,56,220,249
+L$_aesni_encrypt6_enter:
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,220,208
+db      102,15,56,220,216
+db      102,15,56,220,224
+db      102,15,56,220,232
+db      102,15,56,220,240
+db      102,15,56,220,248
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$009enc6_loop
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+db      102,15,56,220,233
+db      102,15,56,220,241
+db      102,15,56,220,249
+db      102,15,56,221,208
+db      102,15,56,221,216
+db      102,15,56,221,224
+db      102,15,56,221,232
+db      102,15,56,221,240
+db      102,15,56,221,248
+        ret
+align   16
+__aesni_decrypt6:
+        movups  xmm0,[edx]
+        shl     ecx,4
+        movups  xmm1,[16+edx]
+        xorps   xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+db      102,15,56,222,209
+        pxor    xmm5,xmm0
+        pxor    xmm6,xmm0
+db      102,15,56,222,217
+        lea     edx,[32+ecx*1+edx]
+        neg     ecx
+db      102,15,56,222,225
+        pxor    xmm7,xmm0
+        movups  xmm0,[ecx*1+edx]
+        add     ecx,16
+        jmp     NEAR L$010_aesni_decrypt6_inner
+align   16
+L$011dec6_loop:
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+L$010_aesni_decrypt6_inner:
+db      102,15,56,222,233
+db      102,15,56,222,241
+db      102,15,56,222,249
+L$_aesni_decrypt6_enter:
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,222,208
+db      102,15,56,222,216
+db      102,15,56,222,224
+db      102,15,56,222,232
+db      102,15,56,222,240
+db      102,15,56,222,248
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$011dec6_loop
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+db      102,15,56,222,233
+db      102,15,56,222,241
+db      102,15,56,222,249
+db      102,15,56,223,208
+db      102,15,56,223,216
+db      102,15,56,223,224
+db      102,15,56,223,232
+db      102,15,56,223,240
+db      102,15,56,223,248
+        ret
+global  _aesni_ecb_encrypt
+align   16
+_aesni_ecb_encrypt:
+L$_aesni_ecb_encrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        mov     ebx,DWORD [36+esp]
+        and     eax,-16
+        jz      NEAR L$012ecb_ret
+        mov     ecx,DWORD [240+edx]
+        test    ebx,ebx
+        jz      NEAR L$013ecb_decrypt
+        mov     ebp,edx
+        mov     ebx,ecx
+        cmp     eax,96
+        jb      NEAR L$014ecb_enc_tail
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        movdqu  xmm6,[64+esi]
+        movdqu  xmm7,[80+esi]
+        lea     esi,[96+esi]
+        sub     eax,96
+        jmp     NEAR L$015ecb_enc_loop6_enter
+align   16
+L$016ecb_enc_loop6:
+        movups  [edi],xmm2
+        movdqu  xmm2,[esi]
+        movups  [16+edi],xmm3
+        movdqu  xmm3,[16+esi]
+        movups  [32+edi],xmm4
+        movdqu  xmm4,[32+esi]
+        movups  [48+edi],xmm5
+        movdqu  xmm5,[48+esi]
+        movups  [64+edi],xmm6
+        movdqu  xmm6,[64+esi]
+        movups  [80+edi],xmm7
+        lea     edi,[96+edi]
+        movdqu  xmm7,[80+esi]
+        lea     esi,[96+esi]
+L$015ecb_enc_loop6_enter:
+        call    __aesni_encrypt6
+        mov     edx,ebp
+        mov     ecx,ebx
+        sub     eax,96
+        jnc     NEAR L$016ecb_enc_loop6
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        movups  [64+edi],xmm6
+        movups  [80+edi],xmm7
+        lea     edi,[96+edi]
+        add     eax,96
+        jz      NEAR L$012ecb_ret
+L$014ecb_enc_tail:
+        movups  xmm2,[esi]
+        cmp     eax,32
+        jb      NEAR L$017ecb_enc_one
+        movups  xmm3,[16+esi]
+        je      NEAR L$018ecb_enc_two
+        movups  xmm4,[32+esi]
+        cmp     eax,64
+        jb      NEAR L$019ecb_enc_three
+        movups  xmm5,[48+esi]
+        je      NEAR L$020ecb_enc_four
+        movups  xmm6,[64+esi]
+        xorps   xmm7,xmm7
+        call    __aesni_encrypt6
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        movups  [64+edi],xmm6
+        jmp     NEAR L$012ecb_ret
+align   16
+L$017ecb_enc_one:
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$021enc1_loop_3:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$021enc1_loop_3
+db      102,15,56,221,209
+        movups  [edi],xmm2
+        jmp     NEAR L$012ecb_ret
+align   16
+L$018ecb_enc_two:
+        call    __aesni_encrypt2
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        jmp     NEAR L$012ecb_ret
+align   16
+L$019ecb_enc_three:
+        call    __aesni_encrypt3
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        jmp     NEAR L$012ecb_ret
+align   16
+L$020ecb_enc_four:
+        call    __aesni_encrypt4
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        jmp     NEAR L$012ecb_ret
+align   16
+L$013ecb_decrypt:
+        mov     ebp,edx
+        mov     ebx,ecx
+        cmp     eax,96
+        jb      NEAR L$022ecb_dec_tail
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        movdqu  xmm6,[64+esi]
+        movdqu  xmm7,[80+esi]
+        lea     esi,[96+esi]
+        sub     eax,96
+        jmp     NEAR L$023ecb_dec_loop6_enter
+align   16
+L$024ecb_dec_loop6:
+        movups  [edi],xmm2
+        movdqu  xmm2,[esi]
+        movups  [16+edi],xmm3
+        movdqu  xmm3,[16+esi]
+        movups  [32+edi],xmm4
+        movdqu  xmm4,[32+esi]
+        movups  [48+edi],xmm5
+        movdqu  xmm5,[48+esi]
+        movups  [64+edi],xmm6
+        movdqu  xmm6,[64+esi]
+        movups  [80+edi],xmm7
+        lea     edi,[96+edi]
+        movdqu  xmm7,[80+esi]
+        lea     esi,[96+esi]
+L$023ecb_dec_loop6_enter:
+        call    __aesni_decrypt6
+        mov     edx,ebp
+        mov     ecx,ebx
+        sub     eax,96
+        jnc     NEAR L$024ecb_dec_loop6
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        movups  [64+edi],xmm6
+        movups  [80+edi],xmm7
+        lea     edi,[96+edi]
+        add     eax,96
+        jz      NEAR L$012ecb_ret
+L$022ecb_dec_tail:
+        movups  xmm2,[esi]
+        cmp     eax,32
+        jb      NEAR L$025ecb_dec_one
+        movups  xmm3,[16+esi]
+        je      NEAR L$026ecb_dec_two
+        movups  xmm4,[32+esi]
+        cmp     eax,64
+        jb      NEAR L$027ecb_dec_three
+        movups  xmm5,[48+esi]
+        je      NEAR L$028ecb_dec_four
+        movups  xmm6,[64+esi]
+        xorps   xmm7,xmm7
+        call    __aesni_decrypt6
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        movups  [64+edi],xmm6
+        jmp     NEAR L$012ecb_ret
+align   16
+L$025ecb_dec_one:
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$029dec1_loop_4:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$029dec1_loop_4
+db      102,15,56,223,209
+        movups  [edi],xmm2
+        jmp     NEAR L$012ecb_ret
+align   16
+L$026ecb_dec_two:
+        call    __aesni_decrypt2
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        jmp     NEAR L$012ecb_ret
+align   16
+L$027ecb_dec_three:
+        call    __aesni_decrypt3
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        jmp     NEAR L$012ecb_ret
+align   16
+L$028ecb_dec_four:
+        call    __aesni_decrypt4
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+L$012ecb_ret:
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        pxor    xmm4,xmm4
+        pxor    xmm5,xmm5
+        pxor    xmm6,xmm6
+        pxor    xmm7,xmm7
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_ccm64_encrypt_blocks
+align   16
+_aesni_ccm64_encrypt_blocks:
+L$_aesni_ccm64_encrypt_blocks_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        mov     ebx,DWORD [36+esp]
+        mov     ecx,DWORD [40+esp]
+        mov     ebp,esp
+        sub     esp,60
+        and     esp,-16
+        mov     DWORD [48+esp],ebp
+        movdqu  xmm7,[ebx]
+        movdqu  xmm3,[ecx]
+        mov     ecx,DWORD [240+edx]
+        mov     DWORD [esp],202182159
+        mov     DWORD [4+esp],134810123
+        mov     DWORD [8+esp],67438087
+        mov     DWORD [12+esp],66051
+        mov     ebx,1
+        xor     ebp,ebp
+        mov     DWORD [16+esp],ebx
+        mov     DWORD [20+esp],ebp
+        mov     DWORD [24+esp],ebp
+        mov     DWORD [28+esp],ebp
+        shl     ecx,4
+        mov     ebx,16
+        lea     ebp,[edx]
+        movdqa  xmm5,[esp]
+        movdqa  xmm2,xmm7
+        lea     edx,[32+ecx*1+edx]
+        sub     ebx,ecx
+db      102,15,56,0,253
+L$030ccm64_enc_outer:
+        movups  xmm0,[ebp]
+        mov     ecx,ebx
+        movups  xmm6,[esi]
+        xorps   xmm2,xmm0
+        movups  xmm1,[16+ebp]
+        xorps   xmm0,xmm6
+        xorps   xmm3,xmm0
+        movups  xmm0,[32+ebp]
+L$031ccm64_enc2_loop:
+db      102,15,56,220,209
+db      102,15,56,220,217
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,220,208
+db      102,15,56,220,216
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$031ccm64_enc2_loop
+db      102,15,56,220,209
+db      102,15,56,220,217
+        paddq   xmm7,[16+esp]
+        dec     eax
+db      102,15,56,221,208
+db      102,15,56,221,216
+        lea     esi,[16+esi]
+        xorps   xmm6,xmm2
+        movdqa  xmm2,xmm7
+        movups  [edi],xmm6
+db      102,15,56,0,213
+        lea     edi,[16+edi]
+        jnz     NEAR L$030ccm64_enc_outer
+        mov     esp,DWORD [48+esp]
+        mov     edi,DWORD [40+esp]
+        movups  [edi],xmm3
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        pxor    xmm4,xmm4
+        pxor    xmm5,xmm5
+        pxor    xmm6,xmm6
+        pxor    xmm7,xmm7
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_ccm64_decrypt_blocks
+align   16
+_aesni_ccm64_decrypt_blocks:
+L$_aesni_ccm64_decrypt_blocks_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        mov     ebx,DWORD [36+esp]
+        mov     ecx,DWORD [40+esp]
+        mov     ebp,esp
+        sub     esp,60
+        and     esp,-16
+        mov     DWORD [48+esp],ebp
+        movdqu  xmm7,[ebx]
+        movdqu  xmm3,[ecx]
+        mov     ecx,DWORD [240+edx]
+        mov     DWORD [esp],202182159
+        mov     DWORD [4+esp],134810123
+        mov     DWORD [8+esp],67438087
+        mov     DWORD [12+esp],66051
+        mov     ebx,1
+        xor     ebp,ebp
+        mov     DWORD [16+esp],ebx
+        mov     DWORD [20+esp],ebp
+        mov     DWORD [24+esp],ebp
+        mov     DWORD [28+esp],ebp
+        movdqa  xmm5,[esp]
+        movdqa  xmm2,xmm7
+        mov     ebp,edx
+        mov     ebx,ecx
+db      102,15,56,0,253
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$032enc1_loop_5:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$032enc1_loop_5
+db      102,15,56,221,209
+        shl     ebx,4
+        mov     ecx,16
+        movups  xmm6,[esi]
+        paddq   xmm7,[16+esp]
+        lea     esi,[16+esi]
+        sub     ecx,ebx
+        lea     edx,[32+ebx*1+ebp]
+        mov     ebx,ecx
+        jmp     NEAR L$033ccm64_dec_outer
+align   16
+L$033ccm64_dec_outer:
+        xorps   xmm6,xmm2
+        movdqa  xmm2,xmm7
+        movups  [edi],xmm6
+        lea     edi,[16+edi]
+db      102,15,56,0,213
+        sub     eax,1
+        jz      NEAR L$034ccm64_dec_break
+        movups  xmm0,[ebp]
+        mov     ecx,ebx
+        movups  xmm1,[16+ebp]
+        xorps   xmm6,xmm0
+        xorps   xmm2,xmm0
+        xorps   xmm3,xmm6
+        movups  xmm0,[32+ebp]
+L$035ccm64_dec2_loop:
+db      102,15,56,220,209
+db      102,15,56,220,217
+        movups  xmm1,[ecx*1+edx]
+        add     ecx,32
+db      102,15,56,220,208
+db      102,15,56,220,216
+        movups  xmm0,[ecx*1+edx-16]
+        jnz     NEAR L$035ccm64_dec2_loop
+        movups  xmm6,[esi]
+        paddq   xmm7,[16+esp]
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,221,208
+db      102,15,56,221,216
+        lea     esi,[16+esi]
+        jmp     NEAR L$033ccm64_dec_outer
+align   16
+L$034ccm64_dec_break:
+        mov     ecx,DWORD [240+ebp]
+        mov     edx,ebp
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        xorps   xmm6,xmm0
+        lea     edx,[32+edx]
+        xorps   xmm3,xmm6
+L$036enc1_loop_6:
+db      102,15,56,220,217
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$036enc1_loop_6
+db      102,15,56,221,217
+        mov     esp,DWORD [48+esp]
+        mov     edi,DWORD [40+esp]
+        movups  [edi],xmm3
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        pxor    xmm4,xmm4
+        pxor    xmm5,xmm5
+        pxor    xmm6,xmm6
+        pxor    xmm7,xmm7
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_ctr32_encrypt_blocks
+align   16
+_aesni_ctr32_encrypt_blocks:
+L$_aesni_ctr32_encrypt_blocks_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        mov     ebx,DWORD [36+esp]
+        mov     ebp,esp
+        sub     esp,88
+        and     esp,-16
+        mov     DWORD [80+esp],ebp
+        cmp     eax,1
+        je      NEAR L$037ctr32_one_shortcut
+        movdqu  xmm7,[ebx]
+        mov     DWORD [esp],202182159
+        mov     DWORD [4+esp],134810123
+        mov     DWORD [8+esp],67438087
+        mov     DWORD [12+esp],66051
+        mov     ecx,6
+        xor     ebp,ebp
+        mov     DWORD [16+esp],ecx
+        mov     DWORD [20+esp],ecx
+        mov     DWORD [24+esp],ecx
+        mov     DWORD [28+esp],ebp
+db      102,15,58,22,251,3
+db      102,15,58,34,253,3
+        mov     ecx,DWORD [240+edx]
+        bswap   ebx
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        movdqa  xmm2,[esp]
+db      102,15,58,34,195,0
+        lea     ebp,[3+ebx]
+db      102,15,58,34,205,0
+        inc     ebx
+db      102,15,58,34,195,1
+        inc     ebp
+db      102,15,58,34,205,1
+        inc     ebx
+db      102,15,58,34,195,2
+        inc     ebp
+db      102,15,58,34,205,2
+        movdqa  [48+esp],xmm0
+db      102,15,56,0,194
+        movdqu  xmm6,[edx]
+        movdqa  [64+esp],xmm1
+db      102,15,56,0,202
+        pshufd  xmm2,xmm0,192
+        pshufd  xmm3,xmm0,128
+        cmp     eax,6
+        jb      NEAR L$038ctr32_tail
+        pxor    xmm7,xmm6
+        shl     ecx,4
+        mov     ebx,16
+        movdqa  [32+esp],xmm7
+        mov     ebp,edx
+        sub     ebx,ecx
+        lea     edx,[32+ecx*1+edx]
+        sub     eax,6
+        jmp     NEAR L$039ctr32_loop6
+align   16
+L$039ctr32_loop6:
+        pshufd  xmm4,xmm0,64
+        movdqa  xmm0,[32+esp]
+        pshufd  xmm5,xmm1,192
+        pxor    xmm2,xmm0
+        pshufd  xmm6,xmm1,128
+        pxor    xmm3,xmm0
+        pshufd  xmm7,xmm1,64
+        movups  xmm1,[16+ebp]
+        pxor    xmm4,xmm0
+        pxor    xmm5,xmm0
+db      102,15,56,220,209
+        pxor    xmm6,xmm0
+        pxor    xmm7,xmm0
+db      102,15,56,220,217
+        movups  xmm0,[32+ebp]
+        mov     ecx,ebx
+db      102,15,56,220,225
+db      102,15,56,220,233
+db      102,15,56,220,241
+db      102,15,56,220,249
+        call    L$_aesni_encrypt6_enter
+        movups  xmm1,[esi]
+        movups  xmm0,[16+esi]
+        xorps   xmm2,xmm1
+        movups  xmm1,[32+esi]
+        xorps   xmm3,xmm0
+        movups  [edi],xmm2
+        movdqa  xmm0,[16+esp]
+        xorps   xmm4,xmm1
+        movdqa  xmm1,[64+esp]
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        paddd   xmm1,xmm0
+        paddd   xmm0,[48+esp]
+        movdqa  xmm2,[esp]
+        movups  xmm3,[48+esi]
+        movups  xmm4,[64+esi]
+        xorps   xmm5,xmm3
+        movups  xmm3,[80+esi]
+        lea     esi,[96+esi]
+        movdqa  [48+esp],xmm0
+db      102,15,56,0,194
+        xorps   xmm6,xmm4
+        movups  [48+edi],xmm5
+        xorps   xmm7,xmm3
+        movdqa  [64+esp],xmm1
+db      102,15,56,0,202
+        movups  [64+edi],xmm6
+        pshufd  xmm2,xmm0,192
+        movups  [80+edi],xmm7
+        lea     edi,[96+edi]
+        pshufd  xmm3,xmm0,128
+        sub     eax,6
+        jnc     NEAR L$039ctr32_loop6
+        add     eax,6
+        jz      NEAR L$040ctr32_ret
+        movdqu  xmm7,[ebp]
+        mov     edx,ebp
+        pxor    xmm7,[32+esp]
+        mov     ecx,DWORD [240+ebp]
+L$038ctr32_tail:
+        por     xmm2,xmm7
+        cmp     eax,2
+        jb      NEAR L$041ctr32_one
+        pshufd  xmm4,xmm0,64
+        por     xmm3,xmm7
+        je      NEAR L$042ctr32_two
+        pshufd  xmm5,xmm1,192
+        por     xmm4,xmm7
+        cmp     eax,4
+        jb      NEAR L$043ctr32_three
+        pshufd  xmm6,xmm1,128
+        por     xmm5,xmm7
+        je      NEAR L$044ctr32_four
+        por     xmm6,xmm7
+        call    __aesni_encrypt6
+        movups  xmm1,[esi]
+        movups  xmm0,[16+esi]
+        xorps   xmm2,xmm1
+        movups  xmm1,[32+esi]
+        xorps   xmm3,xmm0
+        movups  xmm0,[48+esi]
+        xorps   xmm4,xmm1
+        movups  xmm1,[64+esi]
+        xorps   xmm5,xmm0
+        movups  [edi],xmm2
+        xorps   xmm6,xmm1
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        movups  [64+edi],xmm6
+        jmp     NEAR L$040ctr32_ret
+align   16
+L$037ctr32_one_shortcut:
+        movups  xmm2,[ebx]
+        mov     ecx,DWORD [240+edx]
+L$041ctr32_one:
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$045enc1_loop_7:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$045enc1_loop_7
+db      102,15,56,221,209
+        movups  xmm6,[esi]
+        xorps   xmm6,xmm2
+        movups  [edi],xmm6
+        jmp     NEAR L$040ctr32_ret
+align   16
+L$042ctr32_two:
+        call    __aesni_encrypt2
+        movups  xmm5,[esi]
+        movups  xmm6,[16+esi]
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        jmp     NEAR L$040ctr32_ret
+align   16
+L$043ctr32_three:
+        call    __aesni_encrypt3
+        movups  xmm5,[esi]
+        movups  xmm6,[16+esi]
+        xorps   xmm2,xmm5
+        movups  xmm7,[32+esi]
+        xorps   xmm3,xmm6
+        movups  [edi],xmm2
+        xorps   xmm4,xmm7
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        jmp     NEAR L$040ctr32_ret
+align   16
+L$044ctr32_four:
+        call    __aesni_encrypt4
+        movups  xmm6,[esi]
+        movups  xmm7,[16+esi]
+        movups  xmm1,[32+esi]
+        xorps   xmm2,xmm6
+        movups  xmm0,[48+esi]
+        xorps   xmm3,xmm7
+        movups  [edi],xmm2
+        xorps   xmm4,xmm1
+        movups  [16+edi],xmm3
+        xorps   xmm5,xmm0
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+L$040ctr32_ret:
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        pxor    xmm4,xmm4
+        movdqa  [32+esp],xmm0
+        pxor    xmm5,xmm5
+        movdqa  [48+esp],xmm0
+        pxor    xmm6,xmm6
+        movdqa  [64+esp],xmm0
+        pxor    xmm7,xmm7
+        mov     esp,DWORD [80+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_xts_encrypt
+align   16
+_aesni_xts_encrypt:
+L$_aesni_xts_encrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     edx,DWORD [36+esp]
+        mov     esi,DWORD [40+esp]
+        mov     ecx,DWORD [240+edx]
+        movups  xmm2,[esi]
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$046enc1_loop_8:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$046enc1_loop_8
+db      102,15,56,221,209
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        mov     ebp,esp
+        sub     esp,120
+        mov     ecx,DWORD [240+edx]
+        and     esp,-16
+        mov     DWORD [96+esp],135
+        mov     DWORD [100+esp],0
+        mov     DWORD [104+esp],1
+        mov     DWORD [108+esp],0
+        mov     DWORD [112+esp],eax
+        mov     DWORD [116+esp],ebp
+        movdqa  xmm1,xmm2
+        pxor    xmm0,xmm0
+        movdqa  xmm3,[96+esp]
+        pcmpgtd xmm0,xmm1
+        and     eax,-16
+        mov     ebp,edx
+        mov     ebx,ecx
+        sub     eax,96
+        jc      NEAR L$047xts_enc_short
+        shl     ecx,4
+        mov     ebx,16
+        sub     ebx,ecx
+        lea     edx,[32+ecx*1+edx]
+        jmp     NEAR L$048xts_enc_loop6
+align   16
+L$048xts_enc_loop6:
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [16+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [32+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [48+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm7,xmm0,19
+        movdqa  [64+esp],xmm1
+        paddq   xmm1,xmm1
+        movups  xmm0,[ebp]
+        pand    xmm7,xmm3
+        movups  xmm2,[esi]
+        pxor    xmm7,xmm1
+        mov     ecx,ebx
+        movdqu  xmm3,[16+esi]
+        xorps   xmm2,xmm0
+        movdqu  xmm4,[32+esi]
+        pxor    xmm3,xmm0
+        movdqu  xmm5,[48+esi]
+        pxor    xmm4,xmm0
+        movdqu  xmm6,[64+esi]
+        pxor    xmm5,xmm0
+        movdqu  xmm1,[80+esi]
+        pxor    xmm6,xmm0
+        lea     esi,[96+esi]
+        pxor    xmm2,[esp]
+        movdqa  [80+esp],xmm7
+        pxor    xmm7,xmm1
+        movups  xmm1,[16+ebp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+db      102,15,56,220,209
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+db      102,15,56,220,217
+        pxor    xmm7,xmm0
+        movups  xmm0,[32+ebp]
+db      102,15,56,220,225
+db      102,15,56,220,233
+db      102,15,56,220,241
+db      102,15,56,220,249
+        call    L$_aesni_encrypt6_enter
+        movdqa  xmm1,[80+esp]
+        pxor    xmm0,xmm0
+        xorps   xmm2,[esp]
+        pcmpgtd xmm0,xmm1
+        xorps   xmm3,[16+esp]
+        movups  [edi],xmm2
+        xorps   xmm4,[32+esp]
+        movups  [16+edi],xmm3
+        xorps   xmm5,[48+esp]
+        movups  [32+edi],xmm4
+        xorps   xmm6,[64+esp]
+        movups  [48+edi],xmm5
+        xorps   xmm7,xmm1
+        movups  [64+edi],xmm6
+        pshufd  xmm2,xmm0,19
+        movups  [80+edi],xmm7
+        lea     edi,[96+edi]
+        movdqa  xmm3,[96+esp]
+        pxor    xmm0,xmm0
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        sub     eax,96
+        jnc     NEAR L$048xts_enc_loop6
+        mov     ecx,DWORD [240+ebp]
+        mov     edx,ebp
+        mov     ebx,ecx
+L$047xts_enc_short:
+        add     eax,96
+        jz      NEAR L$049xts_enc_done6x
+        movdqa  xmm5,xmm1
+        cmp     eax,32
+        jb      NEAR L$050xts_enc_one
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        je      NEAR L$051xts_enc_two
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  xmm6,xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        cmp     eax,64
+        jb      NEAR L$052xts_enc_three
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  xmm7,xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        movdqa  [esp],xmm5
+        movdqa  [16+esp],xmm6
+        je      NEAR L$053xts_enc_four
+        movdqa  [32+esp],xmm7
+        pshufd  xmm7,xmm0,19
+        movdqa  [48+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm7,xmm3
+        pxor    xmm7,xmm1
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        pxor    xmm2,[esp]
+        movdqu  xmm5,[48+esi]
+        pxor    xmm3,[16+esp]
+        movdqu  xmm6,[64+esi]
+        pxor    xmm4,[32+esp]
+        lea     esi,[80+esi]
+        pxor    xmm5,[48+esp]
+        movdqa  [64+esp],xmm7
+        pxor    xmm6,xmm7
+        call    __aesni_encrypt6
+        movaps  xmm1,[64+esp]
+        xorps   xmm2,[esp]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,[32+esp]
+        movups  [edi],xmm2
+        xorps   xmm5,[48+esp]
+        movups  [16+edi],xmm3
+        xorps   xmm6,xmm1
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        movups  [64+edi],xmm6
+        lea     edi,[80+edi]
+        jmp     NEAR L$054xts_enc_done
+align   16
+L$050xts_enc_one:
+        movups  xmm2,[esi]
+        lea     esi,[16+esi]
+        xorps   xmm2,xmm5
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$055enc1_loop_9:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$055enc1_loop_9
+db      102,15,56,221,209
+        xorps   xmm2,xmm5
+        movups  [edi],xmm2
+        lea     edi,[16+edi]
+        movdqa  xmm1,xmm5
+        jmp     NEAR L$054xts_enc_done
+align   16
+L$051xts_enc_two:
+        movaps  xmm6,xmm1
+        movups  xmm2,[esi]
+        movups  xmm3,[16+esi]
+        lea     esi,[32+esi]
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        call    __aesni_encrypt2
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        lea     edi,[32+edi]
+        movdqa  xmm1,xmm6
+        jmp     NEAR L$054xts_enc_done
+align   16
+L$052xts_enc_three:
+        movaps  xmm7,xmm1
+        movups  xmm2,[esi]
+        movups  xmm3,[16+esi]
+        movups  xmm4,[32+esi]
+        lea     esi,[48+esi]
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        xorps   xmm4,xmm7
+        call    __aesni_encrypt3
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        xorps   xmm4,xmm7
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        lea     edi,[48+edi]
+        movdqa  xmm1,xmm7
+        jmp     NEAR L$054xts_enc_done
+align   16
+L$053xts_enc_four:
+        movaps  xmm6,xmm1
+        movups  xmm2,[esi]
+        movups  xmm3,[16+esi]
+        movups  xmm4,[32+esi]
+        xorps   xmm2,[esp]
+        movups  xmm5,[48+esi]
+        lea     esi,[64+esi]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,xmm7
+        xorps   xmm5,xmm6
+        call    __aesni_encrypt4
+        xorps   xmm2,[esp]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,xmm7
+        movups  [edi],xmm2
+        xorps   xmm5,xmm6
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        lea     edi,[64+edi]
+        movdqa  xmm1,xmm6
+        jmp     NEAR L$054xts_enc_done
+align   16
+L$049xts_enc_done6x:
+        mov     eax,DWORD [112+esp]
+        and     eax,15
+        jz      NEAR L$056xts_enc_ret
+        movdqa  xmm5,xmm1
+        mov     DWORD [112+esp],eax
+        jmp     NEAR L$057xts_enc_steal
+align   16
+L$054xts_enc_done:
+        mov     eax,DWORD [112+esp]
+        pxor    xmm0,xmm0
+        and     eax,15
+        jz      NEAR L$056xts_enc_ret
+        pcmpgtd xmm0,xmm1
+        mov     DWORD [112+esp],eax
+        pshufd  xmm5,xmm0,19
+        paddq   xmm1,xmm1
+        pand    xmm5,[96+esp]
+        pxor    xmm5,xmm1
+L$057xts_enc_steal:
+        movzx   ecx,BYTE [esi]
+        movzx   edx,BYTE [edi-16]
+        lea     esi,[1+esi]
+        mov     BYTE [edi-16],cl
+        mov     BYTE [edi],dl
+        lea     edi,[1+edi]
+        sub     eax,1
+        jnz     NEAR L$057xts_enc_steal
+        sub     edi,DWORD [112+esp]
+        mov     edx,ebp
+        mov     ecx,ebx
+        movups  xmm2,[edi-16]
+        xorps   xmm2,xmm5
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$058enc1_loop_10:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$058enc1_loop_10
+db      102,15,56,221,209
+        xorps   xmm2,xmm5
+        movups  [edi-16],xmm2
+L$056xts_enc_ret:
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        movdqa  [esp],xmm0
+        pxor    xmm3,xmm3
+        movdqa  [16+esp],xmm0
+        pxor    xmm4,xmm4
+        movdqa  [32+esp],xmm0
+        pxor    xmm5,xmm5
+        movdqa  [48+esp],xmm0
+        pxor    xmm6,xmm6
+        movdqa  [64+esp],xmm0
+        pxor    xmm7,xmm7
+        movdqa  [80+esp],xmm0
+        mov     esp,DWORD [116+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_xts_decrypt
+align   16
+_aesni_xts_decrypt:
+L$_aesni_xts_decrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     edx,DWORD [36+esp]
+        mov     esi,DWORD [40+esp]
+        mov     ecx,DWORD [240+edx]
+        movups  xmm2,[esi]
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$059enc1_loop_11:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$059enc1_loop_11
+db      102,15,56,221,209
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        mov     ebp,esp
+        sub     esp,120
+        and     esp,-16
+        xor     ebx,ebx
+        test    eax,15
+        setnz   bl
+        shl     ebx,4
+        sub     eax,ebx
+        mov     DWORD [96+esp],135
+        mov     DWORD [100+esp],0
+        mov     DWORD [104+esp],1
+        mov     DWORD [108+esp],0
+        mov     DWORD [112+esp],eax
+        mov     DWORD [116+esp],ebp
+        mov     ecx,DWORD [240+edx]
+        mov     ebp,edx
+        mov     ebx,ecx
+        movdqa  xmm1,xmm2
+        pxor    xmm0,xmm0
+        movdqa  xmm3,[96+esp]
+        pcmpgtd xmm0,xmm1
+        and     eax,-16
+        sub     eax,96
+        jc      NEAR L$060xts_dec_short
+        shl     ecx,4
+        mov     ebx,16
+        sub     ebx,ecx
+        lea     edx,[32+ecx*1+edx]
+        jmp     NEAR L$061xts_dec_loop6
+align   16
+L$061xts_dec_loop6:
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [16+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [32+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  [48+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        pshufd  xmm7,xmm0,19
+        movdqa  [64+esp],xmm1
+        paddq   xmm1,xmm1
+        movups  xmm0,[ebp]
+        pand    xmm7,xmm3
+        movups  xmm2,[esi]
+        pxor    xmm7,xmm1
+        mov     ecx,ebx
+        movdqu  xmm3,[16+esi]
+        xorps   xmm2,xmm0
+        movdqu  xmm4,[32+esi]
+        pxor    xmm3,xmm0
+        movdqu  xmm5,[48+esi]
+        pxor    xmm4,xmm0
+        movdqu  xmm6,[64+esi]
+        pxor    xmm5,xmm0
+        movdqu  xmm1,[80+esi]
+        pxor    xmm6,xmm0
+        lea     esi,[96+esi]
+        pxor    xmm2,[esp]
+        movdqa  [80+esp],xmm7
+        pxor    xmm7,xmm1
+        movups  xmm1,[16+ebp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+db      102,15,56,222,209
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+db      102,15,56,222,217
+        pxor    xmm7,xmm0
+        movups  xmm0,[32+ebp]
+db      102,15,56,222,225
+db      102,15,56,222,233
+db      102,15,56,222,241
+db      102,15,56,222,249
+        call    L$_aesni_decrypt6_enter
+        movdqa  xmm1,[80+esp]
+        pxor    xmm0,xmm0
+        xorps   xmm2,[esp]
+        pcmpgtd xmm0,xmm1
+        xorps   xmm3,[16+esp]
+        movups  [edi],xmm2
+        xorps   xmm4,[32+esp]
+        movups  [16+edi],xmm3
+        xorps   xmm5,[48+esp]
+        movups  [32+edi],xmm4
+        xorps   xmm6,[64+esp]
+        movups  [48+edi],xmm5
+        xorps   xmm7,xmm1
+        movups  [64+edi],xmm6
+        pshufd  xmm2,xmm0,19
+        movups  [80+edi],xmm7
+        lea     edi,[96+edi]
+        movdqa  xmm3,[96+esp]
+        pxor    xmm0,xmm0
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        sub     eax,96
+        jnc     NEAR L$061xts_dec_loop6
+        mov     ecx,DWORD [240+ebp]
+        mov     edx,ebp
+        mov     ebx,ecx
+L$060xts_dec_short:
+        add     eax,96
+        jz      NEAR L$062xts_dec_done6x
+        movdqa  xmm5,xmm1
+        cmp     eax,32
+        jb      NEAR L$063xts_dec_one
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        je      NEAR L$064xts_dec_two
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  xmm6,xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        cmp     eax,64
+        jb      NEAR L$065xts_dec_three
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  xmm7,xmm1
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+        movdqa  [esp],xmm5
+        movdqa  [16+esp],xmm6
+        je      NEAR L$066xts_dec_four
+        movdqa  [32+esp],xmm7
+        pshufd  xmm7,xmm0,19
+        movdqa  [48+esp],xmm1
+        paddq   xmm1,xmm1
+        pand    xmm7,xmm3
+        pxor    xmm7,xmm1
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        pxor    xmm2,[esp]
+        movdqu  xmm5,[48+esi]
+        pxor    xmm3,[16+esp]
+        movdqu  xmm6,[64+esi]
+        pxor    xmm4,[32+esp]
+        lea     esi,[80+esi]
+        pxor    xmm5,[48+esp]
+        movdqa  [64+esp],xmm7
+        pxor    xmm6,xmm7
+        call    __aesni_decrypt6
+        movaps  xmm1,[64+esp]
+        xorps   xmm2,[esp]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,[32+esp]
+        movups  [edi],xmm2
+        xorps   xmm5,[48+esp]
+        movups  [16+edi],xmm3
+        xorps   xmm6,xmm1
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        movups  [64+edi],xmm6
+        lea     edi,[80+edi]
+        jmp     NEAR L$067xts_dec_done
+align   16
+L$063xts_dec_one:
+        movups  xmm2,[esi]
+        lea     esi,[16+esi]
+        xorps   xmm2,xmm5
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$068dec1_loop_12:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$068dec1_loop_12
+db      102,15,56,223,209
+        xorps   xmm2,xmm5
+        movups  [edi],xmm2
+        lea     edi,[16+edi]
+        movdqa  xmm1,xmm5
+        jmp     NEAR L$067xts_dec_done
+align   16
+L$064xts_dec_two:
+        movaps  xmm6,xmm1
+        movups  xmm2,[esi]
+        movups  xmm3,[16+esi]
+        lea     esi,[32+esi]
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        call    __aesni_decrypt2
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        lea     edi,[32+edi]
+        movdqa  xmm1,xmm6
+        jmp     NEAR L$067xts_dec_done
+align   16
+L$065xts_dec_three:
+        movaps  xmm7,xmm1
+        movups  xmm2,[esi]
+        movups  xmm3,[16+esi]
+        movups  xmm4,[32+esi]
+        lea     esi,[48+esi]
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        xorps   xmm4,xmm7
+        call    __aesni_decrypt3
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        xorps   xmm4,xmm7
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        lea     edi,[48+edi]
+        movdqa  xmm1,xmm7
+        jmp     NEAR L$067xts_dec_done
+align   16
+L$066xts_dec_four:
+        movaps  xmm6,xmm1
+        movups  xmm2,[esi]
+        movups  xmm3,[16+esi]
+        movups  xmm4,[32+esi]
+        xorps   xmm2,[esp]
+        movups  xmm5,[48+esi]
+        lea     esi,[64+esi]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,xmm7
+        xorps   xmm5,xmm6
+        call    __aesni_decrypt4
+        xorps   xmm2,[esp]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,xmm7
+        movups  [edi],xmm2
+        xorps   xmm5,xmm6
+        movups  [16+edi],xmm3
+        movups  [32+edi],xmm4
+        movups  [48+edi],xmm5
+        lea     edi,[64+edi]
+        movdqa  xmm1,xmm6
+        jmp     NEAR L$067xts_dec_done
+align   16
+L$062xts_dec_done6x:
+        mov     eax,DWORD [112+esp]
+        and     eax,15
+        jz      NEAR L$069xts_dec_ret
+        mov     DWORD [112+esp],eax
+        jmp     NEAR L$070xts_dec_only_one_more
+align   16
+L$067xts_dec_done:
+        mov     eax,DWORD [112+esp]
+        pxor    xmm0,xmm0
+        and     eax,15
+        jz      NEAR L$069xts_dec_ret
+        pcmpgtd xmm0,xmm1
+        mov     DWORD [112+esp],eax
+        pshufd  xmm2,xmm0,19
+        pxor    xmm0,xmm0
+        movdqa  xmm3,[96+esp]
+        paddq   xmm1,xmm1
+        pand    xmm2,xmm3
+        pcmpgtd xmm0,xmm1
+        pxor    xmm1,xmm2
+L$070xts_dec_only_one_more:
+        pshufd  xmm5,xmm0,19
+        movdqa  xmm6,xmm1
+        paddq   xmm1,xmm1
+        pand    xmm5,xmm3
+        pxor    xmm5,xmm1
+        mov     edx,ebp
+        mov     ecx,ebx
+        movups  xmm2,[esi]
+        xorps   xmm2,xmm5
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$071dec1_loop_13:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$071dec1_loop_13
+db      102,15,56,223,209
+        xorps   xmm2,xmm5
+        movups  [edi],xmm2
+L$072xts_dec_steal:
+        movzx   ecx,BYTE [16+esi]
+        movzx   edx,BYTE [edi]
+        lea     esi,[1+esi]
+        mov     BYTE [edi],cl
+        mov     BYTE [16+edi],dl
+        lea     edi,[1+edi]
+        sub     eax,1
+        jnz     NEAR L$072xts_dec_steal
+        sub     edi,DWORD [112+esp]
+        mov     edx,ebp
+        mov     ecx,ebx
+        movups  xmm2,[edi]
+        xorps   xmm2,xmm6
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$073dec1_loop_14:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$073dec1_loop_14
+db      102,15,56,223,209
+        xorps   xmm2,xmm6
+        movups  [edi],xmm2
+L$069xts_dec_ret:
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        movdqa  [esp],xmm0
+        pxor    xmm3,xmm3
+        movdqa  [16+esp],xmm0
+        pxor    xmm4,xmm4
+        movdqa  [32+esp],xmm0
+        pxor    xmm5,xmm5
+        movdqa  [48+esp],xmm0
+        pxor    xmm6,xmm6
+        movdqa  [64+esp],xmm0
+        pxor    xmm7,xmm7
+        movdqa  [80+esp],xmm0
+        mov     esp,DWORD [116+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_ocb_encrypt
+align   16
+_aesni_ocb_encrypt:
+L$_aesni_ocb_encrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     ecx,DWORD [40+esp]
+        mov     ebx,DWORD [48+esp]
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        movdqu  xmm0,[ecx]
+        mov     ebp,DWORD [36+esp]
+        movdqu  xmm1,[ebx]
+        mov     ebx,DWORD [44+esp]
+        mov     ecx,esp
+        sub     esp,132
+        and     esp,-16
+        sub     edi,esi
+        shl     eax,4
+        lea     eax,[eax*1+esi-96]
+        mov     DWORD [120+esp],edi
+        mov     DWORD [124+esp],eax
+        mov     DWORD [128+esp],ecx
+        mov     ecx,DWORD [240+edx]
+        test    ebp,1
+        jnz     NEAR L$074odd
+        bsf     eax,ebp
+        add     ebp,1
+        shl     eax,4
+        movdqu  xmm7,[eax*1+ebx]
+        mov     eax,edx
+        movdqu  xmm2,[esi]
+        lea     esi,[16+esi]
+        pxor    xmm7,xmm0
+        pxor    xmm1,xmm2
+        pxor    xmm2,xmm7
+        movdqa  xmm6,xmm1
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$075enc1_loop_15:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$075enc1_loop_15
+db      102,15,56,221,209
+        xorps   xmm2,xmm7
+        movdqa  xmm0,xmm7
+        movdqa  xmm1,xmm6
+        movups  [esi*1+edi-16],xmm2
+        mov     ecx,DWORD [240+eax]
+        mov     edx,eax
+        mov     eax,DWORD [124+esp]
+L$074odd:
+        shl     ecx,4
+        mov     edi,16
+        sub     edi,ecx
+        mov     DWORD [112+esp],edx
+        lea     edx,[32+ecx*1+edx]
+        mov     DWORD [116+esp],edi
+        cmp     esi,eax
+        ja      NEAR L$076short
+        jmp     NEAR L$077grandloop
+align   32
+L$077grandloop:
+        lea     ecx,[1+ebp]
+        lea     eax,[3+ebp]
+        lea     edi,[5+ebp]
+        add     ebp,6
+        bsf     ecx,ecx
+        bsf     eax,eax
+        bsf     edi,edi
+        shl     ecx,4
+        shl     eax,4
+        shl     edi,4
+        movdqu  xmm2,[ebx]
+        movdqu  xmm3,[ecx*1+ebx]
+        mov     ecx,DWORD [116+esp]
+        movdqa  xmm4,xmm2
+        movdqu  xmm5,[eax*1+ebx]
+        movdqa  xmm6,xmm2
+        movdqu  xmm7,[edi*1+ebx]
+        pxor    xmm2,xmm0
+        pxor    xmm3,xmm2
+        movdqa  [esp],xmm2
+        pxor    xmm4,xmm3
+        movdqa  [16+esp],xmm3
+        pxor    xmm5,xmm4
+        movdqa  [32+esp],xmm4
+        pxor    xmm6,xmm5
+        movdqa  [48+esp],xmm5
+        pxor    xmm7,xmm6
+        movdqa  [64+esp],xmm6
+        movdqa  [80+esp],xmm7
+        movups  xmm0,[ecx*1+edx-48]
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        movdqu  xmm6,[64+esi]
+        movdqu  xmm7,[80+esi]
+        lea     esi,[96+esi]
+        pxor    xmm1,xmm2
+        pxor    xmm2,xmm0
+        pxor    xmm1,xmm3
+        pxor    xmm3,xmm0
+        pxor    xmm1,xmm4
+        pxor    xmm4,xmm0
+        pxor    xmm1,xmm5
+        pxor    xmm5,xmm0
+        pxor    xmm1,xmm6
+        pxor    xmm6,xmm0
+        pxor    xmm1,xmm7
+        pxor    xmm7,xmm0
+        movdqa  [96+esp],xmm1
+        movups  xmm1,[ecx*1+edx-32]
+        pxor    xmm2,[esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+        pxor    xmm7,[80+esp]
+        movups  xmm0,[ecx*1+edx-16]
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+db      102,15,56,220,233
+db      102,15,56,220,241
+db      102,15,56,220,249
+        mov     edi,DWORD [120+esp]
+        mov     eax,DWORD [124+esp]
+        call    L$_aesni_encrypt6_enter
+        movdqa  xmm0,[80+esp]
+        pxor    xmm2,[esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+        pxor    xmm7,xmm0
+        movdqa  xmm1,[96+esp]
+        movdqu  [esi*1+edi-96],xmm2
+        movdqu  [esi*1+edi-80],xmm3
+        movdqu  [esi*1+edi-64],xmm4
+        movdqu  [esi*1+edi-48],xmm5
+        movdqu  [esi*1+edi-32],xmm6
+        movdqu  [esi*1+edi-16],xmm7
+        cmp     esi,eax
+        jb      NEAR L$077grandloop
+L$076short:
+        add     eax,96
+        sub     eax,esi
+        jz      NEAR L$078done
+        cmp     eax,32
+        jb      NEAR L$079one
+        je      NEAR L$080two
+        cmp     eax,64
+        jb      NEAR L$081three
+        je      NEAR L$082four
+        lea     ecx,[1+ebp]
+        lea     eax,[3+ebp]
+        bsf     ecx,ecx
+        bsf     eax,eax
+        shl     ecx,4
+        shl     eax,4
+        movdqu  xmm2,[ebx]
+        movdqu  xmm3,[ecx*1+ebx]
+        mov     ecx,DWORD [116+esp]
+        movdqa  xmm4,xmm2
+        movdqu  xmm5,[eax*1+ebx]
+        movdqa  xmm6,xmm2
+        pxor    xmm2,xmm0
+        pxor    xmm3,xmm2
+        movdqa  [esp],xmm2
+        pxor    xmm4,xmm3
+        movdqa  [16+esp],xmm3
+        pxor    xmm5,xmm4
+        movdqa  [32+esp],xmm4
+        pxor    xmm6,xmm5
+        movdqa  [48+esp],xmm5
+        pxor    xmm7,xmm6
+        movdqa  [64+esp],xmm6
+        movups  xmm0,[ecx*1+edx-48]
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        movdqu  xmm6,[64+esi]
+        pxor    xmm7,xmm7
+        pxor    xmm1,xmm2
+        pxor    xmm2,xmm0
+        pxor    xmm1,xmm3
+        pxor    xmm3,xmm0
+        pxor    xmm1,xmm4
+        pxor    xmm4,xmm0
+        pxor    xmm1,xmm5
+        pxor    xmm5,xmm0
+        pxor    xmm1,xmm6
+        pxor    xmm6,xmm0
+        movdqa  [96+esp],xmm1
+        movups  xmm1,[ecx*1+edx-32]
+        pxor    xmm2,[esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+        movups  xmm0,[ecx*1+edx-16]
+db      102,15,56,220,209
+db      102,15,56,220,217
+db      102,15,56,220,225
+db      102,15,56,220,233
+db      102,15,56,220,241
+db      102,15,56,220,249
+        mov     edi,DWORD [120+esp]
+        call    L$_aesni_encrypt6_enter
+        movdqa  xmm0,[64+esp]
+        pxor    xmm2,[esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,xmm0
+        movdqa  xmm1,[96+esp]
+        movdqu  [esi*1+edi],xmm2
+        movdqu  [16+esi*1+edi],xmm3
+        movdqu  [32+esi*1+edi],xmm4
+        movdqu  [48+esi*1+edi],xmm5
+        movdqu  [64+esi*1+edi],xmm6
+        jmp     NEAR L$078done
+align   16
+L$079one:
+        movdqu  xmm7,[ebx]
+        mov     edx,DWORD [112+esp]
+        movdqu  xmm2,[esi]
+        mov     ecx,DWORD [240+edx]
+        pxor    xmm7,xmm0
+        pxor    xmm1,xmm2
+        pxor    xmm2,xmm7
+        movdqa  xmm6,xmm1
+        mov     edi,DWORD [120+esp]
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$083enc1_loop_16:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$083enc1_loop_16
+db      102,15,56,221,209
+        xorps   xmm2,xmm7
+        movdqa  xmm0,xmm7
+        movdqa  xmm1,xmm6
+        movups  [esi*1+edi],xmm2
+        jmp     NEAR L$078done
+align   16
+L$080two:
+        lea     ecx,[1+ebp]
+        mov     edx,DWORD [112+esp]
+        bsf     ecx,ecx
+        shl     ecx,4
+        movdqu  xmm6,[ebx]
+        movdqu  xmm7,[ecx*1+ebx]
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        mov     ecx,DWORD [240+edx]
+        pxor    xmm6,xmm0
+        pxor    xmm7,xmm6
+        pxor    xmm1,xmm2
+        pxor    xmm2,xmm6
+        pxor    xmm1,xmm3
+        pxor    xmm3,xmm7
+        movdqa  xmm5,xmm1
+        mov     edi,DWORD [120+esp]
+        call    __aesni_encrypt2
+        xorps   xmm2,xmm6
+        xorps   xmm3,xmm7
+        movdqa  xmm0,xmm7
+        movdqa  xmm1,xmm5
+        movups  [esi*1+edi],xmm2
+        movups  [16+esi*1+edi],xmm3
+        jmp     NEAR L$078done
+align   16
+L$081three:
+        lea     ecx,[1+ebp]
+        mov     edx,DWORD [112+esp]
+        bsf     ecx,ecx
+        shl     ecx,4
+        movdqu  xmm5,[ebx]
+        movdqu  xmm6,[ecx*1+ebx]
+        movdqa  xmm7,xmm5
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        mov     ecx,DWORD [240+edx]
+        pxor    xmm5,xmm0
+        pxor    xmm6,xmm5
+        pxor    xmm7,xmm6
+        pxor    xmm1,xmm2
+        pxor    xmm2,xmm5
+        pxor    xmm1,xmm3
+        pxor    xmm3,xmm6
+        pxor    xmm1,xmm4
+        pxor    xmm4,xmm7
+        movdqa  [96+esp],xmm1
+        mov     edi,DWORD [120+esp]
+        call    __aesni_encrypt3
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        xorps   xmm4,xmm7
+        movdqa  xmm0,xmm7
+        movdqa  xmm1,[96+esp]
+        movups  [esi*1+edi],xmm2
+        movups  [16+esi*1+edi],xmm3
+        movups  [32+esi*1+edi],xmm4
+        jmp     NEAR L$078done
+align   16
+L$082four:
+        lea     ecx,[1+ebp]
+        lea     eax,[3+ebp]
+        bsf     ecx,ecx
+        bsf     eax,eax
+        mov     edx,DWORD [112+esp]
+        shl     ecx,4
+        shl     eax,4
+        movdqu  xmm4,[ebx]
+        movdqu  xmm5,[ecx*1+ebx]
+        movdqa  xmm6,xmm4
+        movdqu  xmm7,[eax*1+ebx]
+        pxor    xmm4,xmm0
+        movdqu  xmm2,[esi]
+        pxor    xmm5,xmm4
+        movdqu  xmm3,[16+esi]
+        pxor    xmm6,xmm5
+        movdqa  [esp],xmm4
+        pxor    xmm7,xmm6
+        movdqa  [16+esp],xmm5
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        mov     ecx,DWORD [240+edx]
+        pxor    xmm1,xmm2
+        pxor    xmm2,[esp]
+        pxor    xmm1,xmm3
+        pxor    xmm3,[16+esp]
+        pxor    xmm1,xmm4
+        pxor    xmm4,xmm6
+        pxor    xmm1,xmm5
+        pxor    xmm5,xmm7
+        movdqa  [96+esp],xmm1
+        mov     edi,DWORD [120+esp]
+        call    __aesni_encrypt4
+        xorps   xmm2,[esp]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,xmm6
+        movups  [esi*1+edi],xmm2
+        xorps   xmm5,xmm7
+        movups  [16+esi*1+edi],xmm3
+        movdqa  xmm0,xmm7
+        movups  [32+esi*1+edi],xmm4
+        movdqa  xmm1,[96+esp]
+        movups  [48+esi*1+edi],xmm5
+L$078done:
+        mov     edx,DWORD [128+esp]
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        movdqa  [esp],xmm2
+        pxor    xmm4,xmm4
+        movdqa  [16+esp],xmm2
+        pxor    xmm5,xmm5
+        movdqa  [32+esp],xmm2
+        pxor    xmm6,xmm6
+        movdqa  [48+esp],xmm2
+        pxor    xmm7,xmm7
+        movdqa  [64+esp],xmm2
+        movdqa  [80+esp],xmm2
+        movdqa  [96+esp],xmm2
+        lea     esp,[edx]
+        mov     ecx,DWORD [40+esp]
+        mov     ebx,DWORD [48+esp]
+        movdqu  [ecx],xmm0
+        pxor    xmm0,xmm0
+        movdqu  [ebx],xmm1
+        pxor    xmm1,xmm1
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_ocb_decrypt
+align   16
+_aesni_ocb_decrypt:
+L$_aesni_ocb_decrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     ecx,DWORD [40+esp]
+        mov     ebx,DWORD [48+esp]
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        movdqu  xmm0,[ecx]
+        mov     ebp,DWORD [36+esp]
+        movdqu  xmm1,[ebx]
+        mov     ebx,DWORD [44+esp]
+        mov     ecx,esp
+        sub     esp,132
+        and     esp,-16
+        sub     edi,esi
+        shl     eax,4
+        lea     eax,[eax*1+esi-96]
+        mov     DWORD [120+esp],edi
+        mov     DWORD [124+esp],eax
+        mov     DWORD [128+esp],ecx
+        mov     ecx,DWORD [240+edx]
+        test    ebp,1
+        jnz     NEAR L$084odd
+        bsf     eax,ebp
+        add     ebp,1
+        shl     eax,4
+        movdqu  xmm7,[eax*1+ebx]
+        mov     eax,edx
+        movdqu  xmm2,[esi]
+        lea     esi,[16+esi]
+        pxor    xmm7,xmm0
+        pxor    xmm2,xmm7
+        movdqa  xmm6,xmm1
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$085dec1_loop_17:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$085dec1_loop_17
+db      102,15,56,223,209
+        xorps   xmm2,xmm7
+        movaps  xmm1,xmm6
+        movdqa  xmm0,xmm7
+        xorps   xmm1,xmm2
+        movups  [esi*1+edi-16],xmm2
+        mov     ecx,DWORD [240+eax]
+        mov     edx,eax
+        mov     eax,DWORD [124+esp]
+L$084odd:
+        shl     ecx,4
+        mov     edi,16
+        sub     edi,ecx
+        mov     DWORD [112+esp],edx
+        lea     edx,[32+ecx*1+edx]
+        mov     DWORD [116+esp],edi
+        cmp     esi,eax
+        ja      NEAR L$086short
+        jmp     NEAR L$087grandloop
+align   32
+L$087grandloop:
+        lea     ecx,[1+ebp]
+        lea     eax,[3+ebp]
+        lea     edi,[5+ebp]
+        add     ebp,6
+        bsf     ecx,ecx
+        bsf     eax,eax
+        bsf     edi,edi
+        shl     ecx,4
+        shl     eax,4
+        shl     edi,4
+        movdqu  xmm2,[ebx]
+        movdqu  xmm3,[ecx*1+ebx]
+        mov     ecx,DWORD [116+esp]
+        movdqa  xmm4,xmm2
+        movdqu  xmm5,[eax*1+ebx]
+        movdqa  xmm6,xmm2
+        movdqu  xmm7,[edi*1+ebx]
+        pxor    xmm2,xmm0
+        pxor    xmm3,xmm2
+        movdqa  [esp],xmm2
+        pxor    xmm4,xmm3
+        movdqa  [16+esp],xmm3
+        pxor    xmm5,xmm4
+        movdqa  [32+esp],xmm4
+        pxor    xmm6,xmm5
+        movdqa  [48+esp],xmm5
+        pxor    xmm7,xmm6
+        movdqa  [64+esp],xmm6
+        movdqa  [80+esp],xmm7
+        movups  xmm0,[ecx*1+edx-48]
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        movdqu  xmm6,[64+esi]
+        movdqu  xmm7,[80+esi]
+        lea     esi,[96+esi]
+        movdqa  [96+esp],xmm1
+        pxor    xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+        pxor    xmm5,xmm0
+        pxor    xmm6,xmm0
+        pxor    xmm7,xmm0
+        movups  xmm1,[ecx*1+edx-32]
+        pxor    xmm2,[esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+        pxor    xmm7,[80+esp]
+        movups  xmm0,[ecx*1+edx-16]
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+db      102,15,56,222,233
+db      102,15,56,222,241
+db      102,15,56,222,249
+        mov     edi,DWORD [120+esp]
+        mov     eax,DWORD [124+esp]
+        call    L$_aesni_decrypt6_enter
+        movdqa  xmm0,[80+esp]
+        pxor    xmm2,[esp]
+        movdqa  xmm1,[96+esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+        pxor    xmm7,xmm0
+        pxor    xmm1,xmm2
+        movdqu  [esi*1+edi-96],xmm2
+        pxor    xmm1,xmm3
+        movdqu  [esi*1+edi-80],xmm3
+        pxor    xmm1,xmm4
+        movdqu  [esi*1+edi-64],xmm4
+        pxor    xmm1,xmm5
+        movdqu  [esi*1+edi-48],xmm5
+        pxor    xmm1,xmm6
+        movdqu  [esi*1+edi-32],xmm6
+        pxor    xmm1,xmm7
+        movdqu  [esi*1+edi-16],xmm7
+        cmp     esi,eax
+        jb      NEAR L$087grandloop
+L$086short:
+        add     eax,96
+        sub     eax,esi
+        jz      NEAR L$088done
+        cmp     eax,32
+        jb      NEAR L$089one
+        je      NEAR L$090two
+        cmp     eax,64
+        jb      NEAR L$091three
+        je      NEAR L$092four
+        lea     ecx,[1+ebp]
+        lea     eax,[3+ebp]
+        bsf     ecx,ecx
+        bsf     eax,eax
+        shl     ecx,4
+        shl     eax,4
+        movdqu  xmm2,[ebx]
+        movdqu  xmm3,[ecx*1+ebx]
+        mov     ecx,DWORD [116+esp]
+        movdqa  xmm4,xmm2
+        movdqu  xmm5,[eax*1+ebx]
+        movdqa  xmm6,xmm2
+        pxor    xmm2,xmm0
+        pxor    xmm3,xmm2
+        movdqa  [esp],xmm2
+        pxor    xmm4,xmm3
+        movdqa  [16+esp],xmm3
+        pxor    xmm5,xmm4
+        movdqa  [32+esp],xmm4
+        pxor    xmm6,xmm5
+        movdqa  [48+esp],xmm5
+        pxor    xmm7,xmm6
+        movdqa  [64+esp],xmm6
+        movups  xmm0,[ecx*1+edx-48]
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        movdqu  xmm6,[64+esi]
+        pxor    xmm7,xmm7
+        movdqa  [96+esp],xmm1
+        pxor    xmm2,xmm0
+        pxor    xmm3,xmm0
+        pxor    xmm4,xmm0
+        pxor    xmm5,xmm0
+        pxor    xmm6,xmm0
+        movups  xmm1,[ecx*1+edx-32]
+        pxor    xmm2,[esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,[64+esp]
+        movups  xmm0,[ecx*1+edx-16]
+db      102,15,56,222,209
+db      102,15,56,222,217
+db      102,15,56,222,225
+db      102,15,56,222,233
+db      102,15,56,222,241
+db      102,15,56,222,249
+        mov     edi,DWORD [120+esp]
+        call    L$_aesni_decrypt6_enter
+        movdqa  xmm0,[64+esp]
+        pxor    xmm2,[esp]
+        movdqa  xmm1,[96+esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,[32+esp]
+        pxor    xmm5,[48+esp]
+        pxor    xmm6,xmm0
+        pxor    xmm1,xmm2
+        movdqu  [esi*1+edi],xmm2
+        pxor    xmm1,xmm3
+        movdqu  [16+esi*1+edi],xmm3
+        pxor    xmm1,xmm4
+        movdqu  [32+esi*1+edi],xmm4
+        pxor    xmm1,xmm5
+        movdqu  [48+esi*1+edi],xmm5
+        pxor    xmm1,xmm6
+        movdqu  [64+esi*1+edi],xmm6
+        jmp     NEAR L$088done
+align   16
+L$089one:
+        movdqu  xmm7,[ebx]
+        mov     edx,DWORD [112+esp]
+        movdqu  xmm2,[esi]
+        mov     ecx,DWORD [240+edx]
+        pxor    xmm7,xmm0
+        pxor    xmm2,xmm7
+        movdqa  xmm6,xmm1
+        mov     edi,DWORD [120+esp]
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$093dec1_loop_18:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$093dec1_loop_18
+db      102,15,56,223,209
+        xorps   xmm2,xmm7
+        movaps  xmm1,xmm6
+        movdqa  xmm0,xmm7
+        xorps   xmm1,xmm2
+        movups  [esi*1+edi],xmm2
+        jmp     NEAR L$088done
+align   16
+L$090two:
+        lea     ecx,[1+ebp]
+        mov     edx,DWORD [112+esp]
+        bsf     ecx,ecx
+        shl     ecx,4
+        movdqu  xmm6,[ebx]
+        movdqu  xmm7,[ecx*1+ebx]
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        mov     ecx,DWORD [240+edx]
+        movdqa  xmm5,xmm1
+        pxor    xmm6,xmm0
+        pxor    xmm7,xmm6
+        pxor    xmm2,xmm6
+        pxor    xmm3,xmm7
+        mov     edi,DWORD [120+esp]
+        call    __aesni_decrypt2
+        xorps   xmm2,xmm6
+        xorps   xmm3,xmm7
+        movdqa  xmm0,xmm7
+        xorps   xmm5,xmm2
+        movups  [esi*1+edi],xmm2
+        xorps   xmm5,xmm3
+        movups  [16+esi*1+edi],xmm3
+        movaps  xmm1,xmm5
+        jmp     NEAR L$088done
+align   16
+L$091three:
+        lea     ecx,[1+ebp]
+        mov     edx,DWORD [112+esp]
+        bsf     ecx,ecx
+        shl     ecx,4
+        movdqu  xmm5,[ebx]
+        movdqu  xmm6,[ecx*1+ebx]
+        movdqa  xmm7,xmm5
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        mov     ecx,DWORD [240+edx]
+        movdqa  [96+esp],xmm1
+        pxor    xmm5,xmm0
+        pxor    xmm6,xmm5
+        pxor    xmm7,xmm6
+        pxor    xmm2,xmm5
+        pxor    xmm3,xmm6
+        pxor    xmm4,xmm7
+        mov     edi,DWORD [120+esp]
+        call    __aesni_decrypt3
+        movdqa  xmm1,[96+esp]
+        xorps   xmm2,xmm5
+        xorps   xmm3,xmm6
+        xorps   xmm4,xmm7
+        movups  [esi*1+edi],xmm2
+        pxor    xmm1,xmm2
+        movdqa  xmm0,xmm7
+        movups  [16+esi*1+edi],xmm3
+        pxor    xmm1,xmm3
+        movups  [32+esi*1+edi],xmm4
+        pxor    xmm1,xmm4
+        jmp     NEAR L$088done
+align   16
+L$092four:
+        lea     ecx,[1+ebp]
+        lea     eax,[3+ebp]
+        bsf     ecx,ecx
+        bsf     eax,eax
+        mov     edx,DWORD [112+esp]
+        shl     ecx,4
+        shl     eax,4
+        movdqu  xmm4,[ebx]
+        movdqu  xmm5,[ecx*1+ebx]
+        movdqa  xmm6,xmm4
+        movdqu  xmm7,[eax*1+ebx]
+        pxor    xmm4,xmm0
+        movdqu  xmm2,[esi]
+        pxor    xmm5,xmm4
+        movdqu  xmm3,[16+esi]
+        pxor    xmm6,xmm5
+        movdqa  [esp],xmm4
+        pxor    xmm7,xmm6
+        movdqa  [16+esp],xmm5
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        mov     ecx,DWORD [240+edx]
+        movdqa  [96+esp],xmm1
+        pxor    xmm2,[esp]
+        pxor    xmm3,[16+esp]
+        pxor    xmm4,xmm6
+        pxor    xmm5,xmm7
+        mov     edi,DWORD [120+esp]
+        call    __aesni_decrypt4
+        movdqa  xmm1,[96+esp]
+        xorps   xmm2,[esp]
+        xorps   xmm3,[16+esp]
+        xorps   xmm4,xmm6
+        movups  [esi*1+edi],xmm2
+        pxor    xmm1,xmm2
+        xorps   xmm5,xmm7
+        movups  [16+esi*1+edi],xmm3
+        pxor    xmm1,xmm3
+        movdqa  xmm0,xmm7
+        movups  [32+esi*1+edi],xmm4
+        pxor    xmm1,xmm4
+        movups  [48+esi*1+edi],xmm5
+        pxor    xmm1,xmm5
+L$088done:
+        mov     edx,DWORD [128+esp]
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        movdqa  [esp],xmm2
+        pxor    xmm4,xmm4
+        movdqa  [16+esp],xmm2
+        pxor    xmm5,xmm5
+        movdqa  [32+esp],xmm2
+        pxor    xmm6,xmm6
+        movdqa  [48+esp],xmm2
+        pxor    xmm7,xmm7
+        movdqa  [64+esp],xmm2
+        movdqa  [80+esp],xmm2
+        movdqa  [96+esp],xmm2
+        lea     esp,[edx]
+        mov     ecx,DWORD [40+esp]
+        mov     ebx,DWORD [48+esp]
+        movdqu  [ecx],xmm0
+        pxor    xmm0,xmm0
+        movdqu  [ebx],xmm1
+        pxor    xmm1,xmm1
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_cbc_encrypt
+align   16
+_aesni_cbc_encrypt:
+L$_aesni_cbc_encrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     ebx,esp
+        mov     edi,DWORD [24+esp]
+        sub     ebx,24
+        mov     eax,DWORD [28+esp]
+        and     ebx,-16
+        mov     edx,DWORD [32+esp]
+        mov     ebp,DWORD [36+esp]
+        test    eax,eax
+        jz      NEAR L$094cbc_abort
+        cmp     DWORD [40+esp],0
+        xchg    ebx,esp
+        movups  xmm7,[ebp]
+        mov     ecx,DWORD [240+edx]
+        mov     ebp,edx
+        mov     DWORD [16+esp],ebx
+        mov     ebx,ecx
+        je      NEAR L$095cbc_decrypt
+        movaps  xmm2,xmm7
+        cmp     eax,16
+        jb      NEAR L$096cbc_enc_tail
+        sub     eax,16
+        jmp     NEAR L$097cbc_enc_loop
+align   16
+L$097cbc_enc_loop:
+        movups  xmm7,[esi]
+        lea     esi,[16+esi]
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        xorps   xmm7,xmm0
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm7
+L$098enc1_loop_19:
+db      102,15,56,220,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$098enc1_loop_19
+db      102,15,56,221,209
+        mov     ecx,ebx
+        mov     edx,ebp
+        movups  [edi],xmm2
+        lea     edi,[16+edi]
+        sub     eax,16
+        jnc     NEAR L$097cbc_enc_loop
+        add     eax,16
+        jnz     NEAR L$096cbc_enc_tail
+        movaps  xmm7,xmm2
+        pxor    xmm2,xmm2
+        jmp     NEAR L$099cbc_ret
+L$096cbc_enc_tail:
+        mov     ecx,eax
+dd      2767451785
+        mov     ecx,16
+        sub     ecx,eax
+        xor     eax,eax
+dd      2868115081
+        lea     edi,[edi-16]
+        mov     ecx,ebx
+        mov     esi,edi
+        mov     edx,ebp
+        jmp     NEAR L$097cbc_enc_loop
+align   16
+L$095cbc_decrypt:
+        cmp     eax,80
+        jbe     NEAR L$100cbc_dec_tail
+        movaps  [esp],xmm7
+        sub     eax,80
+        jmp     NEAR L$101cbc_dec_loop6_enter
+align   16
+L$102cbc_dec_loop6:
+        movaps  [esp],xmm0
+        movups  [edi],xmm7
+        lea     edi,[16+edi]
+L$101cbc_dec_loop6_enter:
+        movdqu  xmm2,[esi]
+        movdqu  xmm3,[16+esi]
+        movdqu  xmm4,[32+esi]
+        movdqu  xmm5,[48+esi]
+        movdqu  xmm6,[64+esi]
+        movdqu  xmm7,[80+esi]
+        call    __aesni_decrypt6
+        movups  xmm1,[esi]
+        movups  xmm0,[16+esi]
+        xorps   xmm2,[esp]
+        xorps   xmm3,xmm1
+        movups  xmm1,[32+esi]
+        xorps   xmm4,xmm0
+        movups  xmm0,[48+esi]
+        xorps   xmm5,xmm1
+        movups  xmm1,[64+esi]
+        xorps   xmm6,xmm0
+        movups  xmm0,[80+esi]
+        xorps   xmm7,xmm1
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        lea     esi,[96+esi]
+        movups  [32+edi],xmm4
+        mov     ecx,ebx
+        movups  [48+edi],xmm5
+        mov     edx,ebp
+        movups  [64+edi],xmm6
+        lea     edi,[80+edi]
+        sub     eax,96
+        ja      NEAR L$102cbc_dec_loop6
+        movaps  xmm2,xmm7
+        movaps  xmm7,xmm0
+        add     eax,80
+        jle     NEAR L$103cbc_dec_clear_tail_collected
+        movups  [edi],xmm2
+        lea     edi,[16+edi]
+L$100cbc_dec_tail:
+        movups  xmm2,[esi]
+        movaps  xmm6,xmm2
+        cmp     eax,16
+        jbe     NEAR L$104cbc_dec_one
+        movups  xmm3,[16+esi]
+        movaps  xmm5,xmm3
+        cmp     eax,32
+        jbe     NEAR L$105cbc_dec_two
+        movups  xmm4,[32+esi]
+        cmp     eax,48
+        jbe     NEAR L$106cbc_dec_three
+        movups  xmm5,[48+esi]
+        cmp     eax,64
+        jbe     NEAR L$107cbc_dec_four
+        movups  xmm6,[64+esi]
+        movaps  [esp],xmm7
+        movups  xmm2,[esi]
+        xorps   xmm7,xmm7
+        call    __aesni_decrypt6
+        movups  xmm1,[esi]
+        movups  xmm0,[16+esi]
+        xorps   xmm2,[esp]
+        xorps   xmm3,xmm1
+        movups  xmm1,[32+esi]
+        xorps   xmm4,xmm0
+        movups  xmm0,[48+esi]
+        xorps   xmm5,xmm1
+        movups  xmm7,[64+esi]
+        xorps   xmm6,xmm0
+        movups  [edi],xmm2
+        movups  [16+edi],xmm3
+        pxor    xmm3,xmm3
+        movups  [32+edi],xmm4
+        pxor    xmm4,xmm4
+        movups  [48+edi],xmm5
+        pxor    xmm5,xmm5
+        lea     edi,[64+edi]
+        movaps  xmm2,xmm6
+        pxor    xmm6,xmm6
+        sub     eax,80
+        jmp     NEAR L$108cbc_dec_tail_collected
+align   16
+L$104cbc_dec_one:
+        movups  xmm0,[edx]
+        movups  xmm1,[16+edx]
+        lea     edx,[32+edx]
+        xorps   xmm2,xmm0
+L$109dec1_loop_20:
+db      102,15,56,222,209
+        dec     ecx
+        movups  xmm1,[edx]
+        lea     edx,[16+edx]
+        jnz     NEAR L$109dec1_loop_20
+db      102,15,56,223,209
+        xorps   xmm2,xmm7
+        movaps  xmm7,xmm6
+        sub     eax,16
+        jmp     NEAR L$108cbc_dec_tail_collected
+align   16
+L$105cbc_dec_two:
+        call    __aesni_decrypt2
+        xorps   xmm2,xmm7
+        xorps   xmm3,xmm6
+        movups  [edi],xmm2
+        movaps  xmm2,xmm3
+        pxor    xmm3,xmm3
+        lea     edi,[16+edi]
+        movaps  xmm7,xmm5
+        sub     eax,32
+        jmp     NEAR L$108cbc_dec_tail_collected
+align   16
+L$106cbc_dec_three:
+        call    __aesni_decrypt3
+        xorps   xmm2,xmm7
+        xorps   xmm3,xmm6
+        xorps   xmm4,xmm5
+        movups  [edi],xmm2
+        movaps  xmm2,xmm4
+        pxor    xmm4,xmm4
+        movups  [16+edi],xmm3
+        pxor    xmm3,xmm3
+        lea     edi,[32+edi]
+        movups  xmm7,[32+esi]
+        sub     eax,48
+        jmp     NEAR L$108cbc_dec_tail_collected
+align   16
+L$107cbc_dec_four:
+        call    __aesni_decrypt4
+        movups  xmm1,[16+esi]
+        movups  xmm0,[32+esi]
+        xorps   xmm2,xmm7
+        movups  xmm7,[48+esi]
+        xorps   xmm3,xmm6
+        movups  [edi],xmm2
+        xorps   xmm4,xmm1
+        movups  [16+edi],xmm3
+        pxor    xmm3,xmm3
+        xorps   xmm5,xmm0
+        movups  [32+edi],xmm4
+        pxor    xmm4,xmm4
+        lea     edi,[48+edi]
+        movaps  xmm2,xmm5
+        pxor    xmm5,xmm5
+        sub     eax,64
+        jmp     NEAR L$108cbc_dec_tail_collected
+align   16
+L$103cbc_dec_clear_tail_collected:
+        pxor    xmm3,xmm3
+        pxor    xmm4,xmm4
+        pxor    xmm5,xmm5
+        pxor    xmm6,xmm6
+L$108cbc_dec_tail_collected:
+        and     eax,15
+        jnz     NEAR L$110cbc_dec_tail_partial
+        movups  [edi],xmm2
+        pxor    xmm0,xmm0
+        jmp     NEAR L$099cbc_ret
+align   16
+L$110cbc_dec_tail_partial:
+        movaps  [esp],xmm2
+        pxor    xmm0,xmm0
+        mov     ecx,16
+        mov     esi,esp
+        sub     ecx,eax
+dd      2767451785
+        movdqa  [esp],xmm2
+L$099cbc_ret:
+        mov     esp,DWORD [16+esp]
+        mov     ebp,DWORD [36+esp]
+        pxor    xmm2,xmm2
+        pxor    xmm1,xmm1
+        movups  [ebp],xmm7
+        pxor    xmm7,xmm7
+L$094cbc_abort:
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+align   16
+__aesni_set_encrypt_key:
+        push    ebp
+        push    ebx
+        test    eax,eax
+        jz      NEAR L$111bad_pointer
+        test    edx,edx
+        jz      NEAR L$111bad_pointer
+        call    L$112pic
+L$112pic:
+        pop     ebx
+        lea     ebx,[(L$key_const-L$112pic)+ebx]
+        lea     ebp,[_OPENSSL_ia32cap_P]
+        movups  xmm0,[eax]
+        xorps   xmm4,xmm4
+        mov     ebp,DWORD [4+ebp]
+        lea     edx,[16+edx]
+        and     ebp,268437504
+        cmp     ecx,256
+        je      NEAR L$11314rounds
+        cmp     ecx,192
+        je      NEAR L$11412rounds
+        cmp     ecx,128
+        jne     NEAR L$115bad_keybits
+align   16
+L$11610rounds:
+        cmp     ebp,268435456
+        je      NEAR L$11710rounds_alt
+        mov     ecx,9
+        movups  [edx-16],xmm0
+db      102,15,58,223,200,1
+        call    L$118key_128_cold
+db      102,15,58,223,200,2
+        call    L$119key_128
+db      102,15,58,223,200,4
+        call    L$119key_128
+db      102,15,58,223,200,8
+        call    L$119key_128
+db      102,15,58,223,200,16
+        call    L$119key_128
+db      102,15,58,223,200,32
+        call    L$119key_128
+db      102,15,58,223,200,64
+        call    L$119key_128
+db      102,15,58,223,200,128
+        call    L$119key_128
+db      102,15,58,223,200,27
+        call    L$119key_128
+db      102,15,58,223,200,54
+        call    L$119key_128
+        movups  [edx],xmm0
+        mov     DWORD [80+edx],ecx
+        jmp     NEAR L$120good_key
+align   16
+L$119key_128:
+        movups  [edx],xmm0
+        lea     edx,[16+edx]
+L$118key_128_cold:
+        shufps  xmm4,xmm0,16
+        xorps   xmm0,xmm4
+        shufps  xmm4,xmm0,140
+        xorps   xmm0,xmm4
+        shufps  xmm1,xmm1,255
+        xorps   xmm0,xmm1
+        ret
+align   16
+L$11710rounds_alt:
+        movdqa  xmm5,[ebx]
+        mov     ecx,8
+        movdqa  xmm4,[32+ebx]
+        movdqa  xmm2,xmm0
+        movdqu  [edx-16],xmm0
+L$121loop_key128:
+db      102,15,56,0,197
+db      102,15,56,221,196
+        pslld   xmm4,1
+        lea     edx,[16+edx]
+        movdqa  xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm2,xmm3
+        pxor    xmm0,xmm2
+        movdqu  [edx-16],xmm0
+        movdqa  xmm2,xmm0
+        dec     ecx
+        jnz     NEAR L$121loop_key128
+        movdqa  xmm4,[48+ebx]
+db      102,15,56,0,197
+db      102,15,56,221,196
+        pslld   xmm4,1
+        movdqa  xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm2,xmm3
+        pxor    xmm0,xmm2
+        movdqu  [edx],xmm0
+        movdqa  xmm2,xmm0
+db      102,15,56,0,197
+db      102,15,56,221,196
+        movdqa  xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm3,xmm2
+        pslldq  xmm2,4
+        pxor    xmm2,xmm3
+        pxor    xmm0,xmm2
+        movdqu  [16+edx],xmm0
+        mov     ecx,9
+        mov     DWORD [96+edx],ecx
+        jmp     NEAR L$120good_key
+align   16
+L$11412rounds:
+        movq    xmm2,[16+eax]
+        cmp     ebp,268435456
+        je      NEAR L$12212rounds_alt
+        mov     ecx,11
+        movups  [edx-16],xmm0
+db      102,15,58,223,202,1
+        call    L$123key_192a_cold
+db      102,15,58,223,202,2
+        call    L$124key_192b
+db      102,15,58,223,202,4
+        call    L$125key_192a
+db      102,15,58,223,202,8
+        call    L$124key_192b
+db      102,15,58,223,202,16
+        call    L$125key_192a
+db      102,15,58,223,202,32
+        call    L$124key_192b
+db      102,15,58,223,202,64
+        call    L$125key_192a
+db      102,15,58,223,202,128
+        call    L$124key_192b
+        movups  [edx],xmm0
+        mov     DWORD [48+edx],ecx
+        jmp     NEAR L$120good_key
+align   16
+L$125key_192a:
+        movups  [edx],xmm0
+        lea     edx,[16+edx]
+align   16
+L$123key_192a_cold:
+        movaps  xmm5,xmm2
+L$126key_192b_warm:
+        shufps  xmm4,xmm0,16
+        movdqa  xmm3,xmm2
+        xorps   xmm0,xmm4
+        shufps  xmm4,xmm0,140
+        pslldq  xmm3,4
+        xorps   xmm0,xmm4
+        pshufd  xmm1,xmm1,85
+        pxor    xmm2,xmm3
+        pxor    xmm0,xmm1
+        pshufd  xmm3,xmm0,255
+        pxor    xmm2,xmm3
+        ret
+align   16
+L$124key_192b:
+        movaps  xmm3,xmm0
+        shufps  xmm5,xmm0,68
+        movups  [edx],xmm5
+        shufps  xmm3,xmm2,78
+        movups  [16+edx],xmm3
+        lea     edx,[32+edx]
+        jmp     NEAR L$126key_192b_warm
+align   16
+L$12212rounds_alt:
+        movdqa  xmm5,[16+ebx]
+        movdqa  xmm4,[32+ebx]
+        mov     ecx,8
+        movdqu  [edx-16],xmm0
+L$127loop_key192:
+        movq    [edx],xmm2
+        movdqa  xmm1,xmm2
+db      102,15,56,0,213
+db      102,15,56,221,212
+        pslld   xmm4,1
+        lea     edx,[24+edx]
+        movdqa  xmm3,xmm0
+        pslldq  xmm0,4
+        pxor    xmm3,xmm0
+        pslldq  xmm0,4
+        pxor    xmm3,xmm0
+        pslldq  xmm0,4
+        pxor    xmm0,xmm3
+        pshufd  xmm3,xmm0,255
+        pxor    xmm3,xmm1
+        pslldq  xmm1,4
+        pxor    xmm3,xmm1
+        pxor    xmm0,xmm2
+        pxor    xmm2,xmm3
+        movdqu  [edx-16],xmm0
+        dec     ecx
+        jnz     NEAR L$127loop_key192
+        mov     ecx,11
+        mov     DWORD [32+edx],ecx
+        jmp     NEAR L$120good_key
+align   16
+L$11314rounds:
+        movups  xmm2,[16+eax]
+        lea     edx,[16+edx]
+        cmp     ebp,268435456
+        je      NEAR L$12814rounds_alt
+        mov     ecx,13
+        movups  [edx-32],xmm0
+        movups  [edx-16],xmm2
+db      102,15,58,223,202,1
+        call    L$129key_256a_cold
+db      102,15,58,223,200,1
+        call    L$130key_256b
+db      102,15,58,223,202,2
+        call    L$131key_256a
+db      102,15,58,223,200,2
+        call    L$130key_256b
+db      102,15,58,223,202,4
+        call    L$131key_256a
+db      102,15,58,223,200,4
+        call    L$130key_256b
+db      102,15,58,223,202,8
+        call    L$131key_256a
+db      102,15,58,223,200,8
+        call    L$130key_256b
+db      102,15,58,223,202,16
+        call    L$131key_256a
+db      102,15,58,223,200,16
+        call    L$130key_256b
+db      102,15,58,223,202,32
+        call    L$131key_256a
+db      102,15,58,223,200,32
+        call    L$130key_256b
+db      102,15,58,223,202,64
+        call    L$131key_256a
+        movups  [edx],xmm0
+        mov     DWORD [16+edx],ecx
+        xor     eax,eax
+        jmp     NEAR L$120good_key
+align   16
+L$131key_256a:
+        movups  [edx],xmm2
+        lea     edx,[16+edx]
+L$129key_256a_cold:
+        shufps  xmm4,xmm0,16
+        xorps   xmm0,xmm4
+        shufps  xmm4,xmm0,140
+        xorps   xmm0,xmm4
+        shufps  xmm1,xmm1,255
+        xorps   xmm0,xmm1
+        ret
+align   16
+L$130key_256b:
+        movups  [edx],xmm0
+        lea     edx,[16+edx]
+        shufps  xmm4,xmm2,16
+        xorps   xmm2,xmm4
+        shufps  xmm4,xmm2,140
+        xorps   xmm2,xmm4
+        shufps  xmm1,xmm1,170
+        xorps   xmm2,xmm1
+        ret
+align   16
+L$12814rounds_alt:
+        movdqa  xmm5,[ebx]
+        movdqa  xmm4,[32+ebx]
+        mov     ecx,7
+        movdqu  [edx-32],xmm0
+        movdqa  xmm1,xmm2
+        movdqu  [edx-16],xmm2
+L$132loop_key256:
+db      102,15,56,0,213
+db      102,15,56,221,212
+        movdqa  xmm3,xmm0
+        pslldq  xmm0,4
+        pxor    xmm3,xmm0
+        pslldq  xmm0,4
+        pxor    xmm3,xmm0
+        pslldq  xmm0,4
+        pxor    xmm0,xmm3
+        pslld   xmm4,1
+        pxor    xmm0,xmm2
+        movdqu  [edx],xmm0
+        dec     ecx
+        jz      NEAR L$133done_key256
+        pshufd  xmm2,xmm0,255
+        pxor    xmm3,xmm3
+db      102,15,56,221,211
+        movdqa  xmm3,xmm1
+        pslldq  xmm1,4
+        pxor    xmm3,xmm1
+        pslldq  xmm1,4
+        pxor    xmm3,xmm1
+        pslldq  xmm1,4
+        pxor    xmm1,xmm3
+        pxor    xmm2,xmm1
+        movdqu  [16+edx],xmm2
+        lea     edx,[32+edx]
+        movdqa  xmm1,xmm2
+        jmp     NEAR L$132loop_key256
+L$133done_key256:
+        mov     ecx,13
+        mov     DWORD [16+edx],ecx
+L$120good_key:
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        pxor    xmm4,xmm4
+        pxor    xmm5,xmm5
+        xor     eax,eax
+        pop     ebx
+        pop     ebp
+        ret
+align   4
+L$111bad_pointer:
+        mov     eax,-1
+        pop     ebx
+        pop     ebp
+        ret
+align   4
+L$115bad_keybits:
+        pxor    xmm0,xmm0
+        mov     eax,-2
+        pop     ebx
+        pop     ebp
+        ret
+global  _aesni_set_encrypt_key
+align   16
+_aesni_set_encrypt_key:
+L$_aesni_set_encrypt_key_begin:
+        mov     eax,DWORD [4+esp]
+        mov     ecx,DWORD [8+esp]
+        mov     edx,DWORD [12+esp]
+        call    __aesni_set_encrypt_key
+        ret
+global  _aesni_set_decrypt_key
+align   16
+_aesni_set_decrypt_key:
+L$_aesni_set_decrypt_key_begin:
+        mov     eax,DWORD [4+esp]
+        mov     ecx,DWORD [8+esp]
+        mov     edx,DWORD [12+esp]
+        call    __aesni_set_encrypt_key
+        mov     edx,DWORD [12+esp]
+        shl     ecx,4
+        test    eax,eax
+        jnz     NEAR L$134dec_key_ret
+        lea     eax,[16+ecx*1+edx]
+        movups  xmm0,[edx]
+        movups  xmm1,[eax]
+        movups  [eax],xmm0
+        movups  [edx],xmm1
+        lea     edx,[16+edx]
+        lea     eax,[eax-16]
+L$135dec_key_inverse:
+        movups  xmm0,[edx]
+        movups  xmm1,[eax]
+db      102,15,56,219,192
+db      102,15,56,219,201
+        lea     edx,[16+edx]
+        lea     eax,[eax-16]
+        movups  [16+eax],xmm0
+        movups  [edx-16],xmm1
+        cmp     eax,edx
+        ja      NEAR L$135dec_key_inverse
+        movups  xmm0,[edx]
+db      102,15,56,219,192
+        movups  [edx],xmm0
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        xor     eax,eax
+L$134dec_key_ret:
+        ret
+align   64
+L$key_const:
+dd      202313229,202313229,202313229,202313229
+dd      67569157,67569157,67569157,67569157
+dd      1,1,1,1
+dd      27,27,27,27
+db      65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+db      83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+db      32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+db      115,108,46,111,114,103,62,0
+segment .bss
+common  _OPENSSL_ia32cap_P 16
diff --git a/CryptoPkg/Library/OpensslLib/IA32/crypto/aes/vpaes-x86.nasm b/CryptoPkg/Library/OpensslLib/IA32/crypto/aes/vpaes-x86.nasm
new file mode 100644
index 0000000000..cf2e13f620
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32/crypto/aes/vpaes-x86.nasm
@@ -0,0 +1,651 @@
+; WARNING: do not edit!
+; Generated from openssl/crypto/aes/asm/vpaes-x86.pl
+;
+; Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License").  You may not use
+; this file except in compliance with the License.  You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
+%ifidn __OUTPUT_FORMAT__,obj
+section code    use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text   code align=64
+%else
+section .text   code
+%endif
+align   64
+L$_vpaes_consts:
+dd      218628480,235210255,168496130,67568393
+dd      252381056,17041926,33884169,51187212
+dd      252645135,252645135,252645135,252645135
+dd      1512730624,3266504856,1377990664,3401244816
+dd      830229760,1275146365,2969422977,3447763452
+dd      3411033600,2979783055,338359620,2782886510
+dd      4209124096,907596821,221174255,1006095553
+dd      191964160,3799684038,3164090317,1589111125
+dd      182528256,1777043520,2877432650,3265356744
+dd      1874708224,3503451415,3305285752,363511674
+dd      1606117888,3487855781,1093350906,2384367825
+dd      197121,67569157,134941193,202313229
+dd      67569157,134941193,202313229,197121
+dd      134941193,202313229,197121,67569157
+dd      202313229,197121,67569157,134941193
+dd      33619971,100992007,168364043,235736079
+dd      235736079,33619971,100992007,168364043
+dd      168364043,235736079,33619971,100992007
+dd      100992007,168364043,235736079,33619971
+dd      50462976,117835012,185207048,252579084
+dd      252314880,51251460,117574920,184942860
+dd      184682752,252054788,50987272,118359308
+dd      118099200,185467140,251790600,50727180
+dd      2946363062,528716217,1300004225,1881839624
+dd      1532713819,1532713819,1532713819,1532713819
+dd      3602276352,4288629033,3737020424,4153884961
+dd      1354558464,32357713,2958822624,3775749553
+dd      1201988352,132424512,1572796698,503232858
+dd      2213177600,1597421020,4103937655,675398315
+dd      2749646592,4273543773,1511898873,121693092
+dd      3040248576,1103263732,2871565598,1608280554
+dd      2236667136,2588920351,482954393,64377734
+dd      3069987328,291237287,2117370568,3650299247
+dd      533321216,3573750986,2572112006,1401264716
+dd      1339849704,2721158661,548607111,3445553514
+dd      2128193280,3054596040,2183486460,1257083700
+dd      655635200,1165381986,3923443150,2344132524
+dd      190078720,256924420,290342170,357187870
+dd      1610966272,2263057382,4103205268,309794674
+dd      2592527872,2233205587,1335446729,3402964816
+dd      3973531904,3225098121,3002836325,1918774430
+dd      3870401024,2102906079,2284471353,4117666579
+dd      617007872,1021508343,366931923,691083277
+dd      2528395776,3491914898,2968704004,1613121270
+dd      3445188352,3247741094,844474987,4093578302
+dd      651481088,1190302358,1689581232,574775300
+dd      4289380608,206939853,2555985458,2489840491
+dd      2130264064,327674451,3566485037,3349835193
+dd      2470714624,316102159,3636825756,3393945945
+db      86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+db      111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
+db      83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
+db      114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
+db      118,101,114,115,105,116,121,41,0
+align   64
+align   16
+__vpaes_preheat:
+        add     ebp,DWORD [esp]
+        movdqa  xmm7,[ebp-48]
+        movdqa  xmm6,[ebp-16]
+        ret
+align   16
+__vpaes_encrypt_core:
+        mov     ecx,16
+        mov     eax,DWORD [240+edx]
+        movdqa  xmm1,xmm6
+        movdqa  xmm2,[ebp]
+        pandn   xmm1,xmm0
+        pand    xmm0,xmm6
+        movdqu  xmm5,[edx]
+db      102,15,56,0,208
+        movdqa  xmm0,[16+ebp]
+        pxor    xmm2,xmm5
+        psrld   xmm1,4
+        add     edx,16
+db      102,15,56,0,193
+        lea     ebx,[192+ebp]
+        pxor    xmm0,xmm2
+        jmp     NEAR L$000enc_entry
+align   16
+L$001enc_loop:
+        movdqa  xmm4,[32+ebp]
+        movdqa  xmm0,[48+ebp]
+db      102,15,56,0,226
+db      102,15,56,0,195
+        pxor    xmm4,xmm5
+        movdqa  xmm5,[64+ebp]
+        pxor    xmm0,xmm4
+        movdqa  xmm1,[ecx*1+ebx-64]
+db      102,15,56,0,234
+        movdqa  xmm2,[80+ebp]
+        movdqa  xmm4,[ecx*1+ebx]
+db      102,15,56,0,211
+        movdqa  xmm3,xmm0
+        pxor    xmm2,xmm5
+db      102,15,56,0,193
+        add     edx,16
+        pxor    xmm0,xmm2
+db      102,15,56,0,220
+        add     ecx,16
+        pxor    xmm3,xmm0
+db      102,15,56,0,193
+        and     ecx,48
+        sub     eax,1
+        pxor    xmm0,xmm3
+L$000enc_entry:
+        movdqa  xmm1,xmm6
+        movdqa  xmm5,[ebp-32]
+        pandn   xmm1,xmm0
+        psrld   xmm1,4
+        pand    xmm0,xmm6
+db      102,15,56,0,232
+        movdqa  xmm3,xmm7
+        pxor    xmm0,xmm1
+db      102,15,56,0,217
+        movdqa  xmm4,xmm7
+        pxor    xmm3,xmm5
+db      102,15,56,0,224
+        movdqa  xmm2,xmm7
+        pxor    xmm4,xmm5
+db      102,15,56,0,211
+        movdqa  xmm3,xmm7
+        pxor    xmm2,xmm0
+db      102,15,56,0,220
+        movdqu  xmm5,[edx]
+        pxor    xmm3,xmm1
+        jnz     NEAR L$001enc_loop
+        movdqa  xmm4,[96+ebp]
+        movdqa  xmm0,[112+ebp]
+db      102,15,56,0,226
+        pxor    xmm4,xmm5
+db      102,15,56,0,195
+        movdqa  xmm1,[64+ecx*1+ebx]
+        pxor    xmm0,xmm4
+db      102,15,56,0,193
+        ret
+align   16
+__vpaes_decrypt_core:
+        lea     ebx,[608+ebp]
+        mov     eax,DWORD [240+edx]
+        movdqa  xmm1,xmm6
+        movdqa  xmm2,[ebx-64]
+        pandn   xmm1,xmm0
+        mov     ecx,eax
+        psrld   xmm1,4
+        movdqu  xmm5,[edx]
+        shl     ecx,4
+        pand    xmm0,xmm6
+db      102,15,56,0,208
+        movdqa  xmm0,[ebx-48]
+        xor     ecx,48
+db      102,15,56,0,193
+        and     ecx,48
+        pxor    xmm2,xmm5
+        movdqa  xmm5,[176+ebp]
+        pxor    xmm0,xmm2
+        add     edx,16
+        lea     ecx,[ecx*1+ebx-352]
+        jmp     NEAR L$002dec_entry
+align   16
+L$003dec_loop:
+        movdqa  xmm4,[ebx-32]
+        movdqa  xmm1,[ebx-16]
+db      102,15,56,0,226
+db      102,15,56,0,203
+        pxor    xmm0,xmm4
+        movdqa  xmm4,[ebx]
+        pxor    xmm0,xmm1
+        movdqa  xmm1,[16+ebx]
+db      102,15,56,0,226
+db      102,15,56,0,197
+db      102,15,56,0,203
+        pxor    xmm0,xmm4
+        movdqa  xmm4,[32+ebx]
+        pxor    xmm0,xmm1
+        movdqa  xmm1,[48+ebx]
+db      102,15,56,0,226
+db      102,15,56,0,197
+db      102,15,56,0,203
+        pxor    xmm0,xmm4
+        movdqa  xmm4,[64+ebx]
+        pxor    xmm0,xmm1
+        movdqa  xmm1,[80+ebx]
+db      102,15,56,0,226
+db      102,15,56,0,197
+db      102,15,56,0,203
+        pxor    xmm0,xmm4
+        add     edx,16
+db      102,15,58,15,237,12
+        pxor    xmm0,xmm1
+        sub     eax,1
+L$002dec_entry:
+        movdqa  xmm1,xmm6
+        movdqa  xmm2,[ebp-32]
+        pandn   xmm1,xmm0
+        pand    xmm0,xmm6
+        psrld   xmm1,4
+db      102,15,56,0,208
+        movdqa  xmm3,xmm7
+        pxor    xmm0,xmm1
+db      102,15,56,0,217
+        movdqa  xmm4,xmm7
+        pxor    xmm3,xmm2
+db      102,15,56,0,224
+        pxor    xmm4,xmm2
+        movdqa  xmm2,xmm7
+db      102,15,56,0,211
+        movdqa  xmm3,xmm7
+        pxor    xmm2,xmm0
+db      102,15,56,0,220
+        movdqu  xmm0,[edx]
+        pxor    xmm3,xmm1
+        jnz     NEAR L$003dec_loop
+        movdqa  xmm4,[96+ebx]
+db      102,15,56,0,226
+        pxor    xmm4,xmm0
+        movdqa  xmm0,[112+ebx]
+        movdqa  xmm2,[ecx]
+db      102,15,56,0,195
+        pxor    xmm0,xmm4
+db      102,15,56,0,194
+        ret
+align   16
+__vpaes_schedule_core:
+        add     ebp,DWORD [esp]
+        movdqu  xmm0,[esi]
+        movdqa  xmm2,[320+ebp]
+        movdqa  xmm3,xmm0
+        lea     ebx,[ebp]
+        movdqa  [4+esp],xmm2
+        call    __vpaes_schedule_transform
+        movdqa  xmm7,xmm0
+        test    edi,edi
+        jnz     NEAR L$004schedule_am_decrypting
+        movdqu  [edx],xmm0
+        jmp     NEAR L$005schedule_go
+L$004schedule_am_decrypting:
+        movdqa  xmm1,[256+ecx*1+ebp]
+db      102,15,56,0,217
+        movdqu  [edx],xmm3
+        xor     ecx,48
+L$005schedule_go:
+        cmp     eax,192
+        ja      NEAR L$006schedule_256
+        je      NEAR L$007schedule_192
+L$008schedule_128:
+        mov     eax,10
+L$009loop_schedule_128:
+        call    __vpaes_schedule_round
+        dec     eax
+        jz      NEAR L$010schedule_mangle_last
+        call    __vpaes_schedule_mangle
+        jmp     NEAR L$009loop_schedule_128
+align   16
+L$007schedule_192:
+        movdqu  xmm0,[8+esi]
+        call    __vpaes_schedule_transform
+        movdqa  xmm6,xmm0
+        pxor    xmm4,xmm4
+        movhlps xmm6,xmm4
+        mov     eax,4
+L$011loop_schedule_192:
+        call    __vpaes_schedule_round
+db      102,15,58,15,198,8
+        call    __vpaes_schedule_mangle
+        call    __vpaes_schedule_192_smear
+        call    __vpaes_schedule_mangle
+        call    __vpaes_schedule_round
+        dec     eax
+        jz      NEAR L$010schedule_mangle_last
+        call    __vpaes_schedule_mangle
+        call    __vpaes_schedule_192_smear
+        jmp     NEAR L$011loop_schedule_192
+align   16
+L$006schedule_256:
+        movdqu  xmm0,[16+esi]
+        call    __vpaes_schedule_transform
+        mov     eax,7
+L$012loop_schedule_256:
+        call    __vpaes_schedule_mangle
+        movdqa  xmm6,xmm0
+        call    __vpaes_schedule_round
+        dec     eax
+        jz      NEAR L$010schedule_mangle_last
+        call    __vpaes_schedule_mangle
+        pshufd  xmm0,xmm0,255
+        movdqa  [20+esp],xmm7
+        movdqa  xmm7,xmm6
+        call    L$_vpaes_schedule_low_round
+        movdqa  xmm7,[20+esp]
+        jmp     NEAR L$012loop_schedule_256
+align   16
+L$010schedule_mangle_last:
+        lea     ebx,[384+ebp]
+        test    edi,edi
+        jnz     NEAR L$013schedule_mangle_last_dec
+        movdqa  xmm1,[256+ecx*1+ebp]
+db      102,15,56,0,193
+        lea     ebx,[352+ebp]
+        add     edx,32
+L$013schedule_mangle_last_dec:
+        add     edx,-16
+        pxor    xmm0,[336+ebp]
+        call    __vpaes_schedule_transform
+        movdqu  [edx],xmm0
+        pxor    xmm0,xmm0
+        pxor    xmm1,xmm1
+        pxor    xmm2,xmm2
+        pxor    xmm3,xmm3
+        pxor    xmm4,xmm4
+        pxor    xmm5,xmm5
+        pxor    xmm6,xmm6
+        pxor    xmm7,xmm7
+        ret
+align   16
+__vpaes_schedule_192_smear:
+        pshufd  xmm1,xmm6,128
+        pshufd  xmm0,xmm7,254
+        pxor    xmm6,xmm1
+        pxor    xmm1,xmm1
+        pxor    xmm6,xmm0
+        movdqa  xmm0,xmm6
+        movhlps xmm6,xmm1
+        ret
+align   16
+__vpaes_schedule_round:
+        movdqa  xmm2,[8+esp]
+        pxor    xmm1,xmm1
+db      102,15,58,15,202,15
+db      102,15,58,15,210,15
+        pxor    xmm7,xmm1
+        pshufd  xmm0,xmm0,255
+db      102,15,58,15,192,1
+        movdqa  [8+esp],xmm2
+L$_vpaes_schedule_low_round:
+        movdqa  xmm1,xmm7
+        pslldq  xmm7,4
+        pxor    xmm7,xmm1
+        movdqa  xmm1,xmm7
+        pslldq  xmm7,8
+        pxor    xmm7,xmm1
+        pxor    xmm7,[336+ebp]
+        movdqa  xmm4,[ebp-16]
+        movdqa  xmm5,[ebp-48]
+        movdqa  xmm1,xmm4
+        pandn   xmm1,xmm0
+        psrld   xmm1,4
+        pand    xmm0,xmm4
+        movdqa  xmm2,[ebp-32]
+db      102,15,56,0,208
+        pxor    xmm0,xmm1
+        movdqa  xmm3,xmm5
+db      102,15,56,0,217
+        pxor    xmm3,xmm2
+        movdqa  xmm4,xmm5
+db      102,15,56,0,224
+        pxor    xmm4,xmm2
+        movdqa  xmm2,xmm5
+db      102,15,56,0,211
+        pxor    xmm2,xmm0
+        movdqa  xmm3,xmm5
+db      102,15,56,0,220
+        pxor    xmm3,xmm1
+        movdqa  xmm4,[32+ebp]
+db      102,15,56,0,226
+        movdqa  xmm0,[48+ebp]
+db      102,15,56,0,195
+        pxor    xmm0,xmm4
+        pxor    xmm0,xmm7
+        movdqa  xmm7,xmm0
+        ret
+align   16
+__vpaes_schedule_transform:
+        movdqa  xmm2,[ebp-16]
+        movdqa  xmm1,xmm2
+        pandn   xmm1,xmm0
+        psrld   xmm1,4
+        pand    xmm0,xmm2
+        movdqa  xmm2,[ebx]
+db      102,15,56,0,208
+        movdqa  xmm0,[16+ebx]
+db      102,15,56,0,193
+        pxor    xmm0,xmm2
+        ret
+align   16
+__vpaes_schedule_mangle:
+        movdqa  xmm4,xmm0
+        movdqa  xmm5,[128+ebp]
+        test    edi,edi
+        jnz     NEAR L$014schedule_mangle_dec
+        add     edx,16
+        pxor    xmm4,[336+ebp]
+db      102,15,56,0,229
+        movdqa  xmm3,xmm4
+db      102,15,56,0,229
+        pxor    xmm3,xmm4
+db      102,15,56,0,229
+        pxor    xmm3,xmm4
+        jmp     NEAR L$015schedule_mangle_both
+align   16
+L$014schedule_mangle_dec:
+        movdqa  xmm2,[ebp-16]
+        lea     esi,[416+ebp]
+        movdqa  xmm1,xmm2
+        pandn   xmm1,xmm4
+        psrld   xmm1,4
+        pand    xmm4,xmm2
+        movdqa  xmm2,[esi]
+db      102,15,56,0,212
+        movdqa  xmm3,[16+esi]
+db      102,15,56,0,217
+        pxor    xmm3,xmm2
+db      102,15,56,0,221
+        movdqa  xmm2,[32+esi]
+db      102,15,56,0,212
+        pxor    xmm2,xmm3
+        movdqa  xmm3,[48+esi]
+db      102,15,56,0,217
+        pxor    xmm3,xmm2
+db      102,15,56,0,221
+        movdqa  xmm2,[64+esi]
+db      102,15,56,0,212
+        pxor    xmm2,xmm3
+        movdqa  xmm3,[80+esi]
+db      102,15,56,0,217
+        pxor    xmm3,xmm2
+db      102,15,56,0,221
+        movdqa  xmm2,[96+esi]
+db      102,15,56,0,212
+        pxor    xmm2,xmm3
+        movdqa  xmm3,[112+esi]
+db      102,15,56,0,217
+        pxor    xmm3,xmm2
+        add     edx,-16
+L$015schedule_mangle_both:
+        movdqa  xmm1,[256+ecx*1+ebp]
+db      102,15,56,0,217
+        add     ecx,-16
+        and     ecx,48
+        movdqu  [edx],xmm3
+        ret
+global  _vpaes_set_encrypt_key
+align   16
+_vpaes_set_encrypt_key:
+L$_vpaes_set_encrypt_key_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        lea     ebx,[esp-56]
+        mov     eax,DWORD [24+esp]
+        and     ebx,-16
+        mov     edx,DWORD [28+esp]
+        xchg    ebx,esp
+        mov     DWORD [48+esp],ebx
+        mov     ebx,eax
+        shr     ebx,5
+        add     ebx,5
+        mov     DWORD [240+edx],ebx
+        mov     ecx,48
+        mov     edi,0
+        lea     ebp,[(L$_vpaes_consts+0x30-L$016pic_point)]
+        call    __vpaes_schedule_core
+L$016pic_point:
+        mov     esp,DWORD [48+esp]
+        xor     eax,eax
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _vpaes_set_decrypt_key
+align   16
+_vpaes_set_decrypt_key:
+L$_vpaes_set_decrypt_key_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        lea     ebx,[esp-56]
+        mov     eax,DWORD [24+esp]
+        and     ebx,-16
+        mov     edx,DWORD [28+esp]
+        xchg    ebx,esp
+        mov     DWORD [48+esp],ebx
+        mov     ebx,eax
+        shr     ebx,5
+        add     ebx,5
+        mov     DWORD [240+edx],ebx
+        shl     ebx,4
+        lea     edx,[16+ebx*1+edx]
+        mov     edi,1
+        mov     ecx,eax
+        shr     ecx,1
+        and     ecx,32
+        xor     ecx,32
+        lea     ebp,[(L$_vpaes_consts+0x30-L$017pic_point)]
+        call    __vpaes_schedule_core
+L$017pic_point:
+        mov     esp,DWORD [48+esp]
+        xor     eax,eax
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _vpaes_encrypt
+align   16
+_vpaes_encrypt:
+L$_vpaes_encrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        lea     ebp,[(L$_vpaes_consts+0x30-L$018pic_point)]
+        call    __vpaes_preheat
+L$018pic_point:
+        mov     esi,DWORD [20+esp]
+        lea     ebx,[esp-56]
+        mov     edi,DWORD [24+esp]
+        and     ebx,-16
+        mov     edx,DWORD [28+esp]
+        xchg    ebx,esp
+        mov     DWORD [48+esp],ebx
+        movdqu  xmm0,[esi]
+        call    __vpaes_encrypt_core
+        movdqu  [edi],xmm0
+        mov     esp,DWORD [48+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _vpaes_decrypt
+align   16
+_vpaes_decrypt:
+L$_vpaes_decrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        lea     ebp,[(L$_vpaes_consts+0x30-L$019pic_point)]
+        call    __vpaes_preheat
+L$019pic_point:
+        mov     esi,DWORD [20+esp]
+        lea     ebx,[esp-56]
+        mov     edi,DWORD [24+esp]
+        and     ebx,-16
+        mov     edx,DWORD [28+esp]
+        xchg    ebx,esp
+        mov     DWORD [48+esp],ebx
+        movdqu  xmm0,[esi]
+        call    __vpaes_decrypt_core
+        movdqu  [edi],xmm0
+        mov     esp,DWORD [48+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _vpaes_cbc_encrypt
+align   16
+_vpaes_cbc_encrypt:
+L$_vpaes_cbc_encrypt_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     edx,DWORD [32+esp]
+        sub     eax,16
+        jc      NEAR L$020cbc_abort
+        lea     ebx,[esp-56]
+        mov     ebp,DWORD [36+esp]
+        and     ebx,-16
+        mov     ecx,DWORD [40+esp]
+        xchg    ebx,esp
+        movdqu  xmm1,[ebp]
+        sub     edi,esi
+        mov     DWORD [48+esp],ebx
+        mov     DWORD [esp],edi
+        mov     DWORD [4+esp],edx
+        mov     DWORD [8+esp],ebp
+        mov     edi,eax
+        lea     ebp,[(L$_vpaes_consts+0x30-L$021pic_point)]
+        call    __vpaes_preheat
+L$021pic_point:
+        cmp     ecx,0
+        je      NEAR L$022cbc_dec_loop
+        jmp     NEAR L$023cbc_enc_loop
+align   16
+L$023cbc_enc_loop:
+        movdqu  xmm0,[esi]
+        pxor    xmm0,xmm1
+        call    __vpaes_encrypt_core
+        mov     ebx,DWORD [esp]
+        mov     edx,DWORD [4+esp]
+        movdqa  xmm1,xmm0
+        movdqu  [esi*1+ebx],xmm0
+        lea     esi,[16+esi]
+        sub     edi,16
+        jnc     NEAR L$023cbc_enc_loop
+        jmp     NEAR L$024cbc_done
+align   16
+L$022cbc_dec_loop:
+        movdqu  xmm0,[esi]
+        movdqa  [16+esp],xmm1
+        movdqa  [32+esp],xmm0
+        call    __vpaes_decrypt_core
+        mov     ebx,DWORD [esp]
+        mov     edx,DWORD [4+esp]
+        pxor    xmm0,[16+esp]
+        movdqa  xmm1,[32+esp]
+        movdqu  [esi*1+ebx],xmm0
+        lea     esi,[16+esi]
+        sub     edi,16
+        jnc     NEAR L$022cbc_dec_loop
+L$024cbc_done:
+        mov     ebx,DWORD [8+esp]
+        mov     esp,DWORD [48+esp]
+        movdqu  [ebx],xmm1
+L$020cbc_abort:
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
diff --git a/CryptoPkg/Library/OpensslLib/IA32/crypto/modes/ghash-x86.nasm b/CryptoPkg/Library/OpensslLib/IA32/crypto/modes/ghash-x86.nasm
new file mode 100644
index 0000000000..88c52f746c
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32/crypto/modes/ghash-x86.nasm
@@ -0,0 +1,700 @@
+; WARNING: do not edit!
+; Generated from openssl/crypto/modes/asm/ghash-x86.pl
+;
+; Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License").  You may not use
+; this file except in compliance with the License.  You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
+%ifidn __OUTPUT_FORMAT__,obj
+section code    use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text   code align=64
+%else
+section .text   code
+%endif
+global  _gcm_gmult_4bit_x86
+align   16
+_gcm_gmult_4bit_x86:
+L$_gcm_gmult_4bit_x86_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        sub     esp,84
+        mov     edi,DWORD [104+esp]
+        mov     esi,DWORD [108+esp]
+        mov     ebp,DWORD [edi]
+        mov     edx,DWORD [4+edi]
+        mov     ecx,DWORD [8+edi]
+        mov     ebx,DWORD [12+edi]
+        mov     DWORD [16+esp],0
+        mov     DWORD [20+esp],471859200
+        mov     DWORD [24+esp],943718400
+        mov     DWORD [28+esp],610271232
+        mov     DWORD [32+esp],1887436800
+        mov     DWORD [36+esp],1822425088
+        mov     DWORD [40+esp],1220542464
+        mov     DWORD [44+esp],1423966208
+        mov     DWORD [48+esp],3774873600
+        mov     DWORD [52+esp],4246732800
+        mov     DWORD [56+esp],3644850176
+        mov     DWORD [60+esp],3311403008
+        mov     DWORD [64+esp],2441084928
+        mov     DWORD [68+esp],2376073216
+        mov     DWORD [72+esp],2847932416
+        mov     DWORD [76+esp],3051356160
+        mov     DWORD [esp],ebp
+        mov     DWORD [4+esp],edx
+        mov     DWORD [8+esp],ecx
+        mov     DWORD [12+esp],ebx
+        shr     ebx,20
+        and     ebx,240
+        mov     ebp,DWORD [4+ebx*1+esi]
+        mov     edx,DWORD [ebx*1+esi]
+        mov     ecx,DWORD [12+ebx*1+esi]
+        mov     ebx,DWORD [8+ebx*1+esi]
+        xor     eax,eax
+        mov     edi,15
+        jmp     NEAR L$000x86_loop
+align   16
+L$000x86_loop:
+        mov     al,bl
+        shrd    ebx,ecx,4
+        and     al,15
+        shrd    ecx,edx,4
+        shrd    edx,ebp,4
+        shr     ebp,4
+        xor     ebp,DWORD [16+eax*4+esp]
+        mov     al,BYTE [edi*1+esp]
+        and     al,240
+        xor     ebx,DWORD [8+eax*1+esi]
+        xor     ecx,DWORD [12+eax*1+esi]
+        xor     edx,DWORD [eax*1+esi]
+        xor     ebp,DWORD [4+eax*1+esi]
+        dec     edi
+        js      NEAR L$001x86_break
+        mov     al,bl
+        shrd    ebx,ecx,4
+        and     al,15
+        shrd    ecx,edx,4
+        shrd    edx,ebp,4
+        shr     ebp,4
+        xor     ebp,DWORD [16+eax*4+esp]
+        mov     al,BYTE [edi*1+esp]
+        shl     al,4
+        xor     ebx,DWORD [8+eax*1+esi]
+        xor     ecx,DWORD [12+eax*1+esi]
+        xor     edx,DWORD [eax*1+esi]
+        xor     ebp,DWORD [4+eax*1+esi]
+        jmp     NEAR L$000x86_loop
+align   16
+L$001x86_break:
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        bswap   ebp
+        mov     edi,DWORD [104+esp]
+        mov     DWORD [12+edi],ebx
+        mov     DWORD [8+edi],ecx
+        mov     DWORD [4+edi],edx
+        mov     DWORD [edi],ebp
+        add     esp,84
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _gcm_ghash_4bit_x86
+align   16
+_gcm_ghash_4bit_x86:
+L$_gcm_ghash_4bit_x86_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        sub     esp,84
+        mov     ebx,DWORD [104+esp]
+        mov     esi,DWORD [108+esp]
+        mov     edi,DWORD [112+esp]
+        mov     ecx,DWORD [116+esp]
+        add     ecx,edi
+        mov     DWORD [116+esp],ecx
+        mov     ebp,DWORD [ebx]
+        mov     edx,DWORD [4+ebx]
+        mov     ecx,DWORD [8+ebx]
+        mov     ebx,DWORD [12+ebx]
+        mov     DWORD [16+esp],0
+        mov     DWORD [20+esp],471859200
+        mov     DWORD [24+esp],943718400
+        mov     DWORD [28+esp],610271232
+        mov     DWORD [32+esp],1887436800
+        mov     DWORD [36+esp],1822425088
+        mov     DWORD [40+esp],1220542464
+        mov     DWORD [44+esp],1423966208
+        mov     DWORD [48+esp],3774873600
+        mov     DWORD [52+esp],4246732800
+        mov     DWORD [56+esp],3644850176
+        mov     DWORD [60+esp],3311403008
+        mov     DWORD [64+esp],2441084928
+        mov     DWORD [68+esp],2376073216
+        mov     DWORD [72+esp],2847932416
+        mov     DWORD [76+esp],3051356160
+align   16
+L$002x86_outer_loop:
+        xor     ebx,DWORD [12+edi]
+        xor     ecx,DWORD [8+edi]
+        xor     edx,DWORD [4+edi]
+        xor     ebp,DWORD [edi]
+        mov     DWORD [12+esp],ebx
+        mov     DWORD [8+esp],ecx
+        mov     DWORD [4+esp],edx
+        mov     DWORD [esp],ebp
+        shr     ebx,20
+        and     ebx,240
+        mov     ebp,DWORD [4+ebx*1+esi]
+        mov     edx,DWORD [ebx*1+esi]
+        mov     ecx,DWORD [12+ebx*1+esi]
+        mov     ebx,DWORD [8+ebx*1+esi]
+        xor     eax,eax
+        mov     edi,15
+        jmp     NEAR L$003x86_loop
+align   16
+L$003x86_loop:
+        mov     al,bl
+        shrd    ebx,ecx,4
+        and     al,15
+        shrd    ecx,edx,4
+        shrd    edx,ebp,4
+        shr     ebp,4
+        xor     ebp,DWORD [16+eax*4+esp]
+        mov     al,BYTE [edi*1+esp]
+        and     al,240
+        xor     ebx,DWORD [8+eax*1+esi]
+        xor     ecx,DWORD [12+eax*1+esi]
+        xor     edx,DWORD [eax*1+esi]
+        xor     ebp,DWORD [4+eax*1+esi]
+        dec     edi
+        js      NEAR L$004x86_break
+        mov     al,bl
+        shrd    ebx,ecx,4
+        and     al,15
+        shrd    ecx,edx,4
+        shrd    edx,ebp,4
+        shr     ebp,4
+        xor     ebp,DWORD [16+eax*4+esp]
+        mov     al,BYTE [edi*1+esp]
+        shl     al,4
+        xor     ebx,DWORD [8+eax*1+esi]
+        xor     ecx,DWORD [12+eax*1+esi]
+        xor     edx,DWORD [eax*1+esi]
+        xor     ebp,DWORD [4+eax*1+esi]
+        jmp     NEAR L$003x86_loop
+align   16
+L$004x86_break:
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        bswap   ebp
+        mov     edi,DWORD [112+esp]
+        lea     edi,[16+edi]
+        cmp     edi,DWORD [116+esp]
+        mov     DWORD [112+esp],edi
+        jb      NEAR L$002x86_outer_loop
+        mov     edi,DWORD [104+esp]
+        mov     DWORD [12+edi],ebx
+        mov     DWORD [8+edi],ecx
+        mov     DWORD [4+edi],edx
+        mov     DWORD [edi],ebp
+        add     esp,84
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+align   16
+__mmx_gmult_4bit_inner:
+        xor     ecx,ecx
+        mov     edx,ebx
+        mov     cl,dl
+        shl     cl,4
+        and     edx,240
+        movq    mm0,[8+ecx*1+esi]
+        movq    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [14+edi]
+        psllq   mm2,60
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [13+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [12+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [11+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [10+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [9+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [8+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [7+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [6+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [5+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [4+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [3+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [2+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [1+edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        mov     cl,BYTE [edi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        mov     edx,ecx
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        shl     cl,4
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+ecx*1+esi]
+        psllq   mm2,60
+        and     edx,240
+        pxor    mm1,[ebp*8+eax]
+        and     ebx,15
+        pxor    mm1,[ecx*1+esi]
+        movd    ebp,mm0
+        pxor    mm0,mm2
+        psrlq   mm0,4
+        movq    mm2,mm1
+        psrlq   mm1,4
+        pxor    mm0,[8+edx*1+esi]
+        psllq   mm2,60
+        pxor    mm1,[ebx*8+eax]
+        and     ebp,15
+        pxor    mm1,[edx*1+esi]
+        movd    ebx,mm0
+        pxor    mm0,mm2
+        mov     edi,DWORD [4+ebp*8+eax]
+        psrlq   mm0,32
+        movd    edx,mm1
+        psrlq   mm1,32
+        movd    ecx,mm0
+        movd    ebp,mm1
+        shl     edi,4
+        bswap   ebx
+        bswap   edx
+        bswap   ecx
+        xor     ebp,edi
+        bswap   ebp
+        ret
+global  _gcm_gmult_4bit_mmx
+align   16
+_gcm_gmult_4bit_mmx:
+L$_gcm_gmult_4bit_mmx_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     edi,DWORD [20+esp]
+        mov     esi,DWORD [24+esp]
+        call    L$005pic_point
+L$005pic_point:
+        pop     eax
+        lea     eax,[(L$rem_4bit-L$005pic_point)+eax]
+        movzx   ebx,BYTE [15+edi]
+        call    __mmx_gmult_4bit_inner
+        mov     edi,DWORD [20+esp]
+        emms
+        mov     DWORD [12+edi],ebx
+        mov     DWORD [4+edi],edx
+        mov     DWORD [8+edi],ecx
+        mov     DWORD [edi],ebp
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _gcm_ghash_4bit_mmx
+align   16
+_gcm_ghash_4bit_mmx:
+L$_gcm_ghash_4bit_mmx_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     ebp,DWORD [20+esp]
+        mov     esi,DWORD [24+esp]
+        mov     edi,DWORD [28+esp]
+        mov     ecx,DWORD [32+esp]
+        call    L$006pic_point
+L$006pic_point:
+        pop     eax
+        lea     eax,[(L$rem_4bit-L$006pic_point)+eax]
+        add     ecx,edi
+        mov     DWORD [32+esp],ecx
+        sub     esp,20
+        mov     ebx,DWORD [12+ebp]
+        mov     edx,DWORD [4+ebp]
+        mov     ecx,DWORD [8+ebp]
+        mov     ebp,DWORD [ebp]
+        jmp     NEAR L$007mmx_outer_loop
+align   16
+L$007mmx_outer_loop:
+        xor     ebx,DWORD [12+edi]
+        xor     edx,DWORD [4+edi]
+        xor     ecx,DWORD [8+edi]
+        xor     ebp,DWORD [edi]
+        mov     DWORD [48+esp],edi
+        mov     DWORD [12+esp],ebx
+        mov     DWORD [4+esp],edx
+        mov     DWORD [8+esp],ecx
+        mov     DWORD [esp],ebp
+        mov     edi,esp
+        shr     ebx,24
+        call    __mmx_gmult_4bit_inner
+        mov     edi,DWORD [48+esp]
+        lea     edi,[16+edi]
+        cmp     edi,DWORD [52+esp]
+        jb      NEAR L$007mmx_outer_loop
+        mov     edi,DWORD [40+esp]
+        emms
+        mov     DWORD [12+edi],ebx
+        mov     DWORD [4+edi],edx
+        mov     DWORD [8+edi],ecx
+        mov     DWORD [edi],ebp
+        add     esp,20
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+align   64
+L$rem_4bit:
+dd      0,0,0,29491200,0,58982400,0,38141952
+dd      0,117964800,0,113901568,0,76283904,0,88997888
+dd      0,235929600,0,265420800,0,227803136,0,206962688
+dd      0,152567808,0,148504576,0,177995776,0,190709760
+db      71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+db      82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+db      112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+db      0
diff --git a/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha1-586.nasm b/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha1-586.nasm
new file mode 100644
index 0000000000..bf2831e5ea
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha1-586.nasm
@@ -0,0 +1,1394 @@
+; WARNING: do not edit!
+; Generated from openssl/crypto/sha/asm/sha1-586.pl
+;
+; Copyright 1998-2020 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License").  You may not use
+; this file except in compliance with the License.  You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
+%ifidn __OUTPUT_FORMAT__,obj
+section code    use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text   code align=64
+%else
+section .text   code
+%endif
+global  _sha1_block_data_order
+align   16
+_sha1_block_data_order:
+L$_sha1_block_data_order_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     ebp,DWORD [20+esp]
+        mov     esi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        sub     esp,76
+        shl     eax,6
+        add     eax,esi
+        mov     DWORD [104+esp],eax
+        mov     edi,DWORD [16+ebp]
+        jmp     NEAR L$000loop
+align   16
+L$000loop:
+        mov     eax,DWORD [esi]
+        mov     ebx,DWORD [4+esi]
+        mov     ecx,DWORD [8+esi]
+        mov     edx,DWORD [12+esi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        mov     DWORD [esp],eax
+        mov     DWORD [4+esp],ebx
+        mov     DWORD [8+esp],ecx
+        mov     DWORD [12+esp],edx
+        mov     eax,DWORD [16+esi]
+        mov     ebx,DWORD [20+esi]
+        mov     ecx,DWORD [24+esi]
+        mov     edx,DWORD [28+esi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        mov     DWORD [16+esp],eax
+        mov     DWORD [20+esp],ebx
+        mov     DWORD [24+esp],ecx
+        mov     DWORD [28+esp],edx
+        mov     eax,DWORD [32+esi]
+        mov     ebx,DWORD [36+esi]
+        mov     ecx,DWORD [40+esi]
+        mov     edx,DWORD [44+esi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        mov     DWORD [32+esp],eax
+        mov     DWORD [36+esp],ebx
+        mov     DWORD [40+esp],ecx
+        mov     DWORD [44+esp],edx
+        mov     eax,DWORD [48+esi]
+        mov     ebx,DWORD [52+esi]
+        mov     ecx,DWORD [56+esi]
+        mov     edx,DWORD [60+esi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        mov     DWORD [48+esp],eax
+        mov     DWORD [52+esp],ebx
+        mov     DWORD [56+esp],ecx
+        mov     DWORD [60+esp],edx
+        mov     DWORD [100+esp],esi
+        mov     eax,DWORD [ebp]
+        mov     ebx,DWORD [4+ebp]
+        mov     ecx,DWORD [8+ebp]
+        mov     edx,DWORD [12+ebp]
+        ; 00_15 0
+        mov     esi,ecx
+        mov     ebp,eax
+        rol     ebp,5
+        xor     esi,edx
+        add     ebp,edi
+        mov     edi,DWORD [esp]
+        and     esi,ebx
+        ror     ebx,2
+        xor     esi,edx
+        lea     ebp,[1518500249+edi*1+ebp]
+        add     ebp,esi
+        ; 00_15 1
+        mov     edi,ebx
+        mov     esi,ebp
+        rol     ebp,5
+        xor     edi,ecx
+        add     ebp,edx
+        mov     edx,DWORD [4+esp]
+        and     edi,eax
+        ror     eax,2
+        xor     edi,ecx
+        lea     ebp,[1518500249+edx*1+ebp]
+        add     ebp,edi
+        ; 00_15 2
+        mov     edx,eax
+        mov     edi,ebp
+        rol     ebp,5
+        xor     edx,ebx
+        add     ebp,ecx
+        mov     ecx,DWORD [8+esp]
+        and     edx,esi
+        ror     esi,2
+        xor     edx,ebx
+        lea     ebp,[1518500249+ecx*1+ebp]
+        add     ebp,edx
+        ; 00_15 3
+        mov     ecx,esi
+        mov     edx,ebp
+        rol     ebp,5
+        xor     ecx,eax
+        add     ebp,ebx
+        mov     ebx,DWORD [12+esp]
+        and     ecx,edi
+        ror     edi,2
+        xor     ecx,eax
+        lea     ebp,[1518500249+ebx*1+ebp]
+        add     ebp,ecx
+        ; 00_15 4
+        mov     ebx,edi
+        mov     ecx,ebp
+        rol     ebp,5
+        xor     ebx,esi
+        add     ebp,eax
+        mov     eax,DWORD [16+esp]
+        and     ebx,edx
+        ror     edx,2
+        xor     ebx,esi
+        lea     ebp,[1518500249+eax*1+ebp]
+        add     ebp,ebx
+        ; 00_15 5
+        mov     eax,edx
+        mov     ebx,ebp
+        rol     ebp,5
+        xor     eax,edi
+        add     ebp,esi
+        mov     esi,DWORD [20+esp]
+        and     eax,ecx
+        ror     ecx,2
+        xor     eax,edi
+        lea     ebp,[1518500249+esi*1+ebp]
+        add     ebp,eax
+        ; 00_15 6
+        mov     esi,ecx
+        mov     eax,ebp
+        rol     ebp,5
+        xor     esi,edx
+        add     ebp,edi
+        mov     edi,DWORD [24+esp]
+        and     esi,ebx
+        ror     ebx,2
+        xor     esi,edx
+        lea     ebp,[1518500249+edi*1+ebp]
+        add     ebp,esi
+        ; 00_15 7
+        mov     edi,ebx
+        mov     esi,ebp
+        rol     ebp,5
+        xor     edi,ecx
+        add     ebp,edx
+        mov     edx,DWORD [28+esp]
+        and     edi,eax
+        ror     eax,2
+        xor     edi,ecx
+        lea     ebp,[1518500249+edx*1+ebp]
+        add     ebp,edi
+        ; 00_15 8
+        mov     edx,eax
+        mov     edi,ebp
+        rol     ebp,5
+        xor     edx,ebx
+        add     ebp,ecx
+        mov     ecx,DWORD [32+esp]
+        and     edx,esi
+        ror     esi,2
+        xor     edx,ebx
+        lea     ebp,[1518500249+ecx*1+ebp]
+        add     ebp,edx
+        ; 00_15 9
+        mov     ecx,esi
+        mov     edx,ebp
+        rol     ebp,5
+        xor     ecx,eax
+        add     ebp,ebx
+        mov     ebx,DWORD [36+esp]
+        and     ecx,edi
+        ror     edi,2
+        xor     ecx,eax
+        lea     ebp,[1518500249+ebx*1+ebp]
+        add     ebp,ecx
+        ; 00_15 10
+        mov     ebx,edi
+        mov     ecx,ebp
+        rol     ebp,5
+        xor     ebx,esi
+        add     ebp,eax
+        mov     eax,DWORD [40+esp]
+        and     ebx,edx
+        ror     edx,2
+        xor     ebx,esi
+        lea     ebp,[1518500249+eax*1+ebp]
+        add     ebp,ebx
+        ; 00_15 11
+        mov     eax,edx
+        mov     ebx,ebp
+        rol     ebp,5
+        xor     eax,edi
+        add     ebp,esi
+        mov     esi,DWORD [44+esp]
+        and     eax,ecx
+        ror     ecx,2
+        xor     eax,edi
+        lea     ebp,[1518500249+esi*1+ebp]
+        add     ebp,eax
+        ; 00_15 12
+        mov     esi,ecx
+        mov     eax,ebp
+        rol     ebp,5
+        xor     esi,edx
+        add     ebp,edi
+        mov     edi,DWORD [48+esp]
+        and     esi,ebx
+        ror     ebx,2
+        xor     esi,edx
+        lea     ebp,[1518500249+edi*1+ebp]
+        add     ebp,esi
+        ; 00_15 13
+        mov     edi,ebx
+        mov     esi,ebp
+        rol     ebp,5
+        xor     edi,ecx
+        add     ebp,edx
+        mov     edx,DWORD [52+esp]
+        and     edi,eax
+        ror     eax,2
+        xor     edi,ecx
+        lea     ebp,[1518500249+edx*1+ebp]
+        add     ebp,edi
+        ; 00_15 14
+        mov     edx,eax
+        mov     edi,ebp
+        rol     ebp,5
+        xor     edx,ebx
+        add     ebp,ecx
+        mov     ecx,DWORD [56+esp]
+        and     edx,esi
+        ror     esi,2
+        xor     edx,ebx
+        lea     ebp,[1518500249+ecx*1+ebp]
+        add     ebp,edx
+        ; 00_15 15
+        mov     ecx,esi
+        mov     edx,ebp
+        rol     ebp,5
+        xor     ecx,eax
+        add     ebp,ebx
+        mov     ebx,DWORD [60+esp]
+        and     ecx,edi
+        ror     edi,2
+        xor     ecx,eax
+        lea     ebp,[1518500249+ebx*1+ebp]
+        mov     ebx,DWORD [esp]
+        add     ecx,ebp
+        ; 16_19 16
+        mov     ebp,edi
+        xor     ebx,DWORD [8+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [32+esp]
+        and     ebp,edx
+        xor     ebx,DWORD [52+esp]
+        rol     ebx,1
+        xor     ebp,esi
+        add     eax,ebp
+        mov     ebp,ecx
+        ror     edx,2
+        mov     DWORD [esp],ebx
+        rol     ebp,5
+        lea     ebx,[1518500249+eax*1+ebx]
+        mov     eax,DWORD [4+esp]
+        add     ebx,ebp
+        ; 16_19 17
+        mov     ebp,edx
+        xor     eax,DWORD [12+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [36+esp]
+        and     ebp,ecx
+        xor     eax,DWORD [56+esp]
+        rol     eax,1
+        xor     ebp,edi
+        add     esi,ebp
+        mov     ebp,ebx
+        ror     ecx,2
+        mov     DWORD [4+esp],eax
+        rol     ebp,5
+        lea     eax,[1518500249+esi*1+eax]
+        mov     esi,DWORD [8+esp]
+        add     eax,ebp
+        ; 16_19 18
+        mov     ebp,ecx
+        xor     esi,DWORD [16+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [40+esp]
+        and     ebp,ebx
+        xor     esi,DWORD [60+esp]
+        rol     esi,1
+        xor     ebp,edx
+        add     edi,ebp
+        mov     ebp,eax
+        ror     ebx,2
+        mov     DWORD [8+esp],esi
+        rol     ebp,5
+        lea     esi,[1518500249+edi*1+esi]
+        mov     edi,DWORD [12+esp]
+        add     esi,ebp
+        ; 16_19 19
+        mov     ebp,ebx
+        xor     edi,DWORD [20+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [44+esp]
+        and     ebp,eax
+        xor     edi,DWORD [esp]
+        rol     edi,1
+        xor     ebp,ecx
+        add     edx,ebp
+        mov     ebp,esi
+        ror     eax,2
+        mov     DWORD [12+esp],edi
+        rol     ebp,5
+        lea     edi,[1518500249+edx*1+edi]
+        mov     edx,DWORD [16+esp]
+        add     edi,ebp
+        ; 20_39 20
+        mov     ebp,esi
+        xor     edx,DWORD [24+esp]
+        xor     ebp,eax
+        xor     edx,DWORD [48+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [4+esp]
+        rol     edx,1
+        add     ecx,ebp
+        ror     esi,2
+        mov     ebp,edi
+        rol     ebp,5
+        mov     DWORD [16+esp],edx
+        lea     edx,[1859775393+ecx*1+edx]
+        mov     ecx,DWORD [20+esp]
+        add     edx,ebp
+        ; 20_39 21
+        mov     ebp,edi
+        xor     ecx,DWORD [28+esp]
+        xor     ebp,esi
+        xor     ecx,DWORD [52+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [8+esp]
+        rol     ecx,1
+        add     ebx,ebp
+        ror     edi,2
+        mov     ebp,edx
+        rol     ebp,5
+        mov     DWORD [20+esp],ecx
+        lea     ecx,[1859775393+ebx*1+ecx]
+        mov     ebx,DWORD [24+esp]
+        add     ecx,ebp
+        ; 20_39 22
+        mov     ebp,edx
+        xor     ebx,DWORD [32+esp]
+        xor     ebp,edi
+        xor     ebx,DWORD [56+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [12+esp]
+        rol     ebx,1
+        add     eax,ebp
+        ror     edx,2
+        mov     ebp,ecx
+        rol     ebp,5
+        mov     DWORD [24+esp],ebx
+        lea     ebx,[1859775393+eax*1+ebx]
+        mov     eax,DWORD [28+esp]
+        add     ebx,ebp
+        ; 20_39 23
+        mov     ebp,ecx
+        xor     eax,DWORD [36+esp]
+        xor     ebp,edx
+        xor     eax,DWORD [60+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [16+esp]
+        rol     eax,1
+        add     esi,ebp
+        ror     ecx,2
+        mov     ebp,ebx
+        rol     ebp,5
+        mov     DWORD [28+esp],eax
+        lea     eax,[1859775393+esi*1+eax]
+        mov     esi,DWORD [32+esp]
+        add     eax,ebp
+        ; 20_39 24
+        mov     ebp,ebx
+        xor     esi,DWORD [40+esp]
+        xor     ebp,ecx
+        xor     esi,DWORD [esp]
+        xor     ebp,edx
+        xor     esi,DWORD [20+esp]
+        rol     esi,1
+        add     edi,ebp
+        ror     ebx,2
+        mov     ebp,eax
+        rol     ebp,5
+        mov     DWORD [32+esp],esi
+        lea     esi,[1859775393+edi*1+esi]
+        mov     edi,DWORD [36+esp]
+        add     esi,ebp
+        ; 20_39 25
+        mov     ebp,eax
+        xor     edi,DWORD [44+esp]
+        xor     ebp,ebx
+        xor     edi,DWORD [4+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [24+esp]
+        rol     edi,1
+        add     edx,ebp
+        ror     eax,2
+        mov     ebp,esi
+        rol     ebp,5
+        mov     DWORD [36+esp],edi
+        lea     edi,[1859775393+edx*1+edi]
+        mov     edx,DWORD [40+esp]
+        add     edi,ebp
+        ; 20_39 26
+        mov     ebp,esi
+        xor     edx,DWORD [48+esp]
+        xor     ebp,eax
+        xor     edx,DWORD [8+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [28+esp]
+        rol     edx,1
+        add     ecx,ebp
+        ror     esi,2
+        mov     ebp,edi
+        rol     ebp,5
+        mov     DWORD [40+esp],edx
+        lea     edx,[1859775393+ecx*1+edx]
+        mov     ecx,DWORD [44+esp]
+        add     edx,ebp
+        ; 20_39 27
+        mov     ebp,edi
+        xor     ecx,DWORD [52+esp]
+        xor     ebp,esi
+        xor     ecx,DWORD [12+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [32+esp]
+        rol     ecx,1
+        add     ebx,ebp
+        ror     edi,2
+        mov     ebp,edx
+        rol     ebp,5
+        mov     DWORD [44+esp],ecx
+        lea     ecx,[1859775393+ebx*1+ecx]
+        mov     ebx,DWORD [48+esp]
+        add     ecx,ebp
+        ; 20_39 28
+        mov     ebp,edx
+        xor     ebx,DWORD [56+esp]
+        xor     ebp,edi
+        xor     ebx,DWORD [16+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [36+esp]
+        rol     ebx,1
+        add     eax,ebp
+        ror     edx,2
+        mov     ebp,ecx
+        rol     ebp,5
+        mov     DWORD [48+esp],ebx
+        lea     ebx,[1859775393+eax*1+ebx]
+        mov     eax,DWORD [52+esp]
+        add     ebx,ebp
+        ; 20_39 29
+        mov     ebp,ecx
+        xor     eax,DWORD [60+esp]
+        xor     ebp,edx
+        xor     eax,DWORD [20+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [40+esp]
+        rol     eax,1
+        add     esi,ebp
+        ror     ecx,2
+        mov     ebp,ebx
+        rol     ebp,5
+        mov     DWORD [52+esp],eax
+        lea     eax,[1859775393+esi*1+eax]
+        mov     esi,DWORD [56+esp]
+        add     eax,ebp
+        ; 20_39 30
+        mov     ebp,ebx
+        xor     esi,DWORD [esp]
+        xor     ebp,ecx
+        xor     esi,DWORD [24+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [44+esp]
+        rol     esi,1
+        add     edi,ebp
+        ror     ebx,2
+        mov     ebp,eax
+        rol     ebp,5
+        mov     DWORD [56+esp],esi
+        lea     esi,[1859775393+edi*1+esi]
+        mov     edi,DWORD [60+esp]
+        add     esi,ebp
+        ; 20_39 31
+        mov     ebp,eax
+        xor     edi,DWORD [4+esp]
+        xor     ebp,ebx
+        xor     edi,DWORD [28+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [48+esp]
+        rol     edi,1
+        add     edx,ebp
+        ror     eax,2
+        mov     ebp,esi
+        rol     ebp,5
+        mov     DWORD [60+esp],edi
+        lea     edi,[1859775393+edx*1+edi]
+        mov     edx,DWORD [esp]
+        add     edi,ebp
+        ; 20_39 32
+        mov     ebp,esi
+        xor     edx,DWORD [8+esp]
+        xor     ebp,eax
+        xor     edx,DWORD [32+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [52+esp]
+        rol     edx,1
+        add     ecx,ebp
+        ror     esi,2
+        mov     ebp,edi
+        rol     ebp,5
+        mov     DWORD [esp],edx
+        lea     edx,[1859775393+ecx*1+edx]
+        mov     ecx,DWORD [4+esp]
+        add     edx,ebp
+        ; 20_39 33
+        mov     ebp,edi
+        xor     ecx,DWORD [12+esp]
+        xor     ebp,esi
+        xor     ecx,DWORD [36+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [56+esp]
+        rol     ecx,1
+        add     ebx,ebp
+        ror     edi,2
+        mov     ebp,edx
+        rol     ebp,5
+        mov     DWORD [4+esp],ecx
+        lea     ecx,[1859775393+ebx*1+ecx]
+        mov     ebx,DWORD [8+esp]
+        add     ecx,ebp
+        ; 20_39 34
+        mov     ebp,edx
+        xor     ebx,DWORD [16+esp]
+        xor     ebp,edi
+        xor     ebx,DWORD [40+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [60+esp]
+        rol     ebx,1
+        add     eax,ebp
+        ror     edx,2
+        mov     ebp,ecx
+        rol     ebp,5
+        mov     DWORD [8+esp],ebx
+        lea     ebx,[1859775393+eax*1+ebx]
+        mov     eax,DWORD [12+esp]
+        add     ebx,ebp
+        ; 20_39 35
+        mov     ebp,ecx
+        xor     eax,DWORD [20+esp]
+        xor     ebp,edx
+        xor     eax,DWORD [44+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [esp]
+        rol     eax,1
+        add     esi,ebp
+        ror     ecx,2
+        mov     ebp,ebx
+        rol     ebp,5
+        mov     DWORD [12+esp],eax
+        lea     eax,[1859775393+esi*1+eax]
+        mov     esi,DWORD [16+esp]
+        add     eax,ebp
+        ; 20_39 36
+        mov     ebp,ebx
+        xor     esi,DWORD [24+esp]
+        xor     ebp,ecx
+        xor     esi,DWORD [48+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [4+esp]
+        rol     esi,1
+        add     edi,ebp
+        ror     ebx,2
+        mov     ebp,eax
+        rol     ebp,5
+        mov     DWORD [16+esp],esi
+        lea     esi,[1859775393+edi*1+esi]
+        mov     edi,DWORD [20+esp]
+        add     esi,ebp
+        ; 20_39 37
+        mov     ebp,eax
+        xor     edi,DWORD [28+esp]
+        xor     ebp,ebx
+        xor     edi,DWORD [52+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [8+esp]
+        rol     edi,1
+        add     edx,ebp
+        ror     eax,2
+        mov     ebp,esi
+        rol     ebp,5
+        mov     DWORD [20+esp],edi
+        lea     edi,[1859775393+edx*1+edi]
+        mov     edx,DWORD [24+esp]
+        add     edi,ebp
+        ; 20_39 38
+        mov     ebp,esi
+        xor     edx,DWORD [32+esp]
+        xor     ebp,eax
+        xor     edx,DWORD [56+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [12+esp]
+        rol     edx,1
+        add     ecx,ebp
+        ror     esi,2
+        mov     ebp,edi
+        rol     ebp,5
+        mov     DWORD [24+esp],edx
+        lea     edx,[1859775393+ecx*1+edx]
+        mov     ecx,DWORD [28+esp]
+        add     edx,ebp
+        ; 20_39 39
+        mov     ebp,edi
+        xor     ecx,DWORD [36+esp]
+        xor     ebp,esi
+        xor     ecx,DWORD [60+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [16+esp]
+        rol     ecx,1
+        add     ebx,ebp
+        ror     edi,2
+        mov     ebp,edx
+        rol     ebp,5
+        mov     DWORD [28+esp],ecx
+        lea     ecx,[1859775393+ebx*1+ecx]
+        mov     ebx,DWORD [32+esp]
+        add     ecx,ebp
+        ; 40_59 40
+        mov     ebp,edi
+        xor     ebx,DWORD [40+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [esp]
+        and     ebp,edx
+        xor     ebx,DWORD [20+esp]
+        rol     ebx,1
+        add     ebp,eax
+        ror     edx,2
+        mov     eax,ecx
+        rol     eax,5
+        mov     DWORD [32+esp],ebx
+        lea     ebx,[2400959708+ebp*1+ebx]
+        mov     ebp,edi
+        add     ebx,eax
+        and     ebp,esi
+        mov     eax,DWORD [36+esp]
+        add     ebx,ebp
+        ; 40_59 41
+        mov     ebp,edx
+        xor     eax,DWORD [44+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [4+esp]
+        and     ebp,ecx
+        xor     eax,DWORD [24+esp]
+        rol     eax,1
+        add     ebp,esi
+        ror     ecx,2
+        mov     esi,ebx
+        rol     esi,5
+        mov     DWORD [36+esp],eax
+        lea     eax,[2400959708+ebp*1+eax]
+        mov     ebp,edx
+        add     eax,esi
+        and     ebp,edi
+        mov     esi,DWORD [40+esp]
+        add     eax,ebp
+        ; 40_59 42
+        mov     ebp,ecx
+        xor     esi,DWORD [48+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [8+esp]
+        and     ebp,ebx
+        xor     esi,DWORD [28+esp]
+        rol     esi,1
+        add     ebp,edi
+        ror     ebx,2
+        mov     edi,eax
+        rol     edi,5
+        mov     DWORD [40+esp],esi
+        lea     esi,[2400959708+ebp*1+esi]
+        mov     ebp,ecx
+        add     esi,edi
+        and     ebp,edx
+        mov     edi,DWORD [44+esp]
+        add     esi,ebp
+        ; 40_59 43
+        mov     ebp,ebx
+        xor     edi,DWORD [52+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [12+esp]
+        and     ebp,eax
+        xor     edi,DWORD [32+esp]
+        rol     edi,1
+        add     ebp,edx
+        ror     eax,2
+        mov     edx,esi
+        rol     edx,5
+        mov     DWORD [44+esp],edi
+        lea     edi,[2400959708+ebp*1+edi]
+        mov     ebp,ebx
+        add     edi,edx
+        and     ebp,ecx
+        mov     edx,DWORD [48+esp]
+        add     edi,ebp
+        ; 40_59 44
+        mov     ebp,eax
+        xor     edx,DWORD [56+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [16+esp]
+        and     ebp,esi
+        xor     edx,DWORD [36+esp]
+        rol     edx,1
+        add     ebp,ecx
+        ror     esi,2
+        mov     ecx,edi
+        rol     ecx,5
+        mov     DWORD [48+esp],edx
+        lea     edx,[2400959708+ebp*1+edx]
+        mov     ebp,eax
+        add     edx,ecx
+        and     ebp,ebx
+        mov     ecx,DWORD [52+esp]
+        add     edx,ebp
+        ; 40_59 45
+        mov     ebp,esi
+        xor     ecx,DWORD [60+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [20+esp]
+        and     ebp,edi
+        xor     ecx,DWORD [40+esp]
+        rol     ecx,1
+        add     ebp,ebx
+        ror     edi,2
+        mov     ebx,edx
+        rol     ebx,5
+        mov     DWORD [52+esp],ecx
+        lea     ecx,[2400959708+ebp*1+ecx]
+        mov     ebp,esi
+        add     ecx,ebx
+        and     ebp,eax
+        mov     ebx,DWORD [56+esp]
+        add     ecx,ebp
+        ; 40_59 46
+        mov     ebp,edi
+        xor     ebx,DWORD [esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [24+esp]
+        and     ebp,edx
+        xor     ebx,DWORD [44+esp]
+        rol     ebx,1
+        add     ebp,eax
+        ror     edx,2
+        mov     eax,ecx
+        rol     eax,5
+        mov     DWORD [56+esp],ebx
+        lea     ebx,[2400959708+ebp*1+ebx]
+        mov     ebp,edi
+        add     ebx,eax
+        and     ebp,esi
+        mov     eax,DWORD [60+esp]
+        add     ebx,ebp
+        ; 40_59 47
+        mov     ebp,edx
+        xor     eax,DWORD [4+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [28+esp]
+        and     ebp,ecx
+        xor     eax,DWORD [48+esp]
+        rol     eax,1
+        add     ebp,esi
+        ror     ecx,2
+        mov     esi,ebx
+        rol     esi,5
+        mov     DWORD [60+esp],eax
+        lea     eax,[2400959708+ebp*1+eax]
+        mov     ebp,edx
+        add     eax,esi
+        and     ebp,edi
+        mov     esi,DWORD [esp]
+        add     eax,ebp
+        ; 40_59 48
+        mov     ebp,ecx
+        xor     esi,DWORD [8+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [32+esp]
+        and     ebp,ebx
+        xor     esi,DWORD [52+esp]
+        rol     esi,1
+        add     ebp,edi
+        ror     ebx,2
+        mov     edi,eax
+        rol     edi,5
+        mov     DWORD [esp],esi
+        lea     esi,[2400959708+ebp*1+esi]
+        mov     ebp,ecx
+        add     esi,edi
+        and     ebp,edx
+        mov     edi,DWORD [4+esp]
+        add     esi,ebp
+        ; 40_59 49
+        mov     ebp,ebx
+        xor     edi,DWORD [12+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [36+esp]
+        and     ebp,eax
+        xor     edi,DWORD [56+esp]
+        rol     edi,1
+        add     ebp,edx
+        ror     eax,2
+        mov     edx,esi
+        rol     edx,5
+        mov     DWORD [4+esp],edi
+        lea     edi,[2400959708+ebp*1+edi]
+        mov     ebp,ebx
+        add     edi,edx
+        and     ebp,ecx
+        mov     edx,DWORD [8+esp]
+        add     edi,ebp
+        ; 40_59 50
+        mov     ebp,eax
+        xor     edx,DWORD [16+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [40+esp]
+        and     ebp,esi
+        xor     edx,DWORD [60+esp]
+        rol     edx,1
+        add     ebp,ecx
+        ror     esi,2
+        mov     ecx,edi
+        rol     ecx,5
+        mov     DWORD [8+esp],edx
+        lea     edx,[2400959708+ebp*1+edx]
+        mov     ebp,eax
+        add     edx,ecx
+        and     ebp,ebx
+        mov     ecx,DWORD [12+esp]
+        add     edx,ebp
+        ; 40_59 51
+        mov     ebp,esi
+        xor     ecx,DWORD [20+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [44+esp]
+        and     ebp,edi
+        xor     ecx,DWORD [esp]
+        rol     ecx,1
+        add     ebp,ebx
+        ror     edi,2
+        mov     ebx,edx
+        rol     ebx,5
+        mov     DWORD [12+esp],ecx
+        lea     ecx,[2400959708+ebp*1+ecx]
+        mov     ebp,esi
+        add     ecx,ebx
+        and     ebp,eax
+        mov     ebx,DWORD [16+esp]
+        add     ecx,ebp
+        ; 40_59 52
+        mov     ebp,edi
+        xor     ebx,DWORD [24+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [48+esp]
+        and     ebp,edx
+        xor     ebx,DWORD [4+esp]
+        rol     ebx,1
+        add     ebp,eax
+        ror     edx,2
+        mov     eax,ecx
+        rol     eax,5
+        mov     DWORD [16+esp],ebx
+        lea     ebx,[2400959708+ebp*1+ebx]
+        mov     ebp,edi
+        add     ebx,eax
+        and     ebp,esi
+        mov     eax,DWORD [20+esp]
+        add     ebx,ebp
+        ; 40_59 53
+        mov     ebp,edx
+        xor     eax,DWORD [28+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [52+esp]
+        and     ebp,ecx
+        xor     eax,DWORD [8+esp]
+        rol     eax,1
+        add     ebp,esi
+        ror     ecx,2
+        mov     esi,ebx
+        rol     esi,5
+        mov     DWORD [20+esp],eax
+        lea     eax,[2400959708+ebp*1+eax]
+        mov     ebp,edx
+        add     eax,esi
+        and     ebp,edi
+        mov     esi,DWORD [24+esp]
+        add     eax,ebp
+        ; 40_59 54
+        mov     ebp,ecx
+        xor     esi,DWORD [32+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [56+esp]
+        and     ebp,ebx
+        xor     esi,DWORD [12+esp]
+        rol     esi,1
+        add     ebp,edi
+        ror     ebx,2
+        mov     edi,eax
+        rol     edi,5
+        mov     DWORD [24+esp],esi
+        lea     esi,[2400959708+ebp*1+esi]
+        mov     ebp,ecx
+        add     esi,edi
+        and     ebp,edx
+        mov     edi,DWORD [28+esp]
+        add     esi,ebp
+        ; 40_59 55
+        mov     ebp,ebx
+        xor     edi,DWORD [36+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [60+esp]
+        and     ebp,eax
+        xor     edi,DWORD [16+esp]
+        rol     edi,1
+        add     ebp,edx
+        ror     eax,2
+        mov     edx,esi
+        rol     edx,5
+        mov     DWORD [28+esp],edi
+        lea     edi,[2400959708+ebp*1+edi]
+        mov     ebp,ebx
+        add     edi,edx
+        and     ebp,ecx
+        mov     edx,DWORD [32+esp]
+        add     edi,ebp
+        ; 40_59 56
+        mov     ebp,eax
+        xor     edx,DWORD [40+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [esp]
+        and     ebp,esi
+        xor     edx,DWORD [20+esp]
+        rol     edx,1
+        add     ebp,ecx
+        ror     esi,2
+        mov     ecx,edi
+        rol     ecx,5
+        mov     DWORD [32+esp],edx
+        lea     edx,[2400959708+ebp*1+edx]
+        mov     ebp,eax
+        add     edx,ecx
+        and     ebp,ebx
+        mov     ecx,DWORD [36+esp]
+        add     edx,ebp
+        ; 40_59 57
+        mov     ebp,esi
+        xor     ecx,DWORD [44+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [4+esp]
+        and     ebp,edi
+        xor     ecx,DWORD [24+esp]
+        rol     ecx,1
+        add     ebp,ebx
+        ror     edi,2
+        mov     ebx,edx
+        rol     ebx,5
+        mov     DWORD [36+esp],ecx
+        lea     ecx,[2400959708+ebp*1+ecx]
+        mov     ebp,esi
+        add     ecx,ebx
+        and     ebp,eax
+        mov     ebx,DWORD [40+esp]
+        add     ecx,ebp
+        ; 40_59 58
+        mov     ebp,edi
+        xor     ebx,DWORD [48+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [8+esp]
+        and     ebp,edx
+        xor     ebx,DWORD [28+esp]
+        rol     ebx,1
+        add     ebp,eax
+        ror     edx,2
+        mov     eax,ecx
+        rol     eax,5
+        mov     DWORD [40+esp],ebx
+        lea     ebx,[2400959708+ebp*1+ebx]
+        mov     ebp,edi
+        add     ebx,eax
+        and     ebp,esi
+        mov     eax,DWORD [44+esp]
+        add     ebx,ebp
+        ; 40_59 59
+        mov     ebp,edx
+        xor     eax,DWORD [52+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [12+esp]
+        and     ebp,ecx
+        xor     eax,DWORD [32+esp]
+        rol     eax,1
+        add     ebp,esi
+        ror     ecx,2
+        mov     esi,ebx
+        rol     esi,5
+        mov     DWORD [44+esp],eax
+        lea     eax,[2400959708+ebp*1+eax]
+        mov     ebp,edx
+        add     eax,esi
+        and     ebp,edi
+        mov     esi,DWORD [48+esp]
+        add     eax,ebp
+        ; 20_39 60
+        mov     ebp,ebx
+        xor     esi,DWORD [56+esp]
+        xor     ebp,ecx
+        xor     esi,DWORD [16+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [36+esp]
+        rol     esi,1
+        add     edi,ebp
+        ror     ebx,2
+        mov     ebp,eax
+        rol     ebp,5
+        mov     DWORD [48+esp],esi
+        lea     esi,[3395469782+edi*1+esi]
+        mov     edi,DWORD [52+esp]
+        add     esi,ebp
+        ; 20_39 61
+        mov     ebp,eax
+        xor     edi,DWORD [60+esp]
+        xor     ebp,ebx
+        xor     edi,DWORD [20+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [40+esp]
+        rol     edi,1
+        add     edx,ebp
+        ror     eax,2
+        mov     ebp,esi
+        rol     ebp,5
+        mov     DWORD [52+esp],edi
+        lea     edi,[3395469782+edx*1+edi]
+        mov     edx,DWORD [56+esp]
+        add     edi,ebp
+        ; 20_39 62
+        mov     ebp,esi
+        xor     edx,DWORD [esp]
+        xor     ebp,eax
+        xor     edx,DWORD [24+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [44+esp]
+        rol     edx,1
+        add     ecx,ebp
+        ror     esi,2
+        mov     ebp,edi
+        rol     ebp,5
+        mov     DWORD [56+esp],edx
+        lea     edx,[3395469782+ecx*1+edx]
+        mov     ecx,DWORD [60+esp]
+        add     edx,ebp
+        ; 20_39 63
+        mov     ebp,edi
+        xor     ecx,DWORD [4+esp]
+        xor     ebp,esi
+        xor     ecx,DWORD [28+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [48+esp]
+        rol     ecx,1
+        add     ebx,ebp
+        ror     edi,2
+        mov     ebp,edx
+        rol     ebp,5
+        mov     DWORD [60+esp],ecx
+        lea     ecx,[3395469782+ebx*1+ecx]
+        mov     ebx,DWORD [esp]
+        add     ecx,ebp
+        ; 20_39 64
+        mov     ebp,edx
+        xor     ebx,DWORD [8+esp]
+        xor     ebp,edi
+        xor     ebx,DWORD [32+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [52+esp]
+        rol     ebx,1
+        add     eax,ebp
+        ror     edx,2
+        mov     ebp,ecx
+        rol     ebp,5
+        mov     DWORD [esp],ebx
+        lea     ebx,[3395469782+eax*1+ebx]
+        mov     eax,DWORD [4+esp]
+        add     ebx,ebp
+        ; 20_39 65
+        mov     ebp,ecx
+        xor     eax,DWORD [12+esp]
+        xor     ebp,edx
+        xor     eax,DWORD [36+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [56+esp]
+        rol     eax,1
+        add     esi,ebp
+        ror     ecx,2
+        mov     ebp,ebx
+        rol     ebp,5
+        mov     DWORD [4+esp],eax
+        lea     eax,[3395469782+esi*1+eax]
+        mov     esi,DWORD [8+esp]
+        add     eax,ebp
+        ; 20_39 66
+        mov     ebp,ebx
+        xor     esi,DWORD [16+esp]
+        xor     ebp,ecx
+        xor     esi,DWORD [40+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [60+esp]
+        rol     esi,1
+        add     edi,ebp
+        ror     ebx,2
+        mov     ebp,eax
+        rol     ebp,5
+        mov     DWORD [8+esp],esi
+        lea     esi,[3395469782+edi*1+esi]
+        mov     edi,DWORD [12+esp]
+        add     esi,ebp
+        ; 20_39 67
+        mov     ebp,eax
+        xor     edi,DWORD [20+esp]
+        xor     ebp,ebx
+        xor     edi,DWORD [44+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [esp]
+        rol     edi,1
+        add     edx,ebp
+        ror     eax,2
+        mov     ebp,esi
+        rol     ebp,5
+        mov     DWORD [12+esp],edi
+        lea     edi,[3395469782+edx*1+edi]
+        mov     edx,DWORD [16+esp]
+        add     edi,ebp
+        ; 20_39 68
+        mov     ebp,esi
+        xor     edx,DWORD [24+esp]
+        xor     ebp,eax
+        xor     edx,DWORD [48+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [4+esp]
+        rol     edx,1
+        add     ecx,ebp
+        ror     esi,2
+        mov     ebp,edi
+        rol     ebp,5
+        mov     DWORD [16+esp],edx
+        lea     edx,[3395469782+ecx*1+edx]
+        mov     ecx,DWORD [20+esp]
+        add     edx,ebp
+        ; 20_39 69
+        mov     ebp,edi
+        xor     ecx,DWORD [28+esp]
+        xor     ebp,esi
+        xor     ecx,DWORD [52+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [8+esp]
+        rol     ecx,1
+        add     ebx,ebp
+        ror     edi,2
+        mov     ebp,edx
+        rol     ebp,5
+        mov     DWORD [20+esp],ecx
+        lea     ecx,[3395469782+ebx*1+ecx]
+        mov     ebx,DWORD [24+esp]
+        add     ecx,ebp
+        ; 20_39 70
+        mov     ebp,edx
+        xor     ebx,DWORD [32+esp]
+        xor     ebp,edi
+        xor     ebx,DWORD [56+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [12+esp]
+        rol     ebx,1
+        add     eax,ebp
+        ror     edx,2
+        mov     ebp,ecx
+        rol     ebp,5
+        mov     DWORD [24+esp],ebx
+        lea     ebx,[3395469782+eax*1+ebx]
+        mov     eax,DWORD [28+esp]
+        add     ebx,ebp
+        ; 20_39 71
+        mov     ebp,ecx
+        xor     eax,DWORD [36+esp]
+        xor     ebp,edx
+        xor     eax,DWORD [60+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [16+esp]
+        rol     eax,1
+        add     esi,ebp
+        ror     ecx,2
+        mov     ebp,ebx
+        rol     ebp,5
+        mov     DWORD [28+esp],eax
+        lea     eax,[3395469782+esi*1+eax]
+        mov     esi,DWORD [32+esp]
+        add     eax,ebp
+        ; 20_39 72
+        mov     ebp,ebx
+        xor     esi,DWORD [40+esp]
+        xor     ebp,ecx
+        xor     esi,DWORD [esp]
+        xor     ebp,edx
+        xor     esi,DWORD [20+esp]
+        rol     esi,1
+        add     edi,ebp
+        ror     ebx,2
+        mov     ebp,eax
+        rol     ebp,5
+        mov     DWORD [32+esp],esi
+        lea     esi,[3395469782+edi*1+esi]
+        mov     edi,DWORD [36+esp]
+        add     esi,ebp
+        ; 20_39 73
+        mov     ebp,eax
+        xor     edi,DWORD [44+esp]
+        xor     ebp,ebx
+        xor     edi,DWORD [4+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [24+esp]
+        rol     edi,1
+        add     edx,ebp
+        ror     eax,2
+        mov     ebp,esi
+        rol     ebp,5
+        mov     DWORD [36+esp],edi
+        lea     edi,[3395469782+edx*1+edi]
+        mov     edx,DWORD [40+esp]
+        add     edi,ebp
+        ; 20_39 74
+        mov     ebp,esi
+        xor     edx,DWORD [48+esp]
+        xor     ebp,eax
+        xor     edx,DWORD [8+esp]
+        xor     ebp,ebx
+        xor     edx,DWORD [28+esp]
+        rol     edx,1
+        add     ecx,ebp
+        ror     esi,2
+        mov     ebp,edi
+        rol     ebp,5
+        mov     DWORD [40+esp],edx
+        lea     edx,[3395469782+ecx*1+edx]
+        mov     ecx,DWORD [44+esp]
+        add     edx,ebp
+        ; 20_39 75
+        mov     ebp,edi
+        xor     ecx,DWORD [52+esp]
+        xor     ebp,esi
+        xor     ecx,DWORD [12+esp]
+        xor     ebp,eax
+        xor     ecx,DWORD [32+esp]
+        rol     ecx,1
+        add     ebx,ebp
+        ror     edi,2
+        mov     ebp,edx
+        rol     ebp,5
+        mov     DWORD [44+esp],ecx
+        lea     ecx,[3395469782+ebx*1+ecx]
+        mov     ebx,DWORD [48+esp]
+        add     ecx,ebp
+        ; 20_39 76
+        mov     ebp,edx
+        xor     ebx,DWORD [56+esp]
+        xor     ebp,edi
+        xor     ebx,DWORD [16+esp]
+        xor     ebp,esi
+        xor     ebx,DWORD [36+esp]
+        rol     ebx,1
+        add     eax,ebp
+        ror     edx,2
+        mov     ebp,ecx
+        rol     ebp,5
+        mov     DWORD [48+esp],ebx
+        lea     ebx,[3395469782+eax*1+ebx]
+        mov     eax,DWORD [52+esp]
+        add     ebx,ebp
+        ; 20_39 77
+        mov     ebp,ecx
+        xor     eax,DWORD [60+esp]
+        xor     ebp,edx
+        xor     eax,DWORD [20+esp]
+        xor     ebp,edi
+        xor     eax,DWORD [40+esp]
+        rol     eax,1
+        add     esi,ebp
+        ror     ecx,2
+        mov     ebp,ebx
+        rol     ebp,5
+        lea     eax,[3395469782+esi*1+eax]
+        mov     esi,DWORD [56+esp]
+        add     eax,ebp
+        ; 20_39 78
+        mov     ebp,ebx
+        xor     esi,DWORD [esp]
+        xor     ebp,ecx
+        xor     esi,DWORD [24+esp]
+        xor     ebp,edx
+        xor     esi,DWORD [44+esp]
+        rol     esi,1
+        add     edi,ebp
+        ror     ebx,2
+        mov     ebp,eax
+        rol     ebp,5
+        lea     esi,[3395469782+edi*1+esi]
+        mov     edi,DWORD [60+esp]
+        add     esi,ebp
+        ; 20_39 79
+        mov     ebp,eax
+        xor     edi,DWORD [4+esp]
+        xor     ebp,ebx
+        xor     edi,DWORD [28+esp]
+        xor     ebp,ecx
+        xor     edi,DWORD [48+esp]
+        rol     edi,1
+        add     edx,ebp
+        ror     eax,2
+        mov     ebp,esi
+        rol     ebp,5
+        lea     edi,[3395469782+edx*1+edi]
+        add     edi,ebp
+        mov     ebp,DWORD [96+esp]
+        mov     edx,DWORD [100+esp]
+        add     edi,DWORD [ebp]
+        add     esi,DWORD [4+ebp]
+        add     eax,DWORD [8+ebp]
+        add     ebx,DWORD [12+ebp]
+        add     ecx,DWORD [16+ebp]
+        mov     DWORD [ebp],edi
+        add     edx,64
+        mov     DWORD [4+ebp],esi
+        cmp     edx,DWORD [104+esp]
+        mov     DWORD [8+ebp],eax
+        mov     edi,ecx
+        mov     DWORD [12+ebp],ebx
+        mov     esi,edx
+        mov     DWORD [16+ebp],ecx
+        jb      NEAR L$000loop
+        add     esp,76
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+db      83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+db      102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+db      89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+db      114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha256-586.nasm b/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha256-586.nasm
new file mode 100644
index 0000000000..4598dae75a
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha256-586.nasm
@@ -0,0 +1,3364 @@
+; WARNING: do not edit!
+; Generated from openssl/crypto/sha/asm/sha256-586.pl
+;
+; Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License").  You may not use
+; this file except in compliance with the License.  You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
+%ifidn __OUTPUT_FORMAT__,obj
+section code    use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text   code align=64
+%else
+section .text   code
+%endif
+;extern _OPENSSL_ia32cap_P
+global  _sha256_block_data_order
+align   16
+_sha256_block_data_order:
+L$_sha256_block_data_order_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     ebx,esp
+        call    L$000pic_point
+L$000pic_point:
+        pop     ebp
+        lea     ebp,[(L$001K256-L$000pic_point)+ebp]
+        sub     esp,16
+        and     esp,-64
+        shl     eax,6
+        add     eax,edi
+        mov     DWORD [esp],esi
+        mov     DWORD [4+esp],edi
+        mov     DWORD [8+esp],eax
+        mov     DWORD [12+esp],ebx
+        jmp     NEAR L$002loop
+align   16
+L$002loop:
+        mov     eax,DWORD [edi]
+        mov     ebx,DWORD [4+edi]
+        mov     ecx,DWORD [8+edi]
+        bswap   eax
+        mov     edx,DWORD [12+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [16+edi]
+        mov     ebx,DWORD [20+edi]
+        mov     ecx,DWORD [24+edi]
+        bswap   eax
+        mov     edx,DWORD [28+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [32+edi]
+        mov     ebx,DWORD [36+edi]
+        mov     ecx,DWORD [40+edi]
+        bswap   eax
+        mov     edx,DWORD [44+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [48+edi]
+        mov     ebx,DWORD [52+edi]
+        mov     ecx,DWORD [56+edi]
+        bswap   eax
+        mov     edx,DWORD [60+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        add     edi,64
+        lea     esp,[esp-36]
+        mov     DWORD [104+esp],edi
+        mov     eax,DWORD [esi]
+        mov     ebx,DWORD [4+esi]
+        mov     ecx,DWORD [8+esi]
+        mov     edi,DWORD [12+esi]
+        mov     DWORD [8+esp],ebx
+        xor     ebx,ecx
+        mov     DWORD [12+esp],ecx
+        mov     DWORD [16+esp],edi
+        mov     DWORD [esp],ebx
+        mov     edx,DWORD [16+esi]
+        mov     ebx,DWORD [20+esi]
+        mov     ecx,DWORD [24+esi]
+        mov     edi,DWORD [28+esi]
+        mov     DWORD [24+esp],ebx
+        mov     DWORD [28+esp],ecx
+        mov     DWORD [32+esp],edi
+align   16
+L$00300_15:
+        mov     ecx,edx
+        mov     esi,DWORD [24+esp]
+        ror     ecx,14
+        mov     edi,DWORD [28+esp]
+        xor     ecx,edx
+        xor     esi,edi
+        mov     ebx,DWORD [96+esp]
+        ror     ecx,5
+        and     esi,edx
+        mov     DWORD [20+esp],edx
+        xor     edx,ecx
+        add     ebx,DWORD [32+esp]
+        xor     esi,edi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,esi
+        ror     ecx,9
+        add     ebx,edx
+        mov     edi,DWORD [8+esp]
+        xor     ecx,eax
+        mov     DWORD [4+esp],eax
+        lea     esp,[esp-4]
+        ror     ecx,11
+        mov     esi,DWORD [ebp]
+        xor     ecx,eax
+        mov     edx,DWORD [20+esp]
+        xor     eax,edi
+        ror     ecx,2
+        add     ebx,esi
+        mov     DWORD [esp],eax
+        add     edx,ebx
+        and     eax,DWORD [4+esp]
+        add     ebx,ecx
+        xor     eax,edi
+        add     ebp,4
+        add     eax,ebx
+        cmp     esi,3248222580
+        jne     NEAR L$00300_15
+        mov     ecx,DWORD [156+esp]
+        jmp     NEAR L$00416_63
+align   16
+L$00416_63:
+        mov     ebx,ecx
+        mov     esi,DWORD [104+esp]
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [160+esp]
+        shr     edi,10
+        add     ebx,DWORD [124+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [24+esp]
+        ror     ecx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     ecx,edx
+        xor     esi,edi
+        mov     DWORD [96+esp],ebx
+        ror     ecx,5
+        and     esi,edx
+        mov     DWORD [20+esp],edx
+        xor     edx,ecx
+        add     ebx,DWORD [32+esp]
+        xor     esi,edi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,esi
+        ror     ecx,9
+        add     ebx,edx
+        mov     edi,DWORD [8+esp]
+        xor     ecx,eax
+        mov     DWORD [4+esp],eax
+        lea     esp,[esp-4]
+        ror     ecx,11
+        mov     esi,DWORD [ebp]
+        xor     ecx,eax
+        mov     edx,DWORD [20+esp]
+        xor     eax,edi
+        ror     ecx,2
+        add     ebx,esi
+        mov     DWORD [esp],eax
+        add     edx,ebx
+        and     eax,DWORD [4+esp]
+        add     ebx,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [156+esp]
+        add     ebp,4
+        add     eax,ebx
+        cmp     esi,3329325298
+        jne     NEAR L$00416_63
+        mov     esi,DWORD [356+esp]
+        mov     ebx,DWORD [8+esp]
+        mov     ecx,DWORD [16+esp]
+        add     eax,DWORD [esi]
+        add     ebx,DWORD [4+esi]
+        add     edi,DWORD [8+esi]
+        add     ecx,DWORD [12+esi]
+        mov     DWORD [esi],eax
+        mov     DWORD [4+esi],ebx
+        mov     DWORD [8+esi],edi
+        mov     DWORD [12+esi],ecx
+        mov     eax,DWORD [24+esp]
+        mov     ebx,DWORD [28+esp]
+        mov     ecx,DWORD [32+esp]
+        mov     edi,DWORD [360+esp]
+        add     edx,DWORD [16+esi]
+        add     eax,DWORD [20+esi]
+        add     ebx,DWORD [24+esi]
+        add     ecx,DWORD [28+esi]
+        mov     DWORD [16+esi],edx
+        mov     DWORD [20+esi],eax
+        mov     DWORD [24+esi],ebx
+        mov     DWORD [28+esi],ecx
+        lea     esp,[356+esp]
+        sub     ebp,256
+        cmp     edi,DWORD [8+esp]
+        jb      NEAR L$002loop
+        mov     esp,DWORD [12+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+align   32
+L$005loop_shrd:
+        mov     eax,DWORD [edi]
+        mov     ebx,DWORD [4+edi]
+        mov     ecx,DWORD [8+edi]
+        bswap   eax
+        mov     edx,DWORD [12+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [16+edi]
+        mov     ebx,DWORD [20+edi]
+        mov     ecx,DWORD [24+edi]
+        bswap   eax
+        mov     edx,DWORD [28+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [32+edi]
+        mov     ebx,DWORD [36+edi]
+        mov     ecx,DWORD [40+edi]
+        bswap   eax
+        mov     edx,DWORD [44+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [48+edi]
+        mov     ebx,DWORD [52+edi]
+        mov     ecx,DWORD [56+edi]
+        bswap   eax
+        mov     edx,DWORD [60+edi]
+        bswap   ebx
+        push    eax
+        bswap   ecx
+        push    ebx
+        bswap   edx
+        push    ecx
+        push    edx
+        add     edi,64
+        lea     esp,[esp-36]
+        mov     DWORD [104+esp],edi
+        mov     eax,DWORD [esi]
+        mov     ebx,DWORD [4+esi]
+        mov     ecx,DWORD [8+esi]
+        mov     edi,DWORD [12+esi]
+        mov     DWORD [8+esp],ebx
+        xor     ebx,ecx
+        mov     DWORD [12+esp],ecx
+        mov     DWORD [16+esp],edi
+        mov     DWORD [esp],ebx
+        mov     edx,DWORD [16+esi]
+        mov     ebx,DWORD [20+esi]
+        mov     ecx,DWORD [24+esi]
+        mov     edi,DWORD [28+esi]
+        mov     DWORD [24+esp],ebx
+        mov     DWORD [28+esp],ecx
+        mov     DWORD [32+esp],edi
+align   16
+L$00600_15_shrd:
+        mov     ecx,edx
+        mov     esi,DWORD [24+esp]
+        shrd    ecx,ecx,14
+        mov     edi,DWORD [28+esp]
+        xor     ecx,edx
+        xor     esi,edi
+        mov     ebx,DWORD [96+esp]
+        shrd    ecx,ecx,5
+        and     esi,edx
+        mov     DWORD [20+esp],edx
+        xor     edx,ecx
+        add     ebx,DWORD [32+esp]
+        xor     esi,edi
+        shrd    edx,edx,6
+        mov     ecx,eax
+        add     ebx,esi
+        shrd    ecx,ecx,9
+        add     ebx,edx
+        mov     edi,DWORD [8+esp]
+        xor     ecx,eax
+        mov     DWORD [4+esp],eax
+        lea     esp,[esp-4]
+        shrd    ecx,ecx,11
+        mov     esi,DWORD [ebp]
+        xor     ecx,eax
+        mov     edx,DWORD [20+esp]
+        xor     eax,edi
+        shrd    ecx,ecx,2
+        add     ebx,esi
+        mov     DWORD [esp],eax
+        add     edx,ebx
+        and     eax,DWORD [4+esp]
+        add     ebx,ecx
+        xor     eax,edi
+        add     ebp,4
+        add     eax,ebx
+        cmp     esi,3248222580
+        jne     NEAR L$00600_15_shrd
+        mov     ecx,DWORD [156+esp]
+        jmp     NEAR L$00716_63_shrd
+align   16
+L$00716_63_shrd:
+        mov     ebx,ecx
+        mov     esi,DWORD [104+esp]
+        shrd    ecx,ecx,11
+        mov     edi,esi
+        shrd    esi,esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        shrd    ecx,ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        shrd    esi,esi,17
+        add     ebx,DWORD [160+esp]
+        shr     edi,10
+        add     ebx,DWORD [124+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [24+esp]
+        shrd    ecx,ecx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     ecx,edx
+        xor     esi,edi
+        mov     DWORD [96+esp],ebx
+        shrd    ecx,ecx,5
+        and     esi,edx
+        mov     DWORD [20+esp],edx
+        xor     edx,ecx
+        add     ebx,DWORD [32+esp]
+        xor     esi,edi
+        shrd    edx,edx,6
+        mov     ecx,eax
+        add     ebx,esi
+        shrd    ecx,ecx,9
+        add     ebx,edx
+        mov     edi,DWORD [8+esp]
+        xor     ecx,eax
+        mov     DWORD [4+esp],eax
+        lea     esp,[esp-4]
+        shrd    ecx,ecx,11
+        mov     esi,DWORD [ebp]
+        xor     ecx,eax
+        mov     edx,DWORD [20+esp]
+        xor     eax,edi
+        shrd    ecx,ecx,2
+        add     ebx,esi
+        mov     DWORD [esp],eax
+        add     edx,ebx
+        and     eax,DWORD [4+esp]
+        add     ebx,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [156+esp]
+        add     ebp,4
+        add     eax,ebx
+        cmp     esi,3329325298
+        jne     NEAR L$00716_63_shrd
+        mov     esi,DWORD [356+esp]
+        mov     ebx,DWORD [8+esp]
+        mov     ecx,DWORD [16+esp]
+        add     eax,DWORD [esi]
+        add     ebx,DWORD [4+esi]
+        add     edi,DWORD [8+esi]
+        add     ecx,DWORD [12+esi]
+        mov     DWORD [esi],eax
+        mov     DWORD [4+esi],ebx
+        mov     DWORD [8+esi],edi
+        mov     DWORD [12+esi],ecx
+        mov     eax,DWORD [24+esp]
+        mov     ebx,DWORD [28+esp]
+        mov     ecx,DWORD [32+esp]
+        mov     edi,DWORD [360+esp]
+        add     edx,DWORD [16+esi]
+        add     eax,DWORD [20+esi]
+        add     ebx,DWORD [24+esi]
+        add     ecx,DWORD [28+esi]
+        mov     DWORD [16+esi],edx
+        mov     DWORD [20+esi],eax
+        mov     DWORD [24+esi],ebx
+        mov     DWORD [28+esi],ecx
+        lea     esp,[356+esp]
+        sub     ebp,256
+        cmp     edi,DWORD [8+esp]
+        jb      NEAR L$005loop_shrd
+        mov     esp,DWORD [12+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+align   64
+L$001K256:
+dd      1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+dd      66051,67438087,134810123,202182159
+db      83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+db      110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+db      67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+db      112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+db      62,0
+align   16
+L$008unrolled:
+        lea     esp,[esp-96]
+        mov     eax,DWORD [esi]
+        mov     ebp,DWORD [4+esi]
+        mov     ecx,DWORD [8+esi]
+        mov     ebx,DWORD [12+esi]
+        mov     DWORD [4+esp],ebp
+        xor     ebp,ecx
+        mov     DWORD [8+esp],ecx
+        mov     DWORD [12+esp],ebx
+        mov     edx,DWORD [16+esi]
+        mov     ebx,DWORD [20+esi]
+        mov     ecx,DWORD [24+esi]
+        mov     esi,DWORD [28+esi]
+        mov     DWORD [20+esp],ebx
+        mov     DWORD [24+esp],ecx
+        mov     DWORD [28+esp],esi
+        jmp     NEAR L$009grand_loop
+align   16
+L$009grand_loop:
+        mov     ebx,DWORD [edi]
+        mov     ecx,DWORD [4+edi]
+        bswap   ebx
+        mov     esi,DWORD [8+edi]
+        bswap   ecx
+        mov     DWORD [32+esp],ebx
+        bswap   esi
+        mov     DWORD [36+esp],ecx
+        mov     DWORD [40+esp],esi
+        mov     ebx,DWORD [12+edi]
+        mov     ecx,DWORD [16+edi]
+        bswap   ebx
+        mov     esi,DWORD [20+edi]
+        bswap   ecx
+        mov     DWORD [44+esp],ebx
+        bswap   esi
+        mov     DWORD [48+esp],ecx
+        mov     DWORD [52+esp],esi
+        mov     ebx,DWORD [24+edi]
+        mov     ecx,DWORD [28+edi]
+        bswap   ebx
+        mov     esi,DWORD [32+edi]
+        bswap   ecx
+        mov     DWORD [56+esp],ebx
+        bswap   esi
+        mov     DWORD [60+esp],ecx
+        mov     DWORD [64+esp],esi
+        mov     ebx,DWORD [36+edi]
+        mov     ecx,DWORD [40+edi]
+        bswap   ebx
+        mov     esi,DWORD [44+edi]
+        bswap   ecx
+        mov     DWORD [68+esp],ebx
+        bswap   esi
+        mov     DWORD [72+esp],ecx
+        mov     DWORD [76+esp],esi
+        mov     ebx,DWORD [48+edi]
+        mov     ecx,DWORD [52+edi]
+        bswap   ebx
+        mov     esi,DWORD [56+edi]
+        bswap   ecx
+        mov     DWORD [80+esp],ebx
+        bswap   esi
+        mov     DWORD [84+esp],ecx
+        mov     DWORD [88+esp],esi
+        mov     ebx,DWORD [60+edi]
+        add     edi,64
+        bswap   ebx
+        mov     DWORD [100+esp],edi
+        mov     DWORD [92+esp],ebx
+        mov     ecx,edx
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [32+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[1116352408+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [36+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1899447441+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     ecx,edx
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [40+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[3049323471+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [44+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[3921009573+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     ecx,edx
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [48+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[961987163+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [52+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1508970993+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     ecx,edx
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [56+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2453635748+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [60+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2870763221+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     ecx,edx
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [64+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[3624381080+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [68+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[310598401+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     ecx,edx
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [72+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[607225278+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [76+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1426881987+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     ecx,edx
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [80+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[1925078388+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [84+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2162078206+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     ecx,edx
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        mov     ebx,DWORD [88+esp]
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2614888103+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     esi,edx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        mov     ebx,DWORD [92+esp]
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[3248222580+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [36+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     esi,DWORD [88+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [32+esp]
+        shr     edi,10
+        add     ebx,DWORD [68+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     DWORD [32+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[3835390401+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [40+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [92+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [36+esp]
+        shr     edi,10
+        add     ebx,DWORD [72+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     DWORD [36+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[4022224774+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [44+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     esi,DWORD [32+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [40+esp]
+        shr     edi,10
+        add     ebx,DWORD [76+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     DWORD [40+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[264347078+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [48+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [36+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [44+esp]
+        shr     edi,10
+        add     ebx,DWORD [80+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     DWORD [44+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[604807628+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [52+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     esi,DWORD [40+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [48+esp]
+        shr     edi,10
+        add     ebx,DWORD [84+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     DWORD [48+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[770255983+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [56+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [44+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [52+esp]
+        shr     edi,10
+        add     ebx,DWORD [88+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     DWORD [52+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1249150122+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [60+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     esi,DWORD [48+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [56+esp]
+        shr     edi,10
+        add     ebx,DWORD [92+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        mov     DWORD [56+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[1555081692+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [64+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [52+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [60+esp]
+        shr     edi,10
+        add     ebx,DWORD [32+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        mov     DWORD [60+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1996064986+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [68+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     esi,DWORD [56+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [64+esp]
+        shr     edi,10
+        add     ebx,DWORD [36+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     DWORD [64+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2554220882+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [72+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [60+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [68+esp]
+        shr     edi,10
+        add     ebx,DWORD [40+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     DWORD [68+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2821834349+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [76+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     esi,DWORD [64+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [72+esp]
+        shr     edi,10
+        add     ebx,DWORD [44+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     DWORD [72+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2952996808+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [80+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [68+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [76+esp]
+        shr     edi,10
+        add     ebx,DWORD [48+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     DWORD [76+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[3210313671+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [84+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     esi,DWORD [72+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [80+esp]
+        shr     edi,10
+        add     ebx,DWORD [52+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     DWORD [80+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[3336571891+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [88+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [76+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [84+esp]
+        shr     edi,10
+        add     ebx,DWORD [56+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     DWORD [84+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[3584528711+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [92+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     esi,DWORD [80+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [88+esp]
+        shr     edi,10
+        add     ebx,DWORD [60+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        mov     DWORD [88+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[113926993+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [32+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [84+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [92+esp]
+        shr     edi,10
+        add     ebx,DWORD [64+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        mov     DWORD [92+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[338241895+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [36+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     esi,DWORD [88+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [32+esp]
+        shr     edi,10
+        add     ebx,DWORD [68+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     DWORD [32+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[666307205+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [40+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [92+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [36+esp]
+        shr     edi,10
+        add     ebx,DWORD [72+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     DWORD [36+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[773529912+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [44+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     esi,DWORD [32+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [40+esp]
+        shr     edi,10
+        add     ebx,DWORD [76+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     DWORD [40+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[1294757372+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [48+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [36+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [44+esp]
+        shr     edi,10
+        add     ebx,DWORD [80+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     DWORD [44+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1396182291+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [52+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     esi,DWORD [40+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [48+esp]
+        shr     edi,10
+        add     ebx,DWORD [84+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     DWORD [48+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[1695183700+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [56+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [44+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [52+esp]
+        shr     edi,10
+        add     ebx,DWORD [88+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     DWORD [52+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1986661051+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [60+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     esi,DWORD [48+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [56+esp]
+        shr     edi,10
+        add     ebx,DWORD [92+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        mov     DWORD [56+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2177026350+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [64+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [52+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [60+esp]
+        shr     edi,10
+        add     ebx,DWORD [32+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        mov     DWORD [60+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2456956037+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [68+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     esi,DWORD [56+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [64+esp]
+        shr     edi,10
+        add     ebx,DWORD [36+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     DWORD [64+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2730485921+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [72+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [60+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [68+esp]
+        shr     edi,10
+        add     ebx,DWORD [40+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     DWORD [68+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2820302411+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [76+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     esi,DWORD [64+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [72+esp]
+        shr     edi,10
+        add     ebx,DWORD [44+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     DWORD [72+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[3259730800+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [80+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [68+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [76+esp]
+        shr     edi,10
+        add     ebx,DWORD [48+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     DWORD [76+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[3345764771+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [84+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     esi,DWORD [72+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [80+esp]
+        shr     edi,10
+        add     ebx,DWORD [52+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     DWORD [80+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[3516065817+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [88+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [76+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [84+esp]
+        shr     edi,10
+        add     ebx,DWORD [56+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     DWORD [84+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[3600352804+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [92+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     esi,DWORD [80+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [88+esp]
+        shr     edi,10
+        add     ebx,DWORD [60+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        mov     DWORD [88+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[4094571909+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [32+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [84+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [92+esp]
+        shr     edi,10
+        add     ebx,DWORD [64+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        mov     DWORD [92+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[275423344+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [36+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     esi,DWORD [88+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [32+esp]
+        shr     edi,10
+        add     ebx,DWORD [68+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     DWORD [32+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[430227734+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [40+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [92+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [36+esp]
+        shr     edi,10
+        add     ebx,DWORD [72+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     DWORD [36+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[506948616+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [44+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     esi,DWORD [32+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [40+esp]
+        shr     edi,10
+        add     ebx,DWORD [76+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     DWORD [40+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[659060556+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [48+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [36+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [44+esp]
+        shr     edi,10
+        add     ebx,DWORD [80+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     DWORD [44+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[883997877+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [52+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     esi,DWORD [40+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [48+esp]
+        shr     edi,10
+        add     ebx,DWORD [84+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     DWORD [48+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[958139571+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [56+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [44+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [52+esp]
+        shr     edi,10
+        add     ebx,DWORD [88+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     DWORD [52+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1322822218+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [60+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     esi,DWORD [48+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [56+esp]
+        shr     edi,10
+        add     ebx,DWORD [92+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        mov     DWORD [56+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[1537002063+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [64+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [52+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [60+esp]
+        shr     edi,10
+        add     ebx,DWORD [32+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        mov     DWORD [60+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[1747873779+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [68+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     esi,DWORD [56+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [64+esp]
+        shr     edi,10
+        add     ebx,DWORD [36+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [20+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [24+esp]
+        xor     edx,ecx
+        mov     DWORD [64+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [16+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [28+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [4+esp]
+        xor     ecx,eax
+        mov     DWORD [esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[1955562222+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [72+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [12+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [60+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [68+esp]
+        shr     edi,10
+        add     ebx,DWORD [40+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [16+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [20+esp]
+        xor     edx,esi
+        mov     DWORD [68+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [12+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [24+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [esp]
+        xor     esi,ebp
+        mov     DWORD [28+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2024104815+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [76+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [8+esp]
+        add     eax,esi
+        mov     esi,DWORD [64+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [72+esp]
+        shr     edi,10
+        add     ebx,DWORD [44+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [12+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [16+esp]
+        xor     edx,ecx
+        mov     DWORD [72+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [8+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [20+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [28+esp]
+        xor     ecx,eax
+        mov     DWORD [24+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2227730452+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [80+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [4+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [68+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [76+esp]
+        shr     edi,10
+        add     ebx,DWORD [48+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [8+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [12+esp]
+        xor     edx,esi
+        mov     DWORD [76+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [4+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [16+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [24+esp]
+        xor     esi,ebp
+        mov     DWORD [20+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2361852424+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [84+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [esp]
+        add     eax,esi
+        mov     esi,DWORD [72+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [80+esp]
+        shr     edi,10
+        add     ebx,DWORD [52+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [4+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [8+esp]
+        xor     edx,ecx
+        mov     DWORD [80+esp],ebx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [12+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [20+esp]
+        xor     ecx,eax
+        mov     DWORD [16+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[2428436474+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [88+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [28+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [76+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [84+esp]
+        shr     edi,10
+        add     ebx,DWORD [56+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [4+esp]
+        xor     edx,esi
+        mov     DWORD [84+esp],ebx
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [28+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [8+esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [16+esp]
+        xor     esi,ebp
+        mov     DWORD [12+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[2756734187+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        mov     ecx,DWORD [92+esp]
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [24+esp]
+        add     eax,esi
+        mov     esi,DWORD [80+esp]
+        mov     ebx,ecx
+        ror     ecx,11
+        mov     edi,esi
+        ror     esi,2
+        xor     ecx,ebx
+        shr     ebx,3
+        ror     ecx,7
+        xor     esi,edi
+        xor     ebx,ecx
+        ror     esi,17
+        add     ebx,DWORD [88+esp]
+        shr     edi,10
+        add     ebx,DWORD [60+esp]
+        mov     ecx,edx
+        xor     edi,esi
+        mov     esi,DWORD [28+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [esp]
+        xor     edx,ecx
+        xor     esi,edi
+        ror     edx,5
+        and     esi,ecx
+        mov     DWORD [24+esp],ecx
+        xor     edx,ecx
+        add     ebx,DWORD [4+esp]
+        xor     edi,esi
+        ror     edx,6
+        mov     ecx,eax
+        add     ebx,edi
+        ror     ecx,9
+        mov     esi,eax
+        mov     edi,DWORD [12+esp]
+        xor     ecx,eax
+        mov     DWORD [8+esp],eax
+        xor     eax,edi
+        ror     ecx,11
+        and     ebp,eax
+        lea     edx,[3204031479+edx*1+ebx]
+        xor     ecx,esi
+        xor     ebp,edi
+        mov     esi,DWORD [32+esp]
+        ror     ecx,2
+        add     ebp,edx
+        add     edx,DWORD [20+esp]
+        add     ebp,ecx
+        mov     ecx,DWORD [84+esp]
+        mov     ebx,esi
+        ror     esi,11
+        mov     edi,ecx
+        ror     ecx,2
+        xor     esi,ebx
+        shr     ebx,3
+        ror     esi,7
+        xor     ecx,edi
+        xor     ebx,esi
+        ror     ecx,17
+        add     ebx,DWORD [92+esp]
+        shr     edi,10
+        add     ebx,DWORD [64+esp]
+        mov     esi,edx
+        xor     edi,ecx
+        mov     ecx,DWORD [24+esp]
+        ror     edx,14
+        add     ebx,edi
+        mov     edi,DWORD [28+esp]
+        xor     edx,esi
+        xor     ecx,edi
+        ror     edx,5
+        and     ecx,esi
+        mov     DWORD [20+esp],esi
+        xor     edx,esi
+        add     ebx,DWORD [esp]
+        xor     edi,ecx
+        ror     edx,6
+        mov     esi,ebp
+        add     ebx,edi
+        ror     esi,9
+        mov     ecx,ebp
+        mov     edi,DWORD [8+esp]
+        xor     esi,ebp
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        ror     esi,11
+        and     eax,ebp
+        lea     edx,[3329325298+edx*1+ebx]
+        xor     esi,ecx
+        xor     eax,edi
+        ror     esi,2
+        add     eax,edx
+        add     edx,DWORD [16+esp]
+        add     eax,esi
+        mov     esi,DWORD [96+esp]
+        xor     ebp,edi
+        mov     ecx,DWORD [12+esp]
+        add     eax,DWORD [esi]
+        add     ebp,DWORD [4+esi]
+        add     edi,DWORD [8+esi]
+        add     ecx,DWORD [12+esi]
+        mov     DWORD [esi],eax
+        mov     DWORD [4+esi],ebp
+        mov     DWORD [8+esi],edi
+        mov     DWORD [12+esi],ecx
+        mov     DWORD [4+esp],ebp
+        xor     ebp,edi
+        mov     DWORD [8+esp],edi
+        mov     DWORD [12+esp],ecx
+        mov     edi,DWORD [20+esp]
+        mov     ebx,DWORD [24+esp]
+        mov     ecx,DWORD [28+esp]
+        add     edx,DWORD [16+esi]
+        add     edi,DWORD [20+esi]
+        add     ebx,DWORD [24+esi]
+        add     ecx,DWORD [28+esi]
+        mov     DWORD [16+esi],edx
+        mov     DWORD [20+esi],edi
+        mov     DWORD [24+esi],ebx
+        mov     DWORD [28+esi],ecx
+        mov     DWORD [20+esp],edi
+        mov     edi,DWORD [100+esp]
+        mov     DWORD [24+esp],ebx
+        mov     DWORD [28+esp],ecx
+        cmp     edi,DWORD [104+esp]
+        jb      NEAR L$009grand_loop
+        mov     esp,DWORD [108+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+segment .bss
+common  _OPENSSL_ia32cap_P 16
diff --git a/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha512-586.nasm b/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha512-586.nasm
new file mode 100644
index 0000000000..a168b4a799
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32/crypto/sha/sha512-586.nasm
@@ -0,0 +1,579 @@
+; WARNING: do not edit!
+; Generated from openssl/crypto/sha/asm/sha512-586.pl
+;
+; Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License").  You may not use
+; this file except in compliance with the License.  You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
+%ifidn __OUTPUT_FORMAT__,obj
+section code    use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text   code align=64
+%else
+section .text   code
+%endif
+global  _sha512_block_data_order
+align   16
+_sha512_block_data_order:
+L$_sha512_block_data_order_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     esi,DWORD [20+esp]
+        mov     edi,DWORD [24+esp]
+        mov     eax,DWORD [28+esp]
+        mov     ebx,esp
+        call    L$000pic_point
+L$000pic_point:
+        pop     ebp
+        lea     ebp,[(L$001K512-L$000pic_point)+ebp]
+        sub     esp,16
+        and     esp,-64
+        shl     eax,7
+        add     eax,edi
+        mov     DWORD [esp],esi
+        mov     DWORD [4+esp],edi
+        mov     DWORD [8+esp],eax
+        mov     DWORD [12+esp],ebx
+align   16
+L$002loop_x86:
+        mov     eax,DWORD [edi]
+        mov     ebx,DWORD [4+edi]
+        mov     ecx,DWORD [8+edi]
+        mov     edx,DWORD [12+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [16+edi]
+        mov     ebx,DWORD [20+edi]
+        mov     ecx,DWORD [24+edi]
+        mov     edx,DWORD [28+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [32+edi]
+        mov     ebx,DWORD [36+edi]
+        mov     ecx,DWORD [40+edi]
+        mov     edx,DWORD [44+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [48+edi]
+        mov     ebx,DWORD [52+edi]
+        mov     ecx,DWORD [56+edi]
+        mov     edx,DWORD [60+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [64+edi]
+        mov     ebx,DWORD [68+edi]
+        mov     ecx,DWORD [72+edi]
+        mov     edx,DWORD [76+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [80+edi]
+        mov     ebx,DWORD [84+edi]
+        mov     ecx,DWORD [88+edi]
+        mov     edx,DWORD [92+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [96+edi]
+        mov     ebx,DWORD [100+edi]
+        mov     ecx,DWORD [104+edi]
+        mov     edx,DWORD [108+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        mov     eax,DWORD [112+edi]
+        mov     ebx,DWORD [116+edi]
+        mov     ecx,DWORD [120+edi]
+        mov     edx,DWORD [124+edi]
+        bswap   eax
+        bswap   ebx
+        bswap   ecx
+        bswap   edx
+        push    eax
+        push    ebx
+        push    ecx
+        push    edx
+        add     edi,128
+        sub     esp,72
+        mov     DWORD [204+esp],edi
+        lea     edi,[8+esp]
+        mov     ecx,16
+dd      2784229001
+align   16
+L$00300_15_x86:
+        mov     ecx,DWORD [40+esp]
+        mov     edx,DWORD [44+esp]
+        mov     esi,ecx
+        shr     ecx,9
+        mov     edi,edx
+        shr     edx,9
+        mov     ebx,ecx
+        shl     esi,14
+        mov     eax,edx
+        shl     edi,14
+        xor     ebx,esi
+        shr     ecx,5
+        xor     eax,edi
+        shr     edx,5
+        xor     eax,ecx
+        shl     esi,4
+        xor     ebx,edx
+        shl     edi,4
+        xor     ebx,esi
+        shr     ecx,4
+        xor     eax,edi
+        shr     edx,4
+        xor     eax,ecx
+        shl     esi,5
+        xor     ebx,edx
+        shl     edi,5
+        xor     eax,esi
+        xor     ebx,edi
+        mov     ecx,DWORD [48+esp]
+        mov     edx,DWORD [52+esp]
+        mov     esi,DWORD [56+esp]
+        mov     edi,DWORD [60+esp]
+        add     eax,DWORD [64+esp]
+        adc     ebx,DWORD [68+esp]
+        xor     ecx,esi
+        xor     edx,edi
+        and     ecx,DWORD [40+esp]
+        and     edx,DWORD [44+esp]
+        add     eax,DWORD [192+esp]
+        adc     ebx,DWORD [196+esp]
+        xor     ecx,esi
+        xor     edx,edi
+        mov     esi,DWORD [ebp]
+        mov     edi,DWORD [4+ebp]
+        add     eax,ecx
+        adc     ebx,edx
+        mov     ecx,DWORD [32+esp]
+        mov     edx,DWORD [36+esp]
+        add     eax,esi
+        adc     ebx,edi
+        mov     DWORD [esp],eax
+        mov     DWORD [4+esp],ebx
+        add     eax,ecx
+        adc     ebx,edx
+        mov     ecx,DWORD [8+esp]
+        mov     edx,DWORD [12+esp]
+        mov     DWORD [32+esp],eax
+        mov     DWORD [36+esp],ebx
+        mov     esi,ecx
+        shr     ecx,2
+        mov     edi,edx
+        shr     edx,2
+        mov     ebx,ecx
+        shl     esi,4
+        mov     eax,edx
+        shl     edi,4
+        xor     ebx,esi
+        shr     ecx,5
+        xor     eax,edi
+        shr     edx,5
+        xor     ebx,ecx
+        shl     esi,21
+        xor     eax,edx
+        shl     edi,21
+        xor     eax,esi
+        shr     ecx,21
+        xor     ebx,edi
+        shr     edx,21
+        xor     eax,ecx
+        shl     esi,5
+        xor     ebx,edx
+        shl     edi,5
+        xor     eax,esi
+        xor     ebx,edi
+        mov     ecx,DWORD [8+esp]
+        mov     edx,DWORD [12+esp]
+        mov     esi,DWORD [16+esp]
+        mov     edi,DWORD [20+esp]
+        add     eax,DWORD [esp]
+        adc     ebx,DWORD [4+esp]
+        or      ecx,esi
+        or      edx,edi
+        and     ecx,DWORD [24+esp]
+        and     edx,DWORD [28+esp]
+        and     esi,DWORD [8+esp]
+        and     edi,DWORD [12+esp]
+        or      ecx,esi
+        or      edx,edi
+        add     eax,ecx
+        adc     ebx,edx
+        mov     DWORD [esp],eax
+        mov     DWORD [4+esp],ebx
+        mov     dl,BYTE [ebp]
+        sub     esp,8
+        lea     ebp,[8+ebp]
+        cmp     dl,148
+        jne     NEAR L$00300_15_x86
+align   16
+L$00416_79_x86:
+        mov     ecx,DWORD [312+esp]
+        mov     edx,DWORD [316+esp]
+        mov     esi,ecx
+        shr     ecx,1
+        mov     edi,edx
+        shr     edx,1
+        mov     eax,ecx
+        shl     esi,24
+        mov     ebx,edx
+        shl     edi,24
+        xor     ebx,esi
+        shr     ecx,6
+        xor     eax,edi
+        shr     edx,6
+        xor     eax,ecx
+        shl     esi,7
+        xor     ebx,edx
+        shl     edi,1
+        xor     ebx,esi
+        shr     ecx,1
+        xor     eax,edi
+        shr     edx,1
+        xor     eax,ecx
+        shl     edi,6
+        xor     ebx,edx
+        xor     eax,edi
+        mov     DWORD [esp],eax
+        mov     DWORD [4+esp],ebx
+        mov     ecx,DWORD [208+esp]
+        mov     edx,DWORD [212+esp]
+        mov     esi,ecx
+        shr     ecx,6
+        mov     edi,edx
+        shr     edx,6
+        mov     eax,ecx
+        shl     esi,3
+        mov     ebx,edx
+        shl     edi,3
+        xor     eax,esi
+        shr     ecx,13
+        xor     ebx,edi
+        shr     edx,13
+        xor     eax,ecx
+        shl     esi,10
+        xor     ebx,edx
+        shl     edi,10
+        xor     ebx,esi
+        shr     ecx,10
+        xor     eax,edi
+        shr     edx,10
+        xor     ebx,ecx
+        shl     edi,13
+        xor     eax,edx
+        xor     eax,edi
+        mov     ecx,DWORD [320+esp]
+        mov     edx,DWORD [324+esp]
+        add     eax,DWORD [esp]
+        adc     ebx,DWORD [4+esp]
+        mov     esi,DWORD [248+esp]
+        mov     edi,DWORD [252+esp]
+        add     eax,ecx
+        adc     ebx,edx
+        add     eax,esi
+        adc     ebx,edi
+        mov     DWORD [192+esp],eax
+        mov     DWORD [196+esp],ebx
+        mov     ecx,DWORD [40+esp]
+        mov     edx,DWORD [44+esp]
+        mov     esi,ecx
+        shr     ecx,9
+        mov     edi,edx
+        shr     edx,9
+        mov     ebx,ecx
+        shl     esi,14
+        mov     eax,edx
+        shl     edi,14
+        xor     ebx,esi
+        shr     ecx,5
+        xor     eax,edi
+        shr     edx,5
+        xor     eax,ecx
+        shl     esi,4
+        xor     ebx,edx
+        shl     edi,4
+        xor     ebx,esi
+        shr     ecx,4
+        xor     eax,edi
+        shr     edx,4
+        xor     eax,ecx
+        shl     esi,5
+        xor     ebx,edx
+        shl     edi,5
+        xor     eax,esi
+        xor     ebx,edi
+        mov     ecx,DWORD [48+esp]
+        mov     edx,DWORD [52+esp]
+        mov     esi,DWORD [56+esp]
+        mov     edi,DWORD [60+esp]
+        add     eax,DWORD [64+esp]
+        adc     ebx,DWORD [68+esp]
+        xor     ecx,esi
+        xor     edx,edi
+        and     ecx,DWORD [40+esp]
+        and     edx,DWORD [44+esp]
+        add     eax,DWORD [192+esp]
+        adc     ebx,DWORD [196+esp]
+        xor     ecx,esi
+        xor     edx,edi
+        mov     esi,DWORD [ebp]
+        mov     edi,DWORD [4+ebp]
+        add     eax,ecx
+        adc     ebx,edx
+        mov     ecx,DWORD [32+esp]
+        mov     edx,DWORD [36+esp]
+        add     eax,esi
+        adc     ebx,edi
+        mov     DWORD [esp],eax
+        mov     DWORD [4+esp],ebx
+        add     eax,ecx
+        adc     ebx,edx
+        mov     ecx,DWORD [8+esp]
+        mov     edx,DWORD [12+esp]
+        mov     DWORD [32+esp],eax
+        mov     DWORD [36+esp],ebx
+        mov     esi,ecx
+        shr     ecx,2
+        mov     edi,edx
+        shr     edx,2
+        mov     ebx,ecx
+        shl     esi,4
+        mov     eax,edx
+        shl     edi,4
+        xor     ebx,esi
+        shr     ecx,5
+        xor     eax,edi
+        shr     edx,5
+        xor     ebx,ecx
+        shl     esi,21
+        xor     eax,edx
+        shl     edi,21
+        xor     eax,esi
+        shr     ecx,21
+        xor     ebx,edi
+        shr     edx,21
+        xor     eax,ecx
+        shl     esi,5
+        xor     ebx,edx
+        shl     edi,5
+        xor     eax,esi
+        xor     ebx,edi
+        mov     ecx,DWORD [8+esp]
+        mov     edx,DWORD [12+esp]
+        mov     esi,DWORD [16+esp]
+        mov     edi,DWORD [20+esp]
+        add     eax,DWORD [esp]
+        adc     ebx,DWORD [4+esp]
+        or      ecx,esi
+        or      edx,edi
+        and     ecx,DWORD [24+esp]
+        and     edx,DWORD [28+esp]
+        and     esi,DWORD [8+esp]
+        and     edi,DWORD [12+esp]
+        or      ecx,esi
+        or      edx,edi
+        add     eax,ecx
+        adc     ebx,edx
+        mov     DWORD [esp],eax
+        mov     DWORD [4+esp],ebx
+        mov     dl,BYTE [ebp]
+        sub     esp,8
+        lea     ebp,[8+ebp]
+        cmp     dl,23
+        jne     NEAR L$00416_79_x86
+        mov     esi,DWORD [840+esp]
+        mov     edi,DWORD [844+esp]
+        mov     eax,DWORD [esi]
+        mov     ebx,DWORD [4+esi]
+        mov     ecx,DWORD [8+esi]
+        mov     edx,DWORD [12+esi]
+        add     eax,DWORD [8+esp]
+        adc     ebx,DWORD [12+esp]
+        mov     DWORD [esi],eax
+        mov     DWORD [4+esi],ebx
+        add     ecx,DWORD [16+esp]
+        adc     edx,DWORD [20+esp]
+        mov     DWORD [8+esi],ecx
+        mov     DWORD [12+esi],edx
+        mov     eax,DWORD [16+esi]
+        mov     ebx,DWORD [20+esi]
+        mov     ecx,DWORD [24+esi]
+        mov     edx,DWORD [28+esi]
+        add     eax,DWORD [24+esp]
+        adc     ebx,DWORD [28+esp]
+        mov     DWORD [16+esi],eax
+        mov     DWORD [20+esi],ebx
+        add     ecx,DWORD [32+esp]
+        adc     edx,DWORD [36+esp]
+        mov     DWORD [24+esi],ecx
+        mov     DWORD [28+esi],edx
+        mov     eax,DWORD [32+esi]
+        mov     ebx,DWORD [36+esi]
+        mov     ecx,DWORD [40+esi]
+        mov     edx,DWORD [44+esi]
+        add     eax,DWORD [40+esp]
+        adc     ebx,DWORD [44+esp]
+        mov     DWORD [32+esi],eax
+        mov     DWORD [36+esi],ebx
+        add     ecx,DWORD [48+esp]
+        adc     edx,DWORD [52+esp]
+        mov     DWORD [40+esi],ecx
+        mov     DWORD [44+esi],edx
+        mov     eax,DWORD [48+esi]
+        mov     ebx,DWORD [52+esi]
+        mov     ecx,DWORD [56+esi]
+        mov     edx,DWORD [60+esi]
+        add     eax,DWORD [56+esp]
+        adc     ebx,DWORD [60+esp]
+        mov     DWORD [48+esi],eax
+        mov     DWORD [52+esi],ebx
+        add     ecx,DWORD [64+esp]
+        adc     edx,DWORD [68+esp]
+        mov     DWORD [56+esi],ecx
+        mov     DWORD [60+esi],edx
+        add     esp,840
+        sub     ebp,640
+        cmp     edi,DWORD [8+esp]
+        jb      NEAR L$002loop_x86
+        mov     esp,DWORD [12+esp]
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+align   64
+L$001K512:
+dd      3609767458,1116352408
+dd      602891725,1899447441
+dd      3964484399,3049323471
+dd      2173295548,3921009573
+dd      4081628472,961987163
+dd      3053834265,1508970993
+dd      2937671579,2453635748
+dd      3664609560,2870763221
+dd      2734883394,3624381080
+dd      1164996542,310598401
+dd      1323610764,607225278
+dd      3590304994,1426881987
+dd      4068182383,1925078388
+dd      991336113,2162078206
+dd      633803317,2614888103
+dd      3479774868,3248222580
+dd      2666613458,3835390401
+dd      944711139,4022224774
+dd      2341262773,264347078
+dd      2007800933,604807628
+dd      1495990901,770255983
+dd      1856431235,1249150122
+dd      3175218132,1555081692
+dd      2198950837,1996064986
+dd      3999719339,2554220882
+dd      766784016,2821834349
+dd      2566594879,2952996808
+dd      3203337956,3210313671
+dd      1034457026,3336571891
+dd      2466948901,3584528711
+dd      3758326383,113926993
+dd      168717936,338241895
+dd      1188179964,666307205
+dd      1546045734,773529912
+dd      1522805485,1294757372
+dd      2643833823,1396182291
+dd      2343527390,1695183700
+dd      1014477480,1986661051
+dd      1206759142,2177026350
+dd      344077627,2456956037
+dd      1290863460,2730485921
+dd      3158454273,2820302411
+dd      3505952657,3259730800
+dd      106217008,3345764771
+dd      3606008344,3516065817
+dd      1432725776,3600352804
+dd      1467031594,4094571909
+dd      851169720,275423344
+dd      3100823752,430227734
+dd      1363258195,506948616
+dd      3750685593,659060556
+dd      3785050280,883997877
+dd      3318307427,958139571
+dd      3812723403,1322822218
+dd      2003034995,1537002063
+dd      3602036899,1747873779
+dd      1575990012,1955562222
+dd      1125592928,2024104815
+dd      2716904306,2227730452
+dd      442776044,2361852424
+dd      593698344,2428436474
+dd      3733110249,2756734187
+dd      2999351573,3204031479
+dd      3815920427,3329325298
+dd      3928383900,3391569614
+dd      566280711,3515267271
+dd      3454069534,3940187606
+dd      4000239992,4118630271
+dd      1914138554,116418474
+dd      2731055270,174292421
+dd      3203993006,289380356
+dd      320620315,460393269
+dd      587496836,685471733
+dd      1086792851,852142971
+dd      365543100,1017036298
+dd      2618297676,1126000580
+dd      3409855158,1288033470
+dd      4234509866,1501505948
+dd      987167468,1607167915
+dd      1246189591,1816402316
+dd      67438087,66051
+dd      202182159,134810123
+db      83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+db      110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+db      67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+db      112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+db      62,0
diff --git a/CryptoPkg/Library/OpensslLib/IA32/crypto/x86cpuid.nasm b/CryptoPkg/Library/OpensslLib/IA32/crypto/x86cpuid.nasm
new file mode 100644
index 0000000000..542e759ecd
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32/crypto/x86cpuid.nasm
@@ -0,0 +1,433 @@
+; WARNING: do not edit!
+; Generated from openssl/crypto/x86cpuid.pl
+;
+; Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
+;
+; Licensed under the OpenSSL license (the "License").  You may not use
+; this file except in compliance with the License.  You can obtain a copy
+; in the file LICENSE in the source distribution or at
+; https://www.openssl.org/source/license.html
+
+%ifidn __OUTPUT_FORMAT__,obj
+section code    use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section .text   code align=64
+%else
+section .text   code
+%endif
+global  _OPENSSL_ia32_cpuid
+align   16
+_OPENSSL_ia32_cpuid:
+L$_OPENSSL_ia32_cpuid_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        xor     edx,edx
+        pushfd
+        pop     eax
+        mov     ecx,eax
+        xor     eax,2097152
+        push    eax
+        popfd
+        pushfd
+        pop     eax
+        xor     ecx,eax
+        xor     eax,eax
+        mov     esi,DWORD [20+esp]
+        mov     DWORD [8+esi],eax
+        bt      ecx,21
+        jnc     NEAR L$000nocpuid
+        cpuid
+        mov     edi,eax
+        xor     eax,eax
+        cmp     ebx,1970169159
+        setne   al
+        mov     ebp,eax
+        cmp     edx,1231384169
+        setne   al
+        or      ebp,eax
+        cmp     ecx,1818588270
+        setne   al
+        or      ebp,eax
+        jz      NEAR L$001intel
+        cmp     ebx,1752462657
+        setne   al
+        mov     esi,eax
+        cmp     edx,1769238117
+        setne   al
+        or      esi,eax
+        cmp     ecx,1145913699
+        setne   al
+        or      esi,eax
+        jnz     NEAR L$001intel
+        mov     eax,2147483648
+        cpuid
+        cmp     eax,2147483649
+        jb      NEAR L$001intel
+        mov     esi,eax
+        mov     eax,2147483649
+        cpuid
+        or      ebp,ecx
+        and     ebp,2049
+        cmp     esi,2147483656
+        jb      NEAR L$001intel
+        mov     eax,2147483656
+        cpuid
+        movzx   esi,cl
+        inc     esi
+        mov     eax,1
+        xor     ecx,ecx
+        cpuid
+        bt      edx,28
+        jnc     NEAR L$002generic
+        shr     ebx,16
+        and     ebx,255
+        cmp     ebx,esi
+        ja      NEAR L$002generic
+        and     edx,4026531839
+        jmp     NEAR L$002generic
+L$001intel:
+        cmp     edi,4
+        mov     esi,-1
+        jb      NEAR L$003nocacheinfo
+        mov     eax,4
+        mov     ecx,0
+        cpuid
+        mov     esi,eax
+        shr     esi,14
+        and     esi,4095
+L$003nocacheinfo:
+        mov     eax,1
+        xor     ecx,ecx
+        cpuid
+        and     edx,3220176895
+        cmp     ebp,0
+        jne     NEAR L$004notintel
+        or      edx,1073741824
+        and     ah,15
+        cmp     ah,15
+        jne     NEAR L$004notintel
+        or      edx,1048576
+L$004notintel:
+        bt      edx,28
+        jnc     NEAR L$002generic
+        and     edx,4026531839
+        cmp     esi,0
+        je      NEAR L$002generic
+        or      edx,268435456
+        shr     ebx,16
+        cmp     bl,1
+        ja      NEAR L$002generic
+        and     edx,4026531839
+L$002generic:
+        and     ebp,2048
+        and     ecx,4294965247
+        mov     esi,edx
+        or      ebp,ecx
+        cmp     edi,7
+        mov     edi,DWORD [20+esp]
+        jb      NEAR L$005no_extended_info
+        mov     eax,7
+        xor     ecx,ecx
+        cpuid
+        mov     DWORD [8+edi],ebx
+L$005no_extended_info:
+        bt      ebp,27
+        jnc     NEAR L$006clear_avx
+        xor     ecx,ecx
+db      15,1,208
+        and     eax,6
+        cmp     eax,6
+        je      NEAR L$007done
+        cmp     eax,2
+        je      NEAR L$006clear_avx
+L$008clear_xmm:
+        and     ebp,4261412861
+        and     esi,4278190079
+L$006clear_avx:
+        and     ebp,4026525695
+        and     DWORD [8+edi],4294967263
+L$007done:
+        mov     eax,esi
+        mov     edx,ebp
+L$000nocpuid:
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+;extern _OPENSSL_ia32cap_P
+global  _OPENSSL_rdtsc
+align   16
+_OPENSSL_rdtsc:
+L$_OPENSSL_rdtsc_begin:
+        xor     eax,eax
+        xor     edx,edx
+        lea     ecx,[_OPENSSL_ia32cap_P]
+        bt      DWORD [ecx],4
+        jnc     NEAR L$009notsc
+        rdtsc
+L$009notsc:
+        ret
+global  _OPENSSL_instrument_halt
+align   16
+_OPENSSL_instrument_halt:
+L$_OPENSSL_instrument_halt_begin:
+        lea     ecx,[_OPENSSL_ia32cap_P]
+        bt      DWORD [ecx],4
+        jnc     NEAR L$010nohalt
+dd      2421723150
+        and     eax,3
+        jnz     NEAR L$010nohalt
+        pushfd
+        pop     eax
+        bt      eax,9
+        jnc     NEAR L$010nohalt
+        rdtsc
+        push    edx
+        push    eax
+        hlt
+        rdtsc
+        sub     eax,DWORD [esp]
+        sbb     edx,DWORD [4+esp]
+        add     esp,8
+        ret
+L$010nohalt:
+        xor     eax,eax
+        xor     edx,edx
+        ret
+global  _OPENSSL_far_spin
+align   16
+_OPENSSL_far_spin:
+L$_OPENSSL_far_spin_begin:
+        pushfd
+        pop     eax
+        bt      eax,9
+        jnc     NEAR L$011nospin
+        mov     eax,DWORD [4+esp]
+        mov     ecx,DWORD [8+esp]
+dd      2430111262
+        xor     eax,eax
+        mov     edx,DWORD [ecx]
+        jmp     NEAR L$012spin
+align   16
+L$012spin:
+        inc     eax
+        cmp     edx,DWORD [ecx]
+        je      NEAR L$012spin
+dd      529567888
+        ret
+L$011nospin:
+        xor     eax,eax
+        xor     edx,edx
+        ret
+global  _OPENSSL_wipe_cpu
+align   16
+_OPENSSL_wipe_cpu:
+L$_OPENSSL_wipe_cpu_begin:
+        xor     eax,eax
+        xor     edx,edx
+        lea     ecx,[_OPENSSL_ia32cap_P]
+        mov     ecx,DWORD [ecx]
+        bt      DWORD [ecx],1
+        jnc     NEAR L$013no_x87
+dd      4007259865,4007259865,4007259865,4007259865,2430851995
+L$013no_x87:
+        lea     eax,[4+esp]
+        ret
+global  _OPENSSL_atomic_add
+align   16
+_OPENSSL_atomic_add:
+L$_OPENSSL_atomic_add_begin:
+        mov     edx,DWORD [4+esp]
+        mov     ecx,DWORD [8+esp]
+        push    ebx
+        nop
+        mov     eax,DWORD [edx]
+L$014spin:
+        lea     ebx,[ecx*1+eax]
+        nop
+dd      447811568
+        jne     NEAR L$014spin
+        mov     eax,ebx
+        pop     ebx
+        ret
+global  _OPENSSL_cleanse
+align   16
+_OPENSSL_cleanse:
+L$_OPENSSL_cleanse_begin:
+        mov     edx,DWORD [4+esp]
+        mov     ecx,DWORD [8+esp]
+        xor     eax,eax
+        cmp     ecx,7
+        jae     NEAR L$015lot
+        cmp     ecx,0
+        je      NEAR L$016ret
+L$017little:
+        mov     BYTE [edx],al
+        sub     ecx,1
+        lea     edx,[1+edx]
+        jnz     NEAR L$017little
+L$016ret:
+        ret
+align   16
+L$015lot:
+        test    edx,3
+        jz      NEAR L$018aligned
+        mov     BYTE [edx],al
+        lea     ecx,[ecx-1]
+        lea     edx,[1+edx]
+        jmp     NEAR L$015lot
+L$018aligned:
+        mov     DWORD [edx],eax
+        lea     ecx,[ecx-4]
+        test    ecx,-4
+        lea     edx,[4+edx]
+        jnz     NEAR L$018aligned
+        cmp     ecx,0
+        jne     NEAR L$017little
+        ret
+global  _CRYPTO_memcmp
+align   16
+_CRYPTO_memcmp:
+L$_CRYPTO_memcmp_begin:
+        push    esi
+        push    edi
+        mov     esi,DWORD [12+esp]
+        mov     edi,DWORD [16+esp]
+        mov     ecx,DWORD [20+esp]
+        xor     eax,eax
+        xor     edx,edx
+        cmp     ecx,0
+        je      NEAR L$019no_data
+L$020loop:
+        mov     dl,BYTE [esi]
+        lea     esi,[1+esi]
+        xor     dl,BYTE [edi]
+        lea     edi,[1+edi]
+        or      al,dl
+        dec     ecx
+        jnz     NEAR L$020loop
+        neg     eax
+        shr     eax,31
+L$019no_data:
+        pop     edi
+        pop     esi
+        ret
+global  _OPENSSL_instrument_bus
+align   16
+_OPENSSL_instrument_bus:
+L$_OPENSSL_instrument_bus_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     eax,0
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _OPENSSL_instrument_bus2
+align   16
+_OPENSSL_instrument_bus2:
+L$_OPENSSL_instrument_bus2_begin:
+        push    ebp
+        push    ebx
+        push    esi
+        push    edi
+        mov     eax,0
+        pop     edi
+        pop     esi
+        pop     ebx
+        pop     ebp
+        ret
+global  _OPENSSL_ia32_rdrand_bytes
+align   16
+_OPENSSL_ia32_rdrand_bytes:
+L$_OPENSSL_ia32_rdrand_bytes_begin:
+        push    edi
+        push    ebx
+        xor     eax,eax
+        mov     edi,DWORD [12+esp]
+        mov     ebx,DWORD [16+esp]
+        cmp     ebx,0
+        je      NEAR L$021done
+        mov     ecx,8
+L$022loop:
+db      15,199,242
+        jc      NEAR L$023break
+        loop    L$022loop
+        jmp     NEAR L$021done
+align   16
+L$023break:
+        cmp     ebx,4
+        jb      NEAR L$024tail
+        mov     DWORD [edi],edx
+        lea     edi,[4+edi]
+        add     eax,4
+        sub     ebx,4
+        jz      NEAR L$021done
+        mov     ecx,8
+        jmp     NEAR L$022loop
+align   16
+L$024tail:
+        mov     BYTE [edi],dl
+        lea     edi,[1+edi]
+        inc     eax
+        shr     edx,8
+        dec     ebx
+        jnz     NEAR L$024tail
+L$021done:
+        xor     edx,edx
+        pop     ebx
+        pop     edi
+        ret
+global  _OPENSSL_ia32_rdseed_bytes
+align   16
+_OPENSSL_ia32_rdseed_bytes:
+L$_OPENSSL_ia32_rdseed_bytes_begin:
+        push    edi
+        push    ebx
+        xor     eax,eax
+        mov     edi,DWORD [12+esp]
+        mov     ebx,DWORD [16+esp]
+        cmp     ebx,0
+        je      NEAR L$025done
+        mov     ecx,8
+L$026loop:
+db      15,199,250
+        jc      NEAR L$027break
+        loop    L$026loop
+        jmp     NEAR L$025done
+align   16
+L$027break:
+        cmp     ebx,4
+        jb      NEAR L$028tail
+        mov     DWORD [edi],edx
+        lea     edi,[4+edi]
+        add     eax,4
+        sub     ebx,4
+        jz      NEAR L$025done
+        mov     ecx,8
+        jmp     NEAR L$026loop
+align   16
+L$028tail:
+        mov     BYTE [edi],dl
+        lea     edi,[1+edi]
+        inc     eax
+        shr     edx,8
+        dec     ebx
+        jnz     NEAR L$028tail
+L$025done:
+        xor     edx,edx
+        pop     ebx
+        pop     edi
+        ret
+segment .bss
+common  _OPENSSL_ia32cap_P 16
+segment .CRT$XCU data align=4
+extern  _OPENSSL_cpuid_setup
+dd      _OPENSSL_cpuid_setup
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/aesni-x86.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/aesni-x86.S
new file mode 100644
index 0000000000..836e831b18
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/aesni-x86.S
@@ -0,0 +1,3247 @@
+# WARNING: do not edit!
+# Generated from openssl/crypto/aes/asm/aesni-x86.pl
+#
+# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+.text
+.globl  aesni_encrypt
+.type   aesni_encrypt,@function
+.align  16
+aesni_encrypt:
+.L_aesni_encrypt_begin:
+        movl    4(%esp),%eax
+        movl    12(%esp),%edx
+        movups  (%eax),%xmm2
+        movl    240(%edx),%ecx
+        movl    8(%esp),%eax
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L000enc1_loop_1:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L000enc1_loop_1
+.byte   102,15,56,221,209
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        movups  %xmm2,(%eax)
+        pxor    %xmm2,%xmm2
+        ret
+.size   aesni_encrypt,.-.L_aesni_encrypt_begin
+.globl  aesni_decrypt
+.type   aesni_decrypt,@function
+.align  16
+aesni_decrypt:
+.L_aesni_decrypt_begin:
+        movl    4(%esp),%eax
+        movl    12(%esp),%edx
+        movups  (%eax),%xmm2
+        movl    240(%edx),%ecx
+        movl    8(%esp),%eax
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L001dec1_loop_2:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L001dec1_loop_2
+.byte   102,15,56,223,209
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        movups  %xmm2,(%eax)
+        pxor    %xmm2,%xmm2
+        ret
+.size   aesni_decrypt,.-.L_aesni_decrypt_begin
+.type   _aesni_encrypt2,@function
+.align  16
+_aesni_encrypt2:
+        movups  (%edx),%xmm0
+        shll    $4,%ecx
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        movups  32(%edx),%xmm0
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+        addl    $16,%ecx
+.L002enc2_loop:
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,220,208
+.byte   102,15,56,220,216
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L002enc2_loop
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,221,208
+.byte   102,15,56,221,216
+        ret
+.size   _aesni_encrypt2,.-_aesni_encrypt2
+.type   _aesni_decrypt2,@function
+.align  16
+_aesni_decrypt2:
+        movups  (%edx),%xmm0
+        shll    $4,%ecx
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        movups  32(%edx),%xmm0
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+        addl    $16,%ecx
+.L003dec2_loop:
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,222,208
+.byte   102,15,56,222,216
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L003dec2_loop
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,223,208
+.byte   102,15,56,223,216
+        ret
+.size   _aesni_decrypt2,.-_aesni_decrypt2
+.type   _aesni_encrypt3,@function
+.align  16
+_aesni_encrypt3:
+        movups  (%edx),%xmm0
+        shll    $4,%ecx
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+        movups  32(%edx),%xmm0
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+        addl    $16,%ecx
+.L004enc3_loop:
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,220,208
+.byte   102,15,56,220,216
+.byte   102,15,56,220,224
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L004enc3_loop
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+.byte   102,15,56,221,208
+.byte   102,15,56,221,216
+.byte   102,15,56,221,224
+        ret
+.size   _aesni_encrypt3,.-_aesni_encrypt3
+.type   _aesni_decrypt3,@function
+.align  16
+_aesni_decrypt3:
+        movups  (%edx),%xmm0
+        shll    $4,%ecx
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+        movups  32(%edx),%xmm0
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+        addl    $16,%ecx
+.L005dec3_loop:
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,222,208
+.byte   102,15,56,222,216
+.byte   102,15,56,222,224
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L005dec3_loop
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+.byte   102,15,56,223,208
+.byte   102,15,56,223,216
+.byte   102,15,56,223,224
+        ret
+.size   _aesni_decrypt3,.-_aesni_decrypt3
+.type   _aesni_encrypt4,@function
+.align  16
+_aesni_encrypt4:
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        shll    $4,%ecx
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+        pxor    %xmm0,%xmm5
+        movups  32(%edx),%xmm0
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+.byte   15,31,64,0
+        addl    $16,%ecx
+.L006enc4_loop:
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+.byte   102,15,56,220,233
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,220,208
+.byte   102,15,56,220,216
+.byte   102,15,56,220,224
+.byte   102,15,56,220,232
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L006enc4_loop
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+.byte   102,15,56,220,233
+.byte   102,15,56,221,208
+.byte   102,15,56,221,216
+.byte   102,15,56,221,224
+.byte   102,15,56,221,232
+        ret
+.size   _aesni_encrypt4,.-_aesni_encrypt4
+.type   _aesni_decrypt4,@function
+.align  16
+_aesni_decrypt4:
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        shll    $4,%ecx
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+        pxor    %xmm0,%xmm5
+        movups  32(%edx),%xmm0
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+.byte   15,31,64,0
+        addl    $16,%ecx
+.L007dec4_loop:
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+.byte   102,15,56,222,233
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,222,208
+.byte   102,15,56,222,216
+.byte   102,15,56,222,224
+.byte   102,15,56,222,232
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L007dec4_loop
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+.byte   102,15,56,222,233
+.byte   102,15,56,223,208
+.byte   102,15,56,223,216
+.byte   102,15,56,223,224
+.byte   102,15,56,223,232
+        ret
+.size   _aesni_decrypt4,.-_aesni_decrypt4
+.type   _aesni_encrypt6,@function
+.align  16
+_aesni_encrypt6:
+        movups  (%edx),%xmm0
+        shll    $4,%ecx
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+.byte   102,15,56,220,209
+        pxor    %xmm0,%xmm5
+        pxor    %xmm0,%xmm6
+.byte   102,15,56,220,217
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+.byte   102,15,56,220,225
+        pxor    %xmm0,%xmm7
+        movups  (%edx,%ecx,1),%xmm0
+        addl    $16,%ecx
+        jmp     .L008_aesni_encrypt6_inner
+.align  16
+.L009enc6_loop:
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+.L008_aesni_encrypt6_inner:
+.byte   102,15,56,220,233
+.byte   102,15,56,220,241
+.byte   102,15,56,220,249
+.L_aesni_encrypt6_enter:
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,220,208
+.byte   102,15,56,220,216
+.byte   102,15,56,220,224
+.byte   102,15,56,220,232
+.byte   102,15,56,220,240
+.byte   102,15,56,220,248
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L009enc6_loop
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+.byte   102,15,56,220,233
+.byte   102,15,56,220,241
+.byte   102,15,56,220,249
+.byte   102,15,56,221,208
+.byte   102,15,56,221,216
+.byte   102,15,56,221,224
+.byte   102,15,56,221,232
+.byte   102,15,56,221,240
+.byte   102,15,56,221,248
+        ret
+.size   _aesni_encrypt6,.-_aesni_encrypt6
+.type   _aesni_decrypt6,@function
+.align  16
+_aesni_decrypt6:
+        movups  (%edx),%xmm0
+        shll    $4,%ecx
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+.byte   102,15,56,222,209
+        pxor    %xmm0,%xmm5
+        pxor    %xmm0,%xmm6
+.byte   102,15,56,222,217
+        leal    32(%edx,%ecx,1),%edx
+        negl    %ecx
+.byte   102,15,56,222,225
+        pxor    %xmm0,%xmm7
+        movups  (%edx,%ecx,1),%xmm0
+        addl    $16,%ecx
+        jmp     .L010_aesni_decrypt6_inner
+.align  16
+.L011dec6_loop:
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+.L010_aesni_decrypt6_inner:
+.byte   102,15,56,222,233
+.byte   102,15,56,222,241
+.byte   102,15,56,222,249
+.L_aesni_decrypt6_enter:
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,222,208
+.byte   102,15,56,222,216
+.byte   102,15,56,222,224
+.byte   102,15,56,222,232
+.byte   102,15,56,222,240
+.byte   102,15,56,222,248
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L011dec6_loop
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+.byte   102,15,56,222,233
+.byte   102,15,56,222,241
+.byte   102,15,56,222,249
+.byte   102,15,56,223,208
+.byte   102,15,56,223,216
+.byte   102,15,56,223,224
+.byte   102,15,56,223,232
+.byte   102,15,56,223,240
+.byte   102,15,56,223,248
+        ret
+.size   _aesni_decrypt6,.-_aesni_decrypt6
+.globl  aesni_ecb_encrypt
+.type   aesni_ecb_encrypt,@function
+.align  16
+aesni_ecb_encrypt:
+.L_aesni_ecb_encrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movl    36(%esp),%ebx
+        andl    $-16,%eax
+        jz      .L012ecb_ret
+        movl    240(%edx),%ecx
+        testl   %ebx,%ebx
+        jz      .L013ecb_decrypt
+        movl    %edx,%ebp
+        movl    %ecx,%ebx
+        cmpl    $96,%eax
+        jb      .L014ecb_enc_tail
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movdqu  64(%esi),%xmm6
+        movdqu  80(%esi),%xmm7
+        leal    96(%esi),%esi
+        subl    $96,%eax
+        jmp     .L015ecb_enc_loop6_enter
+.align  16
+.L016ecb_enc_loop6:
+        movups  %xmm2,(%edi)
+        movdqu  (%esi),%xmm2
+        movups  %xmm3,16(%edi)
+        movdqu  16(%esi),%xmm3
+        movups  %xmm4,32(%edi)
+        movdqu  32(%esi),%xmm4
+        movups  %xmm5,48(%edi)
+        movdqu  48(%esi),%xmm5
+        movups  %xmm6,64(%edi)
+        movdqu  64(%esi),%xmm6
+        movups  %xmm7,80(%edi)
+        leal    96(%edi),%edi
+        movdqu  80(%esi),%xmm7
+        leal    96(%esi),%esi
+.L015ecb_enc_loop6_enter:
+        call    _aesni_encrypt6
+        movl    %ebp,%edx
+        movl    %ebx,%ecx
+        subl    $96,%eax
+        jnc     .L016ecb_enc_loop6
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        movups  %xmm6,64(%edi)
+        movups  %xmm7,80(%edi)
+        leal    96(%edi),%edi
+        addl    $96,%eax
+        jz      .L012ecb_ret
+.L014ecb_enc_tail:
+        movups  (%esi),%xmm2
+        cmpl    $32,%eax
+        jb      .L017ecb_enc_one
+        movups  16(%esi),%xmm3
+        je      .L018ecb_enc_two
+        movups  32(%esi),%xmm4
+        cmpl    $64,%eax
+        jb      .L019ecb_enc_three
+        movups  48(%esi),%xmm5
+        je      .L020ecb_enc_four
+        movups  64(%esi),%xmm6
+        xorps   %xmm7,%xmm7
+        call    _aesni_encrypt6
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        movups  %xmm6,64(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L017ecb_enc_one:
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L021enc1_loop_3:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L021enc1_loop_3
+.byte   102,15,56,221,209
+        movups  %xmm2,(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L018ecb_enc_two:
+        call    _aesni_encrypt2
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L019ecb_enc_three:
+        call    _aesni_encrypt3
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L020ecb_enc_four:
+        call    _aesni_encrypt4
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L013ecb_decrypt:
+        movl    %edx,%ebp
+        movl    %ecx,%ebx
+        cmpl    $96,%eax
+        jb      .L022ecb_dec_tail
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movdqu  64(%esi),%xmm6
+        movdqu  80(%esi),%xmm7
+        leal    96(%esi),%esi
+        subl    $96,%eax
+        jmp     .L023ecb_dec_loop6_enter
+.align  16
+.L024ecb_dec_loop6:
+        movups  %xmm2,(%edi)
+        movdqu  (%esi),%xmm2
+        movups  %xmm3,16(%edi)
+        movdqu  16(%esi),%xmm3
+        movups  %xmm4,32(%edi)
+        movdqu  32(%esi),%xmm4
+        movups  %xmm5,48(%edi)
+        movdqu  48(%esi),%xmm5
+        movups  %xmm6,64(%edi)
+        movdqu  64(%esi),%xmm6
+        movups  %xmm7,80(%edi)
+        leal    96(%edi),%edi
+        movdqu  80(%esi),%xmm7
+        leal    96(%esi),%esi
+.L023ecb_dec_loop6_enter:
+        call    _aesni_decrypt6
+        movl    %ebp,%edx
+        movl    %ebx,%ecx
+        subl    $96,%eax
+        jnc     .L024ecb_dec_loop6
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        movups  %xmm6,64(%edi)
+        movups  %xmm7,80(%edi)
+        leal    96(%edi),%edi
+        addl    $96,%eax
+        jz      .L012ecb_ret
+.L022ecb_dec_tail:
+        movups  (%esi),%xmm2
+        cmpl    $32,%eax
+        jb      .L025ecb_dec_one
+        movups  16(%esi),%xmm3
+        je      .L026ecb_dec_two
+        movups  32(%esi),%xmm4
+        cmpl    $64,%eax
+        jb      .L027ecb_dec_three
+        movups  48(%esi),%xmm5
+        je      .L028ecb_dec_four
+        movups  64(%esi),%xmm6
+        xorps   %xmm7,%xmm7
+        call    _aesni_decrypt6
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        movups  %xmm6,64(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L025ecb_dec_one:
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L029dec1_loop_4:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L029dec1_loop_4
+.byte   102,15,56,223,209
+        movups  %xmm2,(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L026ecb_dec_two:
+        call    _aesni_decrypt2
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L027ecb_dec_three:
+        call    _aesni_decrypt3
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        jmp     .L012ecb_ret
+.align  16
+.L028ecb_dec_four:
+        call    _aesni_decrypt4
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+.L012ecb_ret:
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        pxor    %xmm4,%xmm4
+        pxor    %xmm5,%xmm5
+        pxor    %xmm6,%xmm6
+        pxor    %xmm7,%xmm7
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
+.globl  aesni_ccm64_encrypt_blocks
+.type   aesni_ccm64_encrypt_blocks,@function
+.align  16
+aesni_ccm64_encrypt_blocks:
+.L_aesni_ccm64_encrypt_blocks_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movl    36(%esp),%ebx
+        movl    40(%esp),%ecx
+        movl    %esp,%ebp
+        subl    $60,%esp
+        andl    $-16,%esp
+        movl    %ebp,48(%esp)
+        movdqu  (%ebx),%xmm7
+        movdqu  (%ecx),%xmm3
+        movl    240(%edx),%ecx
+        movl    $202182159,(%esp)
+        movl    $134810123,4(%esp)
+        movl    $67438087,8(%esp)
+        movl    $66051,12(%esp)
+        movl    $1,%ebx
+        xorl    %ebp,%ebp
+        movl    %ebx,16(%esp)
+        movl    %ebp,20(%esp)
+        movl    %ebp,24(%esp)
+        movl    %ebp,28(%esp)
+        shll    $4,%ecx
+        movl    $16,%ebx
+        leal    (%edx),%ebp
+        movdqa  (%esp),%xmm5
+        movdqa  %xmm7,%xmm2
+        leal    32(%edx,%ecx,1),%edx
+        subl    %ecx,%ebx
+.byte   102,15,56,0,253
+.L030ccm64_enc_outer:
+        movups  (%ebp),%xmm0
+        movl    %ebx,%ecx
+        movups  (%esi),%xmm6
+        xorps   %xmm0,%xmm2
+        movups  16(%ebp),%xmm1
+        xorps   %xmm6,%xmm0
+        xorps   %xmm0,%xmm3
+        movups  32(%ebp),%xmm0
+.L031ccm64_enc2_loop:
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,220,208
+.byte   102,15,56,220,216
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L031ccm64_enc2_loop
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+        paddq   16(%esp),%xmm7
+        decl    %eax
+.byte   102,15,56,221,208
+.byte   102,15,56,221,216
+        leal    16(%esi),%esi
+        xorps   %xmm2,%xmm6
+        movdqa  %xmm7,%xmm2
+        movups  %xmm6,(%edi)
+.byte   102,15,56,0,213
+        leal    16(%edi),%edi
+        jnz     .L030ccm64_enc_outer
+        movl    48(%esp),%esp
+        movl    40(%esp),%edi
+        movups  %xmm3,(%edi)
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        pxor    %xmm4,%xmm4
+        pxor    %xmm5,%xmm5
+        pxor    %xmm6,%xmm6
+        pxor    %xmm7,%xmm7
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
+.globl  aesni_ccm64_decrypt_blocks
+.type   aesni_ccm64_decrypt_blocks,@function
+.align  16
+aesni_ccm64_decrypt_blocks:
+.L_aesni_ccm64_decrypt_blocks_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movl    36(%esp),%ebx
+        movl    40(%esp),%ecx
+        movl    %esp,%ebp
+        subl    $60,%esp
+        andl    $-16,%esp
+        movl    %ebp,48(%esp)
+        movdqu  (%ebx),%xmm7
+        movdqu  (%ecx),%xmm3
+        movl    240(%edx),%ecx
+        movl    $202182159,(%esp)
+        movl    $134810123,4(%esp)
+        movl    $67438087,8(%esp)
+        movl    $66051,12(%esp)
+        movl    $1,%ebx
+        xorl    %ebp,%ebp
+        movl    %ebx,16(%esp)
+        movl    %ebp,20(%esp)
+        movl    %ebp,24(%esp)
+        movl    %ebp,28(%esp)
+        movdqa  (%esp),%xmm5
+        movdqa  %xmm7,%xmm2
+        movl    %edx,%ebp
+        movl    %ecx,%ebx
+.byte   102,15,56,0,253
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L032enc1_loop_5:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L032enc1_loop_5
+.byte   102,15,56,221,209
+        shll    $4,%ebx
+        movl    $16,%ecx
+        movups  (%esi),%xmm6
+        paddq   16(%esp),%xmm7
+        leal    16(%esi),%esi
+        subl    %ebx,%ecx
+        leal    32(%ebp,%ebx,1),%edx
+        movl    %ecx,%ebx
+        jmp     .L033ccm64_dec_outer
+.align  16
+.L033ccm64_dec_outer:
+        xorps   %xmm2,%xmm6
+        movdqa  %xmm7,%xmm2
+        movups  %xmm6,(%edi)
+        leal    16(%edi),%edi
+.byte   102,15,56,0,213
+        subl    $1,%eax
+        jz      .L034ccm64_dec_break
+        movups  (%ebp),%xmm0
+        movl    %ebx,%ecx
+        movups  16(%ebp),%xmm1
+        xorps   %xmm0,%xmm6
+        xorps   %xmm0,%xmm2
+        xorps   %xmm6,%xmm3
+        movups  32(%ebp),%xmm0
+.L035ccm64_dec2_loop:
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+        movups  (%edx,%ecx,1),%xmm1
+        addl    $32,%ecx
+.byte   102,15,56,220,208
+.byte   102,15,56,220,216
+        movups  -16(%edx,%ecx,1),%xmm0
+        jnz     .L035ccm64_dec2_loop
+        movups  (%esi),%xmm6
+        paddq   16(%esp),%xmm7
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,221,208
+.byte   102,15,56,221,216
+        leal    16(%esi),%esi
+        jmp     .L033ccm64_dec_outer
+.align  16
+.L034ccm64_dec_break:
+        movl    240(%ebp),%ecx
+        movl    %ebp,%edx
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm6
+        leal    32(%edx),%edx
+        xorps   %xmm6,%xmm3
+.L036enc1_loop_6:
+.byte   102,15,56,220,217
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L036enc1_loop_6
+.byte   102,15,56,221,217
+        movl    48(%esp),%esp
+        movl    40(%esp),%edi
+        movups  %xmm3,(%edi)
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        pxor    %xmm4,%xmm4
+        pxor    %xmm5,%xmm5
+        pxor    %xmm6,%xmm6
+        pxor    %xmm7,%xmm7
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
+.globl  aesni_ctr32_encrypt_blocks
+.type   aesni_ctr32_encrypt_blocks,@function
+.align  16
+aesni_ctr32_encrypt_blocks:
+.L_aesni_ctr32_encrypt_blocks_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movl    36(%esp),%ebx
+        movl    %esp,%ebp
+        subl    $88,%esp
+        andl    $-16,%esp
+        movl    %ebp,80(%esp)
+        cmpl    $1,%eax
+        je      .L037ctr32_one_shortcut
+        movdqu  (%ebx),%xmm7
+        movl    $202182159,(%esp)
+        movl    $134810123,4(%esp)
+        movl    $67438087,8(%esp)
+        movl    $66051,12(%esp)
+        movl    $6,%ecx
+        xorl    %ebp,%ebp
+        movl    %ecx,16(%esp)
+        movl    %ecx,20(%esp)
+        movl    %ecx,24(%esp)
+        movl    %ebp,28(%esp)
+.byte   102,15,58,22,251,3
+.byte   102,15,58,34,253,3
+        movl    240(%edx),%ecx
+        bswap   %ebx
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        movdqa  (%esp),%xmm2
+.byte   102,15,58,34,195,0
+        leal    3(%ebx),%ebp
+.byte   102,15,58,34,205,0
+        incl    %ebx
+.byte   102,15,58,34,195,1
+        incl    %ebp
+.byte   102,15,58,34,205,1
+        incl    %ebx
+.byte   102,15,58,34,195,2
+        incl    %ebp
+.byte   102,15,58,34,205,2
+        movdqa  %xmm0,48(%esp)
+.byte   102,15,56,0,194
+        movdqu  (%edx),%xmm6
+        movdqa  %xmm1,64(%esp)
+.byte   102,15,56,0,202
+        pshufd  $192,%xmm0,%xmm2
+        pshufd  $128,%xmm0,%xmm3
+        cmpl    $6,%eax
+        jb      .L038ctr32_tail
+        pxor    %xmm6,%xmm7
+        shll    $4,%ecx
+        movl    $16,%ebx
+        movdqa  %xmm7,32(%esp)
+        movl    %edx,%ebp
+        subl    %ecx,%ebx
+        leal    32(%edx,%ecx,1),%edx
+        subl    $6,%eax
+        jmp     .L039ctr32_loop6
+.align  16
+.L039ctr32_loop6:
+        pshufd  $64,%xmm0,%xmm4
+        movdqa  32(%esp),%xmm0
+        pshufd  $192,%xmm1,%xmm5
+        pxor    %xmm0,%xmm2
+        pshufd  $128,%xmm1,%xmm6
+        pxor    %xmm0,%xmm3
+        pshufd  $64,%xmm1,%xmm7
+        movups  16(%ebp),%xmm1
+        pxor    %xmm0,%xmm4
+        pxor    %xmm0,%xmm5
+.byte   102,15,56,220,209
+        pxor    %xmm0,%xmm6
+        pxor    %xmm0,%xmm7
+.byte   102,15,56,220,217
+        movups  32(%ebp),%xmm0
+        movl    %ebx,%ecx
+.byte   102,15,56,220,225
+.byte   102,15,56,220,233
+.byte   102,15,56,220,241
+.byte   102,15,56,220,249
+        call    .L_aesni_encrypt6_enter
+        movups  (%esi),%xmm1
+        movups  16(%esi),%xmm0
+        xorps   %xmm1,%xmm2
+        movups  32(%esi),%xmm1
+        xorps   %xmm0,%xmm3
+        movups  %xmm2,(%edi)
+        movdqa  16(%esp),%xmm0
+        xorps   %xmm1,%xmm4
+        movdqa  64(%esp),%xmm1
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        paddd   %xmm0,%xmm1
+        paddd   48(%esp),%xmm0
+        movdqa  (%esp),%xmm2
+        movups  48(%esi),%xmm3
+        movups  64(%esi),%xmm4
+        xorps   %xmm3,%xmm5
+        movups  80(%esi),%xmm3
+        leal    96(%esi),%esi
+        movdqa  %xmm0,48(%esp)
+.byte   102,15,56,0,194
+        xorps   %xmm4,%xmm6
+        movups  %xmm5,48(%edi)
+        xorps   %xmm3,%xmm7
+        movdqa  %xmm1,64(%esp)
+.byte   102,15,56,0,202
+        movups  %xmm6,64(%edi)
+        pshufd  $192,%xmm0,%xmm2
+        movups  %xmm7,80(%edi)
+        leal    96(%edi),%edi
+        pshufd  $128,%xmm0,%xmm3
+        subl    $6,%eax
+        jnc     .L039ctr32_loop6
+        addl    $6,%eax
+        jz      .L040ctr32_ret
+        movdqu  (%ebp),%xmm7
+        movl    %ebp,%edx
+        pxor    32(%esp),%xmm7
+        movl    240(%ebp),%ecx
+.L038ctr32_tail:
+        por     %xmm7,%xmm2
+        cmpl    $2,%eax
+        jb      .L041ctr32_one
+        pshufd  $64,%xmm0,%xmm4
+        por     %xmm7,%xmm3
+        je      .L042ctr32_two
+        pshufd  $192,%xmm1,%xmm5
+        por     %xmm7,%xmm4
+        cmpl    $4,%eax
+        jb      .L043ctr32_three
+        pshufd  $128,%xmm1,%xmm6
+        por     %xmm7,%xmm5
+        je      .L044ctr32_four
+        por     %xmm7,%xmm6
+        call    _aesni_encrypt6
+        movups  (%esi),%xmm1
+        movups  16(%esi),%xmm0
+        xorps   %xmm1,%xmm2
+        movups  32(%esi),%xmm1
+        xorps   %xmm0,%xmm3
+        movups  48(%esi),%xmm0
+        xorps   %xmm1,%xmm4
+        movups  64(%esi),%xmm1
+        xorps   %xmm0,%xmm5
+        movups  %xmm2,(%edi)
+        xorps   %xmm1,%xmm6
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        movups  %xmm6,64(%edi)
+        jmp     .L040ctr32_ret
+.align  16
+.L037ctr32_one_shortcut:
+        movups  (%ebx),%xmm2
+        movl    240(%edx),%ecx
+.L041ctr32_one:
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L045enc1_loop_7:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L045enc1_loop_7
+.byte   102,15,56,221,209
+        movups  (%esi),%xmm6
+        xorps   %xmm2,%xmm6
+        movups  %xmm6,(%edi)
+        jmp     .L040ctr32_ret
+.align  16
+.L042ctr32_two:
+        call    _aesni_encrypt2
+        movups  (%esi),%xmm5
+        movups  16(%esi),%xmm6
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        jmp     .L040ctr32_ret
+.align  16
+.L043ctr32_three:
+        call    _aesni_encrypt3
+        movups  (%esi),%xmm5
+        movups  16(%esi),%xmm6
+        xorps   %xmm5,%xmm2
+        movups  32(%esi),%xmm7
+        xorps   %xmm6,%xmm3
+        movups  %xmm2,(%edi)
+        xorps   %xmm7,%xmm4
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        jmp     .L040ctr32_ret
+.align  16
+.L044ctr32_four:
+        call    _aesni_encrypt4
+        movups  (%esi),%xmm6
+        movups  16(%esi),%xmm7
+        movups  32(%esi),%xmm1
+        xorps   %xmm6,%xmm2
+        movups  48(%esi),%xmm0
+        xorps   %xmm7,%xmm3
+        movups  %xmm2,(%edi)
+        xorps   %xmm1,%xmm4
+        movups  %xmm3,16(%edi)
+        xorps   %xmm0,%xmm5
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+.L040ctr32_ret:
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        pxor    %xmm4,%xmm4
+        movdqa  %xmm0,32(%esp)
+        pxor    %xmm5,%xmm5
+        movdqa  %xmm0,48(%esp)
+        pxor    %xmm6,%xmm6
+        movdqa  %xmm0,64(%esp)
+        pxor    %xmm7,%xmm7
+        movl    80(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
+.globl  aesni_xts_encrypt
+.type   aesni_xts_encrypt,@function
+.align  16
+aesni_xts_encrypt:
+.L_aesni_xts_encrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    36(%esp),%edx
+        movl    40(%esp),%esi
+        movl    240(%edx),%ecx
+        movups  (%esi),%xmm2
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L046enc1_loop_8:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L046enc1_loop_8
+.byte   102,15,56,221,209
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movl    %esp,%ebp
+        subl    $120,%esp
+        movl    240(%edx),%ecx
+        andl    $-16,%esp
+        movl    $135,96(%esp)
+        movl    $0,100(%esp)
+        movl    $1,104(%esp)
+        movl    $0,108(%esp)
+        movl    %eax,112(%esp)
+        movl    %ebp,116(%esp)
+        movdqa  %xmm2,%xmm1
+        pxor    %xmm0,%xmm0
+        movdqa  96(%esp),%xmm3
+        pcmpgtd %xmm1,%xmm0
+        andl    $-16,%eax
+        movl    %edx,%ebp
+        movl    %ecx,%ebx
+        subl    $96,%eax
+        jc      .L047xts_enc_short
+        shll    $4,%ecx
+        movl    $16,%ebx
+        subl    %ecx,%ebx
+        leal    32(%edx,%ecx,1),%edx
+        jmp     .L048xts_enc_loop6
+.align  16
+.L048xts_enc_loop6:
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,16(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,32(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,48(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm7
+        movdqa  %xmm1,64(%esp)
+        paddq   %xmm1,%xmm1
+        movups  (%ebp),%xmm0
+        pand    %xmm3,%xmm7
+        movups  (%esi),%xmm2
+        pxor    %xmm1,%xmm7
+        movl    %ebx,%ecx
+        movdqu  16(%esi),%xmm3
+        xorps   %xmm0,%xmm2
+        movdqu  32(%esi),%xmm4
+        pxor    %xmm0,%xmm3
+        movdqu  48(%esi),%xmm5
+        pxor    %xmm0,%xmm4
+        movdqu  64(%esi),%xmm6
+        pxor    %xmm0,%xmm5
+        movdqu  80(%esi),%xmm1
+        pxor    %xmm0,%xmm6
+        leal    96(%esi),%esi
+        pxor    (%esp),%xmm2
+        movdqa  %xmm7,80(%esp)
+        pxor    %xmm1,%xmm7
+        movups  16(%ebp),%xmm1
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+.byte   102,15,56,220,209
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+.byte   102,15,56,220,217
+        pxor    %xmm0,%xmm7
+        movups  32(%ebp),%xmm0
+.byte   102,15,56,220,225
+.byte   102,15,56,220,233
+.byte   102,15,56,220,241
+.byte   102,15,56,220,249
+        call    .L_aesni_encrypt6_enter
+        movdqa  80(%esp),%xmm1
+        pxor    %xmm0,%xmm0
+        xorps   (%esp),%xmm2
+        pcmpgtd %xmm1,%xmm0
+        xorps   16(%esp),%xmm3
+        movups  %xmm2,(%edi)
+        xorps   32(%esp),%xmm4
+        movups  %xmm3,16(%edi)
+        xorps   48(%esp),%xmm5
+        movups  %xmm4,32(%edi)
+        xorps   64(%esp),%xmm6
+        movups  %xmm5,48(%edi)
+        xorps   %xmm1,%xmm7
+        movups  %xmm6,64(%edi)
+        pshufd  $19,%xmm0,%xmm2
+        movups  %xmm7,80(%edi)
+        leal    96(%edi),%edi
+        movdqa  96(%esp),%xmm3
+        pxor    %xmm0,%xmm0
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        subl    $96,%eax
+        jnc     .L048xts_enc_loop6
+        movl    240(%ebp),%ecx
+        movl    %ebp,%edx
+        movl    %ecx,%ebx
+.L047xts_enc_short:
+        addl    $96,%eax
+        jz      .L049xts_enc_done6x
+        movdqa  %xmm1,%xmm5
+        cmpl    $32,%eax
+        jb      .L050xts_enc_one
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        je      .L051xts_enc_two
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,%xmm6
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        cmpl    $64,%eax
+        jb      .L052xts_enc_three
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,%xmm7
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        movdqa  %xmm5,(%esp)
+        movdqa  %xmm6,16(%esp)
+        je      .L053xts_enc_four
+        movdqa  %xmm7,32(%esp)
+        pshufd  $19,%xmm0,%xmm7
+        movdqa  %xmm1,48(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm7
+        pxor    %xmm1,%xmm7
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        pxor    (%esp),%xmm2
+        movdqu  48(%esi),%xmm5
+        pxor    16(%esp),%xmm3
+        movdqu  64(%esi),%xmm6
+        pxor    32(%esp),%xmm4
+        leal    80(%esi),%esi
+        pxor    48(%esp),%xmm5
+        movdqa  %xmm7,64(%esp)
+        pxor    %xmm7,%xmm6
+        call    _aesni_encrypt6
+        movaps  64(%esp),%xmm1
+        xorps   (%esp),%xmm2
+        xorps   16(%esp),%xmm3
+        xorps   32(%esp),%xmm4
+        movups  %xmm2,(%edi)
+        xorps   48(%esp),%xmm5
+        movups  %xmm3,16(%edi)
+        xorps   %xmm1,%xmm6
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        movups  %xmm6,64(%edi)
+        leal    80(%edi),%edi
+        jmp     .L054xts_enc_done
+.align  16
+.L050xts_enc_one:
+        movups  (%esi),%xmm2
+        leal    16(%esi),%esi
+        xorps   %xmm5,%xmm2
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L055enc1_loop_9:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L055enc1_loop_9
+.byte   102,15,56,221,209
+        xorps   %xmm5,%xmm2
+        movups  %xmm2,(%edi)
+        leal    16(%edi),%edi
+        movdqa  %xmm5,%xmm1
+        jmp     .L054xts_enc_done
+.align  16
+.L051xts_enc_two:
+        movaps  %xmm1,%xmm6
+        movups  (%esi),%xmm2
+        movups  16(%esi),%xmm3
+        leal    32(%esi),%esi
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        call    _aesni_encrypt2
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        leal    32(%edi),%edi
+        movdqa  %xmm6,%xmm1
+        jmp     .L054xts_enc_done
+.align  16
+.L052xts_enc_three:
+        movaps  %xmm1,%xmm7
+        movups  (%esi),%xmm2
+        movups  16(%esi),%xmm3
+        movups  32(%esi),%xmm4
+        leal    48(%esi),%esi
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        xorps   %xmm7,%xmm4
+        call    _aesni_encrypt3
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        xorps   %xmm7,%xmm4
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        leal    48(%edi),%edi
+        movdqa  %xmm7,%xmm1
+        jmp     .L054xts_enc_done
+.align  16
+.L053xts_enc_four:
+        movaps  %xmm1,%xmm6
+        movups  (%esi),%xmm2
+        movups  16(%esi),%xmm3
+        movups  32(%esi),%xmm4
+        xorps   (%esp),%xmm2
+        movups  48(%esi),%xmm5
+        leal    64(%esi),%esi
+        xorps   16(%esp),%xmm3
+        xorps   %xmm7,%xmm4
+        xorps   %xmm6,%xmm5
+        call    _aesni_encrypt4
+        xorps   (%esp),%xmm2
+        xorps   16(%esp),%xmm3
+        xorps   %xmm7,%xmm4
+        movups  %xmm2,(%edi)
+        xorps   %xmm6,%xmm5
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        leal    64(%edi),%edi
+        movdqa  %xmm6,%xmm1
+        jmp     .L054xts_enc_done
+.align  16
+.L049xts_enc_done6x:
+        movl    112(%esp),%eax
+        andl    $15,%eax
+        jz      .L056xts_enc_ret
+        movdqa  %xmm1,%xmm5
+        movl    %eax,112(%esp)
+        jmp     .L057xts_enc_steal
+.align  16
+.L054xts_enc_done:
+        movl    112(%esp),%eax
+        pxor    %xmm0,%xmm0
+        andl    $15,%eax
+        jz      .L056xts_enc_ret
+        pcmpgtd %xmm1,%xmm0
+        movl    %eax,112(%esp)
+        pshufd  $19,%xmm0,%xmm5
+        paddq   %xmm1,%xmm1
+        pand    96(%esp),%xmm5
+        pxor    %xmm1,%xmm5
+.L057xts_enc_steal:
+        movzbl  (%esi),%ecx
+        movzbl  -16(%edi),%edx
+        leal    1(%esi),%esi
+        movb    %cl,-16(%edi)
+        movb    %dl,(%edi)
+        leal    1(%edi),%edi
+        subl    $1,%eax
+        jnz     .L057xts_enc_steal
+        subl    112(%esp),%edi
+        movl    %ebp,%edx
+        movl    %ebx,%ecx
+        movups  -16(%edi),%xmm2
+        xorps   %xmm5,%xmm2
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L058enc1_loop_10:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L058enc1_loop_10
+.byte   102,15,56,221,209
+        xorps   %xmm5,%xmm2
+        movups  %xmm2,-16(%edi)
+.L056xts_enc_ret:
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        movdqa  %xmm0,(%esp)
+        pxor    %xmm3,%xmm3
+        movdqa  %xmm0,16(%esp)
+        pxor    %xmm4,%xmm4
+        movdqa  %xmm0,32(%esp)
+        pxor    %xmm5,%xmm5
+        movdqa  %xmm0,48(%esp)
+        pxor    %xmm6,%xmm6
+        movdqa  %xmm0,64(%esp)
+        pxor    %xmm7,%xmm7
+        movdqa  %xmm0,80(%esp)
+        movl    116(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
+.globl  aesni_xts_decrypt
+.type   aesni_xts_decrypt,@function
+.align  16
+aesni_xts_decrypt:
+.L_aesni_xts_decrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    36(%esp),%edx
+        movl    40(%esp),%esi
+        movl    240(%edx),%ecx
+        movups  (%esi),%xmm2
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L059enc1_loop_11:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L059enc1_loop_11
+.byte   102,15,56,221,209
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movl    %esp,%ebp
+        subl    $120,%esp
+        andl    $-16,%esp
+        xorl    %ebx,%ebx
+        testl   $15,%eax
+        setnz   %bl
+        shll    $4,%ebx
+        subl    %ebx,%eax
+        movl    $135,96(%esp)
+        movl    $0,100(%esp)
+        movl    $1,104(%esp)
+        movl    $0,108(%esp)
+        movl    %eax,112(%esp)
+        movl    %ebp,116(%esp)
+        movl    240(%edx),%ecx
+        movl    %edx,%ebp
+        movl    %ecx,%ebx
+        movdqa  %xmm2,%xmm1
+        pxor    %xmm0,%xmm0
+        movdqa  96(%esp),%xmm3
+        pcmpgtd %xmm1,%xmm0
+        andl    $-16,%eax
+        subl    $96,%eax
+        jc      .L060xts_dec_short
+        shll    $4,%ecx
+        movl    $16,%ebx
+        subl    %ecx,%ebx
+        leal    32(%edx,%ecx,1),%edx
+        jmp     .L061xts_dec_loop6
+.align  16
+.L061xts_dec_loop6:
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,16(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,32(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,48(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        pshufd  $19,%xmm0,%xmm7
+        movdqa  %xmm1,64(%esp)
+        paddq   %xmm1,%xmm1
+        movups  (%ebp),%xmm0
+        pand    %xmm3,%xmm7
+        movups  (%esi),%xmm2
+        pxor    %xmm1,%xmm7
+        movl    %ebx,%ecx
+        movdqu  16(%esi),%xmm3
+        xorps   %xmm0,%xmm2
+        movdqu  32(%esi),%xmm4
+        pxor    %xmm0,%xmm3
+        movdqu  48(%esi),%xmm5
+        pxor    %xmm0,%xmm4
+        movdqu  64(%esi),%xmm6
+        pxor    %xmm0,%xmm5
+        movdqu  80(%esi),%xmm1
+        pxor    %xmm0,%xmm6
+        leal    96(%esi),%esi
+        pxor    (%esp),%xmm2
+        movdqa  %xmm7,80(%esp)
+        pxor    %xmm1,%xmm7
+        movups  16(%ebp),%xmm1
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+.byte   102,15,56,222,209
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+.byte   102,15,56,222,217
+        pxor    %xmm0,%xmm7
+        movups  32(%ebp),%xmm0
+.byte   102,15,56,222,225
+.byte   102,15,56,222,233
+.byte   102,15,56,222,241
+.byte   102,15,56,222,249
+        call    .L_aesni_decrypt6_enter
+        movdqa  80(%esp),%xmm1
+        pxor    %xmm0,%xmm0
+        xorps   (%esp),%xmm2
+        pcmpgtd %xmm1,%xmm0
+        xorps   16(%esp),%xmm3
+        movups  %xmm2,(%edi)
+        xorps   32(%esp),%xmm4
+        movups  %xmm3,16(%edi)
+        xorps   48(%esp),%xmm5
+        movups  %xmm4,32(%edi)
+        xorps   64(%esp),%xmm6
+        movups  %xmm5,48(%edi)
+        xorps   %xmm1,%xmm7
+        movups  %xmm6,64(%edi)
+        pshufd  $19,%xmm0,%xmm2
+        movups  %xmm7,80(%edi)
+        leal    96(%edi),%edi
+        movdqa  96(%esp),%xmm3
+        pxor    %xmm0,%xmm0
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        subl    $96,%eax
+        jnc     .L061xts_dec_loop6
+        movl    240(%ebp),%ecx
+        movl    %ebp,%edx
+        movl    %ecx,%ebx
+.L060xts_dec_short:
+        addl    $96,%eax
+        jz      .L062xts_dec_done6x
+        movdqa  %xmm1,%xmm5
+        cmpl    $32,%eax
+        jb      .L063xts_dec_one
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        je      .L064xts_dec_two
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,%xmm6
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        cmpl    $64,%eax
+        jb      .L065xts_dec_three
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  %xmm1,%xmm7
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+        movdqa  %xmm5,(%esp)
+        movdqa  %xmm6,16(%esp)
+        je      .L066xts_dec_four
+        movdqa  %xmm7,32(%esp)
+        pshufd  $19,%xmm0,%xmm7
+        movdqa  %xmm1,48(%esp)
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm7
+        pxor    %xmm1,%xmm7
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        pxor    (%esp),%xmm2
+        movdqu  48(%esi),%xmm5
+        pxor    16(%esp),%xmm3
+        movdqu  64(%esi),%xmm6
+        pxor    32(%esp),%xmm4
+        leal    80(%esi),%esi
+        pxor    48(%esp),%xmm5
+        movdqa  %xmm7,64(%esp)
+        pxor    %xmm7,%xmm6
+        call    _aesni_decrypt6
+        movaps  64(%esp),%xmm1
+        xorps   (%esp),%xmm2
+        xorps   16(%esp),%xmm3
+        xorps   32(%esp),%xmm4
+        movups  %xmm2,(%edi)
+        xorps   48(%esp),%xmm5
+        movups  %xmm3,16(%edi)
+        xorps   %xmm1,%xmm6
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        movups  %xmm6,64(%edi)
+        leal    80(%edi),%edi
+        jmp     .L067xts_dec_done
+.align  16
+.L063xts_dec_one:
+        movups  (%esi),%xmm2
+        leal    16(%esi),%esi
+        xorps   %xmm5,%xmm2
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L068dec1_loop_12:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L068dec1_loop_12
+.byte   102,15,56,223,209
+        xorps   %xmm5,%xmm2
+        movups  %xmm2,(%edi)
+        leal    16(%edi),%edi
+        movdqa  %xmm5,%xmm1
+        jmp     .L067xts_dec_done
+.align  16
+.L064xts_dec_two:
+        movaps  %xmm1,%xmm6
+        movups  (%esi),%xmm2
+        movups  16(%esi),%xmm3
+        leal    32(%esi),%esi
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        call    _aesni_decrypt2
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        leal    32(%edi),%edi
+        movdqa  %xmm6,%xmm1
+        jmp     .L067xts_dec_done
+.align  16
+.L065xts_dec_three:
+        movaps  %xmm1,%xmm7
+        movups  (%esi),%xmm2
+        movups  16(%esi),%xmm3
+        movups  32(%esi),%xmm4
+        leal    48(%esi),%esi
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        xorps   %xmm7,%xmm4
+        call    _aesni_decrypt3
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        xorps   %xmm7,%xmm4
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        leal    48(%edi),%edi
+        movdqa  %xmm7,%xmm1
+        jmp     .L067xts_dec_done
+.align  16
+.L066xts_dec_four:
+        movaps  %xmm1,%xmm6
+        movups  (%esi),%xmm2
+        movups  16(%esi),%xmm3
+        movups  32(%esi),%xmm4
+        xorps   (%esp),%xmm2
+        movups  48(%esi),%xmm5
+        leal    64(%esi),%esi
+        xorps   16(%esp),%xmm3
+        xorps   %xmm7,%xmm4
+        xorps   %xmm6,%xmm5
+        call    _aesni_decrypt4
+        xorps   (%esp),%xmm2
+        xorps   16(%esp),%xmm3
+        xorps   %xmm7,%xmm4
+        movups  %xmm2,(%edi)
+        xorps   %xmm6,%xmm5
+        movups  %xmm3,16(%edi)
+        movups  %xmm4,32(%edi)
+        movups  %xmm5,48(%edi)
+        leal    64(%edi),%edi
+        movdqa  %xmm6,%xmm1
+        jmp     .L067xts_dec_done
+.align  16
+.L062xts_dec_done6x:
+        movl    112(%esp),%eax
+        andl    $15,%eax
+        jz      .L069xts_dec_ret
+        movl    %eax,112(%esp)
+        jmp     .L070xts_dec_only_one_more
+.align  16
+.L067xts_dec_done:
+        movl    112(%esp),%eax
+        pxor    %xmm0,%xmm0
+        andl    $15,%eax
+        jz      .L069xts_dec_ret
+        pcmpgtd %xmm1,%xmm0
+        movl    %eax,112(%esp)
+        pshufd  $19,%xmm0,%xmm2
+        pxor    %xmm0,%xmm0
+        movdqa  96(%esp),%xmm3
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm2
+        pcmpgtd %xmm1,%xmm0
+        pxor    %xmm2,%xmm1
+.L070xts_dec_only_one_more:
+        pshufd  $19,%xmm0,%xmm5
+        movdqa  %xmm1,%xmm6
+        paddq   %xmm1,%xmm1
+        pand    %xmm3,%xmm5
+        pxor    %xmm1,%xmm5
+        movl    %ebp,%edx
+        movl    %ebx,%ecx
+        movups  (%esi),%xmm2
+        xorps   %xmm5,%xmm2
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L071dec1_loop_13:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L071dec1_loop_13
+.byte   102,15,56,223,209
+        xorps   %xmm5,%xmm2
+        movups  %xmm2,(%edi)
+.L072xts_dec_steal:
+        movzbl  16(%esi),%ecx
+        movzbl  (%edi),%edx
+        leal    1(%esi),%esi
+        movb    %cl,(%edi)
+        movb    %dl,16(%edi)
+        leal    1(%edi),%edi
+        subl    $1,%eax
+        jnz     .L072xts_dec_steal
+        subl    112(%esp),%edi
+        movl    %ebp,%edx
+        movl    %ebx,%ecx
+        movups  (%edi),%xmm2
+        xorps   %xmm6,%xmm2
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L073dec1_loop_14:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L073dec1_loop_14
+.byte   102,15,56,223,209
+        xorps   %xmm6,%xmm2
+        movups  %xmm2,(%edi)
+.L069xts_dec_ret:
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        movdqa  %xmm0,(%esp)
+        pxor    %xmm3,%xmm3
+        movdqa  %xmm0,16(%esp)
+        pxor    %xmm4,%xmm4
+        movdqa  %xmm0,32(%esp)
+        pxor    %xmm5,%xmm5
+        movdqa  %xmm0,48(%esp)
+        pxor    %xmm6,%xmm6
+        movdqa  %xmm0,64(%esp)
+        pxor    %xmm7,%xmm7
+        movdqa  %xmm0,80(%esp)
+        movl    116(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
+.globl  aesni_ocb_encrypt
+.type   aesni_ocb_encrypt,@function
+.align  16
+aesni_ocb_encrypt:
+.L_aesni_ocb_encrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    40(%esp),%ecx
+        movl    48(%esp),%ebx
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movdqu  (%ecx),%xmm0
+        movl    36(%esp),%ebp
+        movdqu  (%ebx),%xmm1
+        movl    44(%esp),%ebx
+        movl    %esp,%ecx
+        subl    $132,%esp
+        andl    $-16,%esp
+        subl    %esi,%edi
+        shll    $4,%eax
+        leal    -96(%esi,%eax,1),%eax
+        movl    %edi,120(%esp)
+        movl    %eax,124(%esp)
+        movl    %ecx,128(%esp)
+        movl    240(%edx),%ecx
+        testl   $1,%ebp
+        jnz     .L074odd
+        bsfl    %ebp,%eax
+        addl    $1,%ebp
+        shll    $4,%eax
+        movdqu  (%ebx,%eax,1),%xmm7
+        movl    %edx,%eax
+        movdqu  (%esi),%xmm2
+        leal    16(%esi),%esi
+        pxor    %xmm0,%xmm7
+        pxor    %xmm2,%xmm1
+        pxor    %xmm7,%xmm2
+        movdqa  %xmm1,%xmm6
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L075enc1_loop_15:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L075enc1_loop_15
+.byte   102,15,56,221,209
+        xorps   %xmm7,%xmm2
+        movdqa  %xmm7,%xmm0
+        movdqa  %xmm6,%xmm1
+        movups  %xmm2,-16(%edi,%esi,1)
+        movl    240(%eax),%ecx
+        movl    %eax,%edx
+        movl    124(%esp),%eax
+.L074odd:
+        shll    $4,%ecx
+        movl    $16,%edi
+        subl    %ecx,%edi
+        movl    %edx,112(%esp)
+        leal    32(%edx,%ecx,1),%edx
+        movl    %edi,116(%esp)
+        cmpl    %eax,%esi
+        ja      .L076short
+        jmp     .L077grandloop
+.align  32
+.L077grandloop:
+        leal    1(%ebp),%ecx
+        leal    3(%ebp),%eax
+        leal    5(%ebp),%edi
+        addl    $6,%ebp
+        bsfl    %ecx,%ecx
+        bsfl    %eax,%eax
+        bsfl    %edi,%edi
+        shll    $4,%ecx
+        shll    $4,%eax
+        shll    $4,%edi
+        movdqu  (%ebx),%xmm2
+        movdqu  (%ebx,%ecx,1),%xmm3
+        movl    116(%esp),%ecx
+        movdqa  %xmm2,%xmm4
+        movdqu  (%ebx,%eax,1),%xmm5
+        movdqa  %xmm2,%xmm6
+        movdqu  (%ebx,%edi,1),%xmm7
+        pxor    %xmm0,%xmm2
+        pxor    %xmm2,%xmm3
+        movdqa  %xmm2,(%esp)
+        pxor    %xmm3,%xmm4
+        movdqa  %xmm3,16(%esp)
+        pxor    %xmm4,%xmm5
+        movdqa  %xmm4,32(%esp)
+        pxor    %xmm5,%xmm6
+        movdqa  %xmm5,48(%esp)
+        pxor    %xmm6,%xmm7
+        movdqa  %xmm6,64(%esp)
+        movdqa  %xmm7,80(%esp)
+        movups  -48(%edx,%ecx,1),%xmm0
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movdqu  64(%esi),%xmm6
+        movdqu  80(%esi),%xmm7
+        leal    96(%esi),%esi
+        pxor    %xmm2,%xmm1
+        pxor    %xmm0,%xmm2
+        pxor    %xmm3,%xmm1
+        pxor    %xmm0,%xmm3
+        pxor    %xmm4,%xmm1
+        pxor    %xmm0,%xmm4
+        pxor    %xmm5,%xmm1
+        pxor    %xmm0,%xmm5
+        pxor    %xmm6,%xmm1
+        pxor    %xmm0,%xmm6
+        pxor    %xmm7,%xmm1
+        pxor    %xmm0,%xmm7
+        movdqa  %xmm1,96(%esp)
+        movups  -32(%edx,%ecx,1),%xmm1
+        pxor    (%esp),%xmm2
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+        pxor    80(%esp),%xmm7
+        movups  -16(%edx,%ecx,1),%xmm0
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+.byte   102,15,56,220,233
+.byte   102,15,56,220,241
+.byte   102,15,56,220,249
+        movl    120(%esp),%edi
+        movl    124(%esp),%eax
+        call    .L_aesni_encrypt6_enter
+        movdqa  80(%esp),%xmm0
+        pxor    (%esp),%xmm2
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+        pxor    %xmm0,%xmm7
+        movdqa  96(%esp),%xmm1
+        movdqu  %xmm2,-96(%edi,%esi,1)
+        movdqu  %xmm3,-80(%edi,%esi,1)
+        movdqu  %xmm4,-64(%edi,%esi,1)
+        movdqu  %xmm5,-48(%edi,%esi,1)
+        movdqu  %xmm6,-32(%edi,%esi,1)
+        movdqu  %xmm7,-16(%edi,%esi,1)
+        cmpl    %eax,%esi
+        jb      .L077grandloop
+.L076short:
+        addl    $96,%eax
+        subl    %esi,%eax
+        jz      .L078done
+        cmpl    $32,%eax
+        jb      .L079one
+        je      .L080two
+        cmpl    $64,%eax
+        jb      .L081three
+        je      .L082four
+        leal    1(%ebp),%ecx
+        leal    3(%ebp),%eax
+        bsfl    %ecx,%ecx
+        bsfl    %eax,%eax
+        shll    $4,%ecx
+        shll    $4,%eax
+        movdqu  (%ebx),%xmm2
+        movdqu  (%ebx,%ecx,1),%xmm3
+        movl    116(%esp),%ecx
+        movdqa  %xmm2,%xmm4
+        movdqu  (%ebx,%eax,1),%xmm5
+        movdqa  %xmm2,%xmm6
+        pxor    %xmm0,%xmm2
+        pxor    %xmm2,%xmm3
+        movdqa  %xmm2,(%esp)
+        pxor    %xmm3,%xmm4
+        movdqa  %xmm3,16(%esp)
+        pxor    %xmm4,%xmm5
+        movdqa  %xmm4,32(%esp)
+        pxor    %xmm5,%xmm6
+        movdqa  %xmm5,48(%esp)
+        pxor    %xmm6,%xmm7
+        movdqa  %xmm6,64(%esp)
+        movups  -48(%edx,%ecx,1),%xmm0
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movdqu  64(%esi),%xmm6
+        pxor    %xmm7,%xmm7
+        pxor    %xmm2,%xmm1
+        pxor    %xmm0,%xmm2
+        pxor    %xmm3,%xmm1
+        pxor    %xmm0,%xmm3
+        pxor    %xmm4,%xmm1
+        pxor    %xmm0,%xmm4
+        pxor    %xmm5,%xmm1
+        pxor    %xmm0,%xmm5
+        pxor    %xmm6,%xmm1
+        pxor    %xmm0,%xmm6
+        movdqa  %xmm1,96(%esp)
+        movups  -32(%edx,%ecx,1),%xmm1
+        pxor    (%esp),%xmm2
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+        movups  -16(%edx,%ecx,1),%xmm0
+.byte   102,15,56,220,209
+.byte   102,15,56,220,217
+.byte   102,15,56,220,225
+.byte   102,15,56,220,233
+.byte   102,15,56,220,241
+.byte   102,15,56,220,249
+        movl    120(%esp),%edi
+        call    .L_aesni_encrypt6_enter
+        movdqa  64(%esp),%xmm0
+        pxor    (%esp),%xmm2
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    %xmm0,%xmm6
+        movdqa  96(%esp),%xmm1
+        movdqu  %xmm2,(%edi,%esi,1)
+        movdqu  %xmm3,16(%edi,%esi,1)
+        movdqu  %xmm4,32(%edi,%esi,1)
+        movdqu  %xmm5,48(%edi,%esi,1)
+        movdqu  %xmm6,64(%edi,%esi,1)
+        jmp     .L078done
+.align  16
+.L079one:
+        movdqu  (%ebx),%xmm7
+        movl    112(%esp),%edx
+        movdqu  (%esi),%xmm2
+        movl    240(%edx),%ecx
+        pxor    %xmm0,%xmm7
+        pxor    %xmm2,%xmm1
+        pxor    %xmm7,%xmm2
+        movdqa  %xmm1,%xmm6
+        movl    120(%esp),%edi
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L083enc1_loop_16:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L083enc1_loop_16
+.byte   102,15,56,221,209
+        xorps   %xmm7,%xmm2
+        movdqa  %xmm7,%xmm0
+        movdqa  %xmm6,%xmm1
+        movups  %xmm2,(%edi,%esi,1)
+        jmp     .L078done
+.align  16
+.L080two:
+        leal    1(%ebp),%ecx
+        movl    112(%esp),%edx
+        bsfl    %ecx,%ecx
+        shll    $4,%ecx
+        movdqu  (%ebx),%xmm6
+        movdqu  (%ebx,%ecx,1),%xmm7
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movl    240(%edx),%ecx
+        pxor    %xmm0,%xmm6
+        pxor    %xmm6,%xmm7
+        pxor    %xmm2,%xmm1
+        pxor    %xmm6,%xmm2
+        pxor    %xmm3,%xmm1
+        pxor    %xmm7,%xmm3
+        movdqa  %xmm1,%xmm5
+        movl    120(%esp),%edi
+        call    _aesni_encrypt2
+        xorps   %xmm6,%xmm2
+        xorps   %xmm7,%xmm3
+        movdqa  %xmm7,%xmm0
+        movdqa  %xmm5,%xmm1
+        movups  %xmm2,(%edi,%esi,1)
+        movups  %xmm3,16(%edi,%esi,1)
+        jmp     .L078done
+.align  16
+.L081three:
+        leal    1(%ebp),%ecx
+        movl    112(%esp),%edx
+        bsfl    %ecx,%ecx
+        shll    $4,%ecx
+        movdqu  (%ebx),%xmm5
+        movdqu  (%ebx,%ecx,1),%xmm6
+        movdqa  %xmm5,%xmm7
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movl    240(%edx),%ecx
+        pxor    %xmm0,%xmm5
+        pxor    %xmm5,%xmm6
+        pxor    %xmm6,%xmm7
+        pxor    %xmm2,%xmm1
+        pxor    %xmm5,%xmm2
+        pxor    %xmm3,%xmm1
+        pxor    %xmm6,%xmm3
+        pxor    %xmm4,%xmm1
+        pxor    %xmm7,%xmm4
+        movdqa  %xmm1,96(%esp)
+        movl    120(%esp),%edi
+        call    _aesni_encrypt3
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        xorps   %xmm7,%xmm4
+        movdqa  %xmm7,%xmm0
+        movdqa  96(%esp),%xmm1
+        movups  %xmm2,(%edi,%esi,1)
+        movups  %xmm3,16(%edi,%esi,1)
+        movups  %xmm4,32(%edi,%esi,1)
+        jmp     .L078done
+.align  16
+.L082four:
+        leal    1(%ebp),%ecx
+        leal    3(%ebp),%eax
+        bsfl    %ecx,%ecx
+        bsfl    %eax,%eax
+        movl    112(%esp),%edx
+        shll    $4,%ecx
+        shll    $4,%eax
+        movdqu  (%ebx),%xmm4
+        movdqu  (%ebx,%ecx,1),%xmm5
+        movdqa  %xmm4,%xmm6
+        movdqu  (%ebx,%eax,1),%xmm7
+        pxor    %xmm0,%xmm4
+        movdqu  (%esi),%xmm2
+        pxor    %xmm4,%xmm5
+        movdqu  16(%esi),%xmm3
+        pxor    %xmm5,%xmm6
+        movdqa  %xmm4,(%esp)
+        pxor    %xmm6,%xmm7
+        movdqa  %xmm5,16(%esp)
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movl    240(%edx),%ecx
+        pxor    %xmm2,%xmm1
+        pxor    (%esp),%xmm2
+        pxor    %xmm3,%xmm1
+        pxor    16(%esp),%xmm3
+        pxor    %xmm4,%xmm1
+        pxor    %xmm6,%xmm4
+        pxor    %xmm5,%xmm1
+        pxor    %xmm7,%xmm5
+        movdqa  %xmm1,96(%esp)
+        movl    120(%esp),%edi
+        call    _aesni_encrypt4
+        xorps   (%esp),%xmm2
+        xorps   16(%esp),%xmm3
+        xorps   %xmm6,%xmm4
+        movups  %xmm2,(%edi,%esi,1)
+        xorps   %xmm7,%xmm5
+        movups  %xmm3,16(%edi,%esi,1)
+        movdqa  %xmm7,%xmm0
+        movups  %xmm4,32(%edi,%esi,1)
+        movdqa  96(%esp),%xmm1
+        movups  %xmm5,48(%edi,%esi,1)
+.L078done:
+        movl    128(%esp),%edx
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        movdqa  %xmm2,(%esp)
+        pxor    %xmm4,%xmm4
+        movdqa  %xmm2,16(%esp)
+        pxor    %xmm5,%xmm5
+        movdqa  %xmm2,32(%esp)
+        pxor    %xmm6,%xmm6
+        movdqa  %xmm2,48(%esp)
+        pxor    %xmm7,%xmm7
+        movdqa  %xmm2,64(%esp)
+        movdqa  %xmm2,80(%esp)
+        movdqa  %xmm2,96(%esp)
+        leal    (%edx),%esp
+        movl    40(%esp),%ecx
+        movl    48(%esp),%ebx
+        movdqu  %xmm0,(%ecx)
+        pxor    %xmm0,%xmm0
+        movdqu  %xmm1,(%ebx)
+        pxor    %xmm1,%xmm1
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_ocb_encrypt,.-.L_aesni_ocb_encrypt_begin
+.globl  aesni_ocb_decrypt
+.type   aesni_ocb_decrypt,@function
+.align  16
+aesni_ocb_decrypt:
+.L_aesni_ocb_decrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    40(%esp),%ecx
+        movl    48(%esp),%ebx
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        movdqu  (%ecx),%xmm0
+        movl    36(%esp),%ebp
+        movdqu  (%ebx),%xmm1
+        movl    44(%esp),%ebx
+        movl    %esp,%ecx
+        subl    $132,%esp
+        andl    $-16,%esp
+        subl    %esi,%edi
+        shll    $4,%eax
+        leal    -96(%esi,%eax,1),%eax
+        movl    %edi,120(%esp)
+        movl    %eax,124(%esp)
+        movl    %ecx,128(%esp)
+        movl    240(%edx),%ecx
+        testl   $1,%ebp
+        jnz     .L084odd
+        bsfl    %ebp,%eax
+        addl    $1,%ebp
+        shll    $4,%eax
+        movdqu  (%ebx,%eax,1),%xmm7
+        movl    %edx,%eax
+        movdqu  (%esi),%xmm2
+        leal    16(%esi),%esi
+        pxor    %xmm0,%xmm7
+        pxor    %xmm7,%xmm2
+        movdqa  %xmm1,%xmm6
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L085dec1_loop_17:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L085dec1_loop_17
+.byte   102,15,56,223,209
+        xorps   %xmm7,%xmm2
+        movaps  %xmm6,%xmm1
+        movdqa  %xmm7,%xmm0
+        xorps   %xmm2,%xmm1
+        movups  %xmm2,-16(%edi,%esi,1)
+        movl    240(%eax),%ecx
+        movl    %eax,%edx
+        movl    124(%esp),%eax
+.L084odd:
+        shll    $4,%ecx
+        movl    $16,%edi
+        subl    %ecx,%edi
+        movl    %edx,112(%esp)
+        leal    32(%edx,%ecx,1),%edx
+        movl    %edi,116(%esp)
+        cmpl    %eax,%esi
+        ja      .L086short
+        jmp     .L087grandloop
+.align  32
+.L087grandloop:
+        leal    1(%ebp),%ecx
+        leal    3(%ebp),%eax
+        leal    5(%ebp),%edi
+        addl    $6,%ebp
+        bsfl    %ecx,%ecx
+        bsfl    %eax,%eax
+        bsfl    %edi,%edi
+        shll    $4,%ecx
+        shll    $4,%eax
+        shll    $4,%edi
+        movdqu  (%ebx),%xmm2
+        movdqu  (%ebx,%ecx,1),%xmm3
+        movl    116(%esp),%ecx
+        movdqa  %xmm2,%xmm4
+        movdqu  (%ebx,%eax,1),%xmm5
+        movdqa  %xmm2,%xmm6
+        movdqu  (%ebx,%edi,1),%xmm7
+        pxor    %xmm0,%xmm2
+        pxor    %xmm2,%xmm3
+        movdqa  %xmm2,(%esp)
+        pxor    %xmm3,%xmm4
+        movdqa  %xmm3,16(%esp)
+        pxor    %xmm4,%xmm5
+        movdqa  %xmm4,32(%esp)
+        pxor    %xmm5,%xmm6
+        movdqa  %xmm5,48(%esp)
+        pxor    %xmm6,%xmm7
+        movdqa  %xmm6,64(%esp)
+        movdqa  %xmm7,80(%esp)
+        movups  -48(%edx,%ecx,1),%xmm0
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movdqu  64(%esi),%xmm6
+        movdqu  80(%esi),%xmm7
+        leal    96(%esi),%esi
+        movdqa  %xmm1,96(%esp)
+        pxor    %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+        pxor    %xmm0,%xmm5
+        pxor    %xmm0,%xmm6
+        pxor    %xmm0,%xmm7
+        movups  -32(%edx,%ecx,1),%xmm1
+        pxor    (%esp),%xmm2
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+        pxor    80(%esp),%xmm7
+        movups  -16(%edx,%ecx,1),%xmm0
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+.byte   102,15,56,222,233
+.byte   102,15,56,222,241
+.byte   102,15,56,222,249
+        movl    120(%esp),%edi
+        movl    124(%esp),%eax
+        call    .L_aesni_decrypt6_enter
+        movdqa  80(%esp),%xmm0
+        pxor    (%esp),%xmm2
+        movdqa  96(%esp),%xmm1
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+        pxor    %xmm0,%xmm7
+        pxor    %xmm2,%xmm1
+        movdqu  %xmm2,-96(%edi,%esi,1)
+        pxor    %xmm3,%xmm1
+        movdqu  %xmm3,-80(%edi,%esi,1)
+        pxor    %xmm4,%xmm1
+        movdqu  %xmm4,-64(%edi,%esi,1)
+        pxor    %xmm5,%xmm1
+        movdqu  %xmm5,-48(%edi,%esi,1)
+        pxor    %xmm6,%xmm1
+        movdqu  %xmm6,-32(%edi,%esi,1)
+        pxor    %xmm7,%xmm1
+        movdqu  %xmm7,-16(%edi,%esi,1)
+        cmpl    %eax,%esi
+        jb      .L087grandloop
+.L086short:
+        addl    $96,%eax
+        subl    %esi,%eax
+        jz      .L088done
+        cmpl    $32,%eax
+        jb      .L089one
+        je      .L090two
+        cmpl    $64,%eax
+        jb      .L091three
+        je      .L092four
+        leal    1(%ebp),%ecx
+        leal    3(%ebp),%eax
+        bsfl    %ecx,%ecx
+        bsfl    %eax,%eax
+        shll    $4,%ecx
+        shll    $4,%eax
+        movdqu  (%ebx),%xmm2
+        movdqu  (%ebx,%ecx,1),%xmm3
+        movl    116(%esp),%ecx
+        movdqa  %xmm2,%xmm4
+        movdqu  (%ebx,%eax,1),%xmm5
+        movdqa  %xmm2,%xmm6
+        pxor    %xmm0,%xmm2
+        pxor    %xmm2,%xmm3
+        movdqa  %xmm2,(%esp)
+        pxor    %xmm3,%xmm4
+        movdqa  %xmm3,16(%esp)
+        pxor    %xmm4,%xmm5
+        movdqa  %xmm4,32(%esp)
+        pxor    %xmm5,%xmm6
+        movdqa  %xmm5,48(%esp)
+        pxor    %xmm6,%xmm7
+        movdqa  %xmm6,64(%esp)
+        movups  -48(%edx,%ecx,1),%xmm0
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movdqu  64(%esi),%xmm6
+        pxor    %xmm7,%xmm7
+        movdqa  %xmm1,96(%esp)
+        pxor    %xmm0,%xmm2
+        pxor    %xmm0,%xmm3
+        pxor    %xmm0,%xmm4
+        pxor    %xmm0,%xmm5
+        pxor    %xmm0,%xmm6
+        movups  -32(%edx,%ecx,1),%xmm1
+        pxor    (%esp),%xmm2
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    64(%esp),%xmm6
+        movups  -16(%edx,%ecx,1),%xmm0
+.byte   102,15,56,222,209
+.byte   102,15,56,222,217
+.byte   102,15,56,222,225
+.byte   102,15,56,222,233
+.byte   102,15,56,222,241
+.byte   102,15,56,222,249
+        movl    120(%esp),%edi
+        call    .L_aesni_decrypt6_enter
+        movdqa  64(%esp),%xmm0
+        pxor    (%esp),%xmm2
+        movdqa  96(%esp),%xmm1
+        pxor    16(%esp),%xmm3
+        pxor    32(%esp),%xmm4
+        pxor    48(%esp),%xmm5
+        pxor    %xmm0,%xmm6
+        pxor    %xmm2,%xmm1
+        movdqu  %xmm2,(%edi,%esi,1)
+        pxor    %xmm3,%xmm1
+        movdqu  %xmm3,16(%edi,%esi,1)
+        pxor    %xmm4,%xmm1
+        movdqu  %xmm4,32(%edi,%esi,1)
+        pxor    %xmm5,%xmm1
+        movdqu  %xmm5,48(%edi,%esi,1)
+        pxor    %xmm6,%xmm1
+        movdqu  %xmm6,64(%edi,%esi,1)
+        jmp     .L088done
+.align  16
+.L089one:
+        movdqu  (%ebx),%xmm7
+        movl    112(%esp),%edx
+        movdqu  (%esi),%xmm2
+        movl    240(%edx),%ecx
+        pxor    %xmm0,%xmm7
+        pxor    %xmm7,%xmm2
+        movdqa  %xmm1,%xmm6
+        movl    120(%esp),%edi
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L093dec1_loop_18:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L093dec1_loop_18
+.byte   102,15,56,223,209
+        xorps   %xmm7,%xmm2
+        movaps  %xmm6,%xmm1
+        movdqa  %xmm7,%xmm0
+        xorps   %xmm2,%xmm1
+        movups  %xmm2,(%edi,%esi,1)
+        jmp     .L088done
+.align  16
+.L090two:
+        leal    1(%ebp),%ecx
+        movl    112(%esp),%edx
+        bsfl    %ecx,%ecx
+        shll    $4,%ecx
+        movdqu  (%ebx),%xmm6
+        movdqu  (%ebx,%ecx,1),%xmm7
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movl    240(%edx),%ecx
+        movdqa  %xmm1,%xmm5
+        pxor    %xmm0,%xmm6
+        pxor    %xmm6,%xmm7
+        pxor    %xmm6,%xmm2
+        pxor    %xmm7,%xmm3
+        movl    120(%esp),%edi
+        call    _aesni_decrypt2
+        xorps   %xmm6,%xmm2
+        xorps   %xmm7,%xmm3
+        movdqa  %xmm7,%xmm0
+        xorps   %xmm2,%xmm5
+        movups  %xmm2,(%edi,%esi,1)
+        xorps   %xmm3,%xmm5
+        movups  %xmm3,16(%edi,%esi,1)
+        movaps  %xmm5,%xmm1
+        jmp     .L088done
+.align  16
+.L091three:
+        leal    1(%ebp),%ecx
+        movl    112(%esp),%edx
+        bsfl    %ecx,%ecx
+        shll    $4,%ecx
+        movdqu  (%ebx),%xmm5
+        movdqu  (%ebx,%ecx,1),%xmm6
+        movdqa  %xmm5,%xmm7
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movl    240(%edx),%ecx
+        movdqa  %xmm1,96(%esp)
+        pxor    %xmm0,%xmm5
+        pxor    %xmm5,%xmm6
+        pxor    %xmm6,%xmm7
+        pxor    %xmm5,%xmm2
+        pxor    %xmm6,%xmm3
+        pxor    %xmm7,%xmm4
+        movl    120(%esp),%edi
+        call    _aesni_decrypt3
+        movdqa  96(%esp),%xmm1
+        xorps   %xmm5,%xmm2
+        xorps   %xmm6,%xmm3
+        xorps   %xmm7,%xmm4
+        movups  %xmm2,(%edi,%esi,1)
+        pxor    %xmm2,%xmm1
+        movdqa  %xmm7,%xmm0
+        movups  %xmm3,16(%edi,%esi,1)
+        pxor    %xmm3,%xmm1
+        movups  %xmm4,32(%edi,%esi,1)
+        pxor    %xmm4,%xmm1
+        jmp     .L088done
+.align  16
+.L092four:
+        leal    1(%ebp),%ecx
+        leal    3(%ebp),%eax
+        bsfl    %ecx,%ecx
+        bsfl    %eax,%eax
+        movl    112(%esp),%edx
+        shll    $4,%ecx
+        shll    $4,%eax
+        movdqu  (%ebx),%xmm4
+        movdqu  (%ebx,%ecx,1),%xmm5
+        movdqa  %xmm4,%xmm6
+        movdqu  (%ebx,%eax,1),%xmm7
+        pxor    %xmm0,%xmm4
+        movdqu  (%esi),%xmm2
+        pxor    %xmm4,%xmm5
+        movdqu  16(%esi),%xmm3
+        pxor    %xmm5,%xmm6
+        movdqa  %xmm4,(%esp)
+        pxor    %xmm6,%xmm7
+        movdqa  %xmm5,16(%esp)
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movl    240(%edx),%ecx
+        movdqa  %xmm1,96(%esp)
+        pxor    (%esp),%xmm2
+        pxor    16(%esp),%xmm3
+        pxor    %xmm6,%xmm4
+        pxor    %xmm7,%xmm5
+        movl    120(%esp),%edi
+        call    _aesni_decrypt4
+        movdqa  96(%esp),%xmm1
+        xorps   (%esp),%xmm2
+        xorps   16(%esp),%xmm3
+        xorps   %xmm6,%xmm4
+        movups  %xmm2,(%edi,%esi,1)
+        pxor    %xmm2,%xmm1
+        xorps   %xmm7,%xmm5
+        movups  %xmm3,16(%edi,%esi,1)
+        pxor    %xmm3,%xmm1
+        movdqa  %xmm7,%xmm0
+        movups  %xmm4,32(%edi,%esi,1)
+        pxor    %xmm4,%xmm1
+        movups  %xmm5,48(%edi,%esi,1)
+        pxor    %xmm5,%xmm1
+.L088done:
+        movl    128(%esp),%edx
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        movdqa  %xmm2,(%esp)
+        pxor    %xmm4,%xmm4
+        movdqa  %xmm2,16(%esp)
+        pxor    %xmm5,%xmm5
+        movdqa  %xmm2,32(%esp)
+        pxor    %xmm6,%xmm6
+        movdqa  %xmm2,48(%esp)
+        pxor    %xmm7,%xmm7
+        movdqa  %xmm2,64(%esp)
+        movdqa  %xmm2,80(%esp)
+        movdqa  %xmm2,96(%esp)
+        leal    (%edx),%esp
+        movl    40(%esp),%ecx
+        movl    48(%esp),%ebx
+        movdqu  %xmm0,(%ecx)
+        pxor    %xmm0,%xmm0
+        movdqu  %xmm1,(%ebx)
+        pxor    %xmm1,%xmm1
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_ocb_decrypt,.-.L_aesni_ocb_decrypt_begin
+.globl  aesni_cbc_encrypt
+.type   aesni_cbc_encrypt,@function
+.align  16
+aesni_cbc_encrypt:
+.L_aesni_cbc_encrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    %esp,%ebx
+        movl    24(%esp),%edi
+        subl    $24,%ebx
+        movl    28(%esp),%eax
+        andl    $-16,%ebx
+        movl    32(%esp),%edx
+        movl    36(%esp),%ebp
+        testl   %eax,%eax
+        jz      .L094cbc_abort
+        cmpl    $0,40(%esp)
+        xchgl   %esp,%ebx
+        movups  (%ebp),%xmm7
+        movl    240(%edx),%ecx
+        movl    %edx,%ebp
+        movl    %ebx,16(%esp)
+        movl    %ecx,%ebx
+        je      .L095cbc_decrypt
+        movaps  %xmm7,%xmm2
+        cmpl    $16,%eax
+        jb      .L096cbc_enc_tail
+        subl    $16,%eax
+        jmp     .L097cbc_enc_loop
+.align  16
+.L097cbc_enc_loop:
+        movups  (%esi),%xmm7
+        leal    16(%esi),%esi
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        xorps   %xmm0,%xmm7
+        leal    32(%edx),%edx
+        xorps   %xmm7,%xmm2
+.L098enc1_loop_19:
+.byte   102,15,56,220,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L098enc1_loop_19
+.byte   102,15,56,221,209
+        movl    %ebx,%ecx
+        movl    %ebp,%edx
+        movups  %xmm2,(%edi)
+        leal    16(%edi),%edi
+        subl    $16,%eax
+        jnc     .L097cbc_enc_loop
+        addl    $16,%eax
+        jnz     .L096cbc_enc_tail
+        movaps  %xmm2,%xmm7
+        pxor    %xmm2,%xmm2
+        jmp     .L099cbc_ret
+.L096cbc_enc_tail:
+        movl    %eax,%ecx
+.long   2767451785
+        movl    $16,%ecx
+        subl    %eax,%ecx
+        xorl    %eax,%eax
+.long   2868115081
+        leal    -16(%edi),%edi
+        movl    %ebx,%ecx
+        movl    %edi,%esi
+        movl    %ebp,%edx
+        jmp     .L097cbc_enc_loop
+.align  16
+.L095cbc_decrypt:
+        cmpl    $80,%eax
+        jbe     .L100cbc_dec_tail
+        movaps  %xmm7,(%esp)
+        subl    $80,%eax
+        jmp     .L101cbc_dec_loop6_enter
+.align  16
+.L102cbc_dec_loop6:
+        movaps  %xmm0,(%esp)
+        movups  %xmm7,(%edi)
+        leal    16(%edi),%edi
+.L101cbc_dec_loop6_enter:
+        movdqu  (%esi),%xmm2
+        movdqu  16(%esi),%xmm3
+        movdqu  32(%esi),%xmm4
+        movdqu  48(%esi),%xmm5
+        movdqu  64(%esi),%xmm6
+        movdqu  80(%esi),%xmm7
+        call    _aesni_decrypt6
+        movups  (%esi),%xmm1
+        movups  16(%esi),%xmm0
+        xorps   (%esp),%xmm2
+        xorps   %xmm1,%xmm3
+        movups  32(%esi),%xmm1
+        xorps   %xmm0,%xmm4
+        movups  48(%esi),%xmm0
+        xorps   %xmm1,%xmm5
+        movups  64(%esi),%xmm1
+        xorps   %xmm0,%xmm6
+        movups  80(%esi),%xmm0
+        xorps   %xmm1,%xmm7
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        leal    96(%esi),%esi
+        movups  %xmm4,32(%edi)
+        movl    %ebx,%ecx
+        movups  %xmm5,48(%edi)
+        movl    %ebp,%edx
+        movups  %xmm6,64(%edi)
+        leal    80(%edi),%edi
+        subl    $96,%eax
+        ja      .L102cbc_dec_loop6
+        movaps  %xmm7,%xmm2
+        movaps  %xmm0,%xmm7
+        addl    $80,%eax
+        jle     .L103cbc_dec_clear_tail_collected
+        movups  %xmm2,(%edi)
+        leal    16(%edi),%edi
+.L100cbc_dec_tail:
+        movups  (%esi),%xmm2
+        movaps  %xmm2,%xmm6
+        cmpl    $16,%eax
+        jbe     .L104cbc_dec_one
+        movups  16(%esi),%xmm3
+        movaps  %xmm3,%xmm5
+        cmpl    $32,%eax
+        jbe     .L105cbc_dec_two
+        movups  32(%esi),%xmm4
+        cmpl    $48,%eax
+        jbe     .L106cbc_dec_three
+        movups  48(%esi),%xmm5
+        cmpl    $64,%eax
+        jbe     .L107cbc_dec_four
+        movups  64(%esi),%xmm6
+        movaps  %xmm7,(%esp)
+        movups  (%esi),%xmm2
+        xorps   %xmm7,%xmm7
+        call    _aesni_decrypt6
+        movups  (%esi),%xmm1
+        movups  16(%esi),%xmm0
+        xorps   (%esp),%xmm2
+        xorps   %xmm1,%xmm3
+        movups  32(%esi),%xmm1
+        xorps   %xmm0,%xmm4
+        movups  48(%esi),%xmm0
+        xorps   %xmm1,%xmm5
+        movups  64(%esi),%xmm7
+        xorps   %xmm0,%xmm6
+        movups  %xmm2,(%edi)
+        movups  %xmm3,16(%edi)
+        pxor    %xmm3,%xmm3
+        movups  %xmm4,32(%edi)
+        pxor    %xmm4,%xmm4
+        movups  %xmm5,48(%edi)
+        pxor    %xmm5,%xmm5
+        leal    64(%edi),%edi
+        movaps  %xmm6,%xmm2
+        pxor    %xmm6,%xmm6
+        subl    $80,%eax
+        jmp     .L108cbc_dec_tail_collected
+.align  16
+.L104cbc_dec_one:
+        movups  (%edx),%xmm0
+        movups  16(%edx),%xmm1
+        leal    32(%edx),%edx
+        xorps   %xmm0,%xmm2
+.L109dec1_loop_20:
+.byte   102,15,56,222,209
+        decl    %ecx
+        movups  (%edx),%xmm1
+        leal    16(%edx),%edx
+        jnz     .L109dec1_loop_20
+.byte   102,15,56,223,209
+        xorps   %xmm7,%xmm2
+        movaps  %xmm6,%xmm7
+        subl    $16,%eax
+        jmp     .L108cbc_dec_tail_collected
+.align  16
+.L105cbc_dec_two:
+        call    _aesni_decrypt2
+        xorps   %xmm7,%xmm2
+        xorps   %xmm6,%xmm3
+        movups  %xmm2,(%edi)
+        movaps  %xmm3,%xmm2
+        pxor    %xmm3,%xmm3
+        leal    16(%edi),%edi
+        movaps  %xmm5,%xmm7
+        subl    $32,%eax
+        jmp     .L108cbc_dec_tail_collected
+.align  16
+.L106cbc_dec_three:
+        call    _aesni_decrypt3
+        xorps   %xmm7,%xmm2
+        xorps   %xmm6,%xmm3
+        xorps   %xmm5,%xmm4
+        movups  %xmm2,(%edi)
+        movaps  %xmm4,%xmm2
+        pxor    %xmm4,%xmm4
+        movups  %xmm3,16(%edi)
+        pxor    %xmm3,%xmm3
+        leal    32(%edi),%edi
+        movups  32(%esi),%xmm7
+        subl    $48,%eax
+        jmp     .L108cbc_dec_tail_collected
+.align  16
+.L107cbc_dec_four:
+        call    _aesni_decrypt4
+        movups  16(%esi),%xmm1
+        movups  32(%esi),%xmm0
+        xorps   %xmm7,%xmm2
+        movups  48(%esi),%xmm7
+        xorps   %xmm6,%xmm3
+        movups  %xmm2,(%edi)
+        xorps   %xmm1,%xmm4
+        movups  %xmm3,16(%edi)
+        pxor    %xmm3,%xmm3
+        xorps   %xmm0,%xmm5
+        movups  %xmm4,32(%edi)
+        pxor    %xmm4,%xmm4
+        leal    48(%edi),%edi
+        movaps  %xmm5,%xmm2
+        pxor    %xmm5,%xmm5
+        subl    $64,%eax
+        jmp     .L108cbc_dec_tail_collected
+.align  16
+.L103cbc_dec_clear_tail_collected:
+        pxor    %xmm3,%xmm3
+        pxor    %xmm4,%xmm4
+        pxor    %xmm5,%xmm5
+        pxor    %xmm6,%xmm6
+.L108cbc_dec_tail_collected:
+        andl    $15,%eax
+        jnz     .L110cbc_dec_tail_partial
+        movups  %xmm2,(%edi)
+        pxor    %xmm0,%xmm0
+        jmp     .L099cbc_ret
+.align  16
+.L110cbc_dec_tail_partial:
+        movaps  %xmm2,(%esp)
+        pxor    %xmm0,%xmm0
+        movl    $16,%ecx
+        movl    %esp,%esi
+        subl    %eax,%ecx
+.long   2767451785
+        movdqa  %xmm2,(%esp)
+.L099cbc_ret:
+        movl    16(%esp),%esp
+        movl    36(%esp),%ebp
+        pxor    %xmm2,%xmm2
+        pxor    %xmm1,%xmm1
+        movups  %xmm7,(%ebp)
+        pxor    %xmm7,%xmm7
+.L094cbc_abort:
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
+.type   _aesni_set_encrypt_key,@function
+.align  16
+_aesni_set_encrypt_key:
+        pushl   %ebp
+        pushl   %ebx
+        testl   %eax,%eax
+        jz      .L111bad_pointer
+        testl   %edx,%edx
+        jz      .L111bad_pointer
+        call    .L112pic
+.L112pic:
+        popl    %ebx
+        leal    .Lkey_const-.L112pic(%ebx),%ebx
+        leal    OPENSSL_ia32cap_P,%ebp
+        movups  (%eax),%xmm0
+        xorps   %xmm4,%xmm4
+        movl    4(%ebp),%ebp
+        leal    16(%edx),%edx
+        andl    $268437504,%ebp
+        cmpl    $256,%ecx
+        je      .L11314rounds
+        cmpl    $192,%ecx
+        je      .L11412rounds
+        cmpl    $128,%ecx
+        jne     .L115bad_keybits
+.align  16
+.L11610rounds:
+        cmpl    $268435456,%ebp
+        je      .L11710rounds_alt
+        movl    $9,%ecx
+        movups  %xmm0,-16(%edx)
+.byte   102,15,58,223,200,1
+        call    .L118key_128_cold
+.byte   102,15,58,223,200,2
+        call    .L119key_128
+.byte   102,15,58,223,200,4
+        call    .L119key_128
+.byte   102,15,58,223,200,8
+        call    .L119key_128
+.byte   102,15,58,223,200,16
+        call    .L119key_128
+.byte   102,15,58,223,200,32
+        call    .L119key_128
+.byte   102,15,58,223,200,64
+        call    .L119key_128
+.byte   102,15,58,223,200,128
+        call    .L119key_128
+.byte   102,15,58,223,200,27
+        call    .L119key_128
+.byte   102,15,58,223,200,54
+        call    .L119key_128
+        movups  %xmm0,(%edx)
+        movl    %ecx,80(%edx)
+        jmp     .L120good_key
+.align  16
+.L119key_128:
+        movups  %xmm0,(%edx)
+        leal    16(%edx),%edx
+.L118key_128_cold:
+        shufps  $16,%xmm0,%xmm4
+        xorps   %xmm4,%xmm0
+        shufps  $140,%xmm0,%xmm4
+        xorps   %xmm4,%xmm0
+        shufps  $255,%xmm1,%xmm1
+        xorps   %xmm1,%xmm0
+        ret
+.align  16
+.L11710rounds_alt:
+        movdqa  (%ebx),%xmm5
+        movl    $8,%ecx
+        movdqa  32(%ebx),%xmm4
+        movdqa  %xmm0,%xmm2
+        movdqu  %xmm0,-16(%edx)
+.L121loop_key128:
+.byte   102,15,56,0,197
+.byte   102,15,56,221,196
+        pslld   $1,%xmm4
+        leal    16(%edx),%edx
+        movdqa  %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm3,%xmm2
+        pxor    %xmm2,%xmm0
+        movdqu  %xmm0,-16(%edx)
+        movdqa  %xmm0,%xmm2
+        decl    %ecx
+        jnz     .L121loop_key128
+        movdqa  48(%ebx),%xmm4
+.byte   102,15,56,0,197
+.byte   102,15,56,221,196
+        pslld   $1,%xmm4
+        movdqa  %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm3,%xmm2
+        pxor    %xmm2,%xmm0
+        movdqu  %xmm0,(%edx)
+        movdqa  %xmm0,%xmm2
+.byte   102,15,56,0,197
+.byte   102,15,56,221,196
+        movdqa  %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm2,%xmm3
+        pslldq  $4,%xmm2
+        pxor    %xmm3,%xmm2
+        pxor    %xmm2,%xmm0
+        movdqu  %xmm0,16(%edx)
+        movl    $9,%ecx
+        movl    %ecx,96(%edx)
+        jmp     .L120good_key
+.align  16
+.L11412rounds:
+        movq    16(%eax),%xmm2
+        cmpl    $268435456,%ebp
+        je      .L12212rounds_alt
+        movl    $11,%ecx
+        movups  %xmm0,-16(%edx)
+.byte   102,15,58,223,202,1
+        call    .L123key_192a_cold
+.byte   102,15,58,223,202,2
+        call    .L124key_192b
+.byte   102,15,58,223,202,4
+        call    .L125key_192a
+.byte   102,15,58,223,202,8
+        call    .L124key_192b
+.byte   102,15,58,223,202,16
+        call    .L125key_192a
+.byte   102,15,58,223,202,32
+        call    .L124key_192b
+.byte   102,15,58,223,202,64
+        call    .L125key_192a
+.byte   102,15,58,223,202,128
+        call    .L124key_192b
+        movups  %xmm0,(%edx)
+        movl    %ecx,48(%edx)
+        jmp     .L120good_key
+.align  16
+.L125key_192a:
+        movups  %xmm0,(%edx)
+        leal    16(%edx),%edx
+.align  16
+.L123key_192a_cold:
+        movaps  %xmm2,%xmm5
+.L126key_192b_warm:
+        shufps  $16,%xmm0,%xmm4
+        movdqa  %xmm2,%xmm3
+        xorps   %xmm4,%xmm0
+        shufps  $140,%xmm0,%xmm4
+        pslldq  $4,%xmm3
+        xorps   %xmm4,%xmm0
+        pshufd  $85,%xmm1,%xmm1
+        pxor    %xmm3,%xmm2
+        pxor    %xmm1,%xmm0
+        pshufd  $255,%xmm0,%xmm3
+        pxor    %xmm3,%xmm2
+        ret
+.align  16
+.L124key_192b:
+        movaps  %xmm0,%xmm3
+        shufps  $68,%xmm0,%xmm5
+        movups  %xmm5,(%edx)
+        shufps  $78,%xmm2,%xmm3
+        movups  %xmm3,16(%edx)
+        leal    32(%edx),%edx
+        jmp     .L126key_192b_warm
+.align  16
+.L12212rounds_alt:
+        movdqa  16(%ebx),%xmm5
+        movdqa  32(%ebx),%xmm4
+        movl    $8,%ecx
+        movdqu  %xmm0,-16(%edx)
+.L127loop_key192:
+        movq    %xmm2,(%edx)
+        movdqa  %xmm2,%xmm1
+.byte   102,15,56,0,213
+.byte   102,15,56,221,212
+        pslld   $1,%xmm4
+        leal    24(%edx),%edx
+        movdqa  %xmm0,%xmm3
+        pslldq  $4,%xmm0
+        pxor    %xmm0,%xmm3
+        pslldq  $4,%xmm0
+        pxor    %xmm0,%xmm3
+        pslldq  $4,%xmm0
+        pxor    %xmm3,%xmm0
+        pshufd  $255,%xmm0,%xmm3
+        pxor    %xmm1,%xmm3
+        pslldq  $4,%xmm1
+        pxor    %xmm1,%xmm3
+        pxor    %xmm2,%xmm0
+        pxor    %xmm3,%xmm2
+        movdqu  %xmm0,-16(%edx)
+        decl    %ecx
+        jnz     .L127loop_key192
+        movl    $11,%ecx
+        movl    %ecx,32(%edx)
+        jmp     .L120good_key
+.align  16
+.L11314rounds:
+        movups  16(%eax),%xmm2
+        leal    16(%edx),%edx
+        cmpl    $268435456,%ebp
+        je      .L12814rounds_alt
+        movl    $13,%ecx
+        movups  %xmm0,-32(%edx)
+        movups  %xmm2,-16(%edx)
+.byte   102,15,58,223,202,1
+        call    .L129key_256a_cold
+.byte   102,15,58,223,200,1
+        call    .L130key_256b
+.byte   102,15,58,223,202,2
+        call    .L131key_256a
+.byte   102,15,58,223,200,2
+        call    .L130key_256b
+.byte   102,15,58,223,202,4
+        call    .L131key_256a
+.byte   102,15,58,223,200,4
+        call    .L130key_256b
+.byte   102,15,58,223,202,8
+        call    .L131key_256a
+.byte   102,15,58,223,200,8
+        call    .L130key_256b
+.byte   102,15,58,223,202,16
+        call    .L131key_256a
+.byte   102,15,58,223,200,16
+        call    .L130key_256b
+.byte   102,15,58,223,202,32
+        call    .L131key_256a
+.byte   102,15,58,223,200,32
+        call    .L130key_256b
+.byte   102,15,58,223,202,64
+        call    .L131key_256a
+        movups  %xmm0,(%edx)
+        movl    %ecx,16(%edx)
+        xorl    %eax,%eax
+        jmp     .L120good_key
+.align  16
+.L131key_256a:
+        movups  %xmm2,(%edx)
+        leal    16(%edx),%edx
+.L129key_256a_cold:
+        shufps  $16,%xmm0,%xmm4
+        xorps   %xmm4,%xmm0
+        shufps  $140,%xmm0,%xmm4
+        xorps   %xmm4,%xmm0
+        shufps  $255,%xmm1,%xmm1
+        xorps   %xmm1,%xmm0
+        ret
+.align  16
+.L130key_256b:
+        movups  %xmm0,(%edx)
+        leal    16(%edx),%edx
+        shufps  $16,%xmm2,%xmm4
+        xorps   %xmm4,%xmm2
+        shufps  $140,%xmm2,%xmm4
+        xorps   %xmm4,%xmm2
+        shufps  $170,%xmm1,%xmm1
+        xorps   %xmm1,%xmm2
+        ret
+.align  16
+.L12814rounds_alt:
+        movdqa  (%ebx),%xmm5
+        movdqa  32(%ebx),%xmm4
+        movl    $7,%ecx
+        movdqu  %xmm0,-32(%edx)
+        movdqa  %xmm2,%xmm1
+        movdqu  %xmm2,-16(%edx)
+.L132loop_key256:
+.byte   102,15,56,0,213
+.byte   102,15,56,221,212
+        movdqa  %xmm0,%xmm3
+        pslldq  $4,%xmm0
+        pxor    %xmm0,%xmm3
+        pslldq  $4,%xmm0
+        pxor    %xmm0,%xmm3
+        pslldq  $4,%xmm0
+        pxor    %xmm3,%xmm0
+        pslld   $1,%xmm4
+        pxor    %xmm2,%xmm0
+        movdqu  %xmm0,(%edx)
+        decl    %ecx
+        jz      .L133done_key256
+        pshufd  $255,%xmm0,%xmm2
+        pxor    %xmm3,%xmm3
+.byte   102,15,56,221,211
+        movdqa  %xmm1,%xmm3
+        pslldq  $4,%xmm1
+        pxor    %xmm1,%xmm3
+        pslldq  $4,%xmm1
+        pxor    %xmm1,%xmm3
+        pslldq  $4,%xmm1
+        pxor    %xmm3,%xmm1
+        pxor    %xmm1,%xmm2
+        movdqu  %xmm2,16(%edx)
+        leal    32(%edx),%edx
+        movdqa  %xmm2,%xmm1
+        jmp     .L132loop_key256
+.L133done_key256:
+        movl    $13,%ecx
+        movl    %ecx,16(%edx)
+.L120good_key:
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        pxor    %xmm4,%xmm4
+        pxor    %xmm5,%xmm5
+        xorl    %eax,%eax
+        popl    %ebx
+        popl    %ebp
+        ret
+.align  4
+.L111bad_pointer:
+        movl    $-1,%eax
+        popl    %ebx
+        popl    %ebp
+        ret
+.align  4
+.L115bad_keybits:
+        pxor    %xmm0,%xmm0
+        movl    $-2,%eax
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
+.globl  aesni_set_encrypt_key
+.type   aesni_set_encrypt_key,@function
+.align  16
+aesni_set_encrypt_key:
+.L_aesni_set_encrypt_key_begin:
+        movl    4(%esp),%eax
+        movl    8(%esp),%ecx
+        movl    12(%esp),%edx
+        call    _aesni_set_encrypt_key
+        ret
+.size   aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
+.globl  aesni_set_decrypt_key
+.type   aesni_set_decrypt_key,@function
+.align  16
+aesni_set_decrypt_key:
+.L_aesni_set_decrypt_key_begin:
+        movl    4(%esp),%eax
+        movl    8(%esp),%ecx
+        movl    12(%esp),%edx
+        call    _aesni_set_encrypt_key
+        movl    12(%esp),%edx
+        shll    $4,%ecx
+        testl   %eax,%eax
+        jnz     .L134dec_key_ret
+        leal    16(%edx,%ecx,1),%eax
+        movups  (%edx),%xmm0
+        movups  (%eax),%xmm1
+        movups  %xmm0,(%eax)
+        movups  %xmm1,(%edx)
+        leal    16(%edx),%edx
+        leal    -16(%eax),%eax
+.L135dec_key_inverse:
+        movups  (%edx),%xmm0
+        movups  (%eax),%xmm1
+.byte   102,15,56,219,192
+.byte   102,15,56,219,201
+        leal    16(%edx),%edx
+        leal    -16(%eax),%eax
+        movups  %xmm0,16(%eax)
+        movups  %xmm1,-16(%edx)
+        cmpl    %edx,%eax
+        ja      .L135dec_key_inverse
+        movups  (%edx),%xmm0
+.byte   102,15,56,219,192
+        movups  %xmm0,(%edx)
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        xorl    %eax,%eax
+.L134dec_key_ret:
+        ret
+.size   aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.align  64
+.Lkey_const:
+.long   202313229,202313229,202313229,202313229
+.long   67569157,67569157,67569157,67569157
+.long   1,1,1,1
+.long   27,27,27,27
+.byte   65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+.byte   83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+.byte   32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+.byte   115,108,46,111,114,103,62,0
+.comm   OPENSSL_ia32cap_P,16,4
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/vpaes-x86.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/vpaes-x86.S
new file mode 100644
index 0000000000..6f62534682
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/aes/vpaes-x86.S
@@ -0,0 +1,670 @@
+# WARNING: do not edit!
+# Generated from openssl/crypto/aes/asm/vpaes-x86.pl
+#
+# Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+.text
+.align  64
+.L_vpaes_consts:
+.long   218628480,235210255,168496130,67568393
+.long   252381056,17041926,33884169,51187212
+.long   252645135,252645135,252645135,252645135
+.long   1512730624,3266504856,1377990664,3401244816
+.long   830229760,1275146365,2969422977,3447763452
+.long   3411033600,2979783055,338359620,2782886510
+.long   4209124096,907596821,221174255,1006095553
+.long   191964160,3799684038,3164090317,1589111125
+.long   182528256,1777043520,2877432650,3265356744
+.long   1874708224,3503451415,3305285752,363511674
+.long   1606117888,3487855781,1093350906,2384367825
+.long   197121,67569157,134941193,202313229
+.long   67569157,134941193,202313229,197121
+.long   134941193,202313229,197121,67569157
+.long   202313229,197121,67569157,134941193
+.long   33619971,100992007,168364043,235736079
+.long   235736079,33619971,100992007,168364043
+.long   168364043,235736079,33619971,100992007
+.long   100992007,168364043,235736079,33619971
+.long   50462976,117835012,185207048,252579084
+.long   252314880,51251460,117574920,184942860
+.long   184682752,252054788,50987272,118359308
+.long   118099200,185467140,251790600,50727180
+.long   2946363062,528716217,1300004225,1881839624
+.long   1532713819,1532713819,1532713819,1532713819
+.long   3602276352,4288629033,3737020424,4153884961
+.long   1354558464,32357713,2958822624,3775749553
+.long   1201988352,132424512,1572796698,503232858
+.long   2213177600,1597421020,4103937655,675398315
+.long   2749646592,4273543773,1511898873,121693092
+.long   3040248576,1103263732,2871565598,1608280554
+.long   2236667136,2588920351,482954393,64377734
+.long   3069987328,291237287,2117370568,3650299247
+.long   533321216,3573750986,2572112006,1401264716
+.long   1339849704,2721158661,548607111,3445553514
+.long   2128193280,3054596040,2183486460,1257083700
+.long   655635200,1165381986,3923443150,2344132524
+.long   190078720,256924420,290342170,357187870
+.long   1610966272,2263057382,4103205268,309794674
+.long   2592527872,2233205587,1335446729,3402964816
+.long   3973531904,3225098121,3002836325,1918774430
+.long   3870401024,2102906079,2284471353,4117666579
+.long   617007872,1021508343,366931923,691083277
+.long   2528395776,3491914898,2968704004,1613121270
+.long   3445188352,3247741094,844474987,4093578302
+.long   651481088,1190302358,1689581232,574775300
+.long   4289380608,206939853,2555985458,2489840491
+.long   2130264064,327674451,3566485037,3349835193
+.long   2470714624,316102159,3636825756,3393945945
+.byte   86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+.byte   111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
+.byte   83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
+.byte   114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
+.byte   118,101,114,115,105,116,121,41,0
+.align  64
+.type   _vpaes_preheat,@function
+.align  16
+_vpaes_preheat:
+        addl    (%esp),%ebp
+        movdqa  -48(%ebp),%xmm7
+        movdqa  -16(%ebp),%xmm6
+        ret
+.size   _vpaes_preheat,.-_vpaes_preheat
+.type   _vpaes_encrypt_core,@function
+.align  16
+_vpaes_encrypt_core:
+        movl    $16,%ecx
+        movl    240(%edx),%eax
+        movdqa  %xmm6,%xmm1
+        movdqa  (%ebp),%xmm2
+        pandn   %xmm0,%xmm1
+        pand    %xmm6,%xmm0
+        movdqu  (%edx),%xmm5
+.byte   102,15,56,0,208
+        movdqa  16(%ebp),%xmm0
+        pxor    %xmm5,%xmm2
+        psrld   $4,%xmm1
+        addl    $16,%edx
+.byte   102,15,56,0,193
+        leal    192(%ebp),%ebx
+        pxor    %xmm2,%xmm0
+        jmp     .L000enc_entry
+.align  16
+.L001enc_loop:
+        movdqa  32(%ebp),%xmm4
+        movdqa  48(%ebp),%xmm0
+.byte   102,15,56,0,226
+.byte   102,15,56,0,195
+        pxor    %xmm5,%xmm4
+        movdqa  64(%ebp),%xmm5
+        pxor    %xmm4,%xmm0
+        movdqa  -64(%ebx,%ecx,1),%xmm1
+.byte   102,15,56,0,234
+        movdqa  80(%ebp),%xmm2
+        movdqa  (%ebx,%ecx,1),%xmm4
+.byte   102,15,56,0,211
+        movdqa  %xmm0,%xmm3
+        pxor    %xmm5,%xmm2
+.byte   102,15,56,0,193
+        addl    $16,%edx
+        pxor    %xmm2,%xmm0
+.byte   102,15,56,0,220
+        addl    $16,%ecx
+        pxor    %xmm0,%xmm3
+.byte   102,15,56,0,193
+        andl    $48,%ecx
+        subl    $1,%eax
+        pxor    %xmm3,%xmm0
+.L000enc_entry:
+        movdqa  %xmm6,%xmm1
+        movdqa  -32(%ebp),%xmm5
+        pandn   %xmm0,%xmm1
+        psrld   $4,%xmm1
+        pand    %xmm6,%xmm0
+.byte   102,15,56,0,232
+        movdqa  %xmm7,%xmm3
+        pxor    %xmm1,%xmm0
+.byte   102,15,56,0,217
+        movdqa  %xmm7,%xmm4
+        pxor    %xmm5,%xmm3
+.byte   102,15,56,0,224
+        movdqa  %xmm7,%xmm2
+        pxor    %xmm5,%xmm4
+.byte   102,15,56,0,211
+        movdqa  %xmm7,%xmm3
+        pxor    %xmm0,%xmm2
+.byte   102,15,56,0,220
+        movdqu  (%edx),%xmm5
+        pxor    %xmm1,%xmm3
+        jnz     .L001enc_loop
+        movdqa  96(%ebp),%xmm4
+        movdqa  112(%ebp),%xmm0
+.byte   102,15,56,0,226
+        pxor    %xmm5,%xmm4
+.byte   102,15,56,0,195
+        movdqa  64(%ebx,%ecx,1),%xmm1
+        pxor    %xmm4,%xmm0
+.byte   102,15,56,0,193
+        ret
+.size   _vpaes_encrypt_core,.-_vpaes_encrypt_core
+.type   _vpaes_decrypt_core,@function
+.align  16
+_vpaes_decrypt_core:
+        leal    608(%ebp),%ebx
+        movl    240(%edx),%eax
+        movdqa  %xmm6,%xmm1
+        movdqa  -64(%ebx),%xmm2
+        pandn   %xmm0,%xmm1
+        movl    %eax,%ecx
+        psrld   $4,%xmm1
+        movdqu  (%edx),%xmm5
+        shll    $4,%ecx
+        pand    %xmm6,%xmm0
+.byte   102,15,56,0,208
+        movdqa  -48(%ebx),%xmm0
+        xorl    $48,%ecx
+.byte   102,15,56,0,193
+        andl    $48,%ecx
+        pxor    %xmm5,%xmm2
+        movdqa  176(%ebp),%xmm5
+        pxor    %xmm2,%xmm0
+        addl    $16,%edx
+        leal    -352(%ebx,%ecx,1),%ecx
+        jmp     .L002dec_entry
+.align  16
+.L003dec_loop:
+        movdqa  -32(%ebx),%xmm4
+        movdqa  -16(%ebx),%xmm1
+.byte   102,15,56,0,226
+.byte   102,15,56,0,203
+        pxor    %xmm4,%xmm0
+        movdqa  (%ebx),%xmm4
+        pxor    %xmm1,%xmm0
+        movdqa  16(%ebx),%xmm1
+.byte   102,15,56,0,226
+.byte   102,15,56,0,197
+.byte   102,15,56,0,203
+        pxor    %xmm4,%xmm0
+        movdqa  32(%ebx),%xmm4
+        pxor    %xmm1,%xmm0
+        movdqa  48(%ebx),%xmm1
+.byte   102,15,56,0,226
+.byte   102,15,56,0,197
+.byte   102,15,56,0,203
+        pxor    %xmm4,%xmm0
+        movdqa  64(%ebx),%xmm4
+        pxor    %xmm1,%xmm0
+        movdqa  80(%ebx),%xmm1
+.byte   102,15,56,0,226
+.byte   102,15,56,0,197
+.byte   102,15,56,0,203
+        pxor    %xmm4,%xmm0
+        addl    $16,%edx
+.byte   102,15,58,15,237,12
+        pxor    %xmm1,%xmm0
+        subl    $1,%eax
+.L002dec_entry:
+        movdqa  %xmm6,%xmm1
+        movdqa  -32(%ebp),%xmm2
+        pandn   %xmm0,%xmm1
+        pand    %xmm6,%xmm0
+        psrld   $4,%xmm1
+.byte   102,15,56,0,208
+        movdqa  %xmm7,%xmm3
+        pxor    %xmm1,%xmm0
+.byte   102,15,56,0,217
+        movdqa  %xmm7,%xmm4
+        pxor    %xmm2,%xmm3
+.byte   102,15,56,0,224
+        pxor    %xmm2,%xmm4
+        movdqa  %xmm7,%xmm2
+.byte   102,15,56,0,211
+        movdqa  %xmm7,%xmm3
+        pxor    %xmm0,%xmm2
+.byte   102,15,56,0,220
+        movdqu  (%edx),%xmm0
+        pxor    %xmm1,%xmm3
+        jnz     .L003dec_loop
+        movdqa  96(%ebx),%xmm4
+.byte   102,15,56,0,226
+        pxor    %xmm0,%xmm4
+        movdqa  112(%ebx),%xmm0
+        movdqa  (%ecx),%xmm2
+.byte   102,15,56,0,195
+        pxor    %xmm4,%xmm0
+.byte   102,15,56,0,194
+        ret
+.size   _vpaes_decrypt_core,.-_vpaes_decrypt_core
+.type   _vpaes_schedule_core,@function
+.align  16
+_vpaes_schedule_core:
+        addl    (%esp),%ebp
+        movdqu  (%esi),%xmm0
+        movdqa  320(%ebp),%xmm2
+        movdqa  %xmm0,%xmm3
+        leal    (%ebp),%ebx
+        movdqa  %xmm2,4(%esp)
+        call    _vpaes_schedule_transform
+        movdqa  %xmm0,%xmm7
+        testl   %edi,%edi
+        jnz     .L004schedule_am_decrypting
+        movdqu  %xmm0,(%edx)
+        jmp     .L005schedule_go
+.L004schedule_am_decrypting:
+        movdqa  256(%ebp,%ecx,1),%xmm1
+.byte   102,15,56,0,217
+        movdqu  %xmm3,(%edx)
+        xorl    $48,%ecx
+.L005schedule_go:
+        cmpl    $192,%eax
+        ja      .L006schedule_256
+        je      .L007schedule_192
+.L008schedule_128:
+        movl    $10,%eax
+.L009loop_schedule_128:
+        call    _vpaes_schedule_round
+        decl    %eax
+        jz      .L010schedule_mangle_last
+        call    _vpaes_schedule_mangle
+        jmp     .L009loop_schedule_128
+.align  16
+.L007schedule_192:
+        movdqu  8(%esi),%xmm0
+        call    _vpaes_schedule_transform
+        movdqa  %xmm0,%xmm6
+        pxor    %xmm4,%xmm4
+        movhlps %xmm4,%xmm6
+        movl    $4,%eax
+.L011loop_schedule_192:
+        call    _vpaes_schedule_round
+.byte   102,15,58,15,198,8
+        call    _vpaes_schedule_mangle
+        call    _vpaes_schedule_192_smear
+        call    _vpaes_schedule_mangle
+        call    _vpaes_schedule_round
+        decl    %eax
+        jz      .L010schedule_mangle_last
+        call    _vpaes_schedule_mangle
+        call    _vpaes_schedule_192_smear
+        jmp     .L011loop_schedule_192
+.align  16
+.L006schedule_256:
+        movdqu  16(%esi),%xmm0
+        call    _vpaes_schedule_transform
+        movl    $7,%eax
+.L012loop_schedule_256:
+        call    _vpaes_schedule_mangle
+        movdqa  %xmm0,%xmm6
+        call    _vpaes_schedule_round
+        decl    %eax
+        jz      .L010schedule_mangle_last
+        call    _vpaes_schedule_mangle
+        pshufd  $255,%xmm0,%xmm0
+        movdqa  %xmm7,20(%esp)
+        movdqa  %xmm6,%xmm7
+        call    .L_vpaes_schedule_low_round
+        movdqa  20(%esp),%xmm7
+        jmp     .L012loop_schedule_256
+.align  16
+.L010schedule_mangle_last:
+        leal    384(%ebp),%ebx
+        testl   %edi,%edi
+        jnz     .L013schedule_mangle_last_dec
+        movdqa  256(%ebp,%ecx,1),%xmm1
+.byte   102,15,56,0,193
+        leal    352(%ebp),%ebx
+        addl    $32,%edx
+.L013schedule_mangle_last_dec:
+        addl    $-16,%edx
+        pxor    336(%ebp),%xmm0
+        call    _vpaes_schedule_transform
+        movdqu  %xmm0,(%edx)
+        pxor    %xmm0,%xmm0
+        pxor    %xmm1,%xmm1
+        pxor    %xmm2,%xmm2
+        pxor    %xmm3,%xmm3
+        pxor    %xmm4,%xmm4
+        pxor    %xmm5,%xmm5
+        pxor    %xmm6,%xmm6
+        pxor    %xmm7,%xmm7
+        ret
+.size   _vpaes_schedule_core,.-_vpaes_schedule_core
+.type   _vpaes_schedule_192_smear,@function
+.align  16
+_vpaes_schedule_192_smear:
+        pshufd  $128,%xmm6,%xmm1
+        pshufd  $254,%xmm7,%xmm0
+        pxor    %xmm1,%xmm6
+        pxor    %xmm1,%xmm1
+        pxor    %xmm0,%xmm6
+        movdqa  %xmm6,%xmm0
+        movhlps %xmm1,%xmm6
+        ret
+.size   _vpaes_schedule_192_smear,.-_vpaes_schedule_192_smear
+.type   _vpaes_schedule_round,@function
+.align  16
+_vpaes_schedule_round:
+        movdqa  8(%esp),%xmm2
+        pxor    %xmm1,%xmm1
+.byte   102,15,58,15,202,15
+.byte   102,15,58,15,210,15
+        pxor    %xmm1,%xmm7
+        pshufd  $255,%xmm0,%xmm0
+.byte   102,15,58,15,192,1
+        movdqa  %xmm2,8(%esp)
+.L_vpaes_schedule_low_round:
+        movdqa  %xmm7,%xmm1
+        pslldq  $4,%xmm7
+        pxor    %xmm1,%xmm7
+        movdqa  %xmm7,%xmm1
+        pslldq  $8,%xmm7
+        pxor    %xmm1,%xmm7
+        pxor    336(%ebp),%xmm7
+        movdqa  -16(%ebp),%xmm4
+        movdqa  -48(%ebp),%xmm5
+        movdqa  %xmm4,%xmm1
+        pandn   %xmm0,%xmm1
+        psrld   $4,%xmm1
+        pand    %xmm4,%xmm0
+        movdqa  -32(%ebp),%xmm2
+.byte   102,15,56,0,208
+        pxor    %xmm1,%xmm0
+        movdqa  %xmm5,%xmm3
+.byte   102,15,56,0,217
+        pxor    %xmm2,%xmm3
+        movdqa  %xmm5,%xmm4
+.byte   102,15,56,0,224
+        pxor    %xmm2,%xmm4
+        movdqa  %xmm5,%xmm2
+.byte   102,15,56,0,211
+        pxor    %xmm0,%xmm2
+        movdqa  %xmm5,%xmm3
+.byte   102,15,56,0,220
+        pxor    %xmm1,%xmm3
+        movdqa  32(%ebp),%xmm4
+.byte   102,15,56,0,226
+        movdqa  48(%ebp),%xmm0
+.byte   102,15,56,0,195
+        pxor    %xmm4,%xmm0
+        pxor    %xmm7,%xmm0
+        movdqa  %xmm0,%xmm7
+        ret
+.size   _vpaes_schedule_round,.-_vpaes_schedule_round
+.type   _vpaes_schedule_transform,@function
+.align  16
+_vpaes_schedule_transform:
+        movdqa  -16(%ebp),%xmm2
+        movdqa  %xmm2,%xmm1
+        pandn   %xmm0,%xmm1
+        psrld   $4,%xmm1
+        pand    %xmm2,%xmm0
+        movdqa  (%ebx),%xmm2
+.byte   102,15,56,0,208
+        movdqa  16(%ebx),%xmm0
+.byte   102,15,56,0,193
+        pxor    %xmm2,%xmm0
+        ret
+.size   _vpaes_schedule_transform,.-_vpaes_schedule_transform
+.type   _vpaes_schedule_mangle,@function
+.align  16
+_vpaes_schedule_mangle:
+        movdqa  %xmm0,%xmm4
+        movdqa  128(%ebp),%xmm5
+        testl   %edi,%edi
+        jnz     .L014schedule_mangle_dec
+        addl    $16,%edx
+        pxor    336(%ebp),%xmm4
+.byte   102,15,56,0,229
+        movdqa  %xmm4,%xmm3
+.byte   102,15,56,0,229
+        pxor    %xmm4,%xmm3
+.byte   102,15,56,0,229
+        pxor    %xmm4,%xmm3
+        jmp     .L015schedule_mangle_both
+.align  16
+.L014schedule_mangle_dec:
+        movdqa  -16(%ebp),%xmm2
+        leal    416(%ebp),%esi
+        movdqa  %xmm2,%xmm1
+        pandn   %xmm4,%xmm1
+        psrld   $4,%xmm1
+        pand    %xmm2,%xmm4
+        movdqa  (%esi),%xmm2
+.byte   102,15,56,0,212
+        movdqa  16(%esi),%xmm3
+.byte   102,15,56,0,217
+        pxor    %xmm2,%xmm3
+.byte   102,15,56,0,221
+        movdqa  32(%esi),%xmm2
+.byte   102,15,56,0,212
+        pxor    %xmm3,%xmm2
+        movdqa  48(%esi),%xmm3
+.byte   102,15,56,0,217
+        pxor    %xmm2,%xmm3
+.byte   102,15,56,0,221
+        movdqa  64(%esi),%xmm2
+.byte   102,15,56,0,212
+        pxor    %xmm3,%xmm2
+        movdqa  80(%esi),%xmm3
+.byte   102,15,56,0,217
+        pxor    %xmm2,%xmm3
+.byte   102,15,56,0,221
+        movdqa  96(%esi),%xmm2
+.byte   102,15,56,0,212
+        pxor    %xmm3,%xmm2
+        movdqa  112(%esi),%xmm3
+.byte   102,15,56,0,217
+        pxor    %xmm2,%xmm3
+        addl    $-16,%edx
+.L015schedule_mangle_both:
+        movdqa  256(%ebp,%ecx,1),%xmm1
+.byte   102,15,56,0,217
+        addl    $-16,%ecx
+        andl    $48,%ecx
+        movdqu  %xmm3,(%edx)
+        ret
+.size   _vpaes_schedule_mangle,.-_vpaes_schedule_mangle
+.globl  vpaes_set_encrypt_key
+.type   vpaes_set_encrypt_key,@function
+.align  16
+vpaes_set_encrypt_key:
+.L_vpaes_set_encrypt_key_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        leal    -56(%esp),%ebx
+        movl    24(%esp),%eax
+        andl    $-16,%ebx
+        movl    28(%esp),%edx
+        xchgl   %esp,%ebx
+        movl    %ebx,48(%esp)
+        movl    %eax,%ebx
+        shrl    $5,%ebx
+        addl    $5,%ebx
+        movl    %ebx,240(%edx)
+        movl    $48,%ecx
+        movl    $0,%edi
+        leal    .L_vpaes_consts+0x30-.L016pic_point,%ebp
+        call    _vpaes_schedule_core
+.L016pic_point:
+        movl    48(%esp),%esp
+        xorl    %eax,%eax
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   vpaes_set_encrypt_key,.-.L_vpaes_set_encrypt_key_begin
+.globl  vpaes_set_decrypt_key
+.type   vpaes_set_decrypt_key,@function
+.align  16
+vpaes_set_decrypt_key:
+.L_vpaes_set_decrypt_key_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        leal    -56(%esp),%ebx
+        movl    24(%esp),%eax
+        andl    $-16,%ebx
+        movl    28(%esp),%edx
+        xchgl   %esp,%ebx
+        movl    %ebx,48(%esp)
+        movl    %eax,%ebx
+        shrl    $5,%ebx
+        addl    $5,%ebx
+        movl    %ebx,240(%edx)
+        shll    $4,%ebx
+        leal    16(%edx,%ebx,1),%edx
+        movl    $1,%edi
+        movl    %eax,%ecx
+        shrl    $1,%ecx
+        andl    $32,%ecx
+        xorl    $32,%ecx
+        leal    .L_vpaes_consts+0x30-.L017pic_point,%ebp
+        call    _vpaes_schedule_core
+.L017pic_point:
+        movl    48(%esp),%esp
+        xorl    %eax,%eax
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   vpaes_set_decrypt_key,.-.L_vpaes_set_decrypt_key_begin
+.globl  vpaes_encrypt
+.type   vpaes_encrypt,@function
+.align  16
+vpaes_encrypt:
+.L_vpaes_encrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        leal    .L_vpaes_consts+0x30-.L018pic_point,%ebp
+        call    _vpaes_preheat
+.L018pic_point:
+        movl    20(%esp),%esi
+        leal    -56(%esp),%ebx
+        movl    24(%esp),%edi
+        andl    $-16,%ebx
+        movl    28(%esp),%edx
+        xchgl   %esp,%ebx
+        movl    %ebx,48(%esp)
+        movdqu  (%esi),%xmm0
+        call    _vpaes_encrypt_core
+        movdqu  %xmm0,(%edi)
+        movl    48(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   vpaes_encrypt,.-.L_vpaes_encrypt_begin
+.globl  vpaes_decrypt
+.type   vpaes_decrypt,@function
+.align  16
+vpaes_decrypt:
+.L_vpaes_decrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        leal    .L_vpaes_consts+0x30-.L019pic_point,%ebp
+        call    _vpaes_preheat
+.L019pic_point:
+        movl    20(%esp),%esi
+        leal    -56(%esp),%ebx
+        movl    24(%esp),%edi
+        andl    $-16,%ebx
+        movl    28(%esp),%edx
+        xchgl   %esp,%ebx
+        movl    %ebx,48(%esp)
+        movdqu  (%esi),%xmm0
+        call    _vpaes_decrypt_core
+        movdqu  %xmm0,(%edi)
+        movl    48(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   vpaes_decrypt,.-.L_vpaes_decrypt_begin
+.globl  vpaes_cbc_encrypt
+.type   vpaes_cbc_encrypt,@function
+.align  16
+vpaes_cbc_encrypt:
+.L_vpaes_cbc_encrypt_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    32(%esp),%edx
+        subl    $16,%eax
+        jc      .L020cbc_abort
+        leal    -56(%esp),%ebx
+        movl    36(%esp),%ebp
+        andl    $-16,%ebx
+        movl    40(%esp),%ecx
+        xchgl   %esp,%ebx
+        movdqu  (%ebp),%xmm1
+        subl    %esi,%edi
+        movl    %ebx,48(%esp)
+        movl    %edi,(%esp)
+        movl    %edx,4(%esp)
+        movl    %ebp,8(%esp)
+        movl    %eax,%edi
+        leal    .L_vpaes_consts+0x30-.L021pic_point,%ebp
+        call    _vpaes_preheat
+.L021pic_point:
+        cmpl    $0,%ecx
+        je      .L022cbc_dec_loop
+        jmp     .L023cbc_enc_loop
+.align  16
+.L023cbc_enc_loop:
+        movdqu  (%esi),%xmm0
+        pxor    %xmm1,%xmm0
+        call    _vpaes_encrypt_core
+        movl    (%esp),%ebx
+        movl    4(%esp),%edx
+        movdqa  %xmm0,%xmm1
+        movdqu  %xmm0,(%ebx,%esi,1)
+        leal    16(%esi),%esi
+        subl    $16,%edi
+        jnc     .L023cbc_enc_loop
+        jmp     .L024cbc_done
+.align  16
+.L022cbc_dec_loop:
+        movdqu  (%esi),%xmm0
+        movdqa  %xmm1,16(%esp)
+        movdqa  %xmm0,32(%esp)
+        call    _vpaes_decrypt_core
+        movl    (%esp),%ebx
+        movl    4(%esp),%edx
+        pxor    16(%esp),%xmm0
+        movdqa  32(%esp),%xmm1
+        movdqu  %xmm0,(%ebx,%esi,1)
+        leal    16(%esi),%esi
+        subl    $16,%edi
+        jnc     .L022cbc_dec_loop
+.L024cbc_done:
+        movl    8(%esp),%ebx
+        movl    48(%esp),%esp
+        movdqu  %xmm1,(%ebx)
+.L020cbc_abort:
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   vpaes_cbc_encrypt,.-.L_vpaes_cbc_encrypt_begin
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/modes/ghash-x86.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/modes/ghash-x86.S
new file mode 100644
index 0000000000..f52b445df2
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/modes/ghash-x86.S
@@ -0,0 +1,703 @@
+# WARNING: do not edit!
+# Generated from openssl/crypto/modes/asm/ghash-x86.pl
+#
+# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+.text
+.globl  gcm_gmult_4bit_x86
+.type   gcm_gmult_4bit_x86,@function
+.align  16
+gcm_gmult_4bit_x86:
+.L_gcm_gmult_4bit_x86_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        subl    $84,%esp
+        movl    104(%esp),%edi
+        movl    108(%esp),%esi
+        movl    (%edi),%ebp
+        movl    4(%edi),%edx
+        movl    8(%edi),%ecx
+        movl    12(%edi),%ebx
+        movl    $0,16(%esp)
+        movl    $471859200,20(%esp)
+        movl    $943718400,24(%esp)
+        movl    $610271232,28(%esp)
+        movl    $1887436800,32(%esp)
+        movl    $1822425088,36(%esp)
+        movl    $1220542464,40(%esp)
+        movl    $1423966208,44(%esp)
+        movl    $3774873600,48(%esp)
+        movl    $4246732800,52(%esp)
+        movl    $3644850176,56(%esp)
+        movl    $3311403008,60(%esp)
+        movl    $2441084928,64(%esp)
+        movl    $2376073216,68(%esp)
+        movl    $2847932416,72(%esp)
+        movl    $3051356160,76(%esp)
+        movl    %ebp,(%esp)
+        movl    %edx,4(%esp)
+        movl    %ecx,8(%esp)
+        movl    %ebx,12(%esp)
+        shrl    $20,%ebx
+        andl    $240,%ebx
+        movl    4(%esi,%ebx,1),%ebp
+        movl    (%esi,%ebx,1),%edx
+        movl    12(%esi,%ebx,1),%ecx
+        movl    8(%esi,%ebx,1),%ebx
+        xorl    %eax,%eax
+        movl    $15,%edi
+        jmp     .L000x86_loop
+.align  16
+.L000x86_loop:
+        movb    %bl,%al
+        shrdl   $4,%ecx,%ebx
+        andb    $15,%al
+        shrdl   $4,%edx,%ecx
+        shrdl   $4,%ebp,%edx
+        shrl    $4,%ebp
+        xorl    16(%esp,%eax,4),%ebp
+        movb    (%esp,%edi,1),%al
+        andb    $240,%al
+        xorl    8(%esi,%eax,1),%ebx
+        xorl    12(%esi,%eax,1),%ecx
+        xorl    (%esi,%eax,1),%edx
+        xorl    4(%esi,%eax,1),%ebp
+        decl    %edi
+        js      .L001x86_break
+        movb    %bl,%al
+        shrdl   $4,%ecx,%ebx
+        andb    $15,%al
+        shrdl   $4,%edx,%ecx
+        shrdl   $4,%ebp,%edx
+        shrl    $4,%ebp
+        xorl    16(%esp,%eax,4),%ebp
+        movb    (%esp,%edi,1),%al
+        shlb    $4,%al
+        xorl    8(%esi,%eax,1),%ebx
+        xorl    12(%esi,%eax,1),%ecx
+        xorl    (%esi,%eax,1),%edx
+        xorl    4(%esi,%eax,1),%ebp
+        jmp     .L000x86_loop
+.align  16
+.L001x86_break:
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        bswap   %ebp
+        movl    104(%esp),%edi
+        movl    %ebx,12(%edi)
+        movl    %ecx,8(%edi)
+        movl    %edx,4(%edi)
+        movl    %ebp,(%edi)
+        addl    $84,%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
+.globl  gcm_ghash_4bit_x86
+.type   gcm_ghash_4bit_x86,@function
+.align  16
+gcm_ghash_4bit_x86:
+.L_gcm_ghash_4bit_x86_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        subl    $84,%esp
+        movl    104(%esp),%ebx
+        movl    108(%esp),%esi
+        movl    112(%esp),%edi
+        movl    116(%esp),%ecx
+        addl    %edi,%ecx
+        movl    %ecx,116(%esp)
+        movl    (%ebx),%ebp
+        movl    4(%ebx),%edx
+        movl    8(%ebx),%ecx
+        movl    12(%ebx),%ebx
+        movl    $0,16(%esp)
+        movl    $471859200,20(%esp)
+        movl    $943718400,24(%esp)
+        movl    $610271232,28(%esp)
+        movl    $1887436800,32(%esp)
+        movl    $1822425088,36(%esp)
+        movl    $1220542464,40(%esp)
+        movl    $1423966208,44(%esp)
+        movl    $3774873600,48(%esp)
+        movl    $4246732800,52(%esp)
+        movl    $3644850176,56(%esp)
+        movl    $3311403008,60(%esp)
+        movl    $2441084928,64(%esp)
+        movl    $2376073216,68(%esp)
+        movl    $2847932416,72(%esp)
+        movl    $3051356160,76(%esp)
+.align  16
+.L002x86_outer_loop:
+        xorl    12(%edi),%ebx
+        xorl    8(%edi),%ecx
+        xorl    4(%edi),%edx
+        xorl    (%edi),%ebp
+        movl    %ebx,12(%esp)
+        movl    %ecx,8(%esp)
+        movl    %edx,4(%esp)
+        movl    %ebp,(%esp)
+        shrl    $20,%ebx
+        andl    $240,%ebx
+        movl    4(%esi,%ebx,1),%ebp
+        movl    (%esi,%ebx,1),%edx
+        movl    12(%esi,%ebx,1),%ecx
+        movl    8(%esi,%ebx,1),%ebx
+        xorl    %eax,%eax
+        movl    $15,%edi
+        jmp     .L003x86_loop
+.align  16
+.L003x86_loop:
+        movb    %bl,%al
+        shrdl   $4,%ecx,%ebx
+        andb    $15,%al
+        shrdl   $4,%edx,%ecx
+        shrdl   $4,%ebp,%edx
+        shrl    $4,%ebp
+        xorl    16(%esp,%eax,4),%ebp
+        movb    (%esp,%edi,1),%al
+        andb    $240,%al
+        xorl    8(%esi,%eax,1),%ebx
+        xorl    12(%esi,%eax,1),%ecx
+        xorl    (%esi,%eax,1),%edx
+        xorl    4(%esi,%eax,1),%ebp
+        decl    %edi
+        js      .L004x86_break
+        movb    %bl,%al
+        shrdl   $4,%ecx,%ebx
+        andb    $15,%al
+        shrdl   $4,%edx,%ecx
+        shrdl   $4,%ebp,%edx
+        shrl    $4,%ebp
+        xorl    16(%esp,%eax,4),%ebp
+        movb    (%esp,%edi,1),%al
+        shlb    $4,%al
+        xorl    8(%esi,%eax,1),%ebx
+        xorl    12(%esi,%eax,1),%ecx
+        xorl    (%esi,%eax,1),%edx
+        xorl    4(%esi,%eax,1),%ebp
+        jmp     .L003x86_loop
+.align  16
+.L004x86_break:
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        bswap   %ebp
+        movl    112(%esp),%edi
+        leal    16(%edi),%edi
+        cmpl    116(%esp),%edi
+        movl    %edi,112(%esp)
+        jb      .L002x86_outer_loop
+        movl    104(%esp),%edi
+        movl    %ebx,12(%edi)
+        movl    %ecx,8(%edi)
+        movl    %edx,4(%edi)
+        movl    %ebp,(%edi)
+        addl    $84,%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
+.type   _mmx_gmult_4bit_inner,@function
+.align  16
+_mmx_gmult_4bit_inner:
+        xorl    %ecx,%ecx
+        movl    %ebx,%edx
+        movb    %dl,%cl
+        shlb    $4,%cl
+        andl    $240,%edx
+        movq    8(%esi,%ecx,1),%mm0
+        movq    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    14(%edi),%cl
+        psllq   $60,%mm2
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    13(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    12(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    11(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    10(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    9(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    8(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    7(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    6(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    5(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    4(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    3(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    2(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    1(%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        movb    (%edi),%cl
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movl    %ecx,%edx
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        shlb    $4,%cl
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%ecx,1),%mm0
+        psllq   $60,%mm2
+        andl    $240,%edx
+        pxor    (%eax,%ebp,8),%mm1
+        andl    $15,%ebx
+        pxor    (%esi,%ecx,1),%mm1
+        movd    %mm0,%ebp
+        pxor    %mm2,%mm0
+        psrlq   $4,%mm0
+        movq    %mm1,%mm2
+        psrlq   $4,%mm1
+        pxor    8(%esi,%edx,1),%mm0
+        psllq   $60,%mm2
+        pxor    (%eax,%ebx,8),%mm1
+        andl    $15,%ebp
+        pxor    (%esi,%edx,1),%mm1
+        movd    %mm0,%ebx
+        pxor    %mm2,%mm0
+        movl    4(%eax,%ebp,8),%edi
+        psrlq   $32,%mm0
+        movd    %mm1,%edx
+        psrlq   $32,%mm1
+        movd    %mm0,%ecx
+        movd    %mm1,%ebp
+        shll    $4,%edi
+        bswap   %ebx
+        bswap   %edx
+        bswap   %ecx
+        xorl    %edi,%ebp
+        bswap   %ebp
+        ret
+.size   _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner
+.globl  gcm_gmult_4bit_mmx
+.type   gcm_gmult_4bit_mmx,@function
+.align  16
+gcm_gmult_4bit_mmx:
+.L_gcm_gmult_4bit_mmx_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%edi
+        movl    24(%esp),%esi
+        call    .L005pic_point
+.L005pic_point:
+        popl    %eax
+        leal    .Lrem_4bit-.L005pic_point(%eax),%eax
+        movzbl  15(%edi),%ebx
+        call    _mmx_gmult_4bit_inner
+        movl    20(%esp),%edi
+        emms
+        movl    %ebx,12(%edi)
+        movl    %edx,4(%edi)
+        movl    %ecx,8(%edi)
+        movl    %ebp,(%edi)
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
+.globl  gcm_ghash_4bit_mmx
+.type   gcm_ghash_4bit_mmx,@function
+.align  16
+gcm_ghash_4bit_mmx:
+.L_gcm_ghash_4bit_mmx_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%ebp
+        movl    24(%esp),%esi
+        movl    28(%esp),%edi
+        movl    32(%esp),%ecx
+        call    .L006pic_point
+.L006pic_point:
+        popl    %eax
+        leal    .Lrem_4bit-.L006pic_point(%eax),%eax
+        addl    %edi,%ecx
+        movl    %ecx,32(%esp)
+        subl    $20,%esp
+        movl    12(%ebp),%ebx
+        movl    4(%ebp),%edx
+        movl    8(%ebp),%ecx
+        movl    (%ebp),%ebp
+        jmp     .L007mmx_outer_loop
+.align  16
+.L007mmx_outer_loop:
+        xorl    12(%edi),%ebx
+        xorl    4(%edi),%edx
+        xorl    8(%edi),%ecx
+        xorl    (%edi),%ebp
+        movl    %edi,48(%esp)
+        movl    %ebx,12(%esp)
+        movl    %edx,4(%esp)
+        movl    %ecx,8(%esp)
+        movl    %ebp,(%esp)
+        movl    %esp,%edi
+        shrl    $24,%ebx
+        call    _mmx_gmult_4bit_inner
+        movl    48(%esp),%edi
+        leal    16(%edi),%edi
+        cmpl    52(%esp),%edi
+        jb      .L007mmx_outer_loop
+        movl    40(%esp),%edi
+        emms
+        movl    %ebx,12(%edi)
+        movl    %edx,4(%edi)
+        movl    %ecx,8(%edi)
+        movl    %ebp,(%edi)
+        addl    $20,%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
+.align  64
+.Lrem_4bit:
+.long   0,0,0,29491200,0,58982400,0,38141952
+.long   0,117964800,0,113901568,0,76283904,0,88997888
+.long   0,235929600,0,265420800,0,227803136,0,206962688
+.long   0,152567808,0,148504576,0,177995776,0,190709760
+.byte   71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+.byte   82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+.byte   112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+.byte   0
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha1-586.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha1-586.S
new file mode 100644
index 0000000000..0b22a3e553
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha1-586.S
@@ -0,0 +1,1389 @@
+# WARNING: do not edit!
+# Generated from openssl/crypto/sha/asm/sha1-586.pl
+#
+# Copyright 1998-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+.text
+.globl  sha1_block_data_order
+.type   sha1_block_data_order,@function
+.align  16
+sha1_block_data_order:
+.L_sha1_block_data_order_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%ebp
+        movl    24(%esp),%esi
+        movl    28(%esp),%eax
+        subl    $76,%esp
+        shll    $6,%eax
+        addl    %esi,%eax
+        movl    %eax,104(%esp)
+        movl    16(%ebp),%edi
+        jmp     .L000loop
+.align  16
+.L000loop:
+        movl    (%esi),%eax
+        movl    4(%esi),%ebx
+        movl    8(%esi),%ecx
+        movl    12(%esi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        movl    %eax,(%esp)
+        movl    %ebx,4(%esp)
+        movl    %ecx,8(%esp)
+        movl    %edx,12(%esp)
+        movl    16(%esi),%eax
+        movl    20(%esi),%ebx
+        movl    24(%esi),%ecx
+        movl    28(%esi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        movl    %eax,16(%esp)
+        movl    %ebx,20(%esp)
+        movl    %ecx,24(%esp)
+        movl    %edx,28(%esp)
+        movl    32(%esi),%eax
+        movl    36(%esi),%ebx
+        movl    40(%esi),%ecx
+        movl    44(%esi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        movl    %eax,32(%esp)
+        movl    %ebx,36(%esp)
+        movl    %ecx,40(%esp)
+        movl    %edx,44(%esp)
+        movl    48(%esi),%eax
+        movl    52(%esi),%ebx
+        movl    56(%esi),%ecx
+        movl    60(%esi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        movl    %eax,48(%esp)
+        movl    %ebx,52(%esp)
+        movl    %ecx,56(%esp)
+        movl    %edx,60(%esp)
+        movl    %esi,100(%esp)
+        movl    (%ebp),%eax
+        movl    4(%ebp),%ebx
+        movl    8(%ebp),%ecx
+        movl    12(%ebp),%edx
+
+        movl    %ecx,%esi
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        xorl    %edx,%esi
+        addl    %edi,%ebp
+        movl    (%esp),%edi
+        andl    %ebx,%esi
+        rorl    $2,%ebx
+        xorl    %edx,%esi
+        leal    1518500249(%ebp,%edi,1),%ebp
+        addl    %esi,%ebp
+
+        movl    %ebx,%edi
+        movl    %ebp,%esi
+        roll    $5,%ebp
+        xorl    %ecx,%edi
+        addl    %edx,%ebp
+        movl    4(%esp),%edx
+        andl    %eax,%edi
+        rorl    $2,%eax
+        xorl    %ecx,%edi
+        leal    1518500249(%ebp,%edx,1),%ebp
+        addl    %edi,%ebp
+
+        movl    %eax,%edx
+        movl    %ebp,%edi
+        roll    $5,%ebp
+        xorl    %ebx,%edx
+        addl    %ecx,%ebp
+        movl    8(%esp),%ecx
+        andl    %esi,%edx
+        rorl    $2,%esi
+        xorl    %ebx,%edx
+        leal    1518500249(%ebp,%ecx,1),%ebp
+        addl    %edx,%ebp
+
+        movl    %esi,%ecx
+        movl    %ebp,%edx
+        roll    $5,%ebp
+        xorl    %eax,%ecx
+        addl    %ebx,%ebp
+        movl    12(%esp),%ebx
+        andl    %edi,%ecx
+        rorl    $2,%edi
+        xorl    %eax,%ecx
+        leal    1518500249(%ebp,%ebx,1),%ebp
+        addl    %ecx,%ebp
+
+        movl    %edi,%ebx
+        movl    %ebp,%ecx
+        roll    $5,%ebp
+        xorl    %esi,%ebx
+        addl    %eax,%ebp
+        movl    16(%esp),%eax
+        andl    %edx,%ebx
+        rorl    $2,%edx
+        xorl    %esi,%ebx
+        leal    1518500249(%ebp,%eax,1),%ebp
+        addl    %ebx,%ebp
+
+        movl    %edx,%eax
+        movl    %ebp,%ebx
+        roll    $5,%ebp
+        xorl    %edi,%eax
+        addl    %esi,%ebp
+        movl    20(%esp),%esi
+        andl    %ecx,%eax
+        rorl    $2,%ecx
+        xorl    %edi,%eax
+        leal    1518500249(%ebp,%esi,1),%ebp
+        addl    %eax,%ebp
+
+        movl    %ecx,%esi
+        movl    %ebp,%eax
+        roll    $5,%ebp
+        xorl    %edx,%esi
+        addl    %edi,%ebp
+        movl    24(%esp),%edi
+        andl    %ebx,%esi
+        rorl    $2,%ebx
+        xorl    %edx,%esi
+        leal    1518500249(%ebp,%edi,1),%ebp
+        addl    %esi,%ebp
+
+        movl    %ebx,%edi
+        movl    %ebp,%esi
+        roll    $5,%ebp
+        xorl    %ecx,%edi
+        addl    %edx,%ebp
+        movl    28(%esp),%edx
+        andl    %eax,%edi
+        rorl    $2,%eax
+        xorl    %ecx,%edi
+        leal    1518500249(%ebp,%edx,1),%ebp
+        addl    %edi,%ebp
+
+        movl    %eax,%edx
+        movl    %ebp,%edi
+        roll    $5,%ebp
+        xorl    %ebx,%edx
+        addl    %ecx,%ebp
+        movl    32(%esp),%ecx
+        andl    %esi,%edx
+        rorl    $2,%esi
+        xorl    %ebx,%edx
+        leal    1518500249(%ebp,%ecx,1),%ebp
+        addl    %edx,%ebp
+
+        movl    %esi,%ecx
+        movl    %ebp,%edx
+        roll    $5,%ebp
+        xorl    %eax,%ecx
+        addl    %ebx,%ebp
+        movl    36(%esp),%ebx
+        andl    %edi,%ecx
+        rorl    $2,%edi
+        xorl    %eax,%ecx
+        leal    1518500249(%ebp,%ebx,1),%ebp
+        addl    %ecx,%ebp
+
+        movl    %edi,%ebx
+        movl    %ebp,%ecx
+        roll    $5,%ebp
+        xorl    %esi,%ebx
+        addl    %eax,%ebp
+        movl    40(%esp),%eax
+        andl    %edx,%ebx
+        rorl    $2,%edx
+        xorl    %esi,%ebx
+        leal    1518500249(%ebp,%eax,1),%ebp
+        addl    %ebx,%ebp
+
+        movl    %edx,%eax
+        movl    %ebp,%ebx
+        roll    $5,%ebp
+        xorl    %edi,%eax
+        addl    %esi,%ebp
+        movl    44(%esp),%esi
+        andl    %ecx,%eax
+        rorl    $2,%ecx
+        xorl    %edi,%eax
+        leal    1518500249(%ebp,%esi,1),%ebp
+        addl    %eax,%ebp
+
+        movl    %ecx,%esi
+        movl    %ebp,%eax
+        roll    $5,%ebp
+        xorl    %edx,%esi
+        addl    %edi,%ebp
+        movl    48(%esp),%edi
+        andl    %ebx,%esi
+        rorl    $2,%ebx
+        xorl    %edx,%esi
+        leal    1518500249(%ebp,%edi,1),%ebp
+        addl    %esi,%ebp
+
+        movl    %ebx,%edi
+        movl    %ebp,%esi
+        roll    $5,%ebp
+        xorl    %ecx,%edi
+        addl    %edx,%ebp
+        movl    52(%esp),%edx
+        andl    %eax,%edi
+        rorl    $2,%eax
+        xorl    %ecx,%edi
+        leal    1518500249(%ebp,%edx,1),%ebp
+        addl    %edi,%ebp
+
+        movl    %eax,%edx
+        movl    %ebp,%edi
+        roll    $5,%ebp
+        xorl    %ebx,%edx
+        addl    %ecx,%ebp
+        movl    56(%esp),%ecx
+        andl    %esi,%edx
+        rorl    $2,%esi
+        xorl    %ebx,%edx
+        leal    1518500249(%ebp,%ecx,1),%ebp
+        addl    %edx,%ebp
+
+        movl    %esi,%ecx
+        movl    %ebp,%edx
+        roll    $5,%ebp
+        xorl    %eax,%ecx
+        addl    %ebx,%ebp
+        movl    60(%esp),%ebx
+        andl    %edi,%ecx
+        rorl    $2,%edi
+        xorl    %eax,%ecx
+        leal    1518500249(%ebp,%ebx,1),%ebp
+        movl    (%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edi,%ebp
+        xorl    8(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    32(%esp),%ebx
+        andl    %edx,%ebp
+        xorl    52(%esp),%ebx
+        roll    $1,%ebx
+        xorl    %esi,%ebp
+        addl    %ebp,%eax
+        movl    %ecx,%ebp
+        rorl    $2,%edx
+        movl    %ebx,(%esp)
+        roll    $5,%ebp
+        leal    1518500249(%ebx,%eax,1),%ebx
+        movl    4(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %edx,%ebp
+        xorl    12(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    36(%esp),%eax
+        andl    %ecx,%ebp
+        xorl    56(%esp),%eax
+        roll    $1,%eax
+        xorl    %edi,%ebp
+        addl    %ebp,%esi
+        movl    %ebx,%ebp
+        rorl    $2,%ecx
+        movl    %eax,4(%esp)
+        roll    $5,%ebp
+        leal    1518500249(%eax,%esi,1),%eax
+        movl    8(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ecx,%ebp
+        xorl    16(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    40(%esp),%esi
+        andl    %ebx,%ebp
+        xorl    60(%esp),%esi
+        roll    $1,%esi
+        xorl    %edx,%ebp
+        addl    %ebp,%edi
+        movl    %eax,%ebp
+        rorl    $2,%ebx
+        movl    %esi,8(%esp)
+        roll    $5,%ebp
+        leal    1518500249(%esi,%edi,1),%esi
+        movl    12(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %ebx,%ebp
+        xorl    20(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    44(%esp),%edi
+        andl    %eax,%ebp
+        xorl    (%esp),%edi
+        roll    $1,%edi
+        xorl    %ecx,%ebp
+        addl    %ebp,%edx
+        movl    %esi,%ebp
+        rorl    $2,%eax
+        movl    %edi,12(%esp)
+        roll    $5,%ebp
+        leal    1518500249(%edi,%edx,1),%edi
+        movl    16(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %esi,%ebp
+        xorl    24(%esp),%edx
+        xorl    %eax,%ebp
+        xorl    48(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    4(%esp),%edx
+        roll    $1,%edx
+        addl    %ebp,%ecx
+        rorl    $2,%esi
+        movl    %edi,%ebp
+        roll    $5,%ebp
+        movl    %edx,16(%esp)
+        leal    1859775393(%edx,%ecx,1),%edx
+        movl    20(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %edi,%ebp
+        xorl    28(%esp),%ecx
+        xorl    %esi,%ebp
+        xorl    52(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    8(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebp,%ebx
+        rorl    $2,%edi
+        movl    %edx,%ebp
+        roll    $5,%ebp
+        movl    %ecx,20(%esp)
+        leal    1859775393(%ecx,%ebx,1),%ecx
+        movl    24(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edx,%ebp
+        xorl    32(%esp),%ebx
+        xorl    %edi,%ebp
+        xorl    56(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    12(%esp),%ebx
+        roll    $1,%ebx
+        addl    %ebp,%eax
+        rorl    $2,%edx
+        movl    %ecx,%ebp
+        roll    $5,%ebp
+        movl    %ebx,24(%esp)
+        leal    1859775393(%ebx,%eax,1),%ebx
+        movl    28(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %ecx,%ebp
+        xorl    36(%esp),%eax
+        xorl    %edx,%ebp
+        xorl    60(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    16(%esp),%eax
+        roll    $1,%eax
+        addl    %ebp,%esi
+        rorl    $2,%ecx
+        movl    %ebx,%ebp
+        roll    $5,%ebp
+        movl    %eax,28(%esp)
+        leal    1859775393(%eax,%esi,1),%eax
+        movl    32(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ebx,%ebp
+        xorl    40(%esp),%esi
+        xorl    %ecx,%ebp
+        xorl    (%esp),%esi
+        xorl    %edx,%ebp
+        xorl    20(%esp),%esi
+        roll    $1,%esi
+        addl    %ebp,%edi
+        rorl    $2,%ebx
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        movl    %esi,32(%esp)
+        leal    1859775393(%esi,%edi,1),%esi
+        movl    36(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %eax,%ebp
+        xorl    44(%esp),%edi
+        xorl    %ebx,%ebp
+        xorl    4(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    24(%esp),%edi
+        roll    $1,%edi
+        addl    %ebp,%edx
+        rorl    $2,%eax
+        movl    %esi,%ebp
+        roll    $5,%ebp
+        movl    %edi,36(%esp)
+        leal    1859775393(%edi,%edx,1),%edi
+        movl    40(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %esi,%ebp
+        xorl    48(%esp),%edx
+        xorl    %eax,%ebp
+        xorl    8(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    28(%esp),%edx
+        roll    $1,%edx
+        addl    %ebp,%ecx
+        rorl    $2,%esi
+        movl    %edi,%ebp
+        roll    $5,%ebp
+        movl    %edx,40(%esp)
+        leal    1859775393(%edx,%ecx,1),%edx
+        movl    44(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %edi,%ebp
+        xorl    52(%esp),%ecx
+        xorl    %esi,%ebp
+        xorl    12(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    32(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebp,%ebx
+        rorl    $2,%edi
+        movl    %edx,%ebp
+        roll    $5,%ebp
+        movl    %ecx,44(%esp)
+        leal    1859775393(%ecx,%ebx,1),%ecx
+        movl    48(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edx,%ebp
+        xorl    56(%esp),%ebx
+        xorl    %edi,%ebp
+        xorl    16(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    36(%esp),%ebx
+        roll    $1,%ebx
+        addl    %ebp,%eax
+        rorl    $2,%edx
+        movl    %ecx,%ebp
+        roll    $5,%ebp
+        movl    %ebx,48(%esp)
+        leal    1859775393(%ebx,%eax,1),%ebx
+        movl    52(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %ecx,%ebp
+        xorl    60(%esp),%eax
+        xorl    %edx,%ebp
+        xorl    20(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    40(%esp),%eax
+        roll    $1,%eax
+        addl    %ebp,%esi
+        rorl    $2,%ecx
+        movl    %ebx,%ebp
+        roll    $5,%ebp
+        movl    %eax,52(%esp)
+        leal    1859775393(%eax,%esi,1),%eax
+        movl    56(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ebx,%ebp
+        xorl    (%esp),%esi
+        xorl    %ecx,%ebp
+        xorl    24(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    44(%esp),%esi
+        roll    $1,%esi
+        addl    %ebp,%edi
+        rorl    $2,%ebx
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        movl    %esi,56(%esp)
+        leal    1859775393(%esi,%edi,1),%esi
+        movl    60(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %eax,%ebp
+        xorl    4(%esp),%edi
+        xorl    %ebx,%ebp
+        xorl    28(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    48(%esp),%edi
+        roll    $1,%edi
+        addl    %ebp,%edx
+        rorl    $2,%eax
+        movl    %esi,%ebp
+        roll    $5,%ebp
+        movl    %edi,60(%esp)
+        leal    1859775393(%edi,%edx,1),%edi
+        movl    (%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %esi,%ebp
+        xorl    8(%esp),%edx
+        xorl    %eax,%ebp
+        xorl    32(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    52(%esp),%edx
+        roll    $1,%edx
+        addl    %ebp,%ecx
+        rorl    $2,%esi
+        movl    %edi,%ebp
+        roll    $5,%ebp
+        movl    %edx,(%esp)
+        leal    1859775393(%edx,%ecx,1),%edx
+        movl    4(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %edi,%ebp
+        xorl    12(%esp),%ecx
+        xorl    %esi,%ebp
+        xorl    36(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    56(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebp,%ebx
+        rorl    $2,%edi
+        movl    %edx,%ebp
+        roll    $5,%ebp
+        movl    %ecx,4(%esp)
+        leal    1859775393(%ecx,%ebx,1),%ecx
+        movl    8(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edx,%ebp
+        xorl    16(%esp),%ebx
+        xorl    %edi,%ebp
+        xorl    40(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    60(%esp),%ebx
+        roll    $1,%ebx
+        addl    %ebp,%eax
+        rorl    $2,%edx
+        movl    %ecx,%ebp
+        roll    $5,%ebp
+        movl    %ebx,8(%esp)
+        leal    1859775393(%ebx,%eax,1),%ebx
+        movl    12(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %ecx,%ebp
+        xorl    20(%esp),%eax
+        xorl    %edx,%ebp
+        xorl    44(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    (%esp),%eax
+        roll    $1,%eax
+        addl    %ebp,%esi
+        rorl    $2,%ecx
+        movl    %ebx,%ebp
+        roll    $5,%ebp
+        movl    %eax,12(%esp)
+        leal    1859775393(%eax,%esi,1),%eax
+        movl    16(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ebx,%ebp
+        xorl    24(%esp),%esi
+        xorl    %ecx,%ebp
+        xorl    48(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    4(%esp),%esi
+        roll    $1,%esi
+        addl    %ebp,%edi
+        rorl    $2,%ebx
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        movl    %esi,16(%esp)
+        leal    1859775393(%esi,%edi,1),%esi
+        movl    20(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %eax,%ebp
+        xorl    28(%esp),%edi
+        xorl    %ebx,%ebp
+        xorl    52(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    8(%esp),%edi
+        roll    $1,%edi
+        addl    %ebp,%edx
+        rorl    $2,%eax
+        movl    %esi,%ebp
+        roll    $5,%ebp
+        movl    %edi,20(%esp)
+        leal    1859775393(%edi,%edx,1),%edi
+        movl    24(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %esi,%ebp
+        xorl    32(%esp),%edx
+        xorl    %eax,%ebp
+        xorl    56(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    12(%esp),%edx
+        roll    $1,%edx
+        addl    %ebp,%ecx
+        rorl    $2,%esi
+        movl    %edi,%ebp
+        roll    $5,%ebp
+        movl    %edx,24(%esp)
+        leal    1859775393(%edx,%ecx,1),%edx
+        movl    28(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %edi,%ebp
+        xorl    36(%esp),%ecx
+        xorl    %esi,%ebp
+        xorl    60(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    16(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebp,%ebx
+        rorl    $2,%edi
+        movl    %edx,%ebp
+        roll    $5,%ebp
+        movl    %ecx,28(%esp)
+        leal    1859775393(%ecx,%ebx,1),%ecx
+        movl    32(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edi,%ebp
+        xorl    40(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    (%esp),%ebx
+        andl    %edx,%ebp
+        xorl    20(%esp),%ebx
+        roll    $1,%ebx
+        addl    %eax,%ebp
+        rorl    $2,%edx
+        movl    %ecx,%eax
+        roll    $5,%eax
+        movl    %ebx,32(%esp)
+        leal    2400959708(%ebx,%ebp,1),%ebx
+        movl    %edi,%ebp
+        addl    %eax,%ebx
+        andl    %esi,%ebp
+        movl    36(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %edx,%ebp
+        xorl    44(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    4(%esp),%eax
+        andl    %ecx,%ebp
+        xorl    24(%esp),%eax
+        roll    $1,%eax
+        addl    %esi,%ebp
+        rorl    $2,%ecx
+        movl    %ebx,%esi
+        roll    $5,%esi
+        movl    %eax,36(%esp)
+        leal    2400959708(%eax,%ebp,1),%eax
+        movl    %edx,%ebp
+        addl    %esi,%eax
+        andl    %edi,%ebp
+        movl    40(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ecx,%ebp
+        xorl    48(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    8(%esp),%esi
+        andl    %ebx,%ebp
+        xorl    28(%esp),%esi
+        roll    $1,%esi
+        addl    %edi,%ebp
+        rorl    $2,%ebx
+        movl    %eax,%edi
+        roll    $5,%edi
+        movl    %esi,40(%esp)
+        leal    2400959708(%esi,%ebp,1),%esi
+        movl    %ecx,%ebp
+        addl    %edi,%esi
+        andl    %edx,%ebp
+        movl    44(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %ebx,%ebp
+        xorl    52(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    12(%esp),%edi
+        andl    %eax,%ebp
+        xorl    32(%esp),%edi
+        roll    $1,%edi
+        addl    %edx,%ebp
+        rorl    $2,%eax
+        movl    %esi,%edx
+        roll    $5,%edx
+        movl    %edi,44(%esp)
+        leal    2400959708(%edi,%ebp,1),%edi
+        movl    %ebx,%ebp
+        addl    %edx,%edi
+        andl    %ecx,%ebp
+        movl    48(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %eax,%ebp
+        xorl    56(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    16(%esp),%edx
+        andl    %esi,%ebp
+        xorl    36(%esp),%edx
+        roll    $1,%edx
+        addl    %ecx,%ebp
+        rorl    $2,%esi
+        movl    %edi,%ecx
+        roll    $5,%ecx
+        movl    %edx,48(%esp)
+        leal    2400959708(%edx,%ebp,1),%edx
+        movl    %eax,%ebp
+        addl    %ecx,%edx
+        andl    %ebx,%ebp
+        movl    52(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %esi,%ebp
+        xorl    60(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    20(%esp),%ecx
+        andl    %edi,%ebp
+        xorl    40(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebx,%ebp
+        rorl    $2,%edi
+        movl    %edx,%ebx
+        roll    $5,%ebx
+        movl    %ecx,52(%esp)
+        leal    2400959708(%ecx,%ebp,1),%ecx
+        movl    %esi,%ebp
+        addl    %ebx,%ecx
+        andl    %eax,%ebp
+        movl    56(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edi,%ebp
+        xorl    (%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    24(%esp),%ebx
+        andl    %edx,%ebp
+        xorl    44(%esp),%ebx
+        roll    $1,%ebx
+        addl    %eax,%ebp
+        rorl    $2,%edx
+        movl    %ecx,%eax
+        roll    $5,%eax
+        movl    %ebx,56(%esp)
+        leal    2400959708(%ebx,%ebp,1),%ebx
+        movl    %edi,%ebp
+        addl    %eax,%ebx
+        andl    %esi,%ebp
+        movl    60(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %edx,%ebp
+        xorl    4(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    28(%esp),%eax
+        andl    %ecx,%ebp
+        xorl    48(%esp),%eax
+        roll    $1,%eax
+        addl    %esi,%ebp
+        rorl    $2,%ecx
+        movl    %ebx,%esi
+        roll    $5,%esi
+        movl    %eax,60(%esp)
+        leal    2400959708(%eax,%ebp,1),%eax
+        movl    %edx,%ebp
+        addl    %esi,%eax
+        andl    %edi,%ebp
+        movl    (%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ecx,%ebp
+        xorl    8(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    32(%esp),%esi
+        andl    %ebx,%ebp
+        xorl    52(%esp),%esi
+        roll    $1,%esi
+        addl    %edi,%ebp
+        rorl    $2,%ebx
+        movl    %eax,%edi
+        roll    $5,%edi
+        movl    %esi,(%esp)
+        leal    2400959708(%esi,%ebp,1),%esi
+        movl    %ecx,%ebp
+        addl    %edi,%esi
+        andl    %edx,%ebp
+        movl    4(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %ebx,%ebp
+        xorl    12(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    36(%esp),%edi
+        andl    %eax,%ebp
+        xorl    56(%esp),%edi
+        roll    $1,%edi
+        addl    %edx,%ebp
+        rorl    $2,%eax
+        movl    %esi,%edx
+        roll    $5,%edx
+        movl    %edi,4(%esp)
+        leal    2400959708(%edi,%ebp,1),%edi
+        movl    %ebx,%ebp
+        addl    %edx,%edi
+        andl    %ecx,%ebp
+        movl    8(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %eax,%ebp
+        xorl    16(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    40(%esp),%edx
+        andl    %esi,%ebp
+        xorl    60(%esp),%edx
+        roll    $1,%edx
+        addl    %ecx,%ebp
+        rorl    $2,%esi
+        movl    %edi,%ecx
+        roll    $5,%ecx
+        movl    %edx,8(%esp)
+        leal    2400959708(%edx,%ebp,1),%edx
+        movl    %eax,%ebp
+        addl    %ecx,%edx
+        andl    %ebx,%ebp
+        movl    12(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %esi,%ebp
+        xorl    20(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    44(%esp),%ecx
+        andl    %edi,%ebp
+        xorl    (%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebx,%ebp
+        rorl    $2,%edi
+        movl    %edx,%ebx
+        roll    $5,%ebx
+        movl    %ecx,12(%esp)
+        leal    2400959708(%ecx,%ebp,1),%ecx
+        movl    %esi,%ebp
+        addl    %ebx,%ecx
+        andl    %eax,%ebp
+        movl    16(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edi,%ebp
+        xorl    24(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    48(%esp),%ebx
+        andl    %edx,%ebp
+        xorl    4(%esp),%ebx
+        roll    $1,%ebx
+        addl    %eax,%ebp
+        rorl    $2,%edx
+        movl    %ecx,%eax
+        roll    $5,%eax
+        movl    %ebx,16(%esp)
+        leal    2400959708(%ebx,%ebp,1),%ebx
+        movl    %edi,%ebp
+        addl    %eax,%ebx
+        andl    %esi,%ebp
+        movl    20(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %edx,%ebp
+        xorl    28(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    52(%esp),%eax
+        andl    %ecx,%ebp
+        xorl    8(%esp),%eax
+        roll    $1,%eax
+        addl    %esi,%ebp
+        rorl    $2,%ecx
+        movl    %ebx,%esi
+        roll    $5,%esi
+        movl    %eax,20(%esp)
+        leal    2400959708(%eax,%ebp,1),%eax
+        movl    %edx,%ebp
+        addl    %esi,%eax
+        andl    %edi,%ebp
+        movl    24(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ecx,%ebp
+        xorl    32(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    56(%esp),%esi
+        andl    %ebx,%ebp
+        xorl    12(%esp),%esi
+        roll    $1,%esi
+        addl    %edi,%ebp
+        rorl    $2,%ebx
+        movl    %eax,%edi
+        roll    $5,%edi
+        movl    %esi,24(%esp)
+        leal    2400959708(%esi,%ebp,1),%esi
+        movl    %ecx,%ebp
+        addl    %edi,%esi
+        andl    %edx,%ebp
+        movl    28(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %ebx,%ebp
+        xorl    36(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    60(%esp),%edi
+        andl    %eax,%ebp
+        xorl    16(%esp),%edi
+        roll    $1,%edi
+        addl    %edx,%ebp
+        rorl    $2,%eax
+        movl    %esi,%edx
+        roll    $5,%edx
+        movl    %edi,28(%esp)
+        leal    2400959708(%edi,%ebp,1),%edi
+        movl    %ebx,%ebp
+        addl    %edx,%edi
+        andl    %ecx,%ebp
+        movl    32(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %eax,%ebp
+        xorl    40(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    (%esp),%edx
+        andl    %esi,%ebp
+        xorl    20(%esp),%edx
+        roll    $1,%edx
+        addl    %ecx,%ebp
+        rorl    $2,%esi
+        movl    %edi,%ecx
+        roll    $5,%ecx
+        movl    %edx,32(%esp)
+        leal    2400959708(%edx,%ebp,1),%edx
+        movl    %eax,%ebp
+        addl    %ecx,%edx
+        andl    %ebx,%ebp
+        movl    36(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %esi,%ebp
+        xorl    44(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    4(%esp),%ecx
+        andl    %edi,%ebp
+        xorl    24(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebx,%ebp
+        rorl    $2,%edi
+        movl    %edx,%ebx
+        roll    $5,%ebx
+        movl    %ecx,36(%esp)
+        leal    2400959708(%ecx,%ebp,1),%ecx
+        movl    %esi,%ebp
+        addl    %ebx,%ecx
+        andl    %eax,%ebp
+        movl    40(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edi,%ebp
+        xorl    48(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    8(%esp),%ebx
+        andl    %edx,%ebp
+        xorl    28(%esp),%ebx
+        roll    $1,%ebx
+        addl    %eax,%ebp
+        rorl    $2,%edx
+        movl    %ecx,%eax
+        roll    $5,%eax
+        movl    %ebx,40(%esp)
+        leal    2400959708(%ebx,%ebp,1),%ebx
+        movl    %edi,%ebp
+        addl    %eax,%ebx
+        andl    %esi,%ebp
+        movl    44(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %edx,%ebp
+        xorl    52(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    12(%esp),%eax
+        andl    %ecx,%ebp
+        xorl    32(%esp),%eax
+        roll    $1,%eax
+        addl    %esi,%ebp
+        rorl    $2,%ecx
+        movl    %ebx,%esi
+        roll    $5,%esi
+        movl    %eax,44(%esp)
+        leal    2400959708(%eax,%ebp,1),%eax
+        movl    %edx,%ebp
+        addl    %esi,%eax
+        andl    %edi,%ebp
+        movl    48(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ebx,%ebp
+        xorl    56(%esp),%esi
+        xorl    %ecx,%ebp
+        xorl    16(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    36(%esp),%esi
+        roll    $1,%esi
+        addl    %ebp,%edi
+        rorl    $2,%ebx
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        movl    %esi,48(%esp)
+        leal    3395469782(%esi,%edi,1),%esi
+        movl    52(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %eax,%ebp
+        xorl    60(%esp),%edi
+        xorl    %ebx,%ebp
+        xorl    20(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    40(%esp),%edi
+        roll    $1,%edi
+        addl    %ebp,%edx
+        rorl    $2,%eax
+        movl    %esi,%ebp
+        roll    $5,%ebp
+        movl    %edi,52(%esp)
+        leal    3395469782(%edi,%edx,1),%edi
+        movl    56(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %esi,%ebp
+        xorl    (%esp),%edx
+        xorl    %eax,%ebp
+        xorl    24(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    44(%esp),%edx
+        roll    $1,%edx
+        addl    %ebp,%ecx
+        rorl    $2,%esi
+        movl    %edi,%ebp
+        roll    $5,%ebp
+        movl    %edx,56(%esp)
+        leal    3395469782(%edx,%ecx,1),%edx
+        movl    60(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %edi,%ebp
+        xorl    4(%esp),%ecx
+        xorl    %esi,%ebp
+        xorl    28(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    48(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebp,%ebx
+        rorl    $2,%edi
+        movl    %edx,%ebp
+        roll    $5,%ebp
+        movl    %ecx,60(%esp)
+        leal    3395469782(%ecx,%ebx,1),%ecx
+        movl    (%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edx,%ebp
+        xorl    8(%esp),%ebx
+        xorl    %edi,%ebp
+        xorl    32(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    52(%esp),%ebx
+        roll    $1,%ebx
+        addl    %ebp,%eax
+        rorl    $2,%edx
+        movl    %ecx,%ebp
+        roll    $5,%ebp
+        movl    %ebx,(%esp)
+        leal    3395469782(%ebx,%eax,1),%ebx
+        movl    4(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %ecx,%ebp
+        xorl    12(%esp),%eax
+        xorl    %edx,%ebp
+        xorl    36(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    56(%esp),%eax
+        roll    $1,%eax
+        addl    %ebp,%esi
+        rorl    $2,%ecx
+        movl    %ebx,%ebp
+        roll    $5,%ebp
+        movl    %eax,4(%esp)
+        leal    3395469782(%eax,%esi,1),%eax
+        movl    8(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ebx,%ebp
+        xorl    16(%esp),%esi
+        xorl    %ecx,%ebp
+        xorl    40(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    60(%esp),%esi
+        roll    $1,%esi
+        addl    %ebp,%edi
+        rorl    $2,%ebx
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        movl    %esi,8(%esp)
+        leal    3395469782(%esi,%edi,1),%esi
+        movl    12(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %eax,%ebp
+        xorl    20(%esp),%edi
+        xorl    %ebx,%ebp
+        xorl    44(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    (%esp),%edi
+        roll    $1,%edi
+        addl    %ebp,%edx
+        rorl    $2,%eax
+        movl    %esi,%ebp
+        roll    $5,%ebp
+        movl    %edi,12(%esp)
+        leal    3395469782(%edi,%edx,1),%edi
+        movl    16(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %esi,%ebp
+        xorl    24(%esp),%edx
+        xorl    %eax,%ebp
+        xorl    48(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    4(%esp),%edx
+        roll    $1,%edx
+        addl    %ebp,%ecx
+        rorl    $2,%esi
+        movl    %edi,%ebp
+        roll    $5,%ebp
+        movl    %edx,16(%esp)
+        leal    3395469782(%edx,%ecx,1),%edx
+        movl    20(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %edi,%ebp
+        xorl    28(%esp),%ecx
+        xorl    %esi,%ebp
+        xorl    52(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    8(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebp,%ebx
+        rorl    $2,%edi
+        movl    %edx,%ebp
+        roll    $5,%ebp
+        movl    %ecx,20(%esp)
+        leal    3395469782(%ecx,%ebx,1),%ecx
+        movl    24(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edx,%ebp
+        xorl    32(%esp),%ebx
+        xorl    %edi,%ebp
+        xorl    56(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    12(%esp),%ebx
+        roll    $1,%ebx
+        addl    %ebp,%eax
+        rorl    $2,%edx
+        movl    %ecx,%ebp
+        roll    $5,%ebp
+        movl    %ebx,24(%esp)
+        leal    3395469782(%ebx,%eax,1),%ebx
+        movl    28(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %ecx,%ebp
+        xorl    36(%esp),%eax
+        xorl    %edx,%ebp
+        xorl    60(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    16(%esp),%eax
+        roll    $1,%eax
+        addl    %ebp,%esi
+        rorl    $2,%ecx
+        movl    %ebx,%ebp
+        roll    $5,%ebp
+        movl    %eax,28(%esp)
+        leal    3395469782(%eax,%esi,1),%eax
+        movl    32(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ebx,%ebp
+        xorl    40(%esp),%esi
+        xorl    %ecx,%ebp
+        xorl    (%esp),%esi
+        xorl    %edx,%ebp
+        xorl    20(%esp),%esi
+        roll    $1,%esi
+        addl    %ebp,%edi
+        rorl    $2,%ebx
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        movl    %esi,32(%esp)
+        leal    3395469782(%esi,%edi,1),%esi
+        movl    36(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %eax,%ebp
+        xorl    44(%esp),%edi
+        xorl    %ebx,%ebp
+        xorl    4(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    24(%esp),%edi
+        roll    $1,%edi
+        addl    %ebp,%edx
+        rorl    $2,%eax
+        movl    %esi,%ebp
+        roll    $5,%ebp
+        movl    %edi,36(%esp)
+        leal    3395469782(%edi,%edx,1),%edi
+        movl    40(%esp),%edx
+        addl    %ebp,%edi
+
+        movl    %esi,%ebp
+        xorl    48(%esp),%edx
+        xorl    %eax,%ebp
+        xorl    8(%esp),%edx
+        xorl    %ebx,%ebp
+        xorl    28(%esp),%edx
+        roll    $1,%edx
+        addl    %ebp,%ecx
+        rorl    $2,%esi
+        movl    %edi,%ebp
+        roll    $5,%ebp
+        movl    %edx,40(%esp)
+        leal    3395469782(%edx,%ecx,1),%edx
+        movl    44(%esp),%ecx
+        addl    %ebp,%edx
+
+        movl    %edi,%ebp
+        xorl    52(%esp),%ecx
+        xorl    %esi,%ebp
+        xorl    12(%esp),%ecx
+        xorl    %eax,%ebp
+        xorl    32(%esp),%ecx
+        roll    $1,%ecx
+        addl    %ebp,%ebx
+        rorl    $2,%edi
+        movl    %edx,%ebp
+        roll    $5,%ebp
+        movl    %ecx,44(%esp)
+        leal    3395469782(%ecx,%ebx,1),%ecx
+        movl    48(%esp),%ebx
+        addl    %ebp,%ecx
+
+        movl    %edx,%ebp
+        xorl    56(%esp),%ebx
+        xorl    %edi,%ebp
+        xorl    16(%esp),%ebx
+        xorl    %esi,%ebp
+        xorl    36(%esp),%ebx
+        roll    $1,%ebx
+        addl    %ebp,%eax
+        rorl    $2,%edx
+        movl    %ecx,%ebp
+        roll    $5,%ebp
+        movl    %ebx,48(%esp)
+        leal    3395469782(%ebx,%eax,1),%ebx
+        movl    52(%esp),%eax
+        addl    %ebp,%ebx
+
+        movl    %ecx,%ebp
+        xorl    60(%esp),%eax
+        xorl    %edx,%ebp
+        xorl    20(%esp),%eax
+        xorl    %edi,%ebp
+        xorl    40(%esp),%eax
+        roll    $1,%eax
+        addl    %ebp,%esi
+        rorl    $2,%ecx
+        movl    %ebx,%ebp
+        roll    $5,%ebp
+        leal    3395469782(%eax,%esi,1),%eax
+        movl    56(%esp),%esi
+        addl    %ebp,%eax
+
+        movl    %ebx,%ebp
+        xorl    (%esp),%esi
+        xorl    %ecx,%ebp
+        xorl    24(%esp),%esi
+        xorl    %edx,%ebp
+        xorl    44(%esp),%esi
+        roll    $1,%esi
+        addl    %ebp,%edi
+        rorl    $2,%ebx
+        movl    %eax,%ebp
+        roll    $5,%ebp
+        leal    3395469782(%esi,%edi,1),%esi
+        movl    60(%esp),%edi
+        addl    %ebp,%esi
+
+        movl    %eax,%ebp
+        xorl    4(%esp),%edi
+        xorl    %ebx,%ebp
+        xorl    28(%esp),%edi
+        xorl    %ecx,%ebp
+        xorl    48(%esp),%edi
+        roll    $1,%edi
+        addl    %ebp,%edx
+        rorl    $2,%eax
+        movl    %esi,%ebp
+        roll    $5,%ebp
+        leal    3395469782(%edi,%edx,1),%edi
+        addl    %ebp,%edi
+        movl    96(%esp),%ebp
+        movl    100(%esp),%edx
+        addl    (%ebp),%edi
+        addl    4(%ebp),%esi
+        addl    8(%ebp),%eax
+        addl    12(%ebp),%ebx
+        addl    16(%ebp),%ecx
+        movl    %edi,(%ebp)
+        addl    $64,%edx
+        movl    %esi,4(%ebp)
+        cmpl    104(%esp),%edx
+        movl    %eax,8(%ebp)
+        movl    %ecx,%edi
+        movl    %ebx,12(%ebp)
+        movl    %edx,%esi
+        movl    %ecx,16(%ebp)
+        jb      .L000loop
+        addl    $76,%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   sha1_block_data_order,.-.L_sha1_block_data_order_begin
+.byte   83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+.byte   102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+.byte   89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+.byte   114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha256-586.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha256-586.S
new file mode 100644
index 0000000000..260dde7ad7
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha256-586.S
@@ -0,0 +1,3356 @@
+# WARNING: do not edit!
+# Generated from openssl/crypto/sha/asm/sha256-586.pl
+#
+# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+.text
+.globl  sha256_block_data_order
+.type   sha256_block_data_order,@function
+.align  16
+sha256_block_data_order:
+.L_sha256_block_data_order_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    %esp,%ebx
+        call    .L000pic_point
+.L000pic_point:
+        popl    %ebp
+        leal    .L001K256-.L000pic_point(%ebp),%ebp
+        subl    $16,%esp
+        andl    $-64,%esp
+        shll    $6,%eax
+        addl    %edi,%eax
+        movl    %esi,(%esp)
+        movl    %edi,4(%esp)
+        movl    %eax,8(%esp)
+        movl    %ebx,12(%esp)
+        jmp     .L002loop
+.align  16
+.L002loop:
+        movl    (%edi),%eax
+        movl    4(%edi),%ebx
+        movl    8(%edi),%ecx
+        bswap   %eax
+        movl    12(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        movl    16(%edi),%eax
+        movl    20(%edi),%ebx
+        movl    24(%edi),%ecx
+        bswap   %eax
+        movl    28(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        movl    32(%edi),%eax
+        movl    36(%edi),%ebx
+        movl    40(%edi),%ecx
+        bswap   %eax
+        movl    44(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        movl    48(%edi),%eax
+        movl    52(%edi),%ebx
+        movl    56(%edi),%ecx
+        bswap   %eax
+        movl    60(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        addl    $64,%edi
+        leal    -36(%esp),%esp
+        movl    %edi,104(%esp)
+        movl    (%esi),%eax
+        movl    4(%esi),%ebx
+        movl    8(%esi),%ecx
+        movl    12(%esi),%edi
+        movl    %ebx,8(%esp)
+        xorl    %ecx,%ebx
+        movl    %ecx,12(%esp)
+        movl    %edi,16(%esp)
+        movl    %ebx,(%esp)
+        movl    16(%esi),%edx
+        movl    20(%esi),%ebx
+        movl    24(%esi),%ecx
+        movl    28(%esi),%edi
+        movl    %ebx,24(%esp)
+        movl    %ecx,28(%esp)
+        movl    %edi,32(%esp)
+.align  16
+.L00300_15:
+        movl    %edx,%ecx
+        movl    24(%esp),%esi
+        rorl    $14,%ecx
+        movl    28(%esp),%edi
+        xorl    %edx,%ecx
+        xorl    %edi,%esi
+        movl    96(%esp),%ebx
+        rorl    $5,%ecx
+        andl    %edx,%esi
+        movl    %edx,20(%esp)
+        xorl    %ecx,%edx
+        addl    32(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %esi,%ebx
+        rorl    $9,%ecx
+        addl    %edx,%ebx
+        movl    8(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,4(%esp)
+        leal    -4(%esp),%esp
+        rorl    $11,%ecx
+        movl    (%ebp),%esi
+        xorl    %eax,%ecx
+        movl    20(%esp),%edx
+        xorl    %edi,%eax
+        rorl    $2,%ecx
+        addl    %esi,%ebx
+        movl    %eax,(%esp)
+        addl    %ebx,%edx
+        andl    4(%esp),%eax
+        addl    %ecx,%ebx
+        xorl    %edi,%eax
+        addl    $4,%ebp
+        addl    %ebx,%eax
+        cmpl    $3248222580,%esi
+        jne     .L00300_15
+        movl    156(%esp),%ecx
+        jmp     .L00416_63
+.align  16
+.L00416_63:
+        movl    %ecx,%ebx
+        movl    104(%esp),%esi
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    160(%esp),%ebx
+        shrl    $10,%edi
+        addl    124(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    24(%esp),%esi
+        rorl    $14,%ecx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %edx,%ecx
+        xorl    %edi,%esi
+        movl    %ebx,96(%esp)
+        rorl    $5,%ecx
+        andl    %edx,%esi
+        movl    %edx,20(%esp)
+        xorl    %ecx,%edx
+        addl    32(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %esi,%ebx
+        rorl    $9,%ecx
+        addl    %edx,%ebx
+        movl    8(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,4(%esp)
+        leal    -4(%esp),%esp
+        rorl    $11,%ecx
+        movl    (%ebp),%esi
+        xorl    %eax,%ecx
+        movl    20(%esp),%edx
+        xorl    %edi,%eax
+        rorl    $2,%ecx
+        addl    %esi,%ebx
+        movl    %eax,(%esp)
+        addl    %ebx,%edx
+        andl    4(%esp),%eax
+        addl    %ecx,%ebx
+        xorl    %edi,%eax
+        movl    156(%esp),%ecx
+        addl    $4,%ebp
+        addl    %ebx,%eax
+        cmpl    $3329325298,%esi
+        jne     .L00416_63
+        movl    356(%esp),%esi
+        movl    8(%esp),%ebx
+        movl    16(%esp),%ecx
+        addl    (%esi),%eax
+        addl    4(%esi),%ebx
+        addl    8(%esi),%edi
+        addl    12(%esi),%ecx
+        movl    %eax,(%esi)
+        movl    %ebx,4(%esi)
+        movl    %edi,8(%esi)
+        movl    %ecx,12(%esi)
+        movl    24(%esp),%eax
+        movl    28(%esp),%ebx
+        movl    32(%esp),%ecx
+        movl    360(%esp),%edi
+        addl    16(%esi),%edx
+        addl    20(%esi),%eax
+        addl    24(%esi),%ebx
+        addl    28(%esi),%ecx
+        movl    %edx,16(%esi)
+        movl    %eax,20(%esi)
+        movl    %ebx,24(%esi)
+        movl    %ecx,28(%esi)
+        leal    356(%esp),%esp
+        subl    $256,%ebp
+        cmpl    8(%esp),%edi
+        jb      .L002loop
+        movl    12(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.align  32
+.L005loop_shrd:
+        movl    (%edi),%eax
+        movl    4(%edi),%ebx
+        movl    8(%edi),%ecx
+        bswap   %eax
+        movl    12(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        movl    16(%edi),%eax
+        movl    20(%edi),%ebx
+        movl    24(%edi),%ecx
+        bswap   %eax
+        movl    28(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        movl    32(%edi),%eax
+        movl    36(%edi),%ebx
+        movl    40(%edi),%ecx
+        bswap   %eax
+        movl    44(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        movl    48(%edi),%eax
+        movl    52(%edi),%ebx
+        movl    56(%edi),%ecx
+        bswap   %eax
+        movl    60(%edi),%edx
+        bswap   %ebx
+        pushl   %eax
+        bswap   %ecx
+        pushl   %ebx
+        bswap   %edx
+        pushl   %ecx
+        pushl   %edx
+        addl    $64,%edi
+        leal    -36(%esp),%esp
+        movl    %edi,104(%esp)
+        movl    (%esi),%eax
+        movl    4(%esi),%ebx
+        movl    8(%esi),%ecx
+        movl    12(%esi),%edi
+        movl    %ebx,8(%esp)
+        xorl    %ecx,%ebx
+        movl    %ecx,12(%esp)
+        movl    %edi,16(%esp)
+        movl    %ebx,(%esp)
+        movl    16(%esi),%edx
+        movl    20(%esi),%ebx
+        movl    24(%esi),%ecx
+        movl    28(%esi),%edi
+        movl    %ebx,24(%esp)
+        movl    %ecx,28(%esp)
+        movl    %edi,32(%esp)
+.align  16
+.L00600_15_shrd:
+        movl    %edx,%ecx
+        movl    24(%esp),%esi
+        shrdl   $14,%ecx,%ecx
+        movl    28(%esp),%edi
+        xorl    %edx,%ecx
+        xorl    %edi,%esi
+        movl    96(%esp),%ebx
+        shrdl   $5,%ecx,%ecx
+        andl    %edx,%esi
+        movl    %edx,20(%esp)
+        xorl    %ecx,%edx
+        addl    32(%esp),%ebx
+        xorl    %edi,%esi
+        shrdl   $6,%edx,%edx
+        movl    %eax,%ecx
+        addl    %esi,%ebx
+        shrdl   $9,%ecx,%ecx
+        addl    %edx,%ebx
+        movl    8(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,4(%esp)
+        leal    -4(%esp),%esp
+        shrdl   $11,%ecx,%ecx
+        movl    (%ebp),%esi
+        xorl    %eax,%ecx
+        movl    20(%esp),%edx
+        xorl    %edi,%eax
+        shrdl   $2,%ecx,%ecx
+        addl    %esi,%ebx
+        movl    %eax,(%esp)
+        addl    %ebx,%edx
+        andl    4(%esp),%eax
+        addl    %ecx,%ebx
+        xorl    %edi,%eax
+        addl    $4,%ebp
+        addl    %ebx,%eax
+        cmpl    $3248222580,%esi
+        jne     .L00600_15_shrd
+        movl    156(%esp),%ecx
+        jmp     .L00716_63_shrd
+.align  16
+.L00716_63_shrd:
+        movl    %ecx,%ebx
+        movl    104(%esp),%esi
+        shrdl   $11,%ecx,%ecx
+        movl    %esi,%edi
+        shrdl   $2,%esi,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        shrdl   $7,%ecx,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        shrdl   $17,%esi,%esi
+        addl    160(%esp),%ebx
+        shrl    $10,%edi
+        addl    124(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    24(%esp),%esi
+        shrdl   $14,%ecx,%ecx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %edx,%ecx
+        xorl    %edi,%esi
+        movl    %ebx,96(%esp)
+        shrdl   $5,%ecx,%ecx
+        andl    %edx,%esi
+        movl    %edx,20(%esp)
+        xorl    %ecx,%edx
+        addl    32(%esp),%ebx
+        xorl    %edi,%esi
+        shrdl   $6,%edx,%edx
+        movl    %eax,%ecx
+        addl    %esi,%ebx
+        shrdl   $9,%ecx,%ecx
+        addl    %edx,%ebx
+        movl    8(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,4(%esp)
+        leal    -4(%esp),%esp
+        shrdl   $11,%ecx,%ecx
+        movl    (%ebp),%esi
+        xorl    %eax,%ecx
+        movl    20(%esp),%edx
+        xorl    %edi,%eax
+        shrdl   $2,%ecx,%ecx
+        addl    %esi,%ebx
+        movl    %eax,(%esp)
+        addl    %ebx,%edx
+        andl    4(%esp),%eax
+        addl    %ecx,%ebx
+        xorl    %edi,%eax
+        movl    156(%esp),%ecx
+        addl    $4,%ebp
+        addl    %ebx,%eax
+        cmpl    $3329325298,%esi
+        jne     .L00716_63_shrd
+        movl    356(%esp),%esi
+        movl    8(%esp),%ebx
+        movl    16(%esp),%ecx
+        addl    (%esi),%eax
+        addl    4(%esi),%ebx
+        addl    8(%esi),%edi
+        addl    12(%esi),%ecx
+        movl    %eax,(%esi)
+        movl    %ebx,4(%esi)
+        movl    %edi,8(%esi)
+        movl    %ecx,12(%esi)
+        movl    24(%esp),%eax
+        movl    28(%esp),%ebx
+        movl    32(%esp),%ecx
+        movl    360(%esp),%edi
+        addl    16(%esi),%edx
+        addl    20(%esi),%eax
+        addl    24(%esi),%ebx
+        addl    28(%esi),%ecx
+        movl    %edx,16(%esi)
+        movl    %eax,20(%esi)
+        movl    %ebx,24(%esi)
+        movl    %ecx,28(%esi)
+        leal    356(%esp),%esp
+        subl    $256,%ebp
+        cmpl    8(%esp),%edi
+        jb      .L005loop_shrd
+        movl    12(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.align  64
+.L001K256:
+.long   1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+.long   66051,67438087,134810123,202182159
+.byte   83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+.byte   110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte   67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte   112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte   62,0
+.align  16
+.L008unrolled:
+        leal    -96(%esp),%esp
+        movl    (%esi),%eax
+        movl    4(%esi),%ebp
+        movl    8(%esi),%ecx
+        movl    12(%esi),%ebx
+        movl    %ebp,4(%esp)
+        xorl    %ecx,%ebp
+        movl    %ecx,8(%esp)
+        movl    %ebx,12(%esp)
+        movl    16(%esi),%edx
+        movl    20(%esi),%ebx
+        movl    24(%esi),%ecx
+        movl    28(%esi),%esi
+        movl    %ebx,20(%esp)
+        movl    %ecx,24(%esp)
+        movl    %esi,28(%esp)
+        jmp     .L009grand_loop
+.align  16
+.L009grand_loop:
+        movl    (%edi),%ebx
+        movl    4(%edi),%ecx
+        bswap   %ebx
+        movl    8(%edi),%esi
+        bswap   %ecx
+        movl    %ebx,32(%esp)
+        bswap   %esi
+        movl    %ecx,36(%esp)
+        movl    %esi,40(%esp)
+        movl    12(%edi),%ebx
+        movl    16(%edi),%ecx
+        bswap   %ebx
+        movl    20(%edi),%esi
+        bswap   %ecx
+        movl    %ebx,44(%esp)
+        bswap   %esi
+        movl    %ecx,48(%esp)
+        movl    %esi,52(%esp)
+        movl    24(%edi),%ebx
+        movl    28(%edi),%ecx
+        bswap   %ebx
+        movl    32(%edi),%esi
+        bswap   %ecx
+        movl    %ebx,56(%esp)
+        bswap   %esi
+        movl    %ecx,60(%esp)
+        movl    %esi,64(%esp)
+        movl    36(%edi),%ebx
+        movl    40(%edi),%ecx
+        bswap   %ebx
+        movl    44(%edi),%esi
+        bswap   %ecx
+        movl    %ebx,68(%esp)
+        bswap   %esi
+        movl    %ecx,72(%esp)
+        movl    %esi,76(%esp)
+        movl    48(%edi),%ebx
+        movl    52(%edi),%ecx
+        bswap   %ebx
+        movl    56(%edi),%esi
+        bswap   %ecx
+        movl    %ebx,80(%esp)
+        bswap   %esi
+        movl    %ecx,84(%esp)
+        movl    %esi,88(%esp)
+        movl    60(%edi),%ebx
+        addl    $64,%edi
+        bswap   %ebx
+        movl    %edi,100(%esp)
+        movl    %ebx,92(%esp)
+        movl    %edx,%ecx
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    32(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    1116352408(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    36(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1899447441(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    %edx,%ecx
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    40(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    3049323471(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    44(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    3921009573(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    %edx,%ecx
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    48(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    961987163(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    52(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1508970993(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    %edx,%ecx
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        movl    56(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2453635748(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        movl    60(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2870763221(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    %edx,%ecx
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    64(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    3624381080(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    68(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    310598401(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    %edx,%ecx
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    72(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    607225278(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    76(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1426881987(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    %edx,%ecx
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    80(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    1925078388(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    84(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2162078206(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    %edx,%ecx
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        movl    88(%esp),%ebx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2614888103(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    %edx,%esi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        movl    92(%esp),%ebx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    3248222580(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    36(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    88(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    32(%esp),%ebx
+        shrl    $10,%edi
+        addl    68(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,32(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    3835390401(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    40(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    92(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    36(%esp),%ebx
+        shrl    $10,%edi
+        addl    72(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,36(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    4022224774(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    44(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    32(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    40(%esp),%ebx
+        shrl    $10,%edi
+        addl    76(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,40(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    264347078(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    48(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    36(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    44(%esp),%ebx
+        shrl    $10,%edi
+        addl    80(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,44(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    604807628(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    52(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    40(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    48(%esp),%ebx
+        shrl    $10,%edi
+        addl    84(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,48(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    770255983(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    56(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    44(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    52(%esp),%ebx
+        shrl    $10,%edi
+        addl    88(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,52(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1249150122(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    60(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    48(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    56(%esp),%ebx
+        shrl    $10,%edi
+        addl    92(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,56(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    1555081692(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    64(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    52(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    60(%esp),%ebx
+        shrl    $10,%edi
+        addl    32(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,60(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1996064986(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    68(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    56(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    64(%esp),%ebx
+        shrl    $10,%edi
+        addl    36(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,64(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2554220882(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    72(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    60(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    68(%esp),%ebx
+        shrl    $10,%edi
+        addl    40(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,68(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2821834349(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    76(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    64(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    72(%esp),%ebx
+        shrl    $10,%edi
+        addl    44(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,72(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2952996808(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    80(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    68(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    76(%esp),%ebx
+        shrl    $10,%edi
+        addl    48(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,76(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    3210313671(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    84(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    72(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    80(%esp),%ebx
+        shrl    $10,%edi
+        addl    52(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,80(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    3336571891(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    88(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    76(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    84(%esp),%ebx
+        shrl    $10,%edi
+        addl    56(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,84(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    3584528711(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    92(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    80(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    88(%esp),%ebx
+        shrl    $10,%edi
+        addl    60(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,88(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    113926993(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    32(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    84(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    92(%esp),%ebx
+        shrl    $10,%edi
+        addl    64(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,92(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    338241895(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    36(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    88(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    32(%esp),%ebx
+        shrl    $10,%edi
+        addl    68(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,32(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    666307205(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    40(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    92(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    36(%esp),%ebx
+        shrl    $10,%edi
+        addl    72(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,36(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    773529912(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    44(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    32(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    40(%esp),%ebx
+        shrl    $10,%edi
+        addl    76(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,40(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    1294757372(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    48(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    36(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    44(%esp),%ebx
+        shrl    $10,%edi
+        addl    80(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,44(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1396182291(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    52(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    40(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    48(%esp),%ebx
+        shrl    $10,%edi
+        addl    84(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,48(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    1695183700(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    56(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    44(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    52(%esp),%ebx
+        shrl    $10,%edi
+        addl    88(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,52(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1986661051(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    60(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    48(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    56(%esp),%ebx
+        shrl    $10,%edi
+        addl    92(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,56(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2177026350(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    64(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    52(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    60(%esp),%ebx
+        shrl    $10,%edi
+        addl    32(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,60(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2456956037(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    68(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    56(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    64(%esp),%ebx
+        shrl    $10,%edi
+        addl    36(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,64(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2730485921(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    72(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    60(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    68(%esp),%ebx
+        shrl    $10,%edi
+        addl    40(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,68(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2820302411(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    76(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    64(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    72(%esp),%ebx
+        shrl    $10,%edi
+        addl    44(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,72(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    3259730800(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    80(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    68(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    76(%esp),%ebx
+        shrl    $10,%edi
+        addl    48(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,76(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    3345764771(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    84(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    72(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    80(%esp),%ebx
+        shrl    $10,%edi
+        addl    52(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,80(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    3516065817(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    88(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    76(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    84(%esp),%ebx
+        shrl    $10,%edi
+        addl    56(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,84(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    3600352804(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    92(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    80(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    88(%esp),%ebx
+        shrl    $10,%edi
+        addl    60(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,88(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    4094571909(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    32(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    84(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    92(%esp),%ebx
+        shrl    $10,%edi
+        addl    64(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,92(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    275423344(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    36(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    88(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    32(%esp),%ebx
+        shrl    $10,%edi
+        addl    68(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,32(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    430227734(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    40(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    92(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    36(%esp),%ebx
+        shrl    $10,%edi
+        addl    72(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,36(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    506948616(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    44(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    32(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    40(%esp),%ebx
+        shrl    $10,%edi
+        addl    76(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,40(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    659060556(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    48(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    36(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    44(%esp),%ebx
+        shrl    $10,%edi
+        addl    80(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,44(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    883997877(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    52(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    40(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    48(%esp),%ebx
+        shrl    $10,%edi
+        addl    84(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,48(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    958139571(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    56(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    44(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    52(%esp),%ebx
+        shrl    $10,%edi
+        addl    88(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,52(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1322822218(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    60(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    48(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    56(%esp),%ebx
+        shrl    $10,%edi
+        addl    92(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,56(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    1537002063(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    64(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    52(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    60(%esp),%ebx
+        shrl    $10,%edi
+        addl    32(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,60(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    1747873779(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    68(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    56(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    64(%esp),%ebx
+        shrl    $10,%edi
+        addl    36(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    20(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    24(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,64(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,16(%esp)
+        xorl    %ecx,%edx
+        addl    28(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    4(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    1955562222(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    72(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    12(%esp),%edx
+        addl    %ecx,%ebp
+        movl    60(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    68(%esp),%ebx
+        shrl    $10,%edi
+        addl    40(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    16(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    20(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,68(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,12(%esp)
+        xorl    %esi,%edx
+        addl    24(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    (%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,28(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2024104815(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    76(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    8(%esp),%edx
+        addl    %esi,%eax
+        movl    64(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    72(%esp),%ebx
+        shrl    $10,%edi
+        addl    44(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    12(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    16(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,72(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,8(%esp)
+        xorl    %ecx,%edx
+        addl    20(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    28(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,24(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2227730452(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    80(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    4(%esp),%edx
+        addl    %ecx,%ebp
+        movl    68(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    76(%esp),%ebx
+        shrl    $10,%edi
+        addl    48(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    8(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    12(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,76(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,4(%esp)
+        xorl    %esi,%edx
+        addl    16(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    24(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,20(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2361852424(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    84(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    (%esp),%edx
+        addl    %esi,%eax
+        movl    72(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    80(%esp),%ebx
+        shrl    $10,%edi
+        addl    52(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    4(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    8(%esp),%edi
+        xorl    %ecx,%edx
+        movl    %ebx,80(%esp)
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,(%esp)
+        xorl    %ecx,%edx
+        addl    12(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    20(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,16(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    2428436474(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    88(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    28(%esp),%edx
+        addl    %ecx,%ebp
+        movl    76(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    84(%esp),%ebx
+        shrl    $10,%edi
+        addl    56(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    (%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    4(%esp),%edi
+        xorl    %esi,%edx
+        movl    %ebx,84(%esp)
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,28(%esp)
+        xorl    %esi,%edx
+        addl    8(%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    16(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,12(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    2756734187(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        movl    92(%esp),%ecx
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    24(%esp),%edx
+        addl    %esi,%eax
+        movl    80(%esp),%esi
+        movl    %ecx,%ebx
+        rorl    $11,%ecx
+        movl    %esi,%edi
+        rorl    $2,%esi
+        xorl    %ebx,%ecx
+        shrl    $3,%ebx
+        rorl    $7,%ecx
+        xorl    %edi,%esi
+        xorl    %ecx,%ebx
+        rorl    $17,%esi
+        addl    88(%esp),%ebx
+        shrl    $10,%edi
+        addl    60(%esp),%ebx
+        movl    %edx,%ecx
+        xorl    %esi,%edi
+        movl    28(%esp),%esi
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    (%esp),%edi
+        xorl    %ecx,%edx
+        xorl    %edi,%esi
+        rorl    $5,%edx
+        andl    %ecx,%esi
+        movl    %ecx,24(%esp)
+        xorl    %ecx,%edx
+        addl    4(%esp),%ebx
+        xorl    %esi,%edi
+        rorl    $6,%edx
+        movl    %eax,%ecx
+        addl    %edi,%ebx
+        rorl    $9,%ecx
+        movl    %eax,%esi
+        movl    12(%esp),%edi
+        xorl    %eax,%ecx
+        movl    %eax,8(%esp)
+        xorl    %edi,%eax
+        rorl    $11,%ecx
+        andl    %eax,%ebp
+        leal    3204031479(%ebx,%edx,1),%edx
+        xorl    %esi,%ecx
+        xorl    %edi,%ebp
+        movl    32(%esp),%esi
+        rorl    $2,%ecx
+        addl    %edx,%ebp
+        addl    20(%esp),%edx
+        addl    %ecx,%ebp
+        movl    84(%esp),%ecx
+        movl    %esi,%ebx
+        rorl    $11,%esi
+        movl    %ecx,%edi
+        rorl    $2,%ecx
+        xorl    %ebx,%esi
+        shrl    $3,%ebx
+        rorl    $7,%esi
+        xorl    %edi,%ecx
+        xorl    %esi,%ebx
+        rorl    $17,%ecx
+        addl    92(%esp),%ebx
+        shrl    $10,%edi
+        addl    64(%esp),%ebx
+        movl    %edx,%esi
+        xorl    %ecx,%edi
+        movl    24(%esp),%ecx
+        rorl    $14,%edx
+        addl    %edi,%ebx
+        movl    28(%esp),%edi
+        xorl    %esi,%edx
+        xorl    %edi,%ecx
+        rorl    $5,%edx
+        andl    %esi,%ecx
+        movl    %esi,20(%esp)
+        xorl    %esi,%edx
+        addl    (%esp),%ebx
+        xorl    %ecx,%edi
+        rorl    $6,%edx
+        movl    %ebp,%esi
+        addl    %edi,%ebx
+        rorl    $9,%esi
+        movl    %ebp,%ecx
+        movl    8(%esp),%edi
+        xorl    %ebp,%esi
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        rorl    $11,%esi
+        andl    %ebp,%eax
+        leal    3329325298(%ebx,%edx,1),%edx
+        xorl    %ecx,%esi
+        xorl    %edi,%eax
+        rorl    $2,%esi
+        addl    %edx,%eax
+        addl    16(%esp),%edx
+        addl    %esi,%eax
+        movl    96(%esp),%esi
+        xorl    %edi,%ebp
+        movl    12(%esp),%ecx
+        addl    (%esi),%eax
+        addl    4(%esi),%ebp
+        addl    8(%esi),%edi
+        addl    12(%esi),%ecx
+        movl    %eax,(%esi)
+        movl    %ebp,4(%esi)
+        movl    %edi,8(%esi)
+        movl    %ecx,12(%esi)
+        movl    %ebp,4(%esp)
+        xorl    %edi,%ebp
+        movl    %edi,8(%esp)
+        movl    %ecx,12(%esp)
+        movl    20(%esp),%edi
+        movl    24(%esp),%ebx
+        movl    28(%esp),%ecx
+        addl    16(%esi),%edx
+        addl    20(%esi),%edi
+        addl    24(%esi),%ebx
+        addl    28(%esi),%ecx
+        movl    %edx,16(%esi)
+        movl    %edi,20(%esi)
+        movl    %ebx,24(%esi)
+        movl    %ecx,28(%esi)
+        movl    %edi,20(%esp)
+        movl    100(%esp),%edi
+        movl    %ebx,24(%esp)
+        movl    %ecx,28(%esp)
+        cmpl    104(%esp),%edi
+        jb      .L009grand_loop
+        movl    108(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   sha256_block_data_order,.-.L_sha256_block_data_order_begin
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha512-586.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha512-586.S
new file mode 100644
index 0000000000..c01cc2bed8
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/sha/sha512-586.S
@@ -0,0 +1,574 @@
+# WARNING: do not edit!
+# Generated from openssl/crypto/sha/asm/sha512-586.pl
+#
+# Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+.text
+.globl  sha512_block_data_order
+.type   sha512_block_data_order,@function
+.align  16
+sha512_block_data_order:
+.L_sha512_block_data_order_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    20(%esp),%esi
+        movl    24(%esp),%edi
+        movl    28(%esp),%eax
+        movl    %esp,%ebx
+        call    .L000pic_point
+.L000pic_point:
+        popl    %ebp
+        leal    .L001K512-.L000pic_point(%ebp),%ebp
+        subl    $16,%esp
+        andl    $-64,%esp
+        shll    $7,%eax
+        addl    %edi,%eax
+        movl    %esi,(%esp)
+        movl    %edi,4(%esp)
+        movl    %eax,8(%esp)
+        movl    %ebx,12(%esp)
+.align  16
+.L002loop_x86:
+        movl    (%edi),%eax
+        movl    4(%edi),%ebx
+        movl    8(%edi),%ecx
+        movl    12(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        movl    16(%edi),%eax
+        movl    20(%edi),%ebx
+        movl    24(%edi),%ecx
+        movl    28(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        movl    32(%edi),%eax
+        movl    36(%edi),%ebx
+        movl    40(%edi),%ecx
+        movl    44(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        movl    48(%edi),%eax
+        movl    52(%edi),%ebx
+        movl    56(%edi),%ecx
+        movl    60(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        movl    64(%edi),%eax
+        movl    68(%edi),%ebx
+        movl    72(%edi),%ecx
+        movl    76(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        movl    80(%edi),%eax
+        movl    84(%edi),%ebx
+        movl    88(%edi),%ecx
+        movl    92(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        movl    96(%edi),%eax
+        movl    100(%edi),%ebx
+        movl    104(%edi),%ecx
+        movl    108(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        movl    112(%edi),%eax
+        movl    116(%edi),%ebx
+        movl    120(%edi),%ecx
+        movl    124(%edi),%edx
+        bswap   %eax
+        bswap   %ebx
+        bswap   %ecx
+        bswap   %edx
+        pushl   %eax
+        pushl   %ebx
+        pushl   %ecx
+        pushl   %edx
+        addl    $128,%edi
+        subl    $72,%esp
+        movl    %edi,204(%esp)
+        leal    8(%esp),%edi
+        movl    $16,%ecx
+.long   2784229001
+.align  16
+.L00300_15_x86:
+        movl    40(%esp),%ecx
+        movl    44(%esp),%edx
+        movl    %ecx,%esi
+        shrl    $9,%ecx
+        movl    %edx,%edi
+        shrl    $9,%edx
+        movl    %ecx,%ebx
+        shll    $14,%esi
+        movl    %edx,%eax
+        shll    $14,%edi
+        xorl    %esi,%ebx
+        shrl    $5,%ecx
+        xorl    %edi,%eax
+        shrl    $5,%edx
+        xorl    %ecx,%eax
+        shll    $4,%esi
+        xorl    %edx,%ebx
+        shll    $4,%edi
+        xorl    %esi,%ebx
+        shrl    $4,%ecx
+        xorl    %edi,%eax
+        shrl    $4,%edx
+        xorl    %ecx,%eax
+        shll    $5,%esi
+        xorl    %edx,%ebx
+        shll    $5,%edi
+        xorl    %esi,%eax
+        xorl    %edi,%ebx
+        movl    48(%esp),%ecx
+        movl    52(%esp),%edx
+        movl    56(%esp),%esi
+        movl    60(%esp),%edi
+        addl    64(%esp),%eax
+        adcl    68(%esp),%ebx
+        xorl    %esi,%ecx
+        xorl    %edi,%edx
+        andl    40(%esp),%ecx
+        andl    44(%esp),%edx
+        addl    192(%esp),%eax
+        adcl    196(%esp),%ebx
+        xorl    %esi,%ecx
+        xorl    %edi,%edx
+        movl    (%ebp),%esi
+        movl    4(%ebp),%edi
+        addl    %ecx,%eax
+        adcl    %edx,%ebx
+        movl    32(%esp),%ecx
+        movl    36(%esp),%edx
+        addl    %esi,%eax
+        adcl    %edi,%ebx
+        movl    %eax,(%esp)
+        movl    %ebx,4(%esp)
+        addl    %ecx,%eax
+        adcl    %edx,%ebx
+        movl    8(%esp),%ecx
+        movl    12(%esp),%edx
+        movl    %eax,32(%esp)
+        movl    %ebx,36(%esp)
+        movl    %ecx,%esi
+        shrl    $2,%ecx
+        movl    %edx,%edi
+        shrl    $2,%edx
+        movl    %ecx,%ebx
+        shll    $4,%esi
+        movl    %edx,%eax
+        shll    $4,%edi
+        xorl    %esi,%ebx
+        shrl    $5,%ecx
+        xorl    %edi,%eax
+        shrl    $5,%edx
+        xorl    %ecx,%ebx
+        shll    $21,%esi
+        xorl    %edx,%eax
+        shll    $21,%edi
+        xorl    %esi,%eax
+        shrl    $21,%ecx
+        xorl    %edi,%ebx
+        shrl    $21,%edx
+        xorl    %ecx,%eax
+        shll    $5,%esi
+        xorl    %edx,%ebx
+        shll    $5,%edi
+        xorl    %esi,%eax
+        xorl    %edi,%ebx
+        movl    8(%esp),%ecx
+        movl    12(%esp),%edx
+        movl    16(%esp),%esi
+        movl    20(%esp),%edi
+        addl    (%esp),%eax
+        adcl    4(%esp),%ebx
+        orl     %esi,%ecx
+        orl     %edi,%edx
+        andl    24(%esp),%ecx
+        andl    28(%esp),%edx
+        andl    8(%esp),%esi
+        andl    12(%esp),%edi
+        orl     %esi,%ecx
+        orl     %edi,%edx
+        addl    %ecx,%eax
+        adcl    %edx,%ebx
+        movl    %eax,(%esp)
+        movl    %ebx,4(%esp)
+        movb    (%ebp),%dl
+        subl    $8,%esp
+        leal    8(%ebp),%ebp
+        cmpb    $148,%dl
+        jne     .L00300_15_x86
+.align  16
+.L00416_79_x86:
+        movl    312(%esp),%ecx
+        movl    316(%esp),%edx
+        movl    %ecx,%esi
+        shrl    $1,%ecx
+        movl    %edx,%edi
+        shrl    $1,%edx
+        movl    %ecx,%eax
+        shll    $24,%esi
+        movl    %edx,%ebx
+        shll    $24,%edi
+        xorl    %esi,%ebx
+        shrl    $6,%ecx
+        xorl    %edi,%eax
+        shrl    $6,%edx
+        xorl    %ecx,%eax
+        shll    $7,%esi
+        xorl    %edx,%ebx
+        shll    $1,%edi
+        xorl    %esi,%ebx
+        shrl    $1,%ecx
+        xorl    %edi,%eax
+        shrl    $1,%edx
+        xorl    %ecx,%eax
+        shll    $6,%edi
+        xorl    %edx,%ebx
+        xorl    %edi,%eax
+        movl    %eax,(%esp)
+        movl    %ebx,4(%esp)
+        movl    208(%esp),%ecx
+        movl    212(%esp),%edx
+        movl    %ecx,%esi
+        shrl    $6,%ecx
+        movl    %edx,%edi
+        shrl    $6,%edx
+        movl    %ecx,%eax
+        shll    $3,%esi
+        movl    %edx,%ebx
+        shll    $3,%edi
+        xorl    %esi,%eax
+        shrl    $13,%ecx
+        xorl    %edi,%ebx
+        shrl    $13,%edx
+        xorl    %ecx,%eax
+        shll    $10,%esi
+        xorl    %edx,%ebx
+        shll    $10,%edi
+        xorl    %esi,%ebx
+        shrl    $10,%ecx
+        xorl    %edi,%eax
+        shrl    $10,%edx
+        xorl    %ecx,%ebx
+        shll    $13,%edi
+        xorl    %edx,%eax
+        xorl    %edi,%eax
+        movl    320(%esp),%ecx
+        movl    324(%esp),%edx
+        addl    (%esp),%eax
+        adcl    4(%esp),%ebx
+        movl    248(%esp),%esi
+        movl    252(%esp),%edi
+        addl    %ecx,%eax
+        adcl    %edx,%ebx
+        addl    %esi,%eax
+        adcl    %edi,%ebx
+        movl    %eax,192(%esp)
+        movl    %ebx,196(%esp)
+        movl    40(%esp),%ecx
+        movl    44(%esp),%edx
+        movl    %ecx,%esi
+        shrl    $9,%ecx
+        movl    %edx,%edi
+        shrl    $9,%edx
+        movl    %ecx,%ebx
+        shll    $14,%esi
+        movl    %edx,%eax
+        shll    $14,%edi
+        xorl    %esi,%ebx
+        shrl    $5,%ecx
+        xorl    %edi,%eax
+        shrl    $5,%edx
+        xorl    %ecx,%eax
+        shll    $4,%esi
+        xorl    %edx,%ebx
+        shll    $4,%edi
+        xorl    %esi,%ebx
+        shrl    $4,%ecx
+        xorl    %edi,%eax
+        shrl    $4,%edx
+        xorl    %ecx,%eax
+        shll    $5,%esi
+        xorl    %edx,%ebx
+        shll    $5,%edi
+        xorl    %esi,%eax
+        xorl    %edi,%ebx
+        movl    48(%esp),%ecx
+        movl    52(%esp),%edx
+        movl    56(%esp),%esi
+        movl    60(%esp),%edi
+        addl    64(%esp),%eax
+        adcl    68(%esp),%ebx
+        xorl    %esi,%ecx
+        xorl    %edi,%edx
+        andl    40(%esp),%ecx
+        andl    44(%esp),%edx
+        addl    192(%esp),%eax
+        adcl    196(%esp),%ebx
+        xorl    %esi,%ecx
+        xorl    %edi,%edx
+        movl    (%ebp),%esi
+        movl    4(%ebp),%edi
+        addl    %ecx,%eax
+        adcl    %edx,%ebx
+        movl    32(%esp),%ecx
+        movl    36(%esp),%edx
+        addl    %esi,%eax
+        adcl    %edi,%ebx
+        movl    %eax,(%esp)
+        movl    %ebx,4(%esp)
+        addl    %ecx,%eax
+        adcl    %edx,%ebx
+        movl    8(%esp),%ecx
+        movl    12(%esp),%edx
+        movl    %eax,32(%esp)
+        movl    %ebx,36(%esp)
+        movl    %ecx,%esi
+        shrl    $2,%ecx
+        movl    %edx,%edi
+        shrl    $2,%edx
+        movl    %ecx,%ebx
+        shll    $4,%esi
+        movl    %edx,%eax
+        shll    $4,%edi
+        xorl    %esi,%ebx
+        shrl    $5,%ecx
+        xorl    %edi,%eax
+        shrl    $5,%edx
+        xorl    %ecx,%ebx
+        shll    $21,%esi
+        xorl    %edx,%eax
+        shll    $21,%edi
+        xorl    %esi,%eax
+        shrl    $21,%ecx
+        xorl    %edi,%ebx
+        shrl    $21,%edx
+        xorl    %ecx,%eax
+        shll    $5,%esi
+        xorl    %edx,%ebx
+        shll    $5,%edi
+        xorl    %esi,%eax
+        xorl    %edi,%ebx
+        movl    8(%esp),%ecx
+        movl    12(%esp),%edx
+        movl    16(%esp),%esi
+        movl    20(%esp),%edi
+        addl    (%esp),%eax
+        adcl    4(%esp),%ebx
+        orl     %esi,%ecx
+        orl     %edi,%edx
+        andl    24(%esp),%ecx
+        andl    28(%esp),%edx
+        andl    8(%esp),%esi
+        andl    12(%esp),%edi
+        orl     %esi,%ecx
+        orl     %edi,%edx
+        addl    %ecx,%eax
+        adcl    %edx,%ebx
+        movl    %eax,(%esp)
+        movl    %ebx,4(%esp)
+        movb    (%ebp),%dl
+        subl    $8,%esp
+        leal    8(%ebp),%ebp
+        cmpb    $23,%dl
+        jne     .L00416_79_x86
+        movl    840(%esp),%esi
+        movl    844(%esp),%edi
+        movl    (%esi),%eax
+        movl    4(%esi),%ebx
+        movl    8(%esi),%ecx
+        movl    12(%esi),%edx
+        addl    8(%esp),%eax
+        adcl    12(%esp),%ebx
+        movl    %eax,(%esi)
+        movl    %ebx,4(%esi)
+        addl    16(%esp),%ecx
+        adcl    20(%esp),%edx
+        movl    %ecx,8(%esi)
+        movl    %edx,12(%esi)
+        movl    16(%esi),%eax
+        movl    20(%esi),%ebx
+        movl    24(%esi),%ecx
+        movl    28(%esi),%edx
+        addl    24(%esp),%eax
+        adcl    28(%esp),%ebx
+        movl    %eax,16(%esi)
+        movl    %ebx,20(%esi)
+        addl    32(%esp),%ecx
+        adcl    36(%esp),%edx
+        movl    %ecx,24(%esi)
+        movl    %edx,28(%esi)
+        movl    32(%esi),%eax
+        movl    36(%esi),%ebx
+        movl    40(%esi),%ecx
+        movl    44(%esi),%edx
+        addl    40(%esp),%eax
+        adcl    44(%esp),%ebx
+        movl    %eax,32(%esi)
+        movl    %ebx,36(%esi)
+        addl    48(%esp),%ecx
+        adcl    52(%esp),%edx
+        movl    %ecx,40(%esi)
+        movl    %edx,44(%esi)
+        movl    48(%esi),%eax
+        movl    52(%esi),%ebx
+        movl    56(%esi),%ecx
+        movl    60(%esi),%edx
+        addl    56(%esp),%eax
+        adcl    60(%esp),%ebx
+        movl    %eax,48(%esi)
+        movl    %ebx,52(%esi)
+        addl    64(%esp),%ecx
+        adcl    68(%esp),%edx
+        movl    %ecx,56(%esi)
+        movl    %edx,60(%esi)
+        addl    $840,%esp
+        subl    $640,%ebp
+        cmpl    8(%esp),%edi
+        jb      .L002loop_x86
+        movl    12(%esp),%esp
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.align  64
+.L001K512:
+.long   3609767458,1116352408
+.long   602891725,1899447441
+.long   3964484399,3049323471
+.long   2173295548,3921009573
+.long   4081628472,961987163
+.long   3053834265,1508970993
+.long   2937671579,2453635748
+.long   3664609560,2870763221
+.long   2734883394,3624381080
+.long   1164996542,310598401
+.long   1323610764,607225278
+.long   3590304994,1426881987
+.long   4068182383,1925078388
+.long   991336113,2162078206
+.long   633803317,2614888103
+.long   3479774868,3248222580
+.long   2666613458,3835390401
+.long   944711139,4022224774
+.long   2341262773,264347078
+.long   2007800933,604807628
+.long   1495990901,770255983
+.long   1856431235,1249150122
+.long   3175218132,1555081692
+.long   2198950837,1996064986
+.long   3999719339,2554220882
+.long   766784016,2821834349
+.long   2566594879,2952996808
+.long   3203337956,3210313671
+.long   1034457026,3336571891
+.long   2466948901,3584528711
+.long   3758326383,113926993
+.long   168717936,338241895
+.long   1188179964,666307205
+.long   1546045734,773529912
+.long   1522805485,1294757372
+.long   2643833823,1396182291
+.long   2343527390,1695183700
+.long   1014477480,1986661051
+.long   1206759142,2177026350
+.long   344077627,2456956037
+.long   1290863460,2730485921
+.long   3158454273,2820302411
+.long   3505952657,3259730800
+.long   106217008,3345764771
+.long   3606008344,3516065817
+.long   1432725776,3600352804
+.long   1467031594,4094571909
+.long   851169720,275423344
+.long   3100823752,430227734
+.long   1363258195,506948616
+.long   3750685593,659060556
+.long   3785050280,883997877
+.long   3318307427,958139571
+.long   3812723403,1322822218
+.long   2003034995,1537002063
+.long   3602036899,1747873779
+.long   1575990012,1955562222
+.long   1125592928,2024104815
+.long   2716904306,2227730452
+.long   442776044,2361852424
+.long   593698344,2428436474
+.long   3733110249,2756734187
+.long   2999351573,3204031479
+.long   3815920427,3329325298
+.long   3928383900,3391569614
+.long   566280711,3515267271
+.long   3454069534,3940187606
+.long   4000239992,4118630271
+.long   1914138554,116418474
+.long   2731055270,174292421
+.long   3203993006,289380356
+.long   320620315,460393269
+.long   587496836,685471733
+.long   1086792851,852142971
+.long   365543100,1017036298
+.long   2618297676,1126000580
+.long   3409855158,1288033470
+.long   4234509866,1501505948
+.long   987167468,1607167915
+.long   1246189591,1816402316
+.long   67438087,66051
+.long   202182159,134810123
+.size   sha512_block_data_order,.-.L_sha512_block_data_order_begin
+.byte   83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+.byte   110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+.byte   67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+.byte   112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+.byte   62,0
diff --git a/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/x86cpuid.S b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/x86cpuid.S
new file mode 100644
index 0000000000..a1934cbc58
--- /dev/null
+++ b/CryptoPkg/Library/OpensslLib/IA32Gcc/crypto/x86cpuid.S
@@ -0,0 +1,449 @@
+# WARNING: do not edit!
+# Generated from openssl/crypto/x86cpuid.pl
+#
+# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License").  You may not use
+# this file except in compliance with the License.  You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
+.text
+.globl  OPENSSL_ia32_cpuid
+.type   OPENSSL_ia32_cpuid,@function
+.align  16
+OPENSSL_ia32_cpuid:
+.L_OPENSSL_ia32_cpuid_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        xorl    %edx,%edx
+        pushfl
+        popl    %eax
+        movl    %eax,%ecx
+        xorl    $2097152,%eax
+        pushl   %eax
+        popfl
+        pushfl
+        popl    %eax
+        xorl    %eax,%ecx
+        xorl    %eax,%eax
+        movl    20(%esp),%esi
+        movl    %eax,8(%esi)
+        btl     $21,%ecx
+        jnc     .L000nocpuid
+        .byte   0x0f,0xa2
+        movl    %eax,%edi
+        xorl    %eax,%eax
+        cmpl    $1970169159,%ebx
+        setne   %al
+        movl    %eax,%ebp
+        cmpl    $1231384169,%edx
+        setne   %al
+        orl     %eax,%ebp
+        cmpl    $1818588270,%ecx
+        setne   %al
+        orl     %eax,%ebp
+        jz      .L001intel
+        cmpl    $1752462657,%ebx
+        setne   %al
+        movl    %eax,%esi
+        cmpl    $1769238117,%edx
+        setne   %al
+        orl     %eax,%esi
+        cmpl    $1145913699,%ecx
+        setne   %al
+        orl     %eax,%esi
+        jnz     .L001intel
+        movl    $2147483648,%eax
+        .byte   0x0f,0xa2
+        cmpl    $2147483649,%eax
+        jb      .L001intel
+        movl    %eax,%esi
+        movl    $2147483649,%eax
+        .byte   0x0f,0xa2
+        orl     %ecx,%ebp
+        andl    $2049,%ebp
+        cmpl    $2147483656,%esi
+        jb      .L001intel
+        movl    $2147483656,%eax
+        .byte   0x0f,0xa2
+        movzbl  %cl,%esi
+        incl    %esi
+        movl    $1,%eax
+        xorl    %ecx,%ecx
+        .byte   0x0f,0xa2
+        btl     $28,%edx
+        jnc     .L002generic
+        shrl    $16,%ebx
+        andl    $255,%ebx
+        cmpl    %esi,%ebx
+        ja      .L002generic
+        andl    $4026531839,%edx
+        jmp     .L002generic
+.L001intel:
+        cmpl    $4,%edi
+        movl    $-1,%esi
+        jb      .L003nocacheinfo
+        movl    $4,%eax
+        movl    $0,%ecx
+        .byte   0x0f,0xa2
+        movl    %eax,%esi
+        shrl    $14,%esi
+        andl    $4095,%esi
+.L003nocacheinfo:
+        movl    $1,%eax
+        xorl    %ecx,%ecx
+        .byte   0x0f,0xa2
+        andl    $3220176895,%edx
+        cmpl    $0,%ebp
+        jne     .L004notintel
+        orl     $1073741824,%edx
+        andb    $15,%ah
+        cmpb    $15,%ah
+        jne     .L004notintel
+        orl     $1048576,%edx
+.L004notintel:
+        btl     $28,%edx
+        jnc     .L002generic
+        andl    $4026531839,%edx
+        cmpl    $0,%esi
+        je      .L002generic
+        orl     $268435456,%edx
+        shrl    $16,%ebx
+        cmpb    $1,%bl
+        ja      .L002generic
+        andl    $4026531839,%edx
+.L002generic:
+        andl    $2048,%ebp
+        andl    $4294965247,%ecx
+        movl    %edx,%esi
+        orl     %ecx,%ebp
+        cmpl    $7,%edi
+        movl    20(%esp),%edi
+        jb      .L005no_extended_info
+        movl    $7,%eax
+        xorl    %ecx,%ecx
+        .byte   0x0f,0xa2
+        movl    %ebx,8(%edi)
+.L005no_extended_info:
+        btl     $27,%ebp
+        jnc     .L006clear_avx
+        xorl    %ecx,%ecx
+.byte   15,1,208
+        andl    $6,%eax
+        cmpl    $6,%eax
+        je      .L007done
+        cmpl    $2,%eax
+        je      .L006clear_avx
+.L008clear_xmm:
+        andl    $4261412861,%ebp
+        andl    $4278190079,%esi
+.L006clear_avx:
+        andl    $4026525695,%ebp
+        andl    $4294967263,8(%edi)
+.L007done:
+        movl    %esi,%eax
+        movl    %ebp,%edx
+.L000nocpuid:
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   OPENSSL_ia32_cpuid,.-.L_OPENSSL_ia32_cpuid_begin
+.globl  OPENSSL_rdtsc
+.type   OPENSSL_rdtsc,@function
+.align  16
+OPENSSL_rdtsc:
+.L_OPENSSL_rdtsc_begin:
+        xorl    %eax,%eax
+        xorl    %edx,%edx
+        leal    OPENSSL_ia32cap_P,%ecx
+        btl     $4,(%ecx)
+        jnc     .L009notsc
+        .byte   0x0f,0x31
+.L009notsc:
+        ret
+.size   OPENSSL_rdtsc,.-.L_OPENSSL_rdtsc_begin
+.globl  OPENSSL_instrument_halt
+.type   OPENSSL_instrument_halt,@function
+.align  16
+OPENSSL_instrument_halt:
+.L_OPENSSL_instrument_halt_begin:
+        leal    OPENSSL_ia32cap_P,%ecx
+        btl     $4,(%ecx)
+        jnc     .L010nohalt
+.long   2421723150
+        andl    $3,%eax
+        jnz     .L010nohalt
+        pushfl
+        popl    %eax
+        btl     $9,%eax
+        jnc     .L010nohalt
+        .byte   0x0f,0x31
+        pushl   %edx
+        pushl   %eax
+        hlt
+        .byte   0x0f,0x31
+        subl    (%esp),%eax
+        sbbl    4(%esp),%edx
+        addl    $8,%esp
+        ret
+.L010nohalt:
+        xorl    %eax,%eax
+        xorl    %edx,%edx
+        ret
+.size   OPENSSL_instrument_halt,.-.L_OPENSSL_instrument_halt_begin
+.globl  OPENSSL_far_spin
+.type   OPENSSL_far_spin,@function
+.align  16
+OPENSSL_far_spin:
+.L_OPENSSL_far_spin_begin:
+        pushfl
+        popl    %eax
+        btl     $9,%eax
+        jnc     .L011nospin
+        movl    4(%esp),%eax
+        movl    8(%esp),%ecx
+.long   2430111262
+        xorl    %eax,%eax
+        movl    (%ecx),%edx
+        jmp     .L012spin
+.align  16
+.L012spin:
+        incl    %eax
+        cmpl    (%ecx),%edx
+        je      .L012spin
+.long   529567888
+        ret
+.L011nospin:
+        xorl    %eax,%eax
+        xorl    %edx,%edx
+        ret
+.size   OPENSSL_far_spin,.-.L_OPENSSL_far_spin_begin
+.globl  OPENSSL_wipe_cpu
+.type   OPENSSL_wipe_cpu,@function
+.align  16
+OPENSSL_wipe_cpu:
+.L_OPENSSL_wipe_cpu_begin:
+        xorl    %eax,%eax
+        xorl    %edx,%edx
+        leal    OPENSSL_ia32cap_P,%ecx
+        movl    (%ecx),%ecx
+        btl     $1,(%ecx)
+        jnc     .L013no_x87
+.long   4007259865,4007259865,4007259865,4007259865,2430851995
+.L013no_x87:
+        leal    4(%esp),%eax
+        ret
+.size   OPENSSL_wipe_cpu,.-.L_OPENSSL_wipe_cpu_begin
+.globl  OPENSSL_atomic_add
+.type   OPENSSL_atomic_add,@function
+.align  16
+OPENSSL_atomic_add:
+.L_OPENSSL_atomic_add_begin:
+        movl    4(%esp),%edx
+        movl    8(%esp),%ecx
+        pushl   %ebx
+        nop
+        movl    (%edx),%eax
+.L014spin:
+        leal    (%eax,%ecx,1),%ebx
+        nop
+.long   447811568
+        jne     .L014spin
+        movl    %ebx,%eax
+        popl    %ebx
+        ret
+.size   OPENSSL_atomic_add,.-.L_OPENSSL_atomic_add_begin
+.globl  OPENSSL_cleanse
+.type   OPENSSL_cleanse,@function
+.align  16
+OPENSSL_cleanse:
+.L_OPENSSL_cleanse_begin:
+        movl    4(%esp),%edx
+        movl    8(%esp),%ecx
+        xorl    %eax,%eax
+        cmpl    $7,%ecx
+        jae     .L015lot
+        cmpl    $0,%ecx
+        je      .L016ret
+.L017little:
+        movb    %al,(%edx)
+        subl    $1,%ecx
+        leal    1(%edx),%edx
+        jnz     .L017little
+.L016ret:
+        ret
+.align  16
+.L015lot:
+        testl   $3,%edx
+        jz      .L018aligned
+        movb    %al,(%edx)
+        leal    -1(%ecx),%ecx
+        leal    1(%edx),%edx
+        jmp     .L015lot
+.L018aligned:
+        movl    %eax,(%edx)
+        leal    -4(%ecx),%ecx
+        testl   $-4,%ecx
+        leal    4(%edx),%edx
+        jnz     .L018aligned
+        cmpl    $0,%ecx
+        jne     .L017little
+        ret
+.size   OPENSSL_cleanse,.-.L_OPENSSL_cleanse_begin
+.globl  CRYPTO_memcmp
+.type   CRYPTO_memcmp,@function
+.align  16
+CRYPTO_memcmp:
+.L_CRYPTO_memcmp_begin:
+        pushl   %esi
+        pushl   %edi
+        movl    12(%esp),%esi
+        movl    16(%esp),%edi
+        movl    20(%esp),%ecx
+        xorl    %eax,%eax
+        xorl    %edx,%edx
+        cmpl    $0,%ecx
+        je      .L019no_data
+.L020loop:
+        movb    (%esi),%dl
+        leal    1(%esi),%esi
+        xorb    (%edi),%dl
+        leal    1(%edi),%edi
+        orb     %dl,%al
+        decl    %ecx
+        jnz     .L020loop
+        negl    %eax
+        shrl    $31,%eax
+.L019no_data:
+        popl    %edi
+        popl    %esi
+        ret
+.size   CRYPTO_memcmp,.-.L_CRYPTO_memcmp_begin
+.globl  OPENSSL_instrument_bus
+.type   OPENSSL_instrument_bus,@function
+.align  16
+OPENSSL_instrument_bus:
+.L_OPENSSL_instrument_bus_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    $0,%eax
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   OPENSSL_instrument_bus,.-.L_OPENSSL_instrument_bus_begin
+.globl  OPENSSL_instrument_bus2
+.type   OPENSSL_instrument_bus2,@function
+.align  16
+OPENSSL_instrument_bus2:
+.L_OPENSSL_instrument_bus2_begin:
+        pushl   %ebp
+        pushl   %ebx
+        pushl   %esi
+        pushl   %edi
+        movl    $0,%eax
+        popl    %edi
+        popl    %esi
+        popl    %ebx
+        popl    %ebp
+        ret
+.size   OPENSSL_instrument_bus2,.-.L_OPENSSL_instrument_bus2_begin
+.globl  OPENSSL_ia32_rdrand_bytes
+.type   OPENSSL_ia32_rdrand_bytes,@function
+.align  16
+OPENSSL_ia32_rdrand_bytes:
+.L_OPENSSL_ia32_rdrand_bytes_begin:
+        pushl   %edi
+        pushl   %ebx
+        xorl    %eax,%eax
+        movl    12(%esp),%edi
+        movl    16(%esp),%ebx
+        cmpl    $0,%ebx
+        je      .L021done
+        movl    $8,%ecx
+.L022loop:
+.byte   15,199,242
+        jc      .L023break
+        loop    .L022loop
+        jmp     .L021done
+.align  16
+.L023break:
+        cmpl    $4,%ebx
+        jb      .L024tail
+        movl    %edx,(%edi)
+        leal    4(%edi),%edi
+        addl    $4,%eax
+        subl    $4,%ebx
+        jz      .L021done
+        movl    $8,%ecx
+        jmp     .L022loop
+.align  16
+.L024tail:
+        movb    %dl,(%edi)
+        leal    1(%edi),%edi
+        incl    %eax
+        shrl    $8,%edx
+        decl    %ebx
+        jnz     .L024tail
+.L021done:
+        xorl    %edx,%edx
+        popl    %ebx
+        popl    %edi
+        ret
+.size   OPENSSL_ia32_rdrand_bytes,.-.L_OPENSSL_ia32_rdrand_bytes_begin
+.globl  OPENSSL_ia32_rdseed_bytes
+.type   OPENSSL_ia32_rdseed_bytes,@function
+.align  16
+OPENSSL_ia32_rdseed_bytes:
+.L_OPENSSL_ia32_rdseed_bytes_begin:
+        pushl   %edi
+        pushl   %ebx
+        xorl    %eax,%eax
+        movl    12(%esp),%edi
+        movl    16(%esp),%ebx
+        cmpl    $0,%ebx
+        je      .L025done
+        movl    $8,%ecx
+.L026loop:
+.byte   15,199,250
+        jc      .L027break
+        loop    .L026loop
+        jmp     .L025done
+.align  16
+.L027break:
+        cmpl    $4,%ebx
+        jb      .L028tail
+        movl    %edx,(%edi)
+        leal    4(%edi),%edi
+        addl    $4,%eax
+        subl    $4,%ebx
+        jz      .L025done
+        movl    $8,%ecx
+        jmp     .L026loop
+.align  16
+.L028tail:
+        movb    %dl,(%edi)
+        leal    1(%edi),%edi
+        incl    %eax
+        shrl    $8,%edx
+        decl    %ebx
+        jnz     .L028tail
+.L025done:
+        xorl    %edx,%edx
+        popl    %ebx
+        popl    %edi
+        ret
+.size   OPENSSL_ia32_rdseed_bytes,.-.L_OPENSSL_ia32_rdseed_bytes_begin
+.hidden OPENSSL_cpuid_setup
+.hidden OPENSSL_ia32cap_P
+.comm   OPENSSL_ia32cap_P,16,4
+.section        .init
+        call    OPENSSL_cpuid_setup
-- 
2.29.2.windows.2


  parent reply	other threads:[~2022-09-21 20:25 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-21 20:25 [PATCH v1 0/3] CryptoPkg/OpensslLib: Add native instruction support for IA32 Christopher Zurcher
2022-09-21 20:25 ` [PATCH v1 1/3] " Christopher Zurcher
2022-09-21 20:25 ` Christopher Zurcher [this message]
2022-09-21 20:25 ` [PATCH v1 3/3] CryptoPkg/OpensslLib: Update generated files for native X64 Christopher Zurcher
2022-09-22  0:53 ` [PATCH v1 0/3] CryptoPkg/OpensslLib: Add native instruction support for IA32 Yao, Jiewen
2022-09-22  1:45   ` [edk2-devel] " Christopher Zurcher
2022-09-23 10:34     ` Yao, Jiewen
     [not found]     ` <171776D81421E66F.25721@groups.io>
2022-09-23 11:08       ` Yao, Jiewen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-list from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220921202541.3691-3-christopher.zurcher@microsoft.com \
    --to=devel@edk2.groups.io \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox