diff --git a/.gitignore b/.gitignore index f4623d7..f82a3fa 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ openssl-1.0.0a-usa.tar.bz2 +/openssl-1.0.0d-usa.tar.bz2 diff --git a/hobble-openssl b/hobble-openssl index 24b05f9..a8be844 100755 --- a/hobble-openssl +++ b/hobble-openssl @@ -5,9 +5,9 @@ set -e # Clean out patent-or-otherwise-encumbered code. # MDC-2: 4,908,861 13/03/2007 - expired, we do not remove it but do not enable it anyway -# IDEA: 5,214,703 25/05/2010 -# RC5: 5,724,428 03/03/2015 -# EC: ????????? ??/??/2015 +# IDEA: 5,214,703 07/01/2012 +# RC5: 5,724,428 01/11/2015 +# EC: ????????? ??/??/2020 # Remove assembler portions of IDEA, MDC2, and RC5. (find crypto/{idea,rc5}/asm -type f | xargs -r rm -fv) diff --git a/mingw32-openssl-1.0.0a-sfx.patch b/mingw32-openssl-1.0.0a-sfx.patch deleted file mode 100644 index c5ddd8f..0000000 --- a/mingw32-openssl-1.0.0a-sfx.patch +++ /dev/null @@ -1,15 +0,0 @@ -diff -up openssl-1.0.0a/engines/Makefile.mingw-sfx openssl-1.0.0a/engines/Makefile ---- openssl-1.0.0a/engines/Makefile.mingw-sfx 2010-06-19 21:52:59.000000000 +0300 -+++ openssl-1.0.0a/engines/Makefile 2010-06-19 21:53:34.000000000 +0300 -@@ -111,7 +111,10 @@ install: - for l in $(LIBNAMES); do \ - ( echo installing $$l; \ - pfx=lib; \ -- if [ "$(PLATFORM)" != "Cygwin" ]; then \ -+ if [ "$(PLATFORM)" = "mingw" ]; then \ -+ sfx=.dll; \ -+ cp $$pfx$$l$$sfx $(INSTALL_PREFIX)$(INSTALLTOP)/lib/engines/$$pfx$$l$$sfx.new; \ -+ elif [ "$(PLATFORM)" != "Cygwin" ]; then \ - case "$(CFLAGS)" in \ - *DSO_BEOS*) sfx=".so";; \ - *DSO_DLFCN*) sfx=".so";; \ diff --git a/mingw32-openssl-1.0.0d-sfx.patch b/mingw32-openssl-1.0.0d-sfx.patch new file mode 100644 index 0000000..bd877b3 --- /dev/null +++ b/mingw32-openssl-1.0.0d-sfx.patch @@ -0,0 +1,15 @@ +diff -up openssl-1.0.0d/engines/Makefile.mingw-sfx openssl-1.0.0d/engines/Makefile +--- openssl-1.0.0d/engines/Makefile.mingw-sfx 2011-04-23 13:04:15.452843560 +0300 ++++ openssl-1.0.0d/engines/Makefile 2011-04-23 13:04:15.689846190 +0300 +@@ -111,7 +111,10 @@ install: + for l in $(LIBNAMES); do \ + ( echo installing $$l; \ + pfx=lib; \ +- if [ "$(PLATFORM)" != "Cygwin" ]; then \ ++ if [ "$(PLATFORM)" = "mingw" ]; then \ ++ sfx=.dll; \ ++ cp $$pfx$$l$$sfx $(INSTALL_PREFIX)$(INSTALLTOP)/lib/engines/$$pfx$$l$$sfx.new; \ ++ elif [ "$(PLATFORM)" != "Cygwin" ]; then \ + case "$(CFLAGS)" in \ + *DSO_BEOS*) sfx=".so";; \ + *DSO_DLFCN*) sfx=`expr "$(SHLIB_EXT)" : '.*\(\.[a-z][a-z]*\)' \| ".so"`;; \ diff --git a/mingw32-openssl.spec b/mingw32-openssl.spec index 91b336b..48000bd 100644 --- a/mingw32-openssl.spec +++ b/mingw32-openssl.spec @@ -28,8 +28,8 @@ %global thread_test_threads %{?threads:%{threads}}%{!?threads:1} Name: mingw32-openssl -Version: 1.0.0a -Release: 3%{?dist} +Version: 1.0.0d +Release: 1%{?dist} Summary: MinGW port of the OpenSSL toolkit License: OpenSSL @@ -59,7 +59,7 @@ Patch7: openssl-1.0.0-timezone.patch # Bug fixes Patch23: openssl-1.0.0-beta4-default-paths.patch Patch24: openssl-0.9.8j-bad-mime.patch -Patch25: openssl-1.0.0a-sslt1lib.patch +Patch25: openssl-1.0.0a-manfix.patch # Functionality changes Patch32: openssl-0.9.8g-ia64.patch Patch33: openssl-1.0.0-beta4-ca-dir.patch @@ -67,7 +67,7 @@ Patch34: openssl-0.9.6-x509.patch Patch35: openssl-0.9.8j-version-add-engines.patch Patch38: openssl-1.0.0-beta5-cipher-change.patch # Disabled this because it uses getaddrinfo which is lacking on Windows. -#Patch39: openssl-1.0.0-beta5-ipv6-apps.patch +#Patch39: openssl-1.0.0b-ipv6-apps.patch Patch40: openssl-1.0.0a-fips.patch Patch41: openssl-1.0.0-beta3-fipscheck.patch Patch43: openssl-1.0.0a-fipsmode.patch @@ -76,16 +76,23 @@ Patch45: openssl-0.9.8j-env-nozlib.patch Patch47: openssl-1.0.0-beta5-readme-warning.patch Patch49: openssl-1.0.0-beta4-algo-doc.patch Patch50: openssl-1.0.0-beta4-dtls1-abi.patch -Patch51: openssl-1.0.0a-version.patch -Patch52: openssl-1.0.0-beta4-aesni.patch +Patch51: openssl-1.0.0d-version.patch +Patch52: openssl-1.0.0b-aesni.patch Patch53: openssl-1.0.0-name-hash.patch +Patch54: openssl-1.0.0c-speed-fips.patch +#Patch55: openssl-1.0.0c-apps-ipv6listen.patch +Patch56: openssl-1.0.0c-rsa-x931.patch +Patch57: openssl-1.0.0c-fips186-3.patch +Patch58: openssl-1.0.0c-fips-md5-allow.patch +Patch59: openssl-1.0.0c-pkcs12-fips-default.patch +Patch60: openssl-1.0.0d-apps-dgst.patch # Backported fixes including security fixes # MinGW-specific patches. # Rename *eay32.dll to lib*.dll Patch101: mingw32-openssl-1.0.0-beta3-libversion.patch # Fix engines/ install target after lib rename -Patch102: mingw32-openssl-1.0.0a-sfx.patch +Patch102: mingw32-openssl-1.0.0d-sfx.patch BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) @@ -141,7 +148,7 @@ Requires: %{name} = %{version}-%{release} Static version of the MinGW port of the OpenSSL toolkit. -%{_mingw32_debug_package} +%{?_mingw32_debug_package} %prep @@ -158,14 +165,14 @@ Static version of the MinGW port of the OpenSSL toolkit. %patch23 -p1 -b .default-paths %patch24 -p1 -b .bad-mime -%patch25 -p1 -b .sslt1lib +%patch25 -p1 -b .manfix %patch32 -p1 -b .ia64 #patch33 is applied after make test %patch34 -p1 -b .x509 %patch35 -p1 -b .version-add-engines %patch38 -p1 -b .cipher-change -#%patch39 -p1 -b .ipv6-apps +#patch39 -p1 -b .ipv6-apps %patch40 -p1 -b .fips %patch41 -p1 -b .fipscheck %patch43 -p1 -b .fipsmode @@ -177,6 +184,13 @@ Static version of the MinGW port of the OpenSSL toolkit. %patch51 -p1 -b .version %patch52 -p1 -b .aesni %patch53 -p1 -b .name-hash +%patch54 -p1 -b .spfips +#patch55 -p1 -b .ipv6listen +%patch56 -p1 -b .x931 +%patch57 -p1 -b .fips186-3 +%patch58 -p1 -b .md5-allow +%patch59 -p1 -b .fips-default +%patch60 -p1 -b .dgst %patch101 -p1 -b .mingw-libversion %patch102 -p1 -b .mingw-sfx @@ -345,6 +359,10 @@ rm -rf $RPM_BUILD_ROOT %changelog +* Sat Apr 23 2011 Kalev Lember - 1.0.0d-1 +- Update to 1.0.0d +- Synced patches with Fedora native openssl-1.0.0d-2 + * Fri Mar 04 2011 Kai Tietz - Fixes for CVE-2011-0014 openssl: OCSP stapling vulnerability diff --git a/openssl-1.0.0-beta4-aesni.patch b/openssl-1.0.0-beta4-aesni.patch deleted file mode 100644 index f57918b..0000000 --- a/openssl-1.0.0-beta4-aesni.patch +++ /dev/null @@ -1,2388 +0,0 @@ -diff -up openssl-1.0.0-beta4/Configure.aesni openssl-1.0.0-beta4/Configure ---- openssl-1.0.0-beta4/Configure.aesni 2010-01-07 23:38:31.000000000 +0100 -+++ openssl-1.0.0-beta4/Configure 2010-01-12 22:18:06.000000000 +0100 -@@ -123,11 +123,11 @@ my $tlib="-lnsl -lsocket"; - my $bits1="THIRTY_TWO_BIT "; - my $bits2="SIXTY_FOUR_BIT "; - --my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o"; -+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o"; - - my $x86_elf_asm="$x86_asm:elf"; - --my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o"; -+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o"; - my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void"; - my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void"; - my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void"; -@@ -491,7 +491,7 @@ my %table=( - # - # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64 - "VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32", --"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", -+"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", - # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement - # 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE' - "VC-WIN32","cl:-W3 -WX -Gs0 -GF -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE:::WIN32::BN_LLONG RC4_INDEX EXPORT_VAR_AS_FN ${x86_gcc_opts}:${x86_asm}:win32n:win32", -@@ -1410,6 +1410,7 @@ if ($rmd160_obj =~ /\.o$/) - if ($aes_obj =~ /\.o$/) - { - $cflags.=" -DAES_ASM"; -+ $aes_obj =~ s/\s*aesni\-x86\.o// if ($no_sse2); - } - else { - $aes_obj=$aes_enc; -diff -up openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl.aesni openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl ---- openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl.aesni 2010-01-12 22:18:06.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86.pl 2010-01-12 22:18:06.000000000 +0100 -@@ -0,0 +1,765 @@ -+#!/usr/bin/env perl -+ -+# ==================================================================== -+# Written by Andy Polyakov for the OpenSSL -+# project. The module is, however, dual licensed under OpenSSL and -+# CRYPTOGAMS licenses depending on where you obtain it. For further -+# details see http://www.openssl.org/~appro/cryptogams/. -+# ==================================================================== -+# -+# This module implements support for Intel AES-NI extension. In -+# OpenSSL context it's used with Intel engine, but can also be used as -+# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for -+# details]. -+ -+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script -+ # generates drop-in replacement for -+ # crypto/aes/asm/aes-586.pl:-) -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+push(@INC,"${dir}","${dir}../../perlasm"); -+require "x86asm.pl"; -+ -+&asm_init($ARGV[0],$0); -+ -+$movekey = eval($RREFIX eq "aseni" ? "*movaps" : "*movups"); -+ -+$len="eax"; -+$rounds="ecx"; -+$key="edx"; -+$inp="esi"; -+$out="edi"; -+$rounds_="ebx"; # backup copy for $rounds -+$key_="ebp"; # backup copy for $key -+ -+$inout0="xmm0"; -+$inout1="xmm1"; -+$inout2="xmm2"; -+$rndkey0="xmm3"; -+$rndkey1="xmm4"; -+$ivec="xmm5"; -+$in0="xmm6"; -+$in1="xmm7"; $inout3="xmm7"; -+ -+# Inline version of internal aesni_[en|de]crypt1 -+sub aesni_inline_generate1 -+{ my $p=shift; -+ -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ &lea ($key,&DWP(32,$key)); -+ &pxor ($inout0,$rndkey0); -+ &set_label("${p}1_loop"); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &dec ($rounds); -+ &$movekey ($rndkey1,&QWP(0,$key)); -+ &lea ($key,&DWP(16,$key)); -+ &jnz (&label("${p}1_loop")); -+ eval"&aes${p}last ($inout0,$rndkey1)"; -+} -+ -+sub aesni_generate1 # fully unrolled loop -+{ my $p=shift; -+ -+ &function_begin_B("_aesni_${p}rypt1"); -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &$movekey ($rndkey1,&QWP(0x10,$key)); -+ &cmp ($rounds,11); -+ &pxor ($inout0,$rndkey0); -+ &$movekey ($rndkey0,&QWP(0x20,$key)); -+ &lea ($key,&DWP(0x30,$key)); -+ &jb (&label("${p}128")); -+ &lea ($key,&DWP(0x20,$key)); -+ &je (&label("${p}192")); -+ &lea ($key,&DWP(0x20,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(-0x40,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(-0x30,$key)); -+ &set_label("${p}192"); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(-0x20,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(-0x10,$key)); -+ &set_label("${p}128"); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x10,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0x20,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x30,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0x40,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x50,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(0x60,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &$movekey ($rndkey0,&QWP(0x70,$key)); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ eval"&aes${p}last ($inout0,$rndkey0)"; -+ &ret(); -+ &function_end_B("_aesni_${p}rypt1"); -+} -+ -+# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key); -+# &aesni_generate1("dec"); -+&function_begin_B("${PREFIX}_encrypt"); -+ &mov ("eax",&wparam(0)); -+ &mov ($key,&wparam(2)); -+ &movups ($inout0,&QWP(0,"eax")); -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ("eax",&wparam(1)); -+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1"); -+ &movups (&QWP(0,"eax"),$inout0); -+ &ret (); -+&function_end_B("${PREFIX}_encrypt"); -+ -+# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key); -+# &aesni_generate1("dec"); -+&function_begin_B("${PREFIX}_decrypt"); -+ &mov ("eax",&wparam(0)); -+ &mov ($key,&wparam(2)); -+ &movups ($inout0,&QWP(0,"eax")); -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ("eax",&wparam(1)); -+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt1"); -+ &movups (&QWP(0,"eax"),$inout0); -+ &ret (); -+&function_end_B("${PREFIX}_decrypt"); -+ -+# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave -+# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec] -+# latency is 6, it turned out that it can be scheduled only every -+# *second* cycle. Thus 3x interleave is the one providing optimal -+# utilization, i.e. when subroutine's throughput is virtually same as -+# of non-interleaved subroutine [for number of input blocks up to 3]. -+# This is why it makes no sense to implement 2x subroutine. As soon -+# as/if Intel improves throughput by making it possible to schedule -+# the instructions in question *every* cycles I would have to -+# implement 6x interleave and use it in loop... -+sub aesni_generate3 -+{ my $p=shift; -+ -+ &function_begin_B("_aesni_${p}rypt3"); -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &shr ($rounds,1); -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ &lea ($key,&DWP(32,$key)); -+ &pxor ($inout0,$rndkey0); -+ &pxor ($inout1,$rndkey0); -+ &pxor ($inout2,$rndkey0); -+ &jmp (&label("${p}3_loop")); -+ &set_label("${p}3_loop",16); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ &dec ($rounds); -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &lea ($key,&DWP(32,$key)); -+ eval"&aes${p} ($inout1,$rndkey0)"; -+ eval"&aes${p} ($inout2,$rndkey0)"; -+ &jnz (&label("${p}3_loop")); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ eval"&aes${p}last ($inout0,$rndkey0)"; -+ eval"&aes${p}last ($inout1,$rndkey0)"; -+ eval"&aes${p}last ($inout2,$rndkey0)"; -+ &ret(); -+ &function_end_B("_aesni_${p}rypt3"); -+} -+ -+# 4x interleave is implemented to improve small block performance, -+# most notably [and naturally] 4 block by ~30%. One can argue that one -+# should have implemented 5x as well, but improvement would be <20%, -+# so it's not worth it... -+sub aesni_generate4 -+{ my $p=shift; -+ -+ &function_begin_B("_aesni_${p}rypt4"); -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ &shr ($rounds,1); -+ &lea ($key,&DWP(32,$key)); -+ &pxor ($inout0,$rndkey0); -+ &pxor ($inout1,$rndkey0); -+ &pxor ($inout2,$rndkey0); -+ &pxor ($inout3,$rndkey0); -+ &jmp (&label("${p}3_loop")); -+ &set_label("${p}3_loop",16); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ &dec ($rounds); -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ eval"&aes${p} ($inout3,$rndkey1)"; -+ &$movekey ($rndkey1,&QWP(16,$key)); -+ eval"&aes${p} ($inout0,$rndkey0)"; -+ &lea ($key,&DWP(32,$key)); -+ eval"&aes${p} ($inout1,$rndkey0)"; -+ eval"&aes${p} ($inout2,$rndkey0)"; -+ eval"&aes${p} ($inout3,$rndkey0)"; -+ &jnz (&label("${p}3_loop")); -+ eval"&aes${p} ($inout0,$rndkey1)"; -+ &$movekey ($rndkey0,&QWP(0,$key)); -+ eval"&aes${p} ($inout1,$rndkey1)"; -+ eval"&aes${p} ($inout2,$rndkey1)"; -+ eval"&aes${p} ($inout3,$rndkey1)"; -+ eval"&aes${p}last ($inout0,$rndkey0)"; -+ eval"&aes${p}last ($inout1,$rndkey0)"; -+ eval"&aes${p}last ($inout2,$rndkey0)"; -+ eval"&aes${p}last ($inout3,$rndkey0)"; -+ &ret(); -+ &function_end_B("_aesni_${p}rypt4"); -+} -+&aesni_generate3("enc") if ($PREFIX eq "aesni"); -+&aesni_generate3("dec"); -+&aesni_generate4("enc") if ($PREFIX eq "aesni"); -+&aesni_generate4("dec"); -+ -+if ($PREFIX eq "aesni") { -+# void aesni_ecb_encrypt (const void *in, void *out, -+# size_t length, const AES_KEY *key, -+# int enc); -+&function_begin("aesni_ecb_encrypt"); -+ &mov ($inp,&wparam(0)); -+ &mov ($out,&wparam(1)); -+ &mov ($len,&wparam(2)); -+ &mov ($key,&wparam(3)); -+ &mov ($rounds,&wparam(4)); -+ &cmp ($len,16); -+ &jb (&label("ecb_ret")); -+ &and ($len,-16); -+ &test ($rounds,$rounds) -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ($key_,$key); # backup $key -+ &mov ($rounds_,$rounds); # backup $rounds -+ &jz (&label("ecb_decrypt")); -+ -+ &sub ($len,0x40); -+ &jbe (&label("ecb_enc_tail")); -+ &jmp (&label("ecb_enc_loop3")); -+ -+&set_label("ecb_enc_loop3",16); -+ &movups ($inout0,&QWP(0,$inp)); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &call ("_aesni_encrypt3"); -+ &sub ($len,0x30); -+ &lea ($inp,&DWP(0x30,$inp)); -+ &lea ($out,&DWP(0x30,$out)); -+ &movups (&QWP(-0x30,$out),$inout0); -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-0x20,$out),$inout1); -+ &mov ($rounds,$rounds_); # restore $rounds -+ &movups (&QWP(-0x10,$out),$inout2); -+ &ja (&label("ecb_enc_loop3")); -+ -+&set_label("ecb_enc_tail"); -+ &add ($len,0x40); -+ &jz (&label("ecb_ret")); -+ -+ &cmp ($len,0x10); -+ &movups ($inout0,&QWP(0,$inp)); -+ &je (&label("ecb_enc_one")); -+ &cmp ($len,0x20); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &je (&label("ecb_enc_two")); -+ &cmp ($len,0x30); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &je (&label("ecb_enc_three")); -+ &movups ($inout3,&QWP(0x30,$inp)); -+ &call ("_aesni_encrypt4"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &movups (&QWP(0x30,$out),$inout3); -+ jmp (&label("ecb_ret")); -+ -+&set_label("ecb_enc_one",16); -+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1"); -+ &movups (&QWP(0,$out),$inout0); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_enc_two",16); -+ &call ("_aesni_encrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_enc_three",16); -+ &call ("_aesni_encrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_decrypt",16); -+ &sub ($len,0x40); -+ &jbe (&label("ecb_dec_tail")); -+ &jmp (&label("ecb_dec_loop3")); -+ -+&set_label("ecb_dec_loop3",16); -+ &movups ($inout0,&QWP(0,$inp)); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &call ("_aesni_decrypt3"); -+ &sub ($len,0x30); -+ &lea ($inp,&DWP(0x30,$inp)); -+ &lea ($out,&DWP(0x30,$out)); -+ &movups (&QWP(-0x30,$out),$inout0); -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-0x20,$out),$inout1); -+ &mov ($rounds,$rounds_); # restore $rounds -+ &movups (&QWP(-0x10,$out),$inout2); -+ &ja (&label("ecb_dec_loop3")); -+ -+&set_label("ecb_dec_tail"); -+ &add ($len,0x40); -+ &jz (&label("ecb_ret")); -+ -+ &cmp ($len,0x10); -+ &movups ($inout0,&QWP(0,$inp)); -+ &je (&label("ecb_dec_one")); -+ &cmp ($len,0x20); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &je (&label("ecb_dec_two")); -+ &cmp ($len,0x30); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &je (&label("ecb_dec_three")); -+ &movups ($inout3,&QWP(0x30,$inp)); -+ &call ("_aesni_decrypt4"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &movups (&QWP(0x30,$out),$inout3); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_dec_one",16); -+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_dec_two",16); -+ &call ("_aesni_decrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &jmp (&label("ecb_ret")); -+ -+&set_label("ecb_dec_three",16); -+ &call ("_aesni_decrypt3"); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ -+&set_label("ecb_ret"); -+&function_end("aesni_ecb_encrypt"); -+} -+ -+# void $PREFIX_cbc_encrypt (const void *inp, void *out, -+# size_t length, const AES_KEY *key, -+# unsigned char *ivp,const int enc); -+&function_begin("${PREFIX}_cbc_encrypt"); -+ &mov ($inp,&wparam(0)); -+ &mov ($out,&wparam(1)); -+ &mov ($len,&wparam(2)); -+ &mov ($key,&wparam(3)); -+ &test ($len,$len); -+ &mov ($key_,&wparam(4)); -+ &jz (&label("cbc_ret")); -+ -+ &cmp (&wparam(5),0); -+ &movups ($ivec,&QWP(0,$key_)); # load IV -+ &mov ($rounds,&DWP(240,$key)); -+ &mov ($key_,$key); # backup $key -+ &mov ($rounds_,$rounds); # backup $rounds -+ &je (&label("cbc_decrypt")); -+ -+ &movaps ($inout0,$ivec); -+ &cmp ($len,16); -+ &jb (&label("cbc_enc_tail")); -+ &sub ($len,16); -+ &jmp (&label("cbc_enc_loop")); -+ -+&set_label("cbc_enc_loop",16); -+ &movups ($ivec,&QWP(0,$inp)); -+ &lea ($inp,&DWP(16,$inp)); -+ &pxor ($inout0,$ivec); -+ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt3"); -+ &sub ($len,16); -+ &lea ($out,&DWP(16,$out)); -+ &mov ($rounds,$rounds_); # restore $rounds -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-16,$out),$inout0); -+ &jnc (&label("cbc_enc_loop")); -+ &add ($len,16); -+ &jnz (&label("cbc_enc_tail")); -+ &movaps ($ivec,$inout0); -+ &jmp (&label("cbc_ret")); -+ -+&set_label("cbc_enc_tail"); -+ &mov ("ecx",$len); # zaps $rounds -+ &data_word(0xA4F3F689); # rep movsb -+ &mov ("ecx",16); # zero tail -+ &sub ("ecx",$len); -+ &xor ("eax","eax"); # zaps $len -+ &data_word(0xAAF3F689); # rep stosb -+ &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block -+ &mov ($rounds,$rounds_); # restore $rounds -+ &mov ($inp,$out); # $inp and $out are the same -+ &mov ($key,$key_); # restore $key -+ &jmp (&label("cbc_enc_loop")); -+ -+&set_label("cbc_decrypt",16); -+ &sub ($len,0x40); -+ &jbe (&label("cbc_dec_tail")); -+ &jmp (&label("cbc_dec_loop3")); -+ -+&set_label("cbc_dec_loop3",16); -+ &movups ($inout0,&QWP(0,$inp)); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &movaps ($in0,$inout0); -+ &movaps ($in1,$inout1); -+ &call ("_aesni_decrypt3"); -+ &sub ($len,0x30); -+ &lea ($inp,&DWP(0x30,$inp)); -+ &lea ($out,&DWP(0x30,$out)); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &movups ($ivec,&QWP(-0x10,$inp)); -+ &pxor ($inout2,$in1); -+ &movups (&QWP(-0x30,$out),$inout0); -+ &mov ($rounds,$rounds_) # restore $rounds -+ &movups (&QWP(-0x20,$out),$inout1); -+ &mov ($key,$key_); # restore $key -+ &movups (&QWP(-0x10,$out),$inout2); -+ &ja (&label("cbc_dec_loop3")); -+ -+&set_label("cbc_dec_tail"); -+ &add ($len,0x40); -+ &jz (&label("cbc_ret")); -+ -+ &movups ($inout0,&QWP(0,$inp)); -+ &cmp ($len,0x10); -+ &movaps ($in0,$inout0); -+ &jbe (&label("cbc_dec_one")); -+ &movups ($inout1,&QWP(0x10,$inp)); -+ &cmp ($len,0x20); -+ &movaps ($in1,$inout1); -+ &jbe (&label("cbc_dec_two")); -+ &movups ($inout2,&QWP(0x20,$inp)); -+ &cmp ($len,0x30); -+ &jbe (&label("cbc_dec_three")); -+ &movups ($inout3,&QWP(0x30,$inp)); -+ &call ("_aesni_decrypt4"); -+ &movups ($rndkey0,&QWP(0x10,$inp)); -+ &movups ($rndkey1,&QWP(0x20,$inp)); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &movups ($ivec,&QWP(0x30,$inp)); -+ &movups (&QWP(0,$out),$inout0); -+ &pxor ($inout2,$rndkey0); -+ &pxor ($inout3,$rndkey1); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movups (&QWP(0x20,$out),$inout2); -+ &movaps ($inout0,$inout3); -+ &lea ($out,&DWP(0x30,$out)); -+ &jmp (&label("cbc_dec_tail_collected")); -+ -+&set_label("cbc_dec_one"); -+ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3"); -+ &pxor ($inout0,$ivec); -+ &movaps ($ivec,$in0); -+ &jmp (&label("cbc_dec_tail_collected")); -+ -+&set_label("cbc_dec_two"); -+ &call ("_aesni_decrypt3"); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &movups (&QWP(0,$out),$inout0); -+ &movaps ($inout0,$inout1); -+ &movaps ($ivec,$in1); -+ &lea ($out,&DWP(0x10,$out)); -+ &jmp (&label("cbc_dec_tail_collected")); -+ -+&set_label("cbc_dec_three"); -+ &call ("_aesni_decrypt3"); -+ &pxor ($inout0,$ivec); -+ &pxor ($inout1,$in0); -+ &pxor ($inout2,$in1); -+ &movups (&QWP(0,$out),$inout0); -+ &movups (&QWP(0x10,$out),$inout1); -+ &movaps ($inout0,$inout2); -+ &movups ($ivec,&QWP(0x20,$inp)); -+ &lea ($out,&DWP(0x20,$out)); -+ -+&set_label("cbc_dec_tail_collected"); -+ &and ($len,15); -+ &jnz (&label("cbc_dec_tail_partial")); -+ &movups (&QWP(0,$out),$inout0); -+ &jmp (&label("cbc_ret")); -+ -+&set_label("cbc_dec_tail_partial"); -+ &mov ($key_,"esp"); -+ &sub ("esp",16); -+ &and ("esp",-16); -+ &movaps (&QWP(0,"esp"),$inout0); -+ &mov ($inp,"esp"); -+ &mov ("ecx",$len); -+ &data_word(0xA4F3F689); # rep movsb -+ &mov ("esp",$key_); -+ -+&set_label("cbc_ret"); -+ &mov ($key_,&wparam(4)); -+ &movups (&QWP(0,$key_),$ivec); # output IV -+&function_end("${PREFIX}_cbc_encrypt"); -+ -+# Mechanical port from aesni-x86_64.pl. -+# -+# _aesni_set_encrypt_key is private interface, -+# input: -+# "eax" const unsigned char *userKey -+# $rounds int bits -+# $key AES_KEY *key -+# output: -+# "eax" return code -+# $round rounds -+ -+&function_begin_B("_aesni_set_encrypt_key"); -+ &test ("eax","eax"); -+ &jz (&label("bad_pointer")); -+ &test ($key,$key); -+ &jz (&label("bad_pointer")); -+ -+ &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey -+ &pxor ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 -+ &lea ($key,&DWP(16,$key)); -+ &cmp ($rounds,256); -+ &je (&label("14rounds")); -+ &cmp ($rounds,192); -+ &je (&label("12rounds")); -+ &cmp ($rounds,128); -+ &jne (&label("bad_keybits")); -+ -+&set_label("10rounds",16); -+ &mov ($rounds,9); -+ &$movekey (&QWP(-16,$key),"xmm0"); # round 0 -+ &aeskeygenassist("xmm1","xmm0",0x01); # round 1 -+ &call (&label("key_128_cold")); -+ &aeskeygenassist("xmm1","xmm0",0x2); # round 2 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x04); # round 3 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x08); # round 4 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x10); # round 5 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x20); # round 6 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x40); # round 7 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x80); # round 8 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x1b); # round 9 -+ &call (&label("key_128")); -+ &aeskeygenassist("xmm1","xmm0",0x36); # round 10 -+ &call (&label("key_128")); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &mov (&DWP(80,$key),$rounds); -+ &xor ("eax","eax"); -+ &ret(); -+ -+&set_label("key_128",16); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &lea ($key,&DWP(16,$key)); -+&set_label("key_128_cold"); -+ &shufps ("xmm4","xmm0",0b00010000); -+ &pxor ("xmm0","xmm4"); -+ &shufps ("xmm4","xmm0",0b10001100,); -+ &pxor ("xmm0","xmm4"); -+ &pshufd ("xmm1","xmm1",0b11111111); # critical path -+ &pxor ("xmm0","xmm1"); -+ &ret(); -+ -+&set_label("12rounds",16); -+ &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey -+ &mov ($rounds,11); -+ &$movekey (&QWP(-16,$key),"xmm0") # round 0 -+ &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 -+ &call (&label("key_192a_cold")); -+ &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 -+ &call (&label("key_192b")); -+ &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5 -+ &call (&label("key_192a")); -+ &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6 -+ &call (&label("key_192b")); -+ &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8 -+ &call (&label("key_192a")); -+ &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9 -+ &call (&label("key_192b")); -+ &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11 -+ &call (&label("key_192a")); -+ &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12 -+ &call (&label("key_192b")); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &mov (&DWP(48,$key),$rounds); -+ &xor ("eax","eax"); -+ &ret(); -+ -+&set_label("key_192a",16); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &lea ($key,&DWP(16,$key)); -+&set_label("key_192a_cold",16); -+ &movaps ("xmm5","xmm2"); -+&set_label("key_192b_warm"); -+ &shufps ("xmm4","xmm0",0b00010000); -+ &movaps ("xmm3","xmm2"); -+ &pxor ("xmm0","xmm4"); -+ &shufps ("xmm4","xmm0",0b10001100); -+ &pslldq ("xmm3",4); -+ &pxor ("xmm0","xmm4"); -+ &pshufd ("xmm1","xmm1",0b01010101); # critical path -+ &pxor ("xmm2","xmm3"); -+ &pxor ("xmm0","xmm1"); -+ &pshufd ("xmm3","xmm0",0b11111111); -+ &pxor ("xmm2","xmm3"); -+ &ret(); -+ -+&set_label("key_192b",16); -+ &movaps ("xmm3","xmm0"); -+ &shufps ("xmm5","xmm0",0b01000100); -+ &$movekey (&QWP(0,$key),"xmm5"); -+ &shufps ("xmm3","xmm2",0b01001110); -+ &$movekey (&QWP(16,$key),"xmm3"); -+ &lea ($key,&DWP(32,$key)); -+ &jmp (&label("key_192b_warm")); -+ -+&set_label("14rounds",16); -+ &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey -+ &mov ($rounds,13); -+ &lea ($key,&DWP(16,$key)); -+ &$movekey (&QWP(-32,$key),"xmm0"); # round 0 -+ &$movekey (&QWP(-16,$key),"xmm2"); # round 1 -+ &aeskeygenassist("xmm1","xmm2",0x01); # round 2 -+ &call (&label("key_256a_cold")); -+ &aeskeygenassist("xmm1","xmm0",0x01); # round 3 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x02); # round 4 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x02); # round 5 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x04); # round 6 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x04); # round 7 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x08); # round 8 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x08); # round 9 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x10); # round 10 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x10); # round 11 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x20); # round 12 -+ &call (&label("key_256a")); -+ &aeskeygenassist("xmm1","xmm0",0x20); # round 13 -+ &call (&label("key_256b")); -+ &aeskeygenassist("xmm1","xmm2",0x40); # round 14 -+ &call (&label("key_256a")); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &mov (&DWP(16,$key),$rounds); -+ &xor ("eax","eax"); -+ &ret(); -+ -+&set_label("key_256a",16); -+ &$movekey (&QWP(0,$key),"xmm2"); -+ &lea ($key,&DWP(16,$key)); -+&set_label("key_256a_cold"); -+ &shufps ("xmm4","xmm0",0b00010000); -+ &pxor ("xmm0","xmm4"); -+ &shufps ("xmm4","xmm0",0b10001100); -+ &pxor ("xmm0","xmm4"); -+ &pshufd ("xmm1","xmm1",0b11111111); # critical path -+ &pxor ("xmm0","xmm1"); -+ &ret(); -+ -+&set_label("key_256b",16); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ &lea ($key,&DWP(16,$key)); -+ -+ &shufps ("xmm4","xmm2",0b00010000); -+ &pxor ("xmm2","xmm4"); -+ &shufps ("xmm4","xmm2",0b10001100); -+ &pxor ("xmm2","xmm4"); -+ &pshufd ("xmm1","xmm1",0b10101010); # critical path -+ &pxor ("xmm2","xmm1"); -+ &ret(); -+ -+&set_label("bad_pointer",4); -+ &mov ("eax",-1); -+ &ret (); -+&set_label("bad_keybits",4); -+ &mov ("eax",-2); -+ &ret (); -+&function_end_B("_aesni_set_encrypt_key"); -+ -+# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits, -+# AES_KEY *key) -+&function_begin_B("${PREFIX}_set_encrypt_key"); -+ &mov ("eax",&wparam(0)); -+ &mov ($rounds,&wparam(1)); -+ &mov ($key,&wparam(2)); -+ &call ("_aesni_set_encrypt_key"); -+ &ret (); -+&function_end_B("${PREFIX}_set_encrypt_key"); -+ -+# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits, -+# AES_KEY *key) -+&function_begin_B("${PREFIX}_set_decrypt_key"); -+ &mov ("eax",&wparam(0)); -+ &mov ($rounds,&wparam(1)); -+ &mov ($key,&wparam(2)); -+ &call ("_aesni_set_encrypt_key"); -+ &mov ($key,&wparam(2)); -+ &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key -+ &test ("eax","eax"); -+ &jnz (&label("dec_key_ret")); -+ &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule -+ -+ &$movekey ("xmm0",&QWP(0,$key)); # just swap -+ &$movekey ("xmm1",&QWP(0,"eax")); -+ &$movekey (&QWP(0,"eax"),"xmm0"); -+ &$movekey (&QWP(0,$key),"xmm1"); -+ &lea ($key,&DWP(16,$key)); -+ &lea ("eax",&DWP(-16,"eax")); -+ -+&set_label("dec_key_inverse"); -+ &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse -+ &$movekey ("xmm1",&QWP(0,"eax")); -+ &aesimc ("xmm0","xmm0"); -+ &aesimc ("xmm1","xmm1"); -+ &lea ($key,&DWP(16,$key)); -+ &lea ("eax",&DWP(-16,"eax")); -+ &cmp ("eax",$key); -+ &$movekey (&QWP(16,"eax"),"xmm0"); -+ &$movekey (&QWP(-16,$key),"xmm1"); -+ &ja (&label("dec_key_inverse")); -+ -+ &$movekey ("xmm0",&QWP(0,$key)); # inverse middle -+ &aesimc ("xmm0","xmm0"); -+ &$movekey (&QWP(0,$key),"xmm0"); -+ -+ &xor ("eax","eax"); # return success -+&set_label("dec_key_ret"); -+ &ret (); -+&function_end_B("${PREFIX}_set_decrypt_key"); -+&asciz("AES for Intel AES-NI, CRYPTOGAMS by "); -+ -+&asm_finish(); -diff -up openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl.aesni openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl ---- openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl.aesni 2010-01-12 22:18:06.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/aes/asm/aesni-x86_64.pl 2010-01-12 22:18:06.000000000 +0100 -@@ -0,0 +1,991 @@ -+#!/usr/bin/env perl -+# -+# ==================================================================== -+# Written by Andy Polyakov for the OpenSSL -+# project. The module is, however, dual licensed under OpenSSL and -+# CRYPTOGAMS licenses depending on where you obtain it. For further -+# details see http://www.openssl.org/~appro/cryptogams/. -+# ==================================================================== -+# -+# This module implements support for Intel AES-NI extension. In -+# OpenSSL context it's used with Intel engine, but can also be used as -+# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for -+# details]. -+ -+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script -+ # generates drop-in replacement for -+ # crypto/aes/asm/aes-x86_64.pl:-) -+ -+$flavour = shift; -+$output = shift; -+if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } -+ -+$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); -+ -+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -+( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -+( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -+die "can't locate x86_64-xlate.pl"; -+ -+open STDOUT,"| $^X $xlate $flavour $output"; -+ -+$movkey = $PREFIX eq "aesni" ? "movaps" : "movups"; -+@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order -+ ("%rdi","%rsi","%rdx","%rcx"); # Unix order -+ -+$code=".text\n"; -+ -+$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! -+# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... -+$inp="%rdi"; -+$out="%rsi"; -+$len="%rdx"; -+$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!! -+$ivp="%r8"; # cbc -+ -+$rnds_="%r10d"; # backup copy for $rounds -+$key_="%r11"; # backup copy for $key -+ -+# %xmm register layout -+$inout0="%xmm0"; $inout1="%xmm1"; -+$inout2="%xmm2"; $inout3="%xmm3"; -+$rndkey0="%xmm4"; $rndkey1="%xmm5"; -+ -+$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt -+$in1="%xmm8"; $in2="%xmm9"; -+ -+# Inline version of internal aesni_[en|de]crypt1. -+# -+# Why folded loop? Because aes[enc|dec] is slow enough to accommodate -+# cycles which take care of loop variables... -+{ my $sn; -+sub aesni_generate1 { -+my ($p,$key,$rounds)=@_; -+++$sn; -+$code.=<<___; -+ $movkey ($key),$rndkey0 -+ $movkey 16($key),$rndkey1 -+ lea 32($key),$key -+ pxor $rndkey0,$inout0 -+.Loop_${p}1_$sn: -+ aes${p} $rndkey1,$inout0 -+ dec $rounds -+ $movkey ($key),$rndkey1 -+ lea 16($key),$key -+ jnz .Loop_${p}1_$sn # loop body is 16 bytes -+ aes${p}last $rndkey1,$inout0 -+___ -+}} -+# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key); -+# -+{ my ($inp,$out,$key) = @_4args; -+ -+$code.=<<___; -+.globl ${PREFIX}_encrypt -+.type ${PREFIX}_encrypt,\@abi-omnipotent -+.align 16 -+${PREFIX}_encrypt: -+ movups ($inp),$inout0 # load input -+ mov 240($key),$rounds # pull $rounds -+___ -+ &aesni_generate1("enc",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) # output -+ ret -+.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt -+ -+.globl ${PREFIX}_decrypt -+.type ${PREFIX}_decrypt,\@abi-omnipotent -+.align 16 -+${PREFIX}_decrypt: -+ movups ($inp),$inout0 # load input -+ mov 240($key),$rounds # pull $rounds -+___ -+ &aesni_generate1("dec",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) # output -+ ret -+.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt -+___ -+} -+ -+# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave -+# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec] -+# latency is 6, it turned out that it can be scheduled only every -+# *second* cycle. Thus 3x interleave is the one providing optimal -+# utilization, i.e. when subroutine's throughput is virtually same as -+# of non-interleaved subroutine [for number of input blocks up to 3]. -+# This is why it makes no sense to implement 2x subroutine. As soon -+# as/if Intel improves throughput by making it possible to schedule -+# the instructions in question *every* cycles I would have to -+# implement 6x interleave and use it in loop... -+sub aesni_generate3 { -+my $dir=shift; -+# As already mentioned it takes in $key and $rounds, which are *not* -+# preserved. $inout[0-2] is cipher/clear text... -+$code.=<<___; -+.type _aesni_${dir}rypt3,\@abi-omnipotent -+.align 16 -+_aesni_${dir}rypt3: -+ $movkey ($key),$rndkey0 -+ shr \$1,$rounds -+ $movkey 16($key),$rndkey1 -+ lea 32($key),$key -+ pxor $rndkey0,$inout0 -+ pxor $rndkey0,$inout1 -+ pxor $rndkey0,$inout2 -+ -+.L${dir}_loop3: -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ dec $rounds -+ aes${dir} $rndkey1,$inout2 -+ aes${dir} $rndkey0,$inout0 -+ $movkey 16($key),$rndkey1 -+ aes${dir} $rndkey0,$inout1 -+ lea 32($key),$key -+ aes${dir} $rndkey0,$inout2 -+ jnz .L${dir}_loop3 -+ -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ aes${dir} $rndkey1,$inout2 -+ aes${dir}last $rndkey0,$inout0 -+ aes${dir}last $rndkey0,$inout1 -+ aes${dir}last $rndkey0,$inout2 -+ ret -+.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 -+___ -+} -+# 4x interleave is implemented to improve small block performance, -+# most notably [and naturally] 4 block by ~30%. One can argue that one -+# should have implemented 5x as well, but improvement would be <20%, -+# so it's not worth it... -+sub aesni_generate4 { -+my $dir=shift; -+# As already mentioned it takes in $key and $rounds, which are *not* -+# preserved. $inout[0-3] is cipher/clear text... -+$code.=<<___; -+.type _aesni_${dir}rypt4,\@abi-omnipotent -+.align 16 -+_aesni_${dir}rypt4: -+ $movkey ($key),$rndkey0 -+ shr \$1,$rounds -+ $movkey 16($key),$rndkey1 -+ lea 32($key),$key -+ pxor $rndkey0,$inout0 -+ pxor $rndkey0,$inout1 -+ pxor $rndkey0,$inout2 -+ pxor $rndkey0,$inout3 -+ -+.L${dir}_loop4: -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ dec $rounds -+ aes${dir} $rndkey1,$inout2 -+ aes${dir} $rndkey1,$inout3 -+ aes${dir} $rndkey0,$inout0 -+ $movkey 16($key),$rndkey1 -+ aes${dir} $rndkey0,$inout1 -+ lea 32($key),$key -+ aes${dir} $rndkey0,$inout2 -+ aes${dir} $rndkey0,$inout3 -+ jnz .L${dir}_loop4 -+ -+ aes${dir} $rndkey1,$inout0 -+ $movkey ($key),$rndkey0 -+ aes${dir} $rndkey1,$inout1 -+ aes${dir} $rndkey1,$inout2 -+ aes${dir} $rndkey1,$inout3 -+ aes${dir}last $rndkey0,$inout0 -+ aes${dir}last $rndkey0,$inout1 -+ aes${dir}last $rndkey0,$inout2 -+ aes${dir}last $rndkey0,$inout3 -+ ret -+.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 -+___ -+} -+&aesni_generate3("enc") if ($PREFIX eq "aesni"); -+&aesni_generate3("dec"); -+&aesni_generate4("enc") if ($PREFIX eq "aesni"); -+&aesni_generate4("dec"); -+ -+if ($PREFIX eq "aesni") { -+# void aesni_ecb_encrypt (const void *in, void *out, -+# size_t length, const AES_KEY *key, -+# int enc); -+$code.=<<___; -+.globl aesni_ecb_encrypt -+.type aesni_ecb_encrypt,\@function,5 -+.align 16 -+aesni_ecb_encrypt: -+ cmp \$16,$len # check length -+ jb .Lecb_ret -+ -+ mov 240($key),$rounds # pull $rounds -+ and \$-16,$len -+ mov $key,$key_ # backup $key -+ test %r8d,%r8d # 5th argument -+ mov $rounds,$rnds_ # backup $rounds -+ jz .Lecb_decrypt -+#--------------------------- ECB ENCRYPT ------------------------------# -+ sub \$0x40,$len -+ jbe .Lecb_enc_tail -+ jmp .Lecb_enc_loop3 -+.align 16 -+.Lecb_enc_loop3: -+ movups ($inp),$inout0 -+ movups 0x10($inp),$inout1 -+ movups 0x20($inp),$inout2 -+ call _aesni_encrypt3 -+ sub \$0x30,$len -+ lea 0x30($inp),$inp -+ lea 0x30($out),$out -+ movups $inout0,-0x30($out) -+ mov $rnds_,$rounds # restore $rounds -+ movups $inout1,-0x20($out) -+ mov $key_,$key # restore $key -+ movups $inout2,-0x10($out) -+ ja .Lecb_enc_loop3 -+ -+.Lecb_enc_tail: -+ add \$0x40,$len -+ jz .Lecb_ret -+ -+ cmp \$0x10,$len -+ movups ($inp),$inout0 -+ je .Lecb_enc_one -+ cmp \$0x20,$len -+ movups 0x10($inp),$inout1 -+ je .Lecb_enc_two -+ cmp \$0x30,$len -+ movups 0x20($inp),$inout2 -+ je .Lecb_enc_three -+ movups 0x30($inp),$inout3 -+ call _aesni_encrypt4 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ movups $inout3,0x30($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_enc_one: -+___ -+ &aesni_generate1("enc",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_enc_two: -+ call _aesni_encrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_enc_three: -+ call _aesni_encrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ jmp .Lecb_ret -+ #--------------------------- ECB DECRYPT ------------------------------# -+.align 16 -+.Lecb_decrypt: -+ sub \$0x40,$len -+ jbe .Lecb_dec_tail -+ jmp .Lecb_dec_loop3 -+.align 16 -+.Lecb_dec_loop3: -+ movups ($inp),$inout0 -+ movups 0x10($inp),$inout1 -+ movups 0x20($inp),$inout2 -+ call _aesni_decrypt3 -+ sub \$0x30,$len -+ lea 0x30($inp),$inp -+ lea 0x30($out),$out -+ movups $inout0,-0x30($out) -+ mov $rnds_,$rounds # restore $rounds -+ movups $inout1,-0x20($out) -+ mov $key_,$key # restore $key -+ movups $inout2,-0x10($out) -+ ja .Lecb_dec_loop3 -+ -+.Lecb_dec_tail: -+ add \$0x40,$len -+ jz .Lecb_ret -+ -+ cmp \$0x10,$len -+ movups ($inp),$inout0 -+ je .Lecb_dec_one -+ cmp \$0x20,$len -+ movups 0x10($inp),$inout1 -+ je .Lecb_dec_two -+ cmp \$0x30,$len -+ movups 0x20($inp),$inout2 -+ je .Lecb_dec_three -+ movups 0x30($inp),$inout3 -+ call _aesni_decrypt4 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ movups $inout3,0x30($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_dec_one: -+___ -+ &aesni_generate1("dec",$key,$rounds); -+$code.=<<___; -+ movups $inout0,($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_dec_two: -+ call _aesni_decrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ jmp .Lecb_ret -+.align 16 -+.Lecb_dec_three: -+ call _aesni_decrypt3 -+ movups $inout0,($out) -+ movups $inout1,0x10($out) -+ movups $inout2,0x20($out) -+ -+.Lecb_ret: -+ ret -+.size aesni_ecb_encrypt,.-aesni_ecb_encrypt -+___ -+} -+ -+# void $PREFIX_cbc_encrypt (const void *inp, void *out, -+# size_t length, const AES_KEY *key, -+# unsigned char *ivp,const int enc); -+$reserved = $win64?0x40:-0x18; # used in decrypt -+$code.=<<___; -+.globl ${PREFIX}_cbc_encrypt -+.type ${PREFIX}_cbc_encrypt,\@function,6 -+.align 16 -+${PREFIX}_cbc_encrypt: -+ test $len,$len # check length -+ jz .Lcbc_ret -+ -+ mov 240($key),$rnds_ # pull $rounds -+ mov $key,$key_ # backup $key -+ test %r9d,%r9d # 6th argument -+ jz .Lcbc_decrypt -+#--------------------------- CBC ENCRYPT ------------------------------# -+ movups ($ivp),$inout0 # load iv as initial state -+ cmp \$16,$len -+ mov $rnds_,$rounds -+ jb .Lcbc_enc_tail -+ sub \$16,$len -+ jmp .Lcbc_enc_loop -+.align 16 -+.Lcbc_enc_loop: -+ movups ($inp),$inout1 # load input -+ lea 16($inp),$inp -+ pxor $inout1,$inout0 -+___ -+ &aesni_generate1("enc",$key,$rounds); -+$code.=<<___; -+ sub \$16,$len -+ lea 16($out),$out -+ mov $rnds_,$rounds # restore $rounds -+ mov $key_,$key # restore $key -+ movups $inout0,-16($out) # store output -+ jnc .Lcbc_enc_loop -+ add \$16,$len -+ jnz .Lcbc_enc_tail -+ movups $inout0,($ivp) -+ jmp .Lcbc_ret -+ -+.Lcbc_enc_tail: -+ mov $len,%rcx # zaps $key -+ xchg $inp,$out # $inp is %rsi and $out is %rdi now -+ .long 0x9066A4F3 # rep movsb -+ mov \$16,%ecx # zero tail -+ sub $len,%rcx -+ xor %eax,%eax -+ .long 0x9066AAF3 # rep stosb -+ lea -16(%rdi),%rdi # rewind $out by 1 block -+ mov $rnds_,$rounds # restore $rounds -+ mov %rdi,%rsi # $inp and $out are the same -+ mov $key_,$key # restore $key -+ xor $len,$len # len=16 -+ jmp .Lcbc_enc_loop # one more spin -+ #--------------------------- CBC DECRYPT ------------------------------# -+.align 16 -+.Lcbc_decrypt: -+___ -+$code.=<<___ if ($win64); -+ lea -0x58(%rsp),%rsp -+ movaps %xmm6,(%rsp) -+ movaps %xmm7,0x10(%rsp) -+ movaps %xmm8,0x20(%rsp) -+ movaps %xmm9,0x30(%rsp) -+.Lcbc_decrypt_body: -+___ -+$code.=<<___; -+ movups ($ivp),$iv -+ sub \$0x40,$len -+ mov $rnds_,$rounds -+ jbe .Lcbc_dec_tail -+ jmp .Lcbc_dec_loop3 -+.align 16 -+.Lcbc_dec_loop3: -+ movups ($inp),$inout0 -+ movups 0x10($inp),$inout1 -+ movups 0x20($inp),$inout2 -+ movaps $inout0,$in0 -+ movaps $inout1,$in1 -+ movaps $inout2,$in2 -+ call _aesni_decrypt3 -+ sub \$0x30,$len -+ lea 0x30($inp),$inp -+ lea 0x30($out),$out -+ pxor $iv,$inout0 -+ pxor $in0,$inout1 -+ movaps $in2,$iv -+ pxor $in1,$inout2 -+ movups $inout0,-0x30($out) -+ mov $rnds_,$rounds # restore $rounds -+ movups $inout1,-0x20($out) -+ mov $key_,$key # restore $key -+ movups $inout2,-0x10($out) -+ ja .Lcbc_dec_loop3 -+ -+.Lcbc_dec_tail: -+ add \$0x40,$len -+ movups $iv,($ivp) -+ jz .Lcbc_dec_ret -+ -+ movups ($inp),$inout0 -+ cmp \$0x10,$len -+ movaps $inout0,$in0 -+ jbe .Lcbc_dec_one -+ movups 0x10($inp),$inout1 -+ cmp \$0x20,$len -+ movaps $inout1,$in1 -+ jbe .Lcbc_dec_two -+ movups 0x20($inp),$inout2 -+ cmp \$0x30,$len -+ movaps $inout2,$in2 -+ jbe .Lcbc_dec_three -+ movups 0x30($inp),$inout3 -+ call _aesni_decrypt4 -+ pxor $iv,$inout0 -+ movups 0x30($inp),$iv -+ pxor $in0,$inout1 -+ movups $inout0,($out) -+ pxor $in1,$inout2 -+ movups $inout1,0x10($out) -+ pxor $in2,$inout3 -+ movups $inout2,0x20($out) -+ movaps $inout3,$inout0 -+ lea 0x30($out),$out -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_one: -+___ -+ &aesni_generate1("dec",$key,$rounds); -+$code.=<<___; -+ pxor $iv,$inout0 -+ movaps $in0,$iv -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_two: -+ call _aesni_decrypt3 -+ pxor $iv,$inout0 -+ pxor $in0,$inout1 -+ movups $inout0,($out) -+ movaps $in1,$iv -+ movaps $inout1,$inout0 -+ lea 0x10($out),$out -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_three: -+ call _aesni_decrypt3 -+ pxor $iv,$inout0 -+ pxor $in0,$inout1 -+ movups $inout0,($out) -+ pxor $in1,$inout2 -+ movups $inout1,0x10($out) -+ movaps $in2,$iv -+ movaps $inout2,$inout0 -+ lea 0x20($out),$out -+ jmp .Lcbc_dec_tail_collected -+.align 16 -+.Lcbc_dec_tail_collected: -+ and \$15,$len -+ movups $iv,($ivp) -+ jnz .Lcbc_dec_tail_partial -+ movups $inout0,($out) -+ jmp .Lcbc_dec_ret -+.Lcbc_dec_tail_partial: -+ movaps $inout0,$reserved(%rsp) -+ mov $out,%rdi -+ mov $len,%rcx -+ lea $reserved(%rsp),%rsi -+ .long 0x9066A4F3 # rep movsb -+ -+.Lcbc_dec_ret: -+___ -+$code.=<<___ if ($win64); -+ movaps (%rsp),%xmm6 -+ movaps 0x10(%rsp),%xmm7 -+ movaps 0x20(%rsp),%xmm8 -+ movaps 0x30(%rsp),%xmm9 -+ lea 0x58(%rsp),%rsp -+___ -+$code.=<<___; -+.Lcbc_ret: -+ ret -+.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt -+___ -+ -+# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, -+# int bits, AES_KEY *key) -+{ my ($inp,$bits,$key) = @_4args; -+ $bits =~ s/%r/%e/; -+ -+$code.=<<___; -+.globl ${PREFIX}_set_decrypt_key -+.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent -+.align 16 -+${PREFIX}_set_decrypt_key: -+ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 -+ call _aesni_set_encrypt_key -+ shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key -+ test %eax,%eax -+ jnz .Ldec_key_ret -+ lea 16($key,$bits),$inp # points at the end of key schedule -+ -+ $movkey ($key),%xmm0 # just swap -+ $movkey ($inp),%xmm1 -+ $movkey %xmm0,($inp) -+ $movkey %xmm1,($key) -+ lea 16($key),$key -+ lea -16($inp),$inp -+ -+.Ldec_key_inverse: -+ $movkey ($key),%xmm0 # swap and inverse -+ $movkey ($inp),%xmm1 -+ aesimc %xmm0,%xmm0 -+ aesimc %xmm1,%xmm1 -+ lea 16($key),$key -+ lea -16($inp),$inp -+ cmp $key,$inp -+ $movkey %xmm0,16($inp) -+ $movkey %xmm1,-16($key) -+ ja .Ldec_key_inverse -+ -+ $movkey ($key),%xmm0 # inverse middle -+ aesimc %xmm0,%xmm0 -+ $movkey %xmm0,($inp) -+.Ldec_key_ret: -+ add \$8,%rsp -+ ret -+.LSEH_end_set_decrypt_key: -+.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key -+___ -+ -+# This is based on submission by -+# -+# Huang Ying -+# Vinodh Gopal -+# Kahraman Akdemir -+# -+# Agressively optimized in respect to aeskeygenassist's critical path -+# and is contained in %xmm0-5 to meet Win64 ABI requirement. -+# -+$code.=<<___; -+.globl ${PREFIX}_set_encrypt_key -+.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent -+.align 16 -+${PREFIX}_set_encrypt_key: -+_aesni_set_encrypt_key: -+ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 -+ test $inp,$inp -+ mov \$-1,%rax -+ jz .Lenc_key_ret -+ test $key,$key -+ jz .Lenc_key_ret -+ -+ movups ($inp),%xmm0 # pull first 128 bits of *userKey -+ pxor %xmm4,%xmm4 # low dword of xmm4 is assumed 0 -+ lea 16($key),%rax -+ cmp \$256,$bits -+ je .L14rounds -+ cmp \$192,$bits -+ je .L12rounds -+ cmp \$128,$bits -+ jne .Lbad_keybits -+ -+.L10rounds: -+ mov \$9,$bits # 10 rounds for 128-bit key -+ $movkey %xmm0,($key) # round 0 -+ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1 -+ call .Lkey_expansion_128_cold -+ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9 -+ call .Lkey_expansion_128 -+ aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10 -+ call .Lkey_expansion_128 -+ $movkey %xmm0,(%rax) -+ mov $bits,80(%rax) # 240(%rdx) -+ xor %eax,%eax -+ jmp .Lenc_key_ret -+ -+.align 16 -+.L12rounds: -+ movq 16($inp),%xmm2 # remaining 1/3 of *userKey -+ mov \$11,$bits # 12 rounds for 192 -+ $movkey %xmm0,($key) # round 0 -+ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2 -+ call .Lkey_expansion_192a_cold -+ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3 -+ call .Lkey_expansion_192b -+ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5 -+ call .Lkey_expansion_192a -+ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6 -+ call .Lkey_expansion_192b -+ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8 -+ call .Lkey_expansion_192a -+ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9 -+ call .Lkey_expansion_192b -+ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11 -+ call .Lkey_expansion_192a -+ aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12 -+ call .Lkey_expansion_192b -+ $movkey %xmm0,(%rax) -+ mov $bits,48(%rax) # 240(%rdx) -+ xor %rax, %rax -+ jmp .Lenc_key_ret -+ -+.align 16 -+.L14rounds: -+ movups 16($inp),%xmm2 # remaning half of *userKey -+ mov \$13,$bits # 14 rounds for 256 -+ lea 16(%rax),%rax -+ $movkey %xmm0,($key) # round 0 -+ $movkey %xmm2,16($key) # round 1 -+ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2 -+ call .Lkey_expansion_256a_cold -+ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12 -+ call .Lkey_expansion_256a -+ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13 -+ call .Lkey_expansion_256b -+ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14 -+ call .Lkey_expansion_256a -+ $movkey %xmm0,(%rax) -+ mov $bits,16(%rax) # 240(%rdx) -+ xor %rax,%rax -+ jmp .Lenc_key_ret -+ -+.align 16 -+.Lbad_keybits: -+ mov \$-2,%rax -+.Lenc_key_ret: -+ add \$8,%rsp -+ ret -+.LSEH_end_set_encrypt_key: -+ -+.align 16 -+.Lkey_expansion_128: -+ $movkey %xmm0,(%rax) -+ lea 16(%rax),%rax -+.Lkey_expansion_128_cold: -+ shufps \$0b00010000,%xmm0,%xmm4 -+ pxor %xmm4, %xmm0 -+ shufps \$0b10001100,%xmm0,%xmm4 -+ pxor %xmm4, %xmm0 -+ pshufd \$0b11111111,%xmm1,%xmm1 # critical path -+ pxor %xmm1,%xmm0 -+ ret -+ -+.align 16 -+.Lkey_expansion_192a: -+ $movkey %xmm0,(%rax) -+ lea 16(%rax),%rax -+.Lkey_expansion_192a_cold: -+ movaps %xmm2, %xmm5 -+.Lkey_expansion_192b_warm: -+ shufps \$0b00010000,%xmm0,%xmm4 -+ movaps %xmm2,%xmm3 -+ pxor %xmm4,%xmm0 -+ shufps \$0b10001100,%xmm0,%xmm4 -+ pslldq \$4,%xmm3 -+ pxor %xmm4,%xmm0 -+ pshufd \$0b01010101,%xmm1,%xmm1 # critical path -+ pxor %xmm3,%xmm2 -+ pxor %xmm1,%xmm0 -+ pshufd \$0b11111111,%xmm0,%xmm3 -+ pxor %xmm3,%xmm2 -+ ret -+ -+.align 16 -+.Lkey_expansion_192b: -+ movaps %xmm0,%xmm3 -+ shufps \$0b01000100,%xmm0,%xmm5 -+ $movkey %xmm5,(%rax) -+ shufps \$0b01001110,%xmm2,%xmm3 -+ $movkey %xmm3,16(%rax) -+ lea 32(%rax),%rax -+ jmp .Lkey_expansion_192b_warm -+ -+.align 16 -+.Lkey_expansion_256a: -+ $movkey %xmm2,(%rax) -+ lea 16(%rax),%rax -+.Lkey_expansion_256a_cold: -+ shufps \$0b00010000,%xmm0,%xmm4 -+ pxor %xmm4,%xmm0 -+ shufps \$0b10001100,%xmm0,%xmm4 -+ pxor %xmm4,%xmm0 -+ pshufd \$0b11111111,%xmm1,%xmm1 # critical path -+ pxor %xmm1,%xmm0 -+ ret -+ -+.align 16 -+.Lkey_expansion_256b: -+ $movkey %xmm0,(%rax) -+ lea 16(%rax),%rax -+ -+ shufps \$0b00010000,%xmm2,%xmm4 -+ pxor %xmm4,%xmm2 -+ shufps \$0b10001100,%xmm2,%xmm4 -+ pxor %xmm4,%xmm2 -+ pshufd \$0b10101010,%xmm1,%xmm1 # critical path -+ pxor %xmm1,%xmm2 -+ ret -+.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key -+___ -+} -+ -+$code.=<<___; -+.asciz "AES for Intel AES-NI, CRYPTOGAMS by " -+.align 64 -+___ -+ -+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, -+# CONTEXT *context,DISPATCHER_CONTEXT *disp) -+if ($win64) { -+$rec="%rcx"; -+$frame="%rdx"; -+$context="%r8"; -+$disp="%r9"; -+ -+$code.=<<___; -+.extern __imp_RtlVirtualUnwind -+.type cbc_se_handler,\@abi-omnipotent -+.align 16 -+cbc_se_handler: -+ push %rsi -+ push %rdi -+ push %rbx -+ push %rbp -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ pushfq -+ sub \$64,%rsp -+ -+ mov 152($context),%rax # pull context->Rsp -+ mov 248($context),%rbx # pull context->Rip -+ -+ lea .Lcbc_decrypt(%rip),%r10 -+ cmp %r10,%rbx # context->Rip<"prologue" label -+ jb .Lin_prologue -+ -+ lea .Lcbc_decrypt_body(%rip),%r10 -+ cmp %r10,%rbx # context->RipRip>="epilogue" label -+ jae .Lin_prologue -+ -+ lea 0(%rax),%rsi # top of stack -+ lea 512($context),%rdi # &context.Xmm6 -+ mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) -+ .long 0xa548f3fc # cld; rep movsq -+ lea 0x58(%rax),%rax # adjust stack pointer -+ jmp .Lin_prologue -+ -+.Lrestore_rax: -+ mov 120($context),%rax -+.Lin_prologue: -+ mov 8(%rax),%rdi -+ mov 16(%rax),%rsi -+ mov %rax,152($context) # restore context->Rsp -+ mov %rsi,168($context) # restore context->Rsi -+ mov %rdi,176($context) # restore context->Rdi -+ -+ jmp .Lcommon_seh_exit -+.size cbc_se_handler,.-cbc_se_handler -+ -+.type ecb_se_handler,\@abi-omnipotent -+.align 16 -+ecb_se_handler: -+ push %rsi -+ push %rdi -+ push %rbx -+ push %rbp -+ push %r12 -+ push %r13 -+ push %r14 -+ push %r15 -+ pushfq -+ sub \$64,%rsp -+ -+ mov 152($context),%rax # pull context->Rsp -+ mov 8(%rax),%rdi -+ mov 16(%rax),%rsi -+ mov %rsi,168($context) # restore context->Rsi -+ mov %rdi,176($context) # restore context->Rdi -+ -+.Lcommon_seh_exit: -+ -+ mov 40($disp),%rdi # disp->ContextRecord -+ mov $context,%rsi # context -+ mov \$154,%ecx # sizeof(CONTEXT) -+ .long 0xa548f3fc # cld; rep movsq -+ -+ mov $disp,%rsi -+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER -+ mov 8(%rsi),%rdx # arg2, disp->ImageBase -+ mov 0(%rsi),%r8 # arg3, disp->ControlPc -+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry -+ mov 40(%rsi),%r10 # disp->ContextRecord -+ lea 56(%rsi),%r11 # &disp->HandlerData -+ lea 24(%rsi),%r12 # &disp->EstablisherFrame -+ mov %r10,32(%rsp) # arg5 -+ mov %r11,40(%rsp) # arg6 -+ mov %r12,48(%rsp) # arg7 -+ mov %rcx,56(%rsp) # arg8, (NULL) -+ call *__imp_RtlVirtualUnwind(%rip) -+ -+ mov \$1,%eax # ExceptionContinueSearch -+ add \$64,%rsp -+ popfq -+ pop %r15 -+ pop %r14 -+ pop %r13 -+ pop %r12 -+ pop %rbp -+ pop %rbx -+ pop %rdi -+ pop %rsi -+ ret -+.size cbc_se_handler,.-cbc_se_handler -+ -+.section .pdata -+.align 4 -+ .rva .LSEH_begin_${PREFIX}_ecb_encrypt -+ .rva .LSEH_end_${PREFIX}_ecb_encrypt -+ .rva .LSEH_info_ecb -+ -+ .rva .LSEH_begin_${PREFIX}_cbc_encrypt -+ .rva .LSEH_end_${PREFIX}_cbc_encrypt -+ .rva .LSEH_info_cbc -+ -+ .rva ${PREFIX}_set_decrypt_key -+ .rva .LSEH_end_set_decrypt_key -+ .rva .LSEH_info_key -+ -+ .rva ${PREFIX}_set_encrypt_key -+ .rva .LSEH_end_set_encrypt_key -+ .rva .LSEH_info_key -+.section .xdata -+.align 8 -+.LSEH_info_ecb: -+ .byte 9,0,0,0 -+ .rva ecb_se_handler -+.LSEH_info_cbc: -+ .byte 9,0,0,0 -+ .rva cbc_se_handler -+.LSEH_info_key: -+ .byte 0x01,0x04,0x01,0x00 -+ .byte 0x04,0x02,0x00,0x00 -+___ -+} -+ -+sub rex { -+ local *opcode=shift; -+ my ($dst,$src)=@_; -+ -+ if ($dst>=8 || $src>=8) { -+ $rex=0x40; -+ $rex|=0x04 if($dst>=8); -+ $rex|=0x01 if($src>=8); -+ push @opcode,$rex; -+ } -+} -+ -+sub aesni { -+ my $line=shift; -+ my @opcode=(0x66); -+ -+ if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { -+ rex(\@opcode,$4,$3); -+ push @opcode,0x0f,0x3a,0xdf; -+ push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M -+ my $c=$2; -+ push @opcode,$c=~/^0/?oct($c):$c; -+ return ".byte\t".join(',',@opcode); -+ } -+ elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { -+ my %opcodelet = ( -+ "aesimc" => 0xdb, -+ "aesenc" => 0xdc, "aesenclast" => 0xdd, -+ "aesdec" => 0xde, "aesdeclast" => 0xdf -+ ); -+ return undef if (!defined($opcodelet{$1})); -+ rex(\@opcode,$3,$2); -+ push @opcode,0x0f,0x38,$opcodelet{$1}; -+ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M -+ return ".byte\t".join(',',@opcode); -+ } -+ return $line; -+} -+ -+$code =~ s/\`([^\`]*)\`/eval($1)/gem; -+$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; -+ -+print $code; -+ -+close STDOUT; -diff -up openssl-1.0.0-beta4/crypto/aes/Makefile.aesni openssl-1.0.0-beta4/crypto/aes/Makefile ---- openssl-1.0.0-beta4/crypto/aes/Makefile.aesni 2008-12-23 12:33:00.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/aes/Makefile 2010-01-12 22:18:06.000000000 +0100 -@@ -50,9 +50,13 @@ aes-ia64.s: asm/aes-ia64.S - - aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl - $(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ -+aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl -+ $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ - - aes-x86_64.s: asm/aes-x86_64.pl - $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@ -+aesni-x86_64.s: asm/aesni-x86_64.pl -+ $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@ - - aes-sparcv9.s: asm/aes-sparcv9.pl - $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ -diff -up openssl-1.0.0-beta4/crypto/engine/eng_aesni.c.aesni openssl-1.0.0-beta4/crypto/engine/eng_aesni.c ---- openssl-1.0.0-beta4/crypto/engine/eng_aesni.c.aesni 2010-01-12 22:18:06.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/engine/eng_aesni.c 2010-01-12 22:18:06.000000000 +0100 -@@ -0,0 +1,413 @@ -+/* -+ * Support for Intel AES-NI intruction set -+ * Author: Huang Ying -+ * -+ * Intel AES-NI is a new set of Single Instruction Multiple Data -+ * (SIMD) instructions that are going to be introduced in the next -+ * generation of Intel processor, as of 2009. These instructions -+ * enable fast and secure data encryption and decryption, using the -+ * Advanced Encryption Standard (AES), defined by FIPS Publication -+ * number 197. The architecture introduces six instructions that -+ * offer full hardware support for AES. Four of them support high -+ * performance data encryption and decryption, and the other two -+ * instructions support the AES key expansion procedure. -+ * -+ * The white paper can be downloaded from: -+ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf -+ * -+ * This file is based on engines/e_padlock.c -+ */ -+ -+/* ==================================================================== -+ * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions -+ * are met: -+ * -+ * 1. Redistributions of source code must retain the above copyright -+ * notice, this list of conditions and the following disclaimer. -+ * -+ * 2. Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in -+ * the documentation and/or other materials provided with the -+ * distribution. -+ * -+ * 3. All advertising materials mentioning features or use of this -+ * software must display the following acknowledgment: -+ * "This product includes software developed by the OpenSSL Project -+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" -+ * -+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to -+ * endorse or promote products derived from this software without -+ * prior written permission. For written permission, please contact -+ * licensing@OpenSSL.org. -+ * -+ * 5. Products derived from this software may not be called "OpenSSL" -+ * nor may "OpenSSL" appear in their names without prior written -+ * permission of the OpenSSL Project. -+ * -+ * 6. Redistributions of any form whatsoever must retain the following -+ * acknowledgment: -+ * "This product includes software developed by the OpenSSL Project -+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY -+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR -+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -+ * OF THE POSSIBILITY OF SUCH DAMAGE. -+ * ==================================================================== -+ * -+ * This product includes cryptographic software written by Eric Young -+ * (eay@cryptsoft.com). This product includes software written by Tim -+ * Hudson (tjh@cryptsoft.com). -+ * -+ */ -+ -+ -+#include -+ -+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AES_NI) && !defined(OPENSSL_NO_AES) -+ -+#include -+#include "cryptlib.h" -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* AES-NI is available *ONLY* on some x86 CPUs. Not only that it -+ doesn't exist elsewhere, but it even can't be compiled on other -+ platforms! */ -+#undef COMPILE_HW_AESNI -+#if (defined(__x86_64) || defined(__x86_64__) || \ -+ defined(_M_AMD64) || defined(_M_X64) || \ -+ defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) -+#define COMPILE_HW_AESNI -+static ENGINE *ENGINE_aesni (void); -+#endif -+ -+void ENGINE_load_aesni (void) -+{ -+/* On non-x86 CPUs it just returns. */ -+#ifdef COMPILE_HW_AESNI -+ ENGINE *toadd = ENGINE_aesni(); -+ if (!toadd) -+ return; -+ ENGINE_add (toadd); -+ ENGINE_register_complete (toadd); -+ ENGINE_free (toadd); -+ ERR_clear_error (); -+#endif -+} -+ -+#ifdef COMPILE_HW_AESNI -+int aesni_set_encrypt_key(const unsigned char *userKey, int bits, -+ AES_KEY *key); -+int aesni_set_decrypt_key(const unsigned char *userKey, int bits, -+ AES_KEY *key); -+ -+void aesni_encrypt(const unsigned char *in, unsigned char *out, -+ const AES_KEY *key); -+void aesni_decrypt(const unsigned char *in, unsigned char *out, -+ const AES_KEY *key); -+ -+void aesni_ecb_encrypt(const unsigned char *in, -+ unsigned char *out, -+ size_t length, -+ const AES_KEY *key, -+ int enc); -+void aesni_cbc_encrypt(const unsigned char *in, -+ unsigned char *out, -+ size_t length, -+ const AES_KEY *key, -+ unsigned char *ivec, int enc); -+ -+/* Function for ENGINE detection and control */ -+static int aesni_init(ENGINE *e); -+ -+/* Cipher Stuff */ -+static int aesni_ciphers(ENGINE *e, const EVP_CIPHER **cipher, -+ const int **nids, int nid); -+ -+#define AESNI_MIN_ALIGN 16 -+#define AESNI_ALIGN(x) \ -+ ((void *)(((unsigned long)(x)+AESNI_MIN_ALIGN-1)&~(AESNI_MIN_ALIGN-1))) -+ -+/* Engine names */ -+static const char aesni_id[] = "aesni", -+ aesni_name[] = "Intel AES-NI engine", -+ no_aesni_name[] = "Intel AES-NI engine (no-aesni)"; -+ -+/* ===== Engine "management" functions ===== */ -+ -+#if defined(_WIN32) -+typedef unsigned __int64 IA32CAP; -+#else -+typedef unsigned long long IA32CAP; -+#endif -+ -+/* Prepare the ENGINE structure for registration */ -+static int -+aesni_bind_helper(ENGINE *e) -+{ -+ int engage; -+ if (sizeof(OPENSSL_ia32cap_P) > 4) { -+ engage = (OPENSSL_ia32cap_P >> 57) & 1; -+ } else { -+ IA32CAP OPENSSL_ia32_cpuid(void); -+ engage = (OPENSSL_ia32_cpuid() >> 57) & 1; -+ } -+ -+ /* Register everything or return with an error */ -+ if (!ENGINE_set_id(e, aesni_id) || -+ !ENGINE_set_name(e, engage ? aesni_name : no_aesni_name) || -+ -+ !ENGINE_set_init_function(e, aesni_init) || -+ (engage && !ENGINE_set_ciphers (e, aesni_ciphers)) -+ ) -+ return 0; -+ -+ /* Everything looks good */ -+ return 1; -+} -+ -+/* Constructor */ -+static ENGINE * -+ENGINE_aesni(void) -+{ -+ ENGINE *eng = ENGINE_new(); -+ -+ if (!eng) { -+ return NULL; -+ } -+ -+ if (!aesni_bind_helper(eng)) { -+ ENGINE_free(eng); -+ return NULL; -+ } -+ -+ return eng; -+} -+ -+/* Check availability of the engine */ -+static int -+aesni_init(ENGINE *e) -+{ -+ return 1; -+} -+ -+#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) -+#define NID_aes_128_cfb NID_aes_128_cfb128 -+#endif -+ -+#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) -+#define NID_aes_128_ofb NID_aes_128_ofb128 -+#endif -+ -+#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) -+#define NID_aes_192_cfb NID_aes_192_cfb128 -+#endif -+ -+#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) -+#define NID_aes_192_ofb NID_aes_192_ofb128 -+#endif -+ -+#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) -+#define NID_aes_256_cfb NID_aes_256_cfb128 -+#endif -+ -+#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) -+#define NID_aes_256_ofb NID_aes_256_ofb128 -+#endif -+ -+/* List of supported ciphers. */ -+static int aesni_cipher_nids[] = { -+ NID_aes_128_ecb, -+ NID_aes_128_cbc, -+ NID_aes_128_cfb, -+ NID_aes_128_ofb, -+ -+ NID_aes_192_ecb, -+ NID_aes_192_cbc, -+ NID_aes_192_cfb, -+ NID_aes_192_ofb, -+ -+ NID_aes_256_ecb, -+ NID_aes_256_cbc, -+ NID_aes_256_cfb, -+ NID_aes_256_ofb, -+}; -+static int aesni_cipher_nids_num = -+ (sizeof(aesni_cipher_nids)/sizeof(aesni_cipher_nids[0])); -+ -+typedef struct -+{ -+ AES_KEY ks; -+ unsigned int _pad1[3]; -+} AESNI_KEY; -+ -+static int -+aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *user_key, -+ const unsigned char *iv, int enc) -+{ -+ int ret; -+ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ -+ if ((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CFB_MODE -+ || (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_OFB_MODE -+ || enc) -+ ret=aesni_set_encrypt_key(user_key, ctx->key_len * 8, key); -+ else -+ ret=aesni_set_decrypt_key(user_key, ctx->key_len * 8, key); -+ -+ if(ret < 0) { -+ EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); -+ return 0; -+ } -+ -+ return 1; -+} -+ -+static int aesni_cipher_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ aesni_ecb_encrypt(in, out, inl, key, ctx->encrypt); -+ return 1; -+} -+static int aesni_cipher_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ aesni_cbc_encrypt(in, out, inl, key, -+ ctx->iv, ctx->encrypt); -+ return 1; -+} -+static int aesni_cipher_cfb(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ CRYPTO_cfb128_encrypt(in, out, inl, key, ctx->iv, -+ &ctx->num, ctx->encrypt, -+ (block128_f)aesni_encrypt); -+ return 1; -+} -+static int aesni_cipher_ofb(EVP_CIPHER_CTX *ctx, unsigned char *out, -+ const unsigned char *in, size_t inl) -+{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); -+ CRYPTO_ofb128_encrypt(in, out, inl, key, ctx->iv, -+ &ctx->num, (block128_f)aesni_encrypt); -+ return 1; -+} -+ -+#define AES_BLOCK_SIZE 16 -+ -+#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE -+#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE -+#define EVP_CIPHER_block_size_OFB 1 -+#define EVP_CIPHER_block_size_CFB 1 -+ -+/* Declaring so many ciphers by hand would be a pain. -+ Instead introduce a bit of preprocessor magic :-) */ -+#define DECLARE_AES_EVP(ksize,lmode,umode) \ -+static const EVP_CIPHER aesni_##ksize##_##lmode = { \ -+ NID_aes_##ksize##_##lmode, \ -+ EVP_CIPHER_block_size_##umode, \ -+ ksize / 8, \ -+ AES_BLOCK_SIZE, \ -+ 0 | EVP_CIPH_##umode##_MODE, \ -+ aesni_init_key, \ -+ aesni_cipher_##lmode, \ -+ NULL, \ -+ sizeof(AESNI_KEY), \ -+ EVP_CIPHER_set_asn1_iv, \ -+ EVP_CIPHER_get_asn1_iv, \ -+ NULL, \ -+ NULL \ -+} -+ -+DECLARE_AES_EVP(128,ecb,ECB); -+DECLARE_AES_EVP(128,cbc,CBC); -+DECLARE_AES_EVP(128,cfb,CFB); -+DECLARE_AES_EVP(128,ofb,OFB); -+ -+DECLARE_AES_EVP(192,ecb,ECB); -+DECLARE_AES_EVP(192,cbc,CBC); -+DECLARE_AES_EVP(192,cfb,CFB); -+DECLARE_AES_EVP(192,ofb,OFB); -+ -+DECLARE_AES_EVP(256,ecb,ECB); -+DECLARE_AES_EVP(256,cbc,CBC); -+DECLARE_AES_EVP(256,cfb,CFB); -+DECLARE_AES_EVP(256,ofb,OFB); -+ -+static int -+aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher, -+ const int **nids, int nid) -+{ -+ /* No specific cipher => return a list of supported nids ... */ -+ if (!cipher) { -+ *nids = aesni_cipher_nids; -+ return aesni_cipher_nids_num; -+ } -+ -+ /* ... or the requested "cipher" otherwise */ -+ switch (nid) { -+ case NID_aes_128_ecb: -+ *cipher = &aesni_128_ecb; -+ break; -+ case NID_aes_128_cbc: -+ *cipher = &aesni_128_cbc; -+ break; -+ case NID_aes_128_cfb: -+ *cipher = &aesni_128_cfb; -+ break; -+ case NID_aes_128_ofb: -+ *cipher = &aesni_128_ofb; -+ break; -+ -+ case NID_aes_192_ecb: -+ *cipher = &aesni_192_ecb; -+ break; -+ case NID_aes_192_cbc: -+ *cipher = &aesni_192_cbc; -+ break; -+ case NID_aes_192_cfb: -+ *cipher = &aesni_192_cfb; -+ break; -+ case NID_aes_192_ofb: -+ *cipher = &aesni_192_ofb; -+ break; -+ -+ case NID_aes_256_ecb: -+ *cipher = &aesni_256_ecb; -+ break; -+ case NID_aes_256_cbc: -+ *cipher = &aesni_256_cbc; -+ break; -+ case NID_aes_256_cfb: -+ *cipher = &aesni_256_cfb; -+ break; -+ case NID_aes_256_ofb: -+ *cipher = &aesni_256_ofb; -+ break; -+ -+ default: -+ /* Sorry, we don't support this NID */ -+ *cipher = NULL; -+ return 0; -+ } -+ -+ return 1; -+} -+ -+#endif /* COMPILE_HW_AESNI */ -+#endif /* !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) && !defined(OPENSSL_NO_AES) */ -diff -up openssl-1.0.0-beta4/crypto/engine/eng_all.c.aesni openssl-1.0.0-beta4/crypto/engine/eng_all.c ---- openssl-1.0.0-beta4/crypto/engine/eng_all.c.aesni 2010-01-07 23:38:31.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/engine/eng_all.c 2010-01-12 22:18:06.000000000 +0100 -@@ -85,6 +85,9 @@ void ENGINE_load_builtin_engines(void) - #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV)) - ENGINE_load_cryptodev(); - #endif -+#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) -+ ENGINE_load_aesni(); -+#endif - ENGINE_load_dynamic(); - #ifndef OPENSSL_NO_STATIC_ENGINE - #ifndef OPENSSL_NO_HW -diff -up openssl-1.0.0-beta4/crypto/engine/engine.h.aesni openssl-1.0.0-beta4/crypto/engine/engine.h ---- openssl-1.0.0-beta4/crypto/engine/engine.h.aesni 2010-01-07 23:38:30.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/engine/engine.h 2010-01-12 22:18:06.000000000 +0100 -@@ -342,6 +342,7 @@ void ENGINE_load_gost(void); - #endif - #endif - void ENGINE_load_cryptodev(void); -+void ENGINE_load_aesni(void); - void ENGINE_load_builtin_engines(void); - - /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation -diff -up openssl-1.0.0-beta4/crypto/engine/Makefile.aesni openssl-1.0.0-beta4/crypto/engine/Makefile ---- openssl-1.0.0-beta4/crypto/engine/Makefile.aesni 2008-06-04 13:01:29.000000000 +0200 -+++ openssl-1.0.0-beta4/crypto/engine/Makefile 2010-01-12 22:18:06.000000000 +0100 -@@ -21,12 +21,14 @@ LIBSRC= eng_err.c eng_lib.c eng_list.c e - eng_table.c eng_pkey.c eng_fat.c eng_all.c \ - tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \ - tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \ -- eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c -+ eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \ -+ eng_aesni.c - LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \ - eng_table.o eng_pkey.o eng_fat.o eng_all.o \ - tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \ - tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \ -- eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o -+ eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \ -+ eng_aesni.o - - SRC= $(LIBSRC) - -diff -up openssl-1.0.0-beta4/crypto/evp/evp_err.c.aesni openssl-1.0.0-beta4/crypto/evp/evp_err.c ---- openssl-1.0.0-beta4/crypto/evp/evp_err.c.aesni 2010-01-07 23:38:31.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/evp/evp_err.c 2010-01-12 22:18:06.000000000 +0100 -@@ -1,6 +1,6 @@ - /* crypto/evp/evp_err.c */ - /* ==================================================================== -- * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved. -+ * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions -@@ -70,6 +70,7 @@ - - static ERR_STRING_DATA EVP_str_functs[]= - { -+{ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, - {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, - {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"}, - {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"}, -@@ -85,7 +86,7 @@ static ERR_STRING_DATA EVP_str_functs[]= - {ERR_FUNC(EVP_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"}, - {ERR_FUNC(EVP_F_EVP_ENCRYPTFINAL_EX), "EVP_EncryptFinal_ex"}, - {ERR_FUNC(EVP_F_EVP_MD_CTX_COPY_EX), "EVP_MD_CTX_copy_ex"}, --{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_SIZE"}, -+{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_size"}, - {ERR_FUNC(EVP_F_EVP_OPENINIT), "EVP_OpenInit"}, - {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD), "EVP_PBE_alg_add"}, - {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD_TYPE), "EVP_PBE_alg_add_type"}, -diff -up openssl-1.0.0-beta4/crypto/evp/evp.h.aesni openssl-1.0.0-beta4/crypto/evp/evp.h ---- openssl-1.0.0-beta4/crypto/evp/evp.h.aesni 2010-01-07 23:38:31.000000000 +0100 -+++ openssl-1.0.0-beta4/crypto/evp/evp.h 2010-01-12 22:18:06.000000000 +0100 -@@ -1162,6 +1162,7 @@ void ERR_load_EVP_strings(void); - /* Error codes for the EVP functions. */ - - /* Function codes. */ -+#define EVP_F_AESNI_INIT_KEY 163 - #define EVP_F_AES_INIT_KEY 133 - #define EVP_F_CAMELLIA_INIT_KEY 159 - #define EVP_F_D2I_PKEY 100 -diff -up openssl-1.0.0-beta4/test/test_aesni.aesni openssl-1.0.0-beta4/test/test_aesni ---- openssl-1.0.0-beta4/test/test_aesni.aesni 2010-01-12 22:18:06.000000000 +0100 -+++ openssl-1.0.0-beta4/test/test_aesni 2010-01-12 22:18:06.000000000 +0100 -@@ -0,0 +1,69 @@ -+#!/bin/sh -+ -+PROG=$1 -+ -+if [ -x $PROG ]; then -+ if expr "x`$PROG version`" : "xOpenSSL" > /dev/null; then -+ : -+ else -+ echo "$PROG is not OpenSSL executable" -+ exit 1 -+ fi -+else -+ echo "$PROG is not executable" -+ exit 1; -+fi -+ -+if $PROG engine aesni | grep -v no-aesni; then -+ -+ HASH=`cat $PROG | $PROG dgst -hex` -+ -+ AES_ALGS=" aes-128-ecb aes-192-ecb aes-256-ecb \ -+ aes-128-cbc aes-192-cbc aes-256-cbc \ -+ aes-128-cfb aes-192-cfb aes-256-cfb \ -+ aes-128-ofb aes-192-ofb aes-256-ofb" -+ BUFSIZE="16 32 48 64 80 96 128 144 999" -+ -+ nerr=0 -+ -+ for alg in $AES_ALGS; do -+ echo $alg -+ for bufsize in $BUFSIZE; do -+ TEST=`( cat $PROG | \ -+ $PROG enc -e -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \ -+ $PROG enc -d -k "$HASH" -$alg | \ -+ $PROG dgst -hex ) 2>/dev/null` -+ if [ "$TEST" != "$HASH" ]; then -+ echo "-$alg/$bufsize encrypt test failed" -+ nerr=`expr $nerr + 1` -+ fi -+ done -+ for bufsize in $BUFSIZE; do -+ TEST=`( cat $PROG | \ -+ $PROG enc -e -k "$HASH" -$alg | \ -+ $PROG enc -d -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \ -+ $PROG dgst -hex ) 2>/dev/null` -+ if [ "$TEST" != "$HASH" ]; then -+ echo "-$alg/$bufsize decrypt test failed" -+ nerr=`expr $nerr + 1` -+ fi -+ done -+ TEST=`( cat $PROG | \ -+ $PROG enc -e -k "$HASH" -$alg -engine aesni | \ -+ $PROG enc -d -k "$HASH" -$alg -engine aesni | \ -+ $PROG dgst -hex ) 2>/dev/null` -+ if [ "$TEST" != "$HASH" ]; then -+ echo "-$alg en/decrypt test failed" -+ nerr=`expr $nerr + 1` -+ fi -+ done -+ -+ if [ $nerr -gt 0 ]; then -+ echo "AESNI engine test failed." -+ exit 1; -+ fi -+else -+ echo "AESNI engine is not available" -+fi -+ -+exit 0 diff --git a/openssl-1.0.0-beta5-cipher-change.patch b/openssl-1.0.0-beta5-cipher-change.patch index 2e8343b..f3f00cd 100644 --- a/openssl-1.0.0-beta5-cipher-change.patch +++ b/openssl-1.0.0-beta5-cipher-change.patch @@ -6,7 +6,7 @@ diff -up openssl-1.0.0-beta5/ssl/ssl.h.cipher-change openssl-1.0.0-beta5/ssl/ssl /* Allow initial connection to servers that don't support RI */ #define SSL_OP_LEGACY_SERVER_CONNECT 0x00000004L -#define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L -+#define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L /* can break some security expectations */ ++#define SSL_OP_NETSCAPE_REUSE_CIPHER_CHANGE_BUG 0x00000008L /* no effect since 1.0.0c due to CVE-2010-4180 */ #define SSL_OP_SSLREF2_REUSE_CERT_TYPE_BUG 0x00000010L #define SSL_OP_MICROSOFT_BIG_SSLV3_BUFFER 0x00000020L #define SSL_OP_MSIE_SSLV2_RSA_PADDING 0x00000040L /* no effect since 0.9.7h and 0.9.8b */ diff --git a/openssl-1.0.0-beta5-ipv6-apps.patch b/openssl-1.0.0-beta5-ipv6-apps.patch deleted file mode 100644 index 4304c01..0000000 --- a/openssl-1.0.0-beta5-ipv6-apps.patch +++ /dev/null @@ -1,499 +0,0 @@ -diff -up openssl-1.0.0-beta5/apps/s_apps.h.ipv6-apps openssl-1.0.0-beta5/apps/s_apps.h ---- openssl-1.0.0-beta5/apps/s_apps.h.ipv6-apps 2010-02-03 09:43:49.000000000 +0100 -+++ openssl-1.0.0-beta5/apps/s_apps.h 2010-02-03 09:43:49.000000000 +0100 -@@ -148,7 +148,7 @@ typedef fd_mask fd_set; - #define PORT_STR "4433" - #define PROTOCOL "tcp" - --int do_server(int port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context); -+int do_server(char *port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context); - #ifdef HEADER_X509_H - int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx); - #endif -@@ -156,10 +156,9 @@ int MS_CALLBACK verify_callback(int ok, - int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file); - int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key); - #endif --int init_client(int *sock, char *server, int port, int type); -+int init_client(int *sock, char *server, char *port, int type); - int should_retry(int i); --int extract_port(char *str, short *port_ptr); --int extract_host_port(char *str,char **host_ptr,unsigned char *ip,short *p); -+int extract_host_port(char *str,char **host_ptr,char **port_ptr); - - long MS_CALLBACK bio_dump_callback(BIO *bio, int cmd, const char *argp, - int argi, long argl, long ret); -diff -up openssl-1.0.0-beta5/apps/s_client.c.ipv6-apps openssl-1.0.0-beta5/apps/s_client.c ---- openssl-1.0.0-beta5/apps/s_client.c.ipv6-apps 2010-02-03 09:43:49.000000000 +0100 -+++ openssl-1.0.0-beta5/apps/s_client.c 2010-02-03 09:43:49.000000000 +0100 -@@ -389,7 +389,7 @@ int MAIN(int argc, char **argv) - int cbuf_len,cbuf_off; - int sbuf_len,sbuf_off; - fd_set readfds,writefds; -- short port=PORT; -+ char *port_str = PORT_STR; - int full_log=1; - char *host=SSL_HOST_NAME; - char *cert_file=NULL,*key_file=NULL; -@@ -488,13 +488,12 @@ int MAIN(int argc, char **argv) - else if (strcmp(*argv,"-port") == 0) - { - if (--argc < 1) goto bad; -- port=atoi(*(++argv)); -- if (port == 0) goto bad; -+ port_str= *(++argv); - } - else if (strcmp(*argv,"-connect") == 0) - { - if (--argc < 1) goto bad; -- if (!extract_host_port(*(++argv),&host,NULL,&port)) -+ if (!extract_host_port(*(++argv),&host,&port_str)) - goto bad; - } - else if (strcmp(*argv,"-verify") == 0) -@@ -967,7 +966,7 @@ bad: - - re_start: - -- if (init_client(&s,host,port,socket_type) == 0) -+ if (init_client(&s,host,port_str,socket_type) == 0) - { - BIO_printf(bio_err,"connect:errno=%d\n",get_last_socket_error()); - SHUTDOWN(s); -diff -up openssl-1.0.0-beta5/apps/s_server.c.ipv6-apps openssl-1.0.0-beta5/apps/s_server.c ---- openssl-1.0.0-beta5/apps/s_server.c.ipv6-apps 2010-02-03 09:43:49.000000000 +0100 -+++ openssl-1.0.0-beta5/apps/s_server.c 2010-02-03 09:43:49.000000000 +0100 -@@ -838,7 +838,7 @@ int MAIN(int argc, char *argv[]) - { - X509_VERIFY_PARAM *vpm = NULL; - int badarg = 0; -- short port=PORT; -+ char *port_str = PORT_STR; - char *CApath=NULL,*CAfile=NULL; - unsigned char *context = NULL; - char *dhfile = NULL; -@@ -909,8 +909,7 @@ int MAIN(int argc, char *argv[]) - (strcmp(*argv,"-accept") == 0)) - { - if (--argc < 1) goto bad; -- if (!extract_port(*(++argv),&port)) -- goto bad; -+ port_str= *(++argv); - } - else if (strcmp(*argv,"-verify") == 0) - { -@@ -1700,9 +1699,9 @@ bad: - BIO_printf(bio_s_out,"ACCEPT\n"); - (void)BIO_flush(bio_s_out); - if (www) -- do_server(port,socket_type,&accept_socket,www_body, context); -+ do_server(port_str,socket_type,&accept_socket,www_body, context); - else -- do_server(port,socket_type,&accept_socket,sv_body, context); -+ do_server(port_str,socket_type,&accept_socket,sv_body, context); - print_stats(bio_s_out,ctx); - ret=0; - end: -diff -up openssl-1.0.0-beta5/apps/s_socket.c.ipv6-apps openssl-1.0.0-beta5/apps/s_socket.c ---- openssl-1.0.0-beta5/apps/s_socket.c.ipv6-apps 2009-08-26 13:21:50.000000000 +0200 -+++ openssl-1.0.0-beta5/apps/s_socket.c 2010-02-03 10:00:30.000000000 +0100 -@@ -102,9 +102,7 @@ static struct hostent *GetHostByName(cha - static void ssl_sock_cleanup(void); - #endif - static int ssl_sock_init(void); --static int init_client_ip(int *sock,unsigned char ip[4], int port, int type); --static int init_server(int *sock, int port, int type); --static int init_server_long(int *sock, int port,char *ip, int type); -+static int init_server(int *sock, char *port, int type); - static int do_accept(int acc_sock, int *sock, char **host); - static int host_ip(char *str, unsigned char ip[4]); - -@@ -234,58 +232,70 @@ static int ssl_sock_init(void) - return(1); - } - --int init_client(int *sock, char *host, int port, int type) -+int init_client(int *sock, char *host, char *port, int type) - { -- unsigned char ip[4]; -- -- if (!host_ip(host,&(ip[0]))) -- { -- return(0); -- } -- return(init_client_ip(sock,ip,port,type)); -- } -- --static int init_client_ip(int *sock, unsigned char ip[4], int port, int type) -- { -- unsigned long addr; -- struct sockaddr_in them; -- int s,i; -+ struct addrinfo *res, *res0, hints; -+ char * failed_call = NULL; -+ int s; -+ int e; - - if (!ssl_sock_init()) return(0); - -- memset((char *)&them,0,sizeof(them)); -- them.sin_family=AF_INET; -- them.sin_port=htons((unsigned short)port); -- addr=(unsigned long) -- ((unsigned long)ip[0]<<24L)| -- ((unsigned long)ip[1]<<16L)| -- ((unsigned long)ip[2]<< 8L)| -- ((unsigned long)ip[3]); -- them.sin_addr.s_addr=htonl(addr); -- -- if (type == SOCK_STREAM) -- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL); -- else /* ( type == SOCK_DGRAM) */ -- s=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP); -- -- if (s == INVALID_SOCKET) { perror("socket"); return(0); } -+ memset(&hints, '\0', sizeof(hints)); -+ hints.ai_socktype = type; -+ hints.ai_flags = AI_ADDRCONFIG; -+ -+ e = getaddrinfo(host, port, &hints, &res); -+ if (e) -+ { -+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); -+ if (e == EAI_SYSTEM) -+ perror("getaddrinfo"); -+ return (0); -+ } - -+ res0 = res; -+ while (res) -+ { -+ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol); -+ if (s == INVALID_SOCKET) -+ { -+ failed_call = "socket"; -+ goto nextres; -+ } - #if defined(SO_KEEPALIVE) && !defined(OPENSSL_SYS_MPE) - if (type == SOCK_STREAM) - { -- i=0; -- i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i)); -- if (i < 0) { perror("keepalive"); return(0); } -+ int i=0; -+ i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE, -+ (char *)&i,sizeof(i)); -+ if (i < 0) { -+ failed_call = "keepalive"; -+ goto nextres; -+ } - } - #endif -- -- if (connect(s,(struct sockaddr *)&them,sizeof(them)) == -1) -- { closesocket(s); perror("connect"); return(0); } -+ if (connect(s,(struct sockaddr *)res->ai_addr, -+ res->ai_addrlen) == 0) -+ { -+ freeaddrinfo(res0); - *sock=s; - return(1); - } - --int do_server(int port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context) -+ failed_call = "socket"; -+nextres: -+ if (s != INVALID_SOCKET) -+ close(s); -+ res = res->ai_next; -+ } -+ freeaddrinfo(res0); -+ -+ perror(failed_call); -+ return(0); -+ } -+ -+int do_server(char *port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context) - { - int sock; - char *name = NULL; -@@ -323,33 +333,38 @@ int do_server(int port, int type, int *r - } - } - --static int init_server_long(int *sock, int port, char *ip, int type) -+static int init_server(int *sock, char *port, int type) - { -- int ret=0; -- struct sockaddr_in server; -- int s= -1,i; -+ struct addrinfo *res, *res0, hints; -+ char * failed_call = NULL; -+ char port_name[8]; -+ int s; -+ int e; - - if (!ssl_sock_init()) return(0); - -- memset((char *)&server,0,sizeof(server)); -- server.sin_family=AF_INET; -- server.sin_port=htons((unsigned short)port); -- if (ip == NULL) -- server.sin_addr.s_addr=INADDR_ANY; -- else --/* Added for T3E, address-of fails on bit field (beckman@acl.lanl.gov) */ --#ifndef BIT_FIELD_LIMITS -- memcpy(&server.sin_addr.s_addr,ip,4); --#else -- memcpy(&server.sin_addr,ip,4); --#endif -+ memset(&hints, '\0', sizeof(hints)); -+ hints.ai_socktype = type; -+ hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; - -- if (type == SOCK_STREAM) -- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL); -- else /* type == SOCK_DGRAM */ -- s=socket(AF_INET, SOCK_DGRAM,IPPROTO_UDP); -+ e = getaddrinfo(NULL, port, &hints, &res); -+ if (e) -+ { -+ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); -+ if (e == EAI_SYSTEM) -+ perror("getaddrinfo"); -+ return (0); -+ } - -- if (s == INVALID_SOCKET) goto err; -+ res0 = res; -+ while (res) -+ { -+ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol); -+ if (s == INVALID_SOCKET) -+ { -+ failed_call = "socket"; -+ goto nextres; -+ } - #if defined SOL_SOCKET && defined SO_REUSEADDR - { - int j = 1; -@@ -357,36 +372,39 @@ static int init_server_long(int *sock, i - (void *) &j, sizeof j); - } - #endif -- if (bind(s,(struct sockaddr *)&server,sizeof(server)) == -1) -+ -+ if (bind(s,(struct sockaddr *)res->ai_addr, res->ai_addrlen) == -1) - { --#ifndef OPENSSL_SYS_WINDOWS -- perror("bind"); --#endif -- goto err; -+ failed_call = "bind"; -+ goto nextres; - } -- /* Make it 128 for linux */ -- if (type==SOCK_STREAM && listen(s,128) == -1) goto err; -- i=0; -- *sock=s; -- ret=1; --err: -- if ((ret == 0) && (s != -1)) -+ if (type==SOCK_STREAM && listen(s,128) == -1) - { -- SHUTDOWN(s); -+ failed_call = "listen"; -+ goto nextres; - } -- return(ret); -+ -+ *sock=s; -+ return(1); -+ -+nextres: -+ if (s != INVALID_SOCKET) -+ close(s); -+ res = res->ai_next; - } -+ freeaddrinfo(res0); - --static int init_server(int *sock, int port, int type) -- { -- return(init_server_long(sock, port, NULL, type)); -+ if (s == INVALID_SOCKET) { perror("socket"); return(0); } -+ -+ perror(failed_call); -+ return(0); - } - - static int do_accept(int acc_sock, int *sock, char **host) - { -- int ret,i; -- struct hostent *h1,*h2; -- static struct sockaddr_in from; -+ static struct sockaddr_storage from; -+ char buffer[NI_MAXHOST]; -+ int ret; - int len; - /* struct linger ling; */ - -@@ -432,136 +450,58 @@ redoit: - */ - - if (host == NULL) goto end; --#ifndef BIT_FIELD_LIMITS -- /* I should use WSAAsyncGetHostByName() under windows */ -- h1=gethostbyaddr((char *)&from.sin_addr.s_addr, -- sizeof(from.sin_addr.s_addr),AF_INET); --#else -- h1=gethostbyaddr((char *)&from.sin_addr, -- sizeof(struct in_addr),AF_INET); --#endif -- if (h1 == NULL) -+ -+ if (getnameinfo((struct sockaddr *)&from, sizeof(from), -+ buffer, sizeof(buffer), -+ NULL, 0, 0)) - { -- BIO_printf(bio_err,"bad gethostbyaddr\n"); -+ BIO_printf(bio_err,"getnameinfo failed\n"); - *host=NULL; - /* return(0); */ - } - else - { -- if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL) -+ if ((*host=(char *)OPENSSL_malloc(strlen(buffer)+1)) == NULL) - { - perror("OPENSSL_malloc"); - return(0); - } -- BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1); -- -- h2=GetHostByName(*host); -- if (h2 == NULL) -- { -- BIO_printf(bio_err,"gethostbyname failure\n"); -- return(0); -- } -- i=0; -- if (h2->h_addrtype != AF_INET) -- { -- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); -- return(0); -- } -+ strcpy(*host, buffer); - } - end: - *sock=ret; - return(1); - } - --int extract_host_port(char *str, char **host_ptr, unsigned char *ip, -- short *port_ptr) -+int extract_host_port(char *str, char **host_ptr, -+ char **port_ptr) - { -- char *h,*p; -+ char *h,*p,*x; - -- h=str; -- p=strchr(str,':'); -+ x=h=str; -+ if (*h == '[') -+ { -+ h++; -+ p=strchr(h,']'); - if (p == NULL) - { -- BIO_printf(bio_err,"no port defined\n"); -+ BIO_printf(bio_err,"no ending bracket for IPv6 address\n"); - return(0); - } - *(p++)='\0'; -- -- if ((ip != NULL) && !host_ip(str,ip)) -- goto err; -- if (host_ptr != NULL) *host_ptr=h; -- -- if (!extract_port(p,port_ptr)) -- goto err; -- return(1); --err: -- return(0); -+ x = p; - } -- --static int host_ip(char *str, unsigned char ip[4]) -- { -- unsigned int in[4]; -- int i; -- -- if (sscanf(str,"%u.%u.%u.%u",&(in[0]),&(in[1]),&(in[2]),&(in[3])) == 4) -- { -- for (i=0; i<4; i++) -- if (in[i] > 255) -- { -- BIO_printf(bio_err,"invalid IP address\n"); -- goto err; -- } -- ip[0]=in[0]; -- ip[1]=in[1]; -- ip[2]=in[2]; -- ip[3]=in[3]; -- } -- else -- { /* do a gethostbyname */ -- struct hostent *he; -- -- if (!ssl_sock_init()) return(0); -- -- he=GetHostByName(str); -- if (he == NULL) -- { -- BIO_printf(bio_err,"gethostbyname failure\n"); -- goto err; -- } -- /* cast to short because of win16 winsock definition */ -- if ((short)he->h_addrtype != AF_INET) -+ p=strchr(x,':'); -+ if (p == NULL) - { -- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); -- return(0); -- } -- ip[0]=he->h_addr_list[0][0]; -- ip[1]=he->h_addr_list[0][1]; -- ip[2]=he->h_addr_list[0][2]; -- ip[3]=he->h_addr_list[0][3]; -- } -- return(1); --err: -+ BIO_printf(bio_err,"no port defined\n"); - return(0); - } -+ *(p++)='\0'; - --int extract_port(char *str, short *port_ptr) -- { -- int i; -- struct servent *s; -+ if (host_ptr != NULL) *host_ptr=h; -+ if (port_ptr != NULL) *port_ptr=p; - -- i=atoi(str); -- if (i != 0) -- *port_ptr=(unsigned short)i; -- else -- { -- s=getservbyname(str,"tcp"); -- if (s == NULL) -- { -- BIO_printf(bio_err,"getservbyname failure for %s\n",str); -- return(0); -- } -- *port_ptr=ntohs((unsigned short)s->s_port); -- } - return(1); - } - diff --git a/openssl-1.0.0a-manfix.patch b/openssl-1.0.0a-manfix.patch new file mode 100644 index 0000000..0d3dc04 --- /dev/null +++ b/openssl-1.0.0a-manfix.patch @@ -0,0 +1,21 @@ +diff -up openssl-1.0.0a/doc/apps/openssl.pod.manfix openssl-1.0.0a/doc/apps/openssl.pod +--- openssl-1.0.0a/doc/apps/openssl.pod.manfix 2010-01-21 19:46:28.000000000 +0100 ++++ openssl-1.0.0a/doc/apps/openssl.pod 2010-06-30 14:24:50.000000000 +0200 +@@ -287,8 +287,6 @@ SHA Digest + + SHA-1 Digest + +-=back +- + =item B + + SHA-224 Digest +@@ -305,6 +303,8 @@ SHA-384 Digest + + SHA-512 Digest + ++=back ++ + =head2 ENCODING AND CIPHER COMMANDS + + =over 10 diff --git a/openssl-1.0.0a-sslt1lib.patch b/openssl-1.0.0a-sslt1lib.patch deleted file mode 100644 index 2e7d2ff..0000000 --- a/openssl-1.0.0a-sslt1lib.patch +++ /dev/null @@ -1,28 +0,0 @@ ---- openssl-1.0.0a/ssl/t1_lib.c 25 Nov 2010 12:28:28 -0000 1.64.2.17 -+++ openssl-1.0.0a/ssl/t1_lib.c 8 Feb 2011 00:00:00 -0000 -@@ -917,6 +917,7 @@ - } - n2s(data, idsize); - dsize -= 2 + idsize; -+ size -= 2 + idsize; - if (dsize < 0) - { - *al = SSL_AD_DECODE_ERROR; -@@ -955,9 +956,14 @@ - } - - /* Read in request_extensions */ -+ if (size < 2) -+ { -+ *al = SSL_AD_DECODE_ERROR; -+ return 0; -+ } - n2s(data,dsize); - size -= 2; -- if (dsize > size) -+ if (dsize != size) - { - *al = SSL_AD_DECODE_ERROR; - return 0; - - diff --git a/openssl-1.0.0a-version.patch b/openssl-1.0.0a-version.patch deleted file mode 100644 index 75a0233..0000000 --- a/openssl-1.0.0a-version.patch +++ /dev/null @@ -1,13 +0,0 @@ -diff -up openssl-1.0.0a/crypto/opensslv.h.version openssl-1.0.0a/crypto/opensslv.h ---- openssl-1.0.0a/crypto/opensslv.h.version 2010-06-04 13:28:52.000000000 +0200 -+++ openssl-1.0.0a/crypto/opensslv.h 2010-06-04 13:29:42.000000000 +0200 -@@ -25,7 +25,8 @@ - * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for - * major minor fix final patch/beta) - */ --#define OPENSSL_VERSION_NUMBER 0x1000001fL -+/* we have to keep the version number to not break the abi */ -+#define OPENSSL_VERSION_NUMBER 0x10000003L - #ifdef OPENSSL_FIPS - #define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0a-fips 1 Jun 2010" - #else diff --git a/openssl-1.0.0b-aesni.patch b/openssl-1.0.0b-aesni.patch new file mode 100644 index 0000000..1dda6bf --- /dev/null +++ b/openssl-1.0.0b-aesni.patch @@ -0,0 +1,2388 @@ +diff -up openssl-1.0.0b/Configure.aesni openssl-1.0.0b/Configure +--- openssl-1.0.0b/Configure.aesni 2010-11-16 17:33:22.000000000 +0100 ++++ openssl-1.0.0b/Configure 2010-11-16 17:35:15.000000000 +0100 +@@ -123,11 +123,11 @@ my $tlib="-lnsl -lsocket"; + my $bits1="THIRTY_TWO_BIT "; + my $bits2="SIXTY_FOUR_BIT "; + +-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o"; ++my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o"; + + my $x86_elf_asm="$x86_asm:elf"; + +-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o"; ++my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o"; + my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void"; + my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void"; + my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void"; +@@ -491,7 +491,7 @@ my %table=( + # + # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64 + "VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32", +-"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", ++"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", + "debug-VC-WIN64I","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32", + "debug-VC-WIN64A","cl:-W3 -Gs0 -Gy -Zi -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:auto:win32", + # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement +@@ -1419,6 +1419,7 @@ if ($rmd160_obj =~ /\.o$/) + if ($aes_obj =~ /\.o$/) + { + $cflags.=" -DAES_ASM"; ++ $aes_obj =~ s/\s*aesni\-x86\.o// if ($no_sse2); + } + else { + $aes_obj=$aes_enc; +diff -up openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl.aesni openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl +--- openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl.aesni 2010-11-16 17:33:23.000000000 +0100 ++++ openssl-1.0.0b/crypto/aes/asm/aesni-x86.pl 2010-11-16 17:33:23.000000000 +0100 +@@ -0,0 +1,765 @@ ++#!/usr/bin/env perl ++ ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# This module implements support for Intel AES-NI extension. In ++# OpenSSL context it's used with Intel engine, but can also be used as ++# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for ++# details]. ++ ++$PREFIX="aesni"; # if $PREFIX is set to "AES", the script ++ # generates drop-in replacement for ++ # crypto/aes/asm/aes-586.pl:-) ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++push(@INC,"${dir}","${dir}../../perlasm"); ++require "x86asm.pl"; ++ ++&asm_init($ARGV[0],$0); ++ ++$movekey = eval($RREFIX eq "aseni" ? "*movaps" : "*movups"); ++ ++$len="eax"; ++$rounds="ecx"; ++$key="edx"; ++$inp="esi"; ++$out="edi"; ++$rounds_="ebx"; # backup copy for $rounds ++$key_="ebp"; # backup copy for $key ++ ++$inout0="xmm0"; ++$inout1="xmm1"; ++$inout2="xmm2"; ++$rndkey0="xmm3"; ++$rndkey1="xmm4"; ++$ivec="xmm5"; ++$in0="xmm6"; ++$in1="xmm7"; $inout3="xmm7"; ++ ++# Inline version of internal aesni_[en|de]crypt1 ++sub aesni_inline_generate1 ++{ my $p=shift; ++ ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ &$movekey ($rndkey1,&QWP(16,$key)); ++ &lea ($key,&DWP(32,$key)); ++ &pxor ($inout0,$rndkey0); ++ &set_label("${p}1_loop"); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &dec ($rounds); ++ &$movekey ($rndkey1,&QWP(0,$key)); ++ &lea ($key,&DWP(16,$key)); ++ &jnz (&label("${p}1_loop")); ++ eval"&aes${p}last ($inout0,$rndkey1)"; ++} ++ ++sub aesni_generate1 # fully unrolled loop ++{ my $p=shift; ++ ++ &function_begin_B("_aesni_${p}rypt1"); ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ &$movekey ($rndkey1,&QWP(0x10,$key)); ++ &cmp ($rounds,11); ++ &pxor ($inout0,$rndkey0); ++ &$movekey ($rndkey0,&QWP(0x20,$key)); ++ &lea ($key,&DWP(0x30,$key)); ++ &jb (&label("${p}128")); ++ &lea ($key,&DWP(0x20,$key)); ++ &je (&label("${p}192")); ++ &lea ($key,&DWP(0x20,$key)); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(-0x40,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &$movekey ($rndkey0,&QWP(-0x30,$key)); ++ &set_label("${p}192"); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(-0x20,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &$movekey ($rndkey0,&QWP(-0x10,$key)); ++ &set_label("${p}128"); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(0,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &$movekey ($rndkey0,&QWP(0x10,$key)); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(0x20,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &$movekey ($rndkey0,&QWP(0x30,$key)); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(0x40,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &$movekey ($rndkey0,&QWP(0x50,$key)); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(0x60,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &$movekey ($rndkey0,&QWP(0x70,$key)); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ eval"&aes${p}last ($inout0,$rndkey0)"; ++ &ret(); ++ &function_end_B("_aesni_${p}rypt1"); ++} ++ ++# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key); ++# &aesni_generate1("dec"); ++&function_begin_B("${PREFIX}_encrypt"); ++ &mov ("eax",&wparam(0)); ++ &mov ($key,&wparam(2)); ++ &movups ($inout0,&QWP(0,"eax")); ++ &mov ($rounds,&DWP(240,$key)); ++ &mov ("eax",&wparam(1)); ++ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1"); ++ &movups (&QWP(0,"eax"),$inout0); ++ &ret (); ++&function_end_B("${PREFIX}_encrypt"); ++ ++# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key); ++# &aesni_generate1("dec"); ++&function_begin_B("${PREFIX}_decrypt"); ++ &mov ("eax",&wparam(0)); ++ &mov ($key,&wparam(2)); ++ &movups ($inout0,&QWP(0,"eax")); ++ &mov ($rounds,&DWP(240,$key)); ++ &mov ("eax",&wparam(1)); ++ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt1"); ++ &movups (&QWP(0,"eax"),$inout0); ++ &ret (); ++&function_end_B("${PREFIX}_decrypt"); ++ ++# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave ++# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec] ++# latency is 6, it turned out that it can be scheduled only every ++# *second* cycle. Thus 3x interleave is the one providing optimal ++# utilization, i.e. when subroutine's throughput is virtually same as ++# of non-interleaved subroutine [for number of input blocks up to 3]. ++# This is why it makes no sense to implement 2x subroutine. As soon ++# as/if Intel improves throughput by making it possible to schedule ++# the instructions in question *every* cycles I would have to ++# implement 6x interleave and use it in loop... ++sub aesni_generate3 ++{ my $p=shift; ++ ++ &function_begin_B("_aesni_${p}rypt3"); ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ &shr ($rounds,1); ++ &$movekey ($rndkey1,&QWP(16,$key)); ++ &lea ($key,&DWP(32,$key)); ++ &pxor ($inout0,$rndkey0); ++ &pxor ($inout1,$rndkey0); ++ &pxor ($inout2,$rndkey0); ++ &jmp (&label("${p}3_loop")); ++ &set_label("${p}3_loop",16); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ eval"&aes${p} ($inout1,$rndkey1)"; ++ &dec ($rounds); ++ eval"&aes${p} ($inout2,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(16,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &lea ($key,&DWP(32,$key)); ++ eval"&aes${p} ($inout1,$rndkey0)"; ++ eval"&aes${p} ($inout2,$rndkey0)"; ++ &jnz (&label("${p}3_loop")); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ eval"&aes${p} ($inout1,$rndkey1)"; ++ eval"&aes${p} ($inout2,$rndkey1)"; ++ eval"&aes${p}last ($inout0,$rndkey0)"; ++ eval"&aes${p}last ($inout1,$rndkey0)"; ++ eval"&aes${p}last ($inout2,$rndkey0)"; ++ &ret(); ++ &function_end_B("_aesni_${p}rypt3"); ++} ++ ++# 4x interleave is implemented to improve small block performance, ++# most notably [and naturally] 4 block by ~30%. One can argue that one ++# should have implemented 5x as well, but improvement would be <20%, ++# so it's not worth it... ++sub aesni_generate4 ++{ my $p=shift; ++ ++ &function_begin_B("_aesni_${p}rypt4"); ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ &$movekey ($rndkey1,&QWP(16,$key)); ++ &shr ($rounds,1); ++ &lea ($key,&DWP(32,$key)); ++ &pxor ($inout0,$rndkey0); ++ &pxor ($inout1,$rndkey0); ++ &pxor ($inout2,$rndkey0); ++ &pxor ($inout3,$rndkey0); ++ &jmp (&label("${p}3_loop")); ++ &set_label("${p}3_loop",16); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ eval"&aes${p} ($inout1,$rndkey1)"; ++ &dec ($rounds); ++ eval"&aes${p} ($inout2,$rndkey1)"; ++ eval"&aes${p} ($inout3,$rndkey1)"; ++ &$movekey ($rndkey1,&QWP(16,$key)); ++ eval"&aes${p} ($inout0,$rndkey0)"; ++ &lea ($key,&DWP(32,$key)); ++ eval"&aes${p} ($inout1,$rndkey0)"; ++ eval"&aes${p} ($inout2,$rndkey0)"; ++ eval"&aes${p} ($inout3,$rndkey0)"; ++ &jnz (&label("${p}3_loop")); ++ eval"&aes${p} ($inout0,$rndkey1)"; ++ &$movekey ($rndkey0,&QWP(0,$key)); ++ eval"&aes${p} ($inout1,$rndkey1)"; ++ eval"&aes${p} ($inout2,$rndkey1)"; ++ eval"&aes${p} ($inout3,$rndkey1)"; ++ eval"&aes${p}last ($inout0,$rndkey0)"; ++ eval"&aes${p}last ($inout1,$rndkey0)"; ++ eval"&aes${p}last ($inout2,$rndkey0)"; ++ eval"&aes${p}last ($inout3,$rndkey0)"; ++ &ret(); ++ &function_end_B("_aesni_${p}rypt4"); ++} ++&aesni_generate3("enc") if ($PREFIX eq "aesni"); ++&aesni_generate3("dec"); ++&aesni_generate4("enc") if ($PREFIX eq "aesni"); ++&aesni_generate4("dec"); ++ ++if ($PREFIX eq "aesni") { ++# void aesni_ecb_encrypt (const void *in, void *out, ++# size_t length, const AES_KEY *key, ++# int enc); ++&function_begin("aesni_ecb_encrypt"); ++ &mov ($inp,&wparam(0)); ++ &mov ($out,&wparam(1)); ++ &mov ($len,&wparam(2)); ++ &mov ($key,&wparam(3)); ++ &mov ($rounds,&wparam(4)); ++ &cmp ($len,16); ++ &jb (&label("ecb_ret")); ++ &and ($len,-16); ++ &test ($rounds,$rounds) ++ &mov ($rounds,&DWP(240,$key)); ++ &mov ($key_,$key); # backup $key ++ &mov ($rounds_,$rounds); # backup $rounds ++ &jz (&label("ecb_decrypt")); ++ ++ &sub ($len,0x40); ++ &jbe (&label("ecb_enc_tail")); ++ &jmp (&label("ecb_enc_loop3")); ++ ++&set_label("ecb_enc_loop3",16); ++ &movups ($inout0,&QWP(0,$inp)); ++ &movups ($inout1,&QWP(0x10,$inp)); ++ &movups ($inout2,&QWP(0x20,$inp)); ++ &call ("_aesni_encrypt3"); ++ &sub ($len,0x30); ++ &lea ($inp,&DWP(0x30,$inp)); ++ &lea ($out,&DWP(0x30,$out)); ++ &movups (&QWP(-0x30,$out),$inout0); ++ &mov ($key,$key_); # restore $key ++ &movups (&QWP(-0x20,$out),$inout1); ++ &mov ($rounds,$rounds_); # restore $rounds ++ &movups (&QWP(-0x10,$out),$inout2); ++ &ja (&label("ecb_enc_loop3")); ++ ++&set_label("ecb_enc_tail"); ++ &add ($len,0x40); ++ &jz (&label("ecb_ret")); ++ ++ &cmp ($len,0x10); ++ &movups ($inout0,&QWP(0,$inp)); ++ &je (&label("ecb_enc_one")); ++ &cmp ($len,0x20); ++ &movups ($inout1,&QWP(0x10,$inp)); ++ &je (&label("ecb_enc_two")); ++ &cmp ($len,0x30); ++ &movups ($inout2,&QWP(0x20,$inp)); ++ &je (&label("ecb_enc_three")); ++ &movups ($inout3,&QWP(0x30,$inp)); ++ &call ("_aesni_encrypt4"); ++ &movups (&QWP(0,$out),$inout0); ++ &movups (&QWP(0x10,$out),$inout1); ++ &movups (&QWP(0x20,$out),$inout2); ++ &movups (&QWP(0x30,$out),$inout3); ++ jmp (&label("ecb_ret")); ++ ++&set_label("ecb_enc_one",16); ++ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt1"); ++ &movups (&QWP(0,$out),$inout0); ++ &jmp (&label("ecb_ret")); ++ ++&set_label("ecb_enc_two",16); ++ &call ("_aesni_encrypt3"); ++ &movups (&QWP(0,$out),$inout0); ++ &movups (&QWP(0x10,$out),$inout1); ++ &jmp (&label("ecb_ret")); ++ ++&set_label("ecb_enc_three",16); ++ &call ("_aesni_encrypt3"); ++ &movups (&QWP(0,$out),$inout0); ++ &movups (&QWP(0x10,$out),$inout1); ++ &movups (&QWP(0x20,$out),$inout2); ++ &jmp (&label("ecb_ret")); ++ ++&set_label("ecb_decrypt",16); ++ &sub ($len,0x40); ++ &jbe (&label("ecb_dec_tail")); ++ &jmp (&label("ecb_dec_loop3")); ++ ++&set_label("ecb_dec_loop3",16); ++ &movups ($inout0,&QWP(0,$inp)); ++ &movups ($inout1,&QWP(0x10,$inp)); ++ &movups ($inout2,&QWP(0x20,$inp)); ++ &call ("_aesni_decrypt3"); ++ &sub ($len,0x30); ++ &lea ($inp,&DWP(0x30,$inp)); ++ &lea ($out,&DWP(0x30,$out)); ++ &movups (&QWP(-0x30,$out),$inout0); ++ &mov ($key,$key_); # restore $key ++ &movups (&QWP(-0x20,$out),$inout1); ++ &mov ($rounds,$rounds_); # restore $rounds ++ &movups (&QWP(-0x10,$out),$inout2); ++ &ja (&label("ecb_dec_loop3")); ++ ++&set_label("ecb_dec_tail"); ++ &add ($len,0x40); ++ &jz (&label("ecb_ret")); ++ ++ &cmp ($len,0x10); ++ &movups ($inout0,&QWP(0,$inp)); ++ &je (&label("ecb_dec_one")); ++ &cmp ($len,0x20); ++ &movups ($inout1,&QWP(0x10,$inp)); ++ &je (&label("ecb_dec_two")); ++ &cmp ($len,0x30); ++ &movups ($inout2,&QWP(0x20,$inp)); ++ &je (&label("ecb_dec_three")); ++ &movups ($inout3,&QWP(0x30,$inp)); ++ &call ("_aesni_decrypt4"); ++ &movups (&QWP(0,$out),$inout0); ++ &movups (&QWP(0x10,$out),$inout1); ++ &movups (&QWP(0x20,$out),$inout2); ++ &movups (&QWP(0x30,$out),$inout3); ++ &jmp (&label("ecb_ret")); ++ ++&set_label("ecb_dec_one",16); ++ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3"); ++ &movups (&QWP(0,$out),$inout0); ++ &jmp (&label("ecb_ret")); ++ ++&set_label("ecb_dec_two",16); ++ &call ("_aesni_decrypt3"); ++ &movups (&QWP(0,$out),$inout0); ++ &movups (&QWP(0x10,$out),$inout1); ++ &jmp (&label("ecb_ret")); ++ ++&set_label("ecb_dec_three",16); ++ &call ("_aesni_decrypt3"); ++ &movups (&QWP(0,$out),$inout0); ++ &movups (&QWP(0x10,$out),$inout1); ++ &movups (&QWP(0x20,$out),$inout2); ++ ++&set_label("ecb_ret"); ++&function_end("aesni_ecb_encrypt"); ++} ++ ++# void $PREFIX_cbc_encrypt (const void *inp, void *out, ++# size_t length, const AES_KEY *key, ++# unsigned char *ivp,const int enc); ++&function_begin("${PREFIX}_cbc_encrypt"); ++ &mov ($inp,&wparam(0)); ++ &mov ($out,&wparam(1)); ++ &mov ($len,&wparam(2)); ++ &mov ($key,&wparam(3)); ++ &test ($len,$len); ++ &mov ($key_,&wparam(4)); ++ &jz (&label("cbc_ret")); ++ ++ &cmp (&wparam(5),0); ++ &movups ($ivec,&QWP(0,$key_)); # load IV ++ &mov ($rounds,&DWP(240,$key)); ++ &mov ($key_,$key); # backup $key ++ &mov ($rounds_,$rounds); # backup $rounds ++ &je (&label("cbc_decrypt")); ++ ++ &movaps ($inout0,$ivec); ++ &cmp ($len,16); ++ &jb (&label("cbc_enc_tail")); ++ &sub ($len,16); ++ &jmp (&label("cbc_enc_loop")); ++ ++&set_label("cbc_enc_loop",16); ++ &movups ($ivec,&QWP(0,$inp)); ++ &lea ($inp,&DWP(16,$inp)); ++ &pxor ($inout0,$ivec); ++ &aesni_inline_generate1("enc"); # &call ("_aesni_encrypt3"); ++ &sub ($len,16); ++ &lea ($out,&DWP(16,$out)); ++ &mov ($rounds,$rounds_); # restore $rounds ++ &mov ($key,$key_); # restore $key ++ &movups (&QWP(-16,$out),$inout0); ++ &jnc (&label("cbc_enc_loop")); ++ &add ($len,16); ++ &jnz (&label("cbc_enc_tail")); ++ &movaps ($ivec,$inout0); ++ &jmp (&label("cbc_ret")); ++ ++&set_label("cbc_enc_tail"); ++ &mov ("ecx",$len); # zaps $rounds ++ &data_word(0xA4F3F689); # rep movsb ++ &mov ("ecx",16); # zero tail ++ &sub ("ecx",$len); ++ &xor ("eax","eax"); # zaps $len ++ &data_word(0xAAF3F689); # rep stosb ++ &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block ++ &mov ($rounds,$rounds_); # restore $rounds ++ &mov ($inp,$out); # $inp and $out are the same ++ &mov ($key,$key_); # restore $key ++ &jmp (&label("cbc_enc_loop")); ++ ++&set_label("cbc_decrypt",16); ++ &sub ($len,0x40); ++ &jbe (&label("cbc_dec_tail")); ++ &jmp (&label("cbc_dec_loop3")); ++ ++&set_label("cbc_dec_loop3",16); ++ &movups ($inout0,&QWP(0,$inp)); ++ &movups ($inout1,&QWP(0x10,$inp)); ++ &movups ($inout2,&QWP(0x20,$inp)); ++ &movaps ($in0,$inout0); ++ &movaps ($in1,$inout1); ++ &call ("_aesni_decrypt3"); ++ &sub ($len,0x30); ++ &lea ($inp,&DWP(0x30,$inp)); ++ &lea ($out,&DWP(0x30,$out)); ++ &pxor ($inout0,$ivec); ++ &pxor ($inout1,$in0); ++ &movups ($ivec,&QWP(-0x10,$inp)); ++ &pxor ($inout2,$in1); ++ &movups (&QWP(-0x30,$out),$inout0); ++ &mov ($rounds,$rounds_) # restore $rounds ++ &movups (&QWP(-0x20,$out),$inout1); ++ &mov ($key,$key_); # restore $key ++ &movups (&QWP(-0x10,$out),$inout2); ++ &ja (&label("cbc_dec_loop3")); ++ ++&set_label("cbc_dec_tail"); ++ &add ($len,0x40); ++ &jz (&label("cbc_ret")); ++ ++ &movups ($inout0,&QWP(0,$inp)); ++ &cmp ($len,0x10); ++ &movaps ($in0,$inout0); ++ &jbe (&label("cbc_dec_one")); ++ &movups ($inout1,&QWP(0x10,$inp)); ++ &cmp ($len,0x20); ++ &movaps ($in1,$inout1); ++ &jbe (&label("cbc_dec_two")); ++ &movups ($inout2,&QWP(0x20,$inp)); ++ &cmp ($len,0x30); ++ &jbe (&label("cbc_dec_three")); ++ &movups ($inout3,&QWP(0x30,$inp)); ++ &call ("_aesni_decrypt4"); ++ &movups ($rndkey0,&QWP(0x10,$inp)); ++ &movups ($rndkey1,&QWP(0x20,$inp)); ++ &pxor ($inout0,$ivec); ++ &pxor ($inout1,$in0); ++ &movups ($ivec,&QWP(0x30,$inp)); ++ &movups (&QWP(0,$out),$inout0); ++ &pxor ($inout2,$rndkey0); ++ &pxor ($inout3,$rndkey1); ++ &movups (&QWP(0x10,$out),$inout1); ++ &movups (&QWP(0x20,$out),$inout2); ++ &movaps ($inout0,$inout3); ++ &lea ($out,&DWP(0x30,$out)); ++ &jmp (&label("cbc_dec_tail_collected")); ++ ++&set_label("cbc_dec_one"); ++ &aesni_inline_generate1("dec"); # &call ("_aesni_decrypt3"); ++ &pxor ($inout0,$ivec); ++ &movaps ($ivec,$in0); ++ &jmp (&label("cbc_dec_tail_collected")); ++ ++&set_label("cbc_dec_two"); ++ &call ("_aesni_decrypt3"); ++ &pxor ($inout0,$ivec); ++ &pxor ($inout1,$in0); ++ &movups (&QWP(0,$out),$inout0); ++ &movaps ($inout0,$inout1); ++ &movaps ($ivec,$in1); ++ &lea ($out,&DWP(0x10,$out)); ++ &jmp (&label("cbc_dec_tail_collected")); ++ ++&set_label("cbc_dec_three"); ++ &call ("_aesni_decrypt3"); ++ &pxor ($inout0,$ivec); ++ &pxor ($inout1,$in0); ++ &pxor ($inout2,$in1); ++ &movups (&QWP(0,$out),$inout0); ++ &movups (&QWP(0x10,$out),$inout1); ++ &movaps ($inout0,$inout2); ++ &movups ($ivec,&QWP(0x20,$inp)); ++ &lea ($out,&DWP(0x20,$out)); ++ ++&set_label("cbc_dec_tail_collected"); ++ &and ($len,15); ++ &jnz (&label("cbc_dec_tail_partial")); ++ &movups (&QWP(0,$out),$inout0); ++ &jmp (&label("cbc_ret")); ++ ++&set_label("cbc_dec_tail_partial"); ++ &mov ($key_,"esp"); ++ &sub ("esp",16); ++ &and ("esp",-16); ++ &movaps (&QWP(0,"esp"),$inout0); ++ &mov ($inp,"esp"); ++ &mov ("ecx",$len); ++ &data_word(0xA4F3F689); # rep movsb ++ &mov ("esp",$key_); ++ ++&set_label("cbc_ret"); ++ &mov ($key_,&wparam(4)); ++ &movups (&QWP(0,$key_),$ivec); # output IV ++&function_end("${PREFIX}_cbc_encrypt"); ++ ++# Mechanical port from aesni-x86_64.pl. ++# ++# _aesni_set_encrypt_key is private interface, ++# input: ++# "eax" const unsigned char *userKey ++# $rounds int bits ++# $key AES_KEY *key ++# output: ++# "eax" return code ++# $round rounds ++ ++&function_begin_B("_aesni_set_encrypt_key"); ++ &test ("eax","eax"); ++ &jz (&label("bad_pointer")); ++ &test ($key,$key); ++ &jz (&label("bad_pointer")); ++ ++ &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey ++ &pxor ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 ++ &lea ($key,&DWP(16,$key)); ++ &cmp ($rounds,256); ++ &je (&label("14rounds")); ++ &cmp ($rounds,192); ++ &je (&label("12rounds")); ++ &cmp ($rounds,128); ++ &jne (&label("bad_keybits")); ++ ++&set_label("10rounds",16); ++ &mov ($rounds,9); ++ &$movekey (&QWP(-16,$key),"xmm0"); # round 0 ++ &aeskeygenassist("xmm1","xmm0",0x01); # round 1 ++ &call (&label("key_128_cold")); ++ &aeskeygenassist("xmm1","xmm0",0x2); # round 2 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x04); # round 3 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x08); # round 4 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x10); # round 5 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x20); # round 6 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x40); # round 7 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x80); # round 8 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x1b); # round 9 ++ &call (&label("key_128")); ++ &aeskeygenassist("xmm1","xmm0",0x36); # round 10 ++ &call (&label("key_128")); ++ &$movekey (&QWP(0,$key),"xmm0"); ++ &mov (&DWP(80,$key),$rounds); ++ &xor ("eax","eax"); ++ &ret(); ++ ++&set_label("key_128",16); ++ &$movekey (&QWP(0,$key),"xmm0"); ++ &lea ($key,&DWP(16,$key)); ++&set_label("key_128_cold"); ++ &shufps ("xmm4","xmm0",0b00010000); ++ &pxor ("xmm0","xmm4"); ++ &shufps ("xmm4","xmm0",0b10001100,); ++ &pxor ("xmm0","xmm4"); ++ &pshufd ("xmm1","xmm1",0b11111111); # critical path ++ &pxor ("xmm0","xmm1"); ++ &ret(); ++ ++&set_label("12rounds",16); ++ &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey ++ &mov ($rounds,11); ++ &$movekey (&QWP(-16,$key),"xmm0") # round 0 ++ &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 ++ &call (&label("key_192a_cold")); ++ &aeskeygenassist("xmm1","xmm2",0x02); # round 2,3 ++ &call (&label("key_192b")); ++ &aeskeygenassist("xmm1","xmm2",0x04); # round 4,5 ++ &call (&label("key_192a")); ++ &aeskeygenassist("xmm1","xmm2",0x08); # round 5,6 ++ &call (&label("key_192b")); ++ &aeskeygenassist("xmm1","xmm2",0x10); # round 7,8 ++ &call (&label("key_192a")); ++ &aeskeygenassist("xmm1","xmm2",0x20); # round 8,9 ++ &call (&label("key_192b")); ++ &aeskeygenassist("xmm1","xmm2",0x40); # round 10,11 ++ &call (&label("key_192a")); ++ &aeskeygenassist("xmm1","xmm2",0x80); # round 11,12 ++ &call (&label("key_192b")); ++ &$movekey (&QWP(0,$key),"xmm0"); ++ &mov (&DWP(48,$key),$rounds); ++ &xor ("eax","eax"); ++ &ret(); ++ ++&set_label("key_192a",16); ++ &$movekey (&QWP(0,$key),"xmm0"); ++ &lea ($key,&DWP(16,$key)); ++&set_label("key_192a_cold",16); ++ &movaps ("xmm5","xmm2"); ++&set_label("key_192b_warm"); ++ &shufps ("xmm4","xmm0",0b00010000); ++ &movaps ("xmm3","xmm2"); ++ &pxor ("xmm0","xmm4"); ++ &shufps ("xmm4","xmm0",0b10001100); ++ &pslldq ("xmm3",4); ++ &pxor ("xmm0","xmm4"); ++ &pshufd ("xmm1","xmm1",0b01010101); # critical path ++ &pxor ("xmm2","xmm3"); ++ &pxor ("xmm0","xmm1"); ++ &pshufd ("xmm3","xmm0",0b11111111); ++ &pxor ("xmm2","xmm3"); ++ &ret(); ++ ++&set_label("key_192b",16); ++ &movaps ("xmm3","xmm0"); ++ &shufps ("xmm5","xmm0",0b01000100); ++ &$movekey (&QWP(0,$key),"xmm5"); ++ &shufps ("xmm3","xmm2",0b01001110); ++ &$movekey (&QWP(16,$key),"xmm3"); ++ &lea ($key,&DWP(32,$key)); ++ &jmp (&label("key_192b_warm")); ++ ++&set_label("14rounds",16); ++ &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey ++ &mov ($rounds,13); ++ &lea ($key,&DWP(16,$key)); ++ &$movekey (&QWP(-32,$key),"xmm0"); # round 0 ++ &$movekey (&QWP(-16,$key),"xmm2"); # round 1 ++ &aeskeygenassist("xmm1","xmm2",0x01); # round 2 ++ &call (&label("key_256a_cold")); ++ &aeskeygenassist("xmm1","xmm0",0x01); # round 3 ++ &call (&label("key_256b")); ++ &aeskeygenassist("xmm1","xmm2",0x02); # round 4 ++ &call (&label("key_256a")); ++ &aeskeygenassist("xmm1","xmm0",0x02); # round 5 ++ &call (&label("key_256b")); ++ &aeskeygenassist("xmm1","xmm2",0x04); # round 6 ++ &call (&label("key_256a")); ++ &aeskeygenassist("xmm1","xmm0",0x04); # round 7 ++ &call (&label("key_256b")); ++ &aeskeygenassist("xmm1","xmm2",0x08); # round 8 ++ &call (&label("key_256a")); ++ &aeskeygenassist("xmm1","xmm0",0x08); # round 9 ++ &call (&label("key_256b")); ++ &aeskeygenassist("xmm1","xmm2",0x10); # round 10 ++ &call (&label("key_256a")); ++ &aeskeygenassist("xmm1","xmm0",0x10); # round 11 ++ &call (&label("key_256b")); ++ &aeskeygenassist("xmm1","xmm2",0x20); # round 12 ++ &call (&label("key_256a")); ++ &aeskeygenassist("xmm1","xmm0",0x20); # round 13 ++ &call (&label("key_256b")); ++ &aeskeygenassist("xmm1","xmm2",0x40); # round 14 ++ &call (&label("key_256a")); ++ &$movekey (&QWP(0,$key),"xmm0"); ++ &mov (&DWP(16,$key),$rounds); ++ &xor ("eax","eax"); ++ &ret(); ++ ++&set_label("key_256a",16); ++ &$movekey (&QWP(0,$key),"xmm2"); ++ &lea ($key,&DWP(16,$key)); ++&set_label("key_256a_cold"); ++ &shufps ("xmm4","xmm0",0b00010000); ++ &pxor ("xmm0","xmm4"); ++ &shufps ("xmm4","xmm0",0b10001100); ++ &pxor ("xmm0","xmm4"); ++ &pshufd ("xmm1","xmm1",0b11111111); # critical path ++ &pxor ("xmm0","xmm1"); ++ &ret(); ++ ++&set_label("key_256b",16); ++ &$movekey (&QWP(0,$key),"xmm0"); ++ &lea ($key,&DWP(16,$key)); ++ ++ &shufps ("xmm4","xmm2",0b00010000); ++ &pxor ("xmm2","xmm4"); ++ &shufps ("xmm4","xmm2",0b10001100); ++ &pxor ("xmm2","xmm4"); ++ &pshufd ("xmm1","xmm1",0b10101010); # critical path ++ &pxor ("xmm2","xmm1"); ++ &ret(); ++ ++&set_label("bad_pointer",4); ++ &mov ("eax",-1); ++ &ret (); ++&set_label("bad_keybits",4); ++ &mov ("eax",-2); ++ &ret (); ++&function_end_B("_aesni_set_encrypt_key"); ++ ++# int $PREFIX_set_encrypt_key (const unsigned char *userKey, int bits, ++# AES_KEY *key) ++&function_begin_B("${PREFIX}_set_encrypt_key"); ++ &mov ("eax",&wparam(0)); ++ &mov ($rounds,&wparam(1)); ++ &mov ($key,&wparam(2)); ++ &call ("_aesni_set_encrypt_key"); ++ &ret (); ++&function_end_B("${PREFIX}_set_encrypt_key"); ++ ++# int $PREFIX_set_decrypt_key (const unsigned char *userKey, int bits, ++# AES_KEY *key) ++&function_begin_B("${PREFIX}_set_decrypt_key"); ++ &mov ("eax",&wparam(0)); ++ &mov ($rounds,&wparam(1)); ++ &mov ($key,&wparam(2)); ++ &call ("_aesni_set_encrypt_key"); ++ &mov ($key,&wparam(2)); ++ &shl ($rounds,4) # rounds-1 after _aesni_set_encrypt_key ++ &test ("eax","eax"); ++ &jnz (&label("dec_key_ret")); ++ &lea ("eax",&DWP(16,$key,$rounds)); # end of key schedule ++ ++ &$movekey ("xmm0",&QWP(0,$key)); # just swap ++ &$movekey ("xmm1",&QWP(0,"eax")); ++ &$movekey (&QWP(0,"eax"),"xmm0"); ++ &$movekey (&QWP(0,$key),"xmm1"); ++ &lea ($key,&DWP(16,$key)); ++ &lea ("eax",&DWP(-16,"eax")); ++ ++&set_label("dec_key_inverse"); ++ &$movekey ("xmm0",&QWP(0,$key)); # swap and inverse ++ &$movekey ("xmm1",&QWP(0,"eax")); ++ &aesimc ("xmm0","xmm0"); ++ &aesimc ("xmm1","xmm1"); ++ &lea ($key,&DWP(16,$key)); ++ &lea ("eax",&DWP(-16,"eax")); ++ &cmp ("eax",$key); ++ &$movekey (&QWP(16,"eax"),"xmm0"); ++ &$movekey (&QWP(-16,$key),"xmm1"); ++ &ja (&label("dec_key_inverse")); ++ ++ &$movekey ("xmm0",&QWP(0,$key)); # inverse middle ++ &aesimc ("xmm0","xmm0"); ++ &$movekey (&QWP(0,$key),"xmm0"); ++ ++ &xor ("eax","eax"); # return success ++&set_label("dec_key_ret"); ++ &ret (); ++&function_end_B("${PREFIX}_set_decrypt_key"); ++&asciz("AES for Intel AES-NI, CRYPTOGAMS by "); ++ ++&asm_finish(); +diff -up openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl.aesni openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl +--- openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl.aesni 2010-11-16 17:33:23.000000000 +0100 ++++ openssl-1.0.0b/crypto/aes/asm/aesni-x86_64.pl 2010-11-16 17:33:23.000000000 +0100 +@@ -0,0 +1,991 @@ ++#!/usr/bin/env perl ++# ++# ==================================================================== ++# Written by Andy Polyakov for the OpenSSL ++# project. The module is, however, dual licensed under OpenSSL and ++# CRYPTOGAMS licenses depending on where you obtain it. For further ++# details see http://www.openssl.org/~appro/cryptogams/. ++# ==================================================================== ++# ++# This module implements support for Intel AES-NI extension. In ++# OpenSSL context it's used with Intel engine, but can also be used as ++# drop-in replacement for crypto/aes/asm/aes-x86_64.pl [see below for ++# details]. ++ ++$PREFIX="aesni"; # if $PREFIX is set to "AES", the script ++ # generates drop-in replacement for ++ # crypto/aes/asm/aes-x86_64.pl:-) ++ ++$flavour = shift; ++$output = shift; ++if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } ++ ++$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); ++ ++$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ++( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or ++( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or ++die "can't locate x86_64-xlate.pl"; ++ ++open STDOUT,"| $^X $xlate $flavour $output"; ++ ++$movkey = $PREFIX eq "aesni" ? "movaps" : "movups"; ++@_4args=$win64? ("%rcx","%rdx","%r8", "%r9") : # Win64 order ++ ("%rdi","%rsi","%rdx","%rcx"); # Unix order ++ ++$code=".text\n"; ++ ++$rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! ++# this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... ++$inp="%rdi"; ++$out="%rsi"; ++$len="%rdx"; ++$key="%rcx"; # input to and changed by aesni_[en|de]cryptN !!! ++$ivp="%r8"; # cbc ++ ++$rnds_="%r10d"; # backup copy for $rounds ++$key_="%r11"; # backup copy for $key ++ ++# %xmm register layout ++$inout0="%xmm0"; $inout1="%xmm1"; ++$inout2="%xmm2"; $inout3="%xmm3"; ++$rndkey0="%xmm4"; $rndkey1="%xmm5"; ++ ++$iv="%xmm6"; $in0="%xmm7"; # used in CBC decrypt ++$in1="%xmm8"; $in2="%xmm9"; ++ ++# Inline version of internal aesni_[en|de]crypt1. ++# ++# Why folded loop? Because aes[enc|dec] is slow enough to accommodate ++# cycles which take care of loop variables... ++{ my $sn; ++sub aesni_generate1 { ++my ($p,$key,$rounds)=@_; ++++$sn; ++$code.=<<___; ++ $movkey ($key),$rndkey0 ++ $movkey 16($key),$rndkey1 ++ lea 32($key),$key ++ pxor $rndkey0,$inout0 ++.Loop_${p}1_$sn: ++ aes${p} $rndkey1,$inout0 ++ dec $rounds ++ $movkey ($key),$rndkey1 ++ lea 16($key),$key ++ jnz .Loop_${p}1_$sn # loop body is 16 bytes ++ aes${p}last $rndkey1,$inout0 ++___ ++}} ++# void $PREFIX_[en|de]crypt (const void *inp,void *out,const AES_KEY *key); ++# ++{ my ($inp,$out,$key) = @_4args; ++ ++$code.=<<___; ++.globl ${PREFIX}_encrypt ++.type ${PREFIX}_encrypt,\@abi-omnipotent ++.align 16 ++${PREFIX}_encrypt: ++ movups ($inp),$inout0 # load input ++ mov 240($key),$rounds # pull $rounds ++___ ++ &aesni_generate1("enc",$key,$rounds); ++$code.=<<___; ++ movups $inout0,($out) # output ++ ret ++.size ${PREFIX}_encrypt,.-${PREFIX}_encrypt ++ ++.globl ${PREFIX}_decrypt ++.type ${PREFIX}_decrypt,\@abi-omnipotent ++.align 16 ++${PREFIX}_decrypt: ++ movups ($inp),$inout0 # load input ++ mov 240($key),$rounds # pull $rounds ++___ ++ &aesni_generate1("dec",$key,$rounds); ++$code.=<<___; ++ movups $inout0,($out) # output ++ ret ++.size ${PREFIX}_decrypt, .-${PREFIX}_decrypt ++___ ++} ++ ++# _aesni_[en|de]crypt[34] are private interfaces, N denotes interleave ++# factor. Why 3x subroutine is used in loops? Even though aes[enc|dec] ++# latency is 6, it turned out that it can be scheduled only every ++# *second* cycle. Thus 3x interleave is the one providing optimal ++# utilization, i.e. when subroutine's throughput is virtually same as ++# of non-interleaved subroutine [for number of input blocks up to 3]. ++# This is why it makes no sense to implement 2x subroutine. As soon ++# as/if Intel improves throughput by making it possible to schedule ++# the instructions in question *every* cycles I would have to ++# implement 6x interleave and use it in loop... ++sub aesni_generate3 { ++my $dir=shift; ++# As already mentioned it takes in $key and $rounds, which are *not* ++# preserved. $inout[0-2] is cipher/clear text... ++$code.=<<___; ++.type _aesni_${dir}rypt3,\@abi-omnipotent ++.align 16 ++_aesni_${dir}rypt3: ++ $movkey ($key),$rndkey0 ++ shr \$1,$rounds ++ $movkey 16($key),$rndkey1 ++ lea 32($key),$key ++ pxor $rndkey0,$inout0 ++ pxor $rndkey0,$inout1 ++ pxor $rndkey0,$inout2 ++ ++.L${dir}_loop3: ++ aes${dir} $rndkey1,$inout0 ++ $movkey ($key),$rndkey0 ++ aes${dir} $rndkey1,$inout1 ++ dec $rounds ++ aes${dir} $rndkey1,$inout2 ++ aes${dir} $rndkey0,$inout0 ++ $movkey 16($key),$rndkey1 ++ aes${dir} $rndkey0,$inout1 ++ lea 32($key),$key ++ aes${dir} $rndkey0,$inout2 ++ jnz .L${dir}_loop3 ++ ++ aes${dir} $rndkey1,$inout0 ++ $movkey ($key),$rndkey0 ++ aes${dir} $rndkey1,$inout1 ++ aes${dir} $rndkey1,$inout2 ++ aes${dir}last $rndkey0,$inout0 ++ aes${dir}last $rndkey0,$inout1 ++ aes${dir}last $rndkey0,$inout2 ++ ret ++.size _aesni_${dir}rypt3,.-_aesni_${dir}rypt3 ++___ ++} ++# 4x interleave is implemented to improve small block performance, ++# most notably [and naturally] 4 block by ~30%. One can argue that one ++# should have implemented 5x as well, but improvement would be <20%, ++# so it's not worth it... ++sub aesni_generate4 { ++my $dir=shift; ++# As already mentioned it takes in $key and $rounds, which are *not* ++# preserved. $inout[0-3] is cipher/clear text... ++$code.=<<___; ++.type _aesni_${dir}rypt4,\@abi-omnipotent ++.align 16 ++_aesni_${dir}rypt4: ++ $movkey ($key),$rndkey0 ++ shr \$1,$rounds ++ $movkey 16($key),$rndkey1 ++ lea 32($key),$key ++ pxor $rndkey0,$inout0 ++ pxor $rndkey0,$inout1 ++ pxor $rndkey0,$inout2 ++ pxor $rndkey0,$inout3 ++ ++.L${dir}_loop4: ++ aes${dir} $rndkey1,$inout0 ++ $movkey ($key),$rndkey0 ++ aes${dir} $rndkey1,$inout1 ++ dec $rounds ++ aes${dir} $rndkey1,$inout2 ++ aes${dir} $rndkey1,$inout3 ++ aes${dir} $rndkey0,$inout0 ++ $movkey 16($key),$rndkey1 ++ aes${dir} $rndkey0,$inout1 ++ lea 32($key),$key ++ aes${dir} $rndkey0,$inout2 ++ aes${dir} $rndkey0,$inout3 ++ jnz .L${dir}_loop4 ++ ++ aes${dir} $rndkey1,$inout0 ++ $movkey ($key),$rndkey0 ++ aes${dir} $rndkey1,$inout1 ++ aes${dir} $rndkey1,$inout2 ++ aes${dir} $rndkey1,$inout3 ++ aes${dir}last $rndkey0,$inout0 ++ aes${dir}last $rndkey0,$inout1 ++ aes${dir}last $rndkey0,$inout2 ++ aes${dir}last $rndkey0,$inout3 ++ ret ++.size _aesni_${dir}rypt4,.-_aesni_${dir}rypt4 ++___ ++} ++&aesni_generate3("enc") if ($PREFIX eq "aesni"); ++&aesni_generate3("dec"); ++&aesni_generate4("enc") if ($PREFIX eq "aesni"); ++&aesni_generate4("dec"); ++ ++if ($PREFIX eq "aesni") { ++# void aesni_ecb_encrypt (const void *in, void *out, ++# size_t length, const AES_KEY *key, ++# int enc); ++$code.=<<___; ++.globl aesni_ecb_encrypt ++.type aesni_ecb_encrypt,\@function,5 ++.align 16 ++aesni_ecb_encrypt: ++ cmp \$16,$len # check length ++ jb .Lecb_ret ++ ++ mov 240($key),$rounds # pull $rounds ++ and \$-16,$len ++ mov $key,$key_ # backup $key ++ test %r8d,%r8d # 5th argument ++ mov $rounds,$rnds_ # backup $rounds ++ jz .Lecb_decrypt ++#--------------------------- ECB ENCRYPT ------------------------------# ++ sub \$0x40,$len ++ jbe .Lecb_enc_tail ++ jmp .Lecb_enc_loop3 ++.align 16 ++.Lecb_enc_loop3: ++ movups ($inp),$inout0 ++ movups 0x10($inp),$inout1 ++ movups 0x20($inp),$inout2 ++ call _aesni_encrypt3 ++ sub \$0x30,$len ++ lea 0x30($inp),$inp ++ lea 0x30($out),$out ++ movups $inout0,-0x30($out) ++ mov $rnds_,$rounds # restore $rounds ++ movups $inout1,-0x20($out) ++ mov $key_,$key # restore $key ++ movups $inout2,-0x10($out) ++ ja .Lecb_enc_loop3 ++ ++.Lecb_enc_tail: ++ add \$0x40,$len ++ jz .Lecb_ret ++ ++ cmp \$0x10,$len ++ movups ($inp),$inout0 ++ je .Lecb_enc_one ++ cmp \$0x20,$len ++ movups 0x10($inp),$inout1 ++ je .Lecb_enc_two ++ cmp \$0x30,$len ++ movups 0x20($inp),$inout2 ++ je .Lecb_enc_three ++ movups 0x30($inp),$inout3 ++ call _aesni_encrypt4 ++ movups $inout0,($out) ++ movups $inout1,0x10($out) ++ movups $inout2,0x20($out) ++ movups $inout3,0x30($out) ++ jmp .Lecb_ret ++.align 16 ++.Lecb_enc_one: ++___ ++ &aesni_generate1("enc",$key,$rounds); ++$code.=<<___; ++ movups $inout0,($out) ++ jmp .Lecb_ret ++.align 16 ++.Lecb_enc_two: ++ call _aesni_encrypt3 ++ movups $inout0,($out) ++ movups $inout1,0x10($out) ++ jmp .Lecb_ret ++.align 16 ++.Lecb_enc_three: ++ call _aesni_encrypt3 ++ movups $inout0,($out) ++ movups $inout1,0x10($out) ++ movups $inout2,0x20($out) ++ jmp .Lecb_ret ++ #--------------------------- ECB DECRYPT ------------------------------# ++.align 16 ++.Lecb_decrypt: ++ sub \$0x40,$len ++ jbe .Lecb_dec_tail ++ jmp .Lecb_dec_loop3 ++.align 16 ++.Lecb_dec_loop3: ++ movups ($inp),$inout0 ++ movups 0x10($inp),$inout1 ++ movups 0x20($inp),$inout2 ++ call _aesni_decrypt3 ++ sub \$0x30,$len ++ lea 0x30($inp),$inp ++ lea 0x30($out),$out ++ movups $inout0,-0x30($out) ++ mov $rnds_,$rounds # restore $rounds ++ movups $inout1,-0x20($out) ++ mov $key_,$key # restore $key ++ movups $inout2,-0x10($out) ++ ja .Lecb_dec_loop3 ++ ++.Lecb_dec_tail: ++ add \$0x40,$len ++ jz .Lecb_ret ++ ++ cmp \$0x10,$len ++ movups ($inp),$inout0 ++ je .Lecb_dec_one ++ cmp \$0x20,$len ++ movups 0x10($inp),$inout1 ++ je .Lecb_dec_two ++ cmp \$0x30,$len ++ movups 0x20($inp),$inout2 ++ je .Lecb_dec_three ++ movups 0x30($inp),$inout3 ++ call _aesni_decrypt4 ++ movups $inout0,($out) ++ movups $inout1,0x10($out) ++ movups $inout2,0x20($out) ++ movups $inout3,0x30($out) ++ jmp .Lecb_ret ++.align 16 ++.Lecb_dec_one: ++___ ++ &aesni_generate1("dec",$key,$rounds); ++$code.=<<___; ++ movups $inout0,($out) ++ jmp .Lecb_ret ++.align 16 ++.Lecb_dec_two: ++ call _aesni_decrypt3 ++ movups $inout0,($out) ++ movups $inout1,0x10($out) ++ jmp .Lecb_ret ++.align 16 ++.Lecb_dec_three: ++ call _aesni_decrypt3 ++ movups $inout0,($out) ++ movups $inout1,0x10($out) ++ movups $inout2,0x20($out) ++ ++.Lecb_ret: ++ ret ++.size aesni_ecb_encrypt,.-aesni_ecb_encrypt ++___ ++} ++ ++# void $PREFIX_cbc_encrypt (const void *inp, void *out, ++# size_t length, const AES_KEY *key, ++# unsigned char *ivp,const int enc); ++$reserved = $win64?0x40:-0x18; # used in decrypt ++$code.=<<___; ++.globl ${PREFIX}_cbc_encrypt ++.type ${PREFIX}_cbc_encrypt,\@function,6 ++.align 16 ++${PREFIX}_cbc_encrypt: ++ test $len,$len # check length ++ jz .Lcbc_ret ++ ++ mov 240($key),$rnds_ # pull $rounds ++ mov $key,$key_ # backup $key ++ test %r9d,%r9d # 6th argument ++ jz .Lcbc_decrypt ++#--------------------------- CBC ENCRYPT ------------------------------# ++ movups ($ivp),$inout0 # load iv as initial state ++ cmp \$16,$len ++ mov $rnds_,$rounds ++ jb .Lcbc_enc_tail ++ sub \$16,$len ++ jmp .Lcbc_enc_loop ++.align 16 ++.Lcbc_enc_loop: ++ movups ($inp),$inout1 # load input ++ lea 16($inp),$inp ++ pxor $inout1,$inout0 ++___ ++ &aesni_generate1("enc",$key,$rounds); ++$code.=<<___; ++ sub \$16,$len ++ lea 16($out),$out ++ mov $rnds_,$rounds # restore $rounds ++ mov $key_,$key # restore $key ++ movups $inout0,-16($out) # store output ++ jnc .Lcbc_enc_loop ++ add \$16,$len ++ jnz .Lcbc_enc_tail ++ movups $inout0,($ivp) ++ jmp .Lcbc_ret ++ ++.Lcbc_enc_tail: ++ mov $len,%rcx # zaps $key ++ xchg $inp,$out # $inp is %rsi and $out is %rdi now ++ .long 0x9066A4F3 # rep movsb ++ mov \$16,%ecx # zero tail ++ sub $len,%rcx ++ xor %eax,%eax ++ .long 0x9066AAF3 # rep stosb ++ lea -16(%rdi),%rdi # rewind $out by 1 block ++ mov $rnds_,$rounds # restore $rounds ++ mov %rdi,%rsi # $inp and $out are the same ++ mov $key_,$key # restore $key ++ xor $len,$len # len=16 ++ jmp .Lcbc_enc_loop # one more spin ++ #--------------------------- CBC DECRYPT ------------------------------# ++.align 16 ++.Lcbc_decrypt: ++___ ++$code.=<<___ if ($win64); ++ lea -0x58(%rsp),%rsp ++ movaps %xmm6,(%rsp) ++ movaps %xmm7,0x10(%rsp) ++ movaps %xmm8,0x20(%rsp) ++ movaps %xmm9,0x30(%rsp) ++.Lcbc_decrypt_body: ++___ ++$code.=<<___; ++ movups ($ivp),$iv ++ sub \$0x40,$len ++ mov $rnds_,$rounds ++ jbe .Lcbc_dec_tail ++ jmp .Lcbc_dec_loop3 ++.align 16 ++.Lcbc_dec_loop3: ++ movups ($inp),$inout0 ++ movups 0x10($inp),$inout1 ++ movups 0x20($inp),$inout2 ++ movaps $inout0,$in0 ++ movaps $inout1,$in1 ++ movaps $inout2,$in2 ++ call _aesni_decrypt3 ++ sub \$0x30,$len ++ lea 0x30($inp),$inp ++ lea 0x30($out),$out ++ pxor $iv,$inout0 ++ pxor $in0,$inout1 ++ movaps $in2,$iv ++ pxor $in1,$inout2 ++ movups $inout0,-0x30($out) ++ mov $rnds_,$rounds # restore $rounds ++ movups $inout1,-0x20($out) ++ mov $key_,$key # restore $key ++ movups $inout2,-0x10($out) ++ ja .Lcbc_dec_loop3 ++ ++.Lcbc_dec_tail: ++ add \$0x40,$len ++ movups $iv,($ivp) ++ jz .Lcbc_dec_ret ++ ++ movups ($inp),$inout0 ++ cmp \$0x10,$len ++ movaps $inout0,$in0 ++ jbe .Lcbc_dec_one ++ movups 0x10($inp),$inout1 ++ cmp \$0x20,$len ++ movaps $inout1,$in1 ++ jbe .Lcbc_dec_two ++ movups 0x20($inp),$inout2 ++ cmp \$0x30,$len ++ movaps $inout2,$in2 ++ jbe .Lcbc_dec_three ++ movups 0x30($inp),$inout3 ++ call _aesni_decrypt4 ++ pxor $iv,$inout0 ++ movups 0x30($inp),$iv ++ pxor $in0,$inout1 ++ movups $inout0,($out) ++ pxor $in1,$inout2 ++ movups $inout1,0x10($out) ++ pxor $in2,$inout3 ++ movups $inout2,0x20($out) ++ movaps $inout3,$inout0 ++ lea 0x30($out),$out ++ jmp .Lcbc_dec_tail_collected ++.align 16 ++.Lcbc_dec_one: ++___ ++ &aesni_generate1("dec",$key,$rounds); ++$code.=<<___; ++ pxor $iv,$inout0 ++ movaps $in0,$iv ++ jmp .Lcbc_dec_tail_collected ++.align 16 ++.Lcbc_dec_two: ++ call _aesni_decrypt3 ++ pxor $iv,$inout0 ++ pxor $in0,$inout1 ++ movups $inout0,($out) ++ movaps $in1,$iv ++ movaps $inout1,$inout0 ++ lea 0x10($out),$out ++ jmp .Lcbc_dec_tail_collected ++.align 16 ++.Lcbc_dec_three: ++ call _aesni_decrypt3 ++ pxor $iv,$inout0 ++ pxor $in0,$inout1 ++ movups $inout0,($out) ++ pxor $in1,$inout2 ++ movups $inout1,0x10($out) ++ movaps $in2,$iv ++ movaps $inout2,$inout0 ++ lea 0x20($out),$out ++ jmp .Lcbc_dec_tail_collected ++.align 16 ++.Lcbc_dec_tail_collected: ++ and \$15,$len ++ movups $iv,($ivp) ++ jnz .Lcbc_dec_tail_partial ++ movups $inout0,($out) ++ jmp .Lcbc_dec_ret ++.Lcbc_dec_tail_partial: ++ movaps $inout0,$reserved(%rsp) ++ mov $out,%rdi ++ mov $len,%rcx ++ lea $reserved(%rsp),%rsi ++ .long 0x9066A4F3 # rep movsb ++ ++.Lcbc_dec_ret: ++___ ++$code.=<<___ if ($win64); ++ movaps (%rsp),%xmm6 ++ movaps 0x10(%rsp),%xmm7 ++ movaps 0x20(%rsp),%xmm8 ++ movaps 0x30(%rsp),%xmm9 ++ lea 0x58(%rsp),%rsp ++___ ++$code.=<<___; ++.Lcbc_ret: ++ ret ++.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ++___ ++ ++# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, ++# int bits, AES_KEY *key) ++{ my ($inp,$bits,$key) = @_4args; ++ $bits =~ s/%r/%e/; ++ ++$code.=<<___; ++.globl ${PREFIX}_set_decrypt_key ++.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent ++.align 16 ++${PREFIX}_set_decrypt_key: ++ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 ++ call _aesni_set_encrypt_key ++ shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key ++ test %eax,%eax ++ jnz .Ldec_key_ret ++ lea 16($key,$bits),$inp # points at the end of key schedule ++ ++ $movkey ($key),%xmm0 # just swap ++ $movkey ($inp),%xmm1 ++ $movkey %xmm0,($inp) ++ $movkey %xmm1,($key) ++ lea 16($key),$key ++ lea -16($inp),$inp ++ ++.Ldec_key_inverse: ++ $movkey ($key),%xmm0 # swap and inverse ++ $movkey ($inp),%xmm1 ++ aesimc %xmm0,%xmm0 ++ aesimc %xmm1,%xmm1 ++ lea 16($key),$key ++ lea -16($inp),$inp ++ cmp $key,$inp ++ $movkey %xmm0,16($inp) ++ $movkey %xmm1,-16($key) ++ ja .Ldec_key_inverse ++ ++ $movkey ($key),%xmm0 # inverse middle ++ aesimc %xmm0,%xmm0 ++ $movkey %xmm0,($inp) ++.Ldec_key_ret: ++ add \$8,%rsp ++ ret ++.LSEH_end_set_decrypt_key: ++.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key ++___ ++ ++# This is based on submission by ++# ++# Huang Ying ++# Vinodh Gopal ++# Kahraman Akdemir ++# ++# Agressively optimized in respect to aeskeygenassist's critical path ++# and is contained in %xmm0-5 to meet Win64 ABI requirement. ++# ++$code.=<<___; ++.globl ${PREFIX}_set_encrypt_key ++.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent ++.align 16 ++${PREFIX}_set_encrypt_key: ++_aesni_set_encrypt_key: ++ .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 ++ test $inp,$inp ++ mov \$-1,%rax ++ jz .Lenc_key_ret ++ test $key,$key ++ jz .Lenc_key_ret ++ ++ movups ($inp),%xmm0 # pull first 128 bits of *userKey ++ pxor %xmm4,%xmm4 # low dword of xmm4 is assumed 0 ++ lea 16($key),%rax ++ cmp \$256,$bits ++ je .L14rounds ++ cmp \$192,$bits ++ je .L12rounds ++ cmp \$128,$bits ++ jne .Lbad_keybits ++ ++.L10rounds: ++ mov \$9,$bits # 10 rounds for 128-bit key ++ $movkey %xmm0,($key) # round 0 ++ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1 ++ call .Lkey_expansion_128_cold ++ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 2 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 3 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 4 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 5 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 6 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x40,%xmm0,%xmm1 # round 7 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x80,%xmm0,%xmm1 # round 8 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x1b,%xmm0,%xmm1 # round 9 ++ call .Lkey_expansion_128 ++ aeskeygenassist \$0x36,%xmm0,%xmm1 # round 10 ++ call .Lkey_expansion_128 ++ $movkey %xmm0,(%rax) ++ mov $bits,80(%rax) # 240(%rdx) ++ xor %eax,%eax ++ jmp .Lenc_key_ret ++ ++.align 16 ++.L12rounds: ++ movq 16($inp),%xmm2 # remaining 1/3 of *userKey ++ mov \$11,$bits # 12 rounds for 192 ++ $movkey %xmm0,($key) # round 0 ++ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2 ++ call .Lkey_expansion_192a_cold ++ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 2,3 ++ call .Lkey_expansion_192b ++ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 4,5 ++ call .Lkey_expansion_192a ++ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 5,6 ++ call .Lkey_expansion_192b ++ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 7,8 ++ call .Lkey_expansion_192a ++ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 8,9 ++ call .Lkey_expansion_192b ++ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 10,11 ++ call .Lkey_expansion_192a ++ aeskeygenassist \$0x80,%xmm2,%xmm1 # round 11,12 ++ call .Lkey_expansion_192b ++ $movkey %xmm0,(%rax) ++ mov $bits,48(%rax) # 240(%rdx) ++ xor %rax, %rax ++ jmp .Lenc_key_ret ++ ++.align 16 ++.L14rounds: ++ movups 16($inp),%xmm2 # remaning half of *userKey ++ mov \$13,$bits # 14 rounds for 256 ++ lea 16(%rax),%rax ++ $movkey %xmm0,($key) # round 0 ++ $movkey %xmm2,16($key) # round 1 ++ aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2 ++ call .Lkey_expansion_256a_cold ++ aeskeygenassist \$0x1,%xmm0,%xmm1 # round 3 ++ call .Lkey_expansion_256b ++ aeskeygenassist \$0x2,%xmm2,%xmm1 # round 4 ++ call .Lkey_expansion_256a ++ aeskeygenassist \$0x2,%xmm0,%xmm1 # round 5 ++ call .Lkey_expansion_256b ++ aeskeygenassist \$0x4,%xmm2,%xmm1 # round 6 ++ call .Lkey_expansion_256a ++ aeskeygenassist \$0x4,%xmm0,%xmm1 # round 7 ++ call .Lkey_expansion_256b ++ aeskeygenassist \$0x8,%xmm2,%xmm1 # round 8 ++ call .Lkey_expansion_256a ++ aeskeygenassist \$0x8,%xmm0,%xmm1 # round 9 ++ call .Lkey_expansion_256b ++ aeskeygenassist \$0x10,%xmm2,%xmm1 # round 10 ++ call .Lkey_expansion_256a ++ aeskeygenassist \$0x10,%xmm0,%xmm1 # round 11 ++ call .Lkey_expansion_256b ++ aeskeygenassist \$0x20,%xmm2,%xmm1 # round 12 ++ call .Lkey_expansion_256a ++ aeskeygenassist \$0x20,%xmm0,%xmm1 # round 13 ++ call .Lkey_expansion_256b ++ aeskeygenassist \$0x40,%xmm2,%xmm1 # round 14 ++ call .Lkey_expansion_256a ++ $movkey %xmm0,(%rax) ++ mov $bits,16(%rax) # 240(%rdx) ++ xor %rax,%rax ++ jmp .Lenc_key_ret ++ ++.align 16 ++.Lbad_keybits: ++ mov \$-2,%rax ++.Lenc_key_ret: ++ add \$8,%rsp ++ ret ++.LSEH_end_set_encrypt_key: ++ ++.align 16 ++.Lkey_expansion_128: ++ $movkey %xmm0,(%rax) ++ lea 16(%rax),%rax ++.Lkey_expansion_128_cold: ++ shufps \$0b00010000,%xmm0,%xmm4 ++ pxor %xmm4, %xmm0 ++ shufps \$0b10001100,%xmm0,%xmm4 ++ pxor %xmm4, %xmm0 ++ pshufd \$0b11111111,%xmm1,%xmm1 # critical path ++ pxor %xmm1,%xmm0 ++ ret ++ ++.align 16 ++.Lkey_expansion_192a: ++ $movkey %xmm0,(%rax) ++ lea 16(%rax),%rax ++.Lkey_expansion_192a_cold: ++ movaps %xmm2, %xmm5 ++.Lkey_expansion_192b_warm: ++ shufps \$0b00010000,%xmm0,%xmm4 ++ movaps %xmm2,%xmm3 ++ pxor %xmm4,%xmm0 ++ shufps \$0b10001100,%xmm0,%xmm4 ++ pslldq \$4,%xmm3 ++ pxor %xmm4,%xmm0 ++ pshufd \$0b01010101,%xmm1,%xmm1 # critical path ++ pxor %xmm3,%xmm2 ++ pxor %xmm1,%xmm0 ++ pshufd \$0b11111111,%xmm0,%xmm3 ++ pxor %xmm3,%xmm2 ++ ret ++ ++.align 16 ++.Lkey_expansion_192b: ++ movaps %xmm0,%xmm3 ++ shufps \$0b01000100,%xmm0,%xmm5 ++ $movkey %xmm5,(%rax) ++ shufps \$0b01001110,%xmm2,%xmm3 ++ $movkey %xmm3,16(%rax) ++ lea 32(%rax),%rax ++ jmp .Lkey_expansion_192b_warm ++ ++.align 16 ++.Lkey_expansion_256a: ++ $movkey %xmm2,(%rax) ++ lea 16(%rax),%rax ++.Lkey_expansion_256a_cold: ++ shufps \$0b00010000,%xmm0,%xmm4 ++ pxor %xmm4,%xmm0 ++ shufps \$0b10001100,%xmm0,%xmm4 ++ pxor %xmm4,%xmm0 ++ pshufd \$0b11111111,%xmm1,%xmm1 # critical path ++ pxor %xmm1,%xmm0 ++ ret ++ ++.align 16 ++.Lkey_expansion_256b: ++ $movkey %xmm0,(%rax) ++ lea 16(%rax),%rax ++ ++ shufps \$0b00010000,%xmm2,%xmm4 ++ pxor %xmm4,%xmm2 ++ shufps \$0b10001100,%xmm2,%xmm4 ++ pxor %xmm4,%xmm2 ++ pshufd \$0b10101010,%xmm1,%xmm1 # critical path ++ pxor %xmm1,%xmm2 ++ ret ++.size ${PREFIX}_set_encrypt_key,.-${PREFIX}_set_encrypt_key ++___ ++} ++ ++$code.=<<___; ++.asciz "AES for Intel AES-NI, CRYPTOGAMS by " ++.align 64 ++___ ++ ++# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, ++# CONTEXT *context,DISPATCHER_CONTEXT *disp) ++if ($win64) { ++$rec="%rcx"; ++$frame="%rdx"; ++$context="%r8"; ++$disp="%r9"; ++ ++$code.=<<___; ++.extern __imp_RtlVirtualUnwind ++.type cbc_se_handler,\@abi-omnipotent ++.align 16 ++cbc_se_handler: ++ push %rsi ++ push %rdi ++ push %rbx ++ push %rbp ++ push %r12 ++ push %r13 ++ push %r14 ++ push %r15 ++ pushfq ++ sub \$64,%rsp ++ ++ mov 152($context),%rax # pull context->Rsp ++ mov 248($context),%rbx # pull context->Rip ++ ++ lea .Lcbc_decrypt(%rip),%r10 ++ cmp %r10,%rbx # context->Rip<"prologue" label ++ jb .Lin_prologue ++ ++ lea .Lcbc_decrypt_body(%rip),%r10 ++ cmp %r10,%rbx # context->RipRip>="epilogue" label ++ jae .Lin_prologue ++ ++ lea 0(%rax),%rsi # top of stack ++ lea 512($context),%rdi # &context.Xmm6 ++ mov \$8,%ecx # 4*sizeof(%xmm0)/sizeof(%rax) ++ .long 0xa548f3fc # cld; rep movsq ++ lea 0x58(%rax),%rax # adjust stack pointer ++ jmp .Lin_prologue ++ ++.Lrestore_rax: ++ mov 120($context),%rax ++.Lin_prologue: ++ mov 8(%rax),%rdi ++ mov 16(%rax),%rsi ++ mov %rax,152($context) # restore context->Rsp ++ mov %rsi,168($context) # restore context->Rsi ++ mov %rdi,176($context) # restore context->Rdi ++ ++ jmp .Lcommon_seh_exit ++.size cbc_se_handler,.-cbc_se_handler ++ ++.type ecb_se_handler,\@abi-omnipotent ++.align 16 ++ecb_se_handler: ++ push %rsi ++ push %rdi ++ push %rbx ++ push %rbp ++ push %r12 ++ push %r13 ++ push %r14 ++ push %r15 ++ pushfq ++ sub \$64,%rsp ++ ++ mov 152($context),%rax # pull context->Rsp ++ mov 8(%rax),%rdi ++ mov 16(%rax),%rsi ++ mov %rsi,168($context) # restore context->Rsi ++ mov %rdi,176($context) # restore context->Rdi ++ ++.Lcommon_seh_exit: ++ ++ mov 40($disp),%rdi # disp->ContextRecord ++ mov $context,%rsi # context ++ mov \$154,%ecx # sizeof(CONTEXT) ++ .long 0xa548f3fc # cld; rep movsq ++ ++ mov $disp,%rsi ++ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER ++ mov 8(%rsi),%rdx # arg2, disp->ImageBase ++ mov 0(%rsi),%r8 # arg3, disp->ControlPc ++ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry ++ mov 40(%rsi),%r10 # disp->ContextRecord ++ lea 56(%rsi),%r11 # &disp->HandlerData ++ lea 24(%rsi),%r12 # &disp->EstablisherFrame ++ mov %r10,32(%rsp) # arg5 ++ mov %r11,40(%rsp) # arg6 ++ mov %r12,48(%rsp) # arg7 ++ mov %rcx,56(%rsp) # arg8, (NULL) ++ call *__imp_RtlVirtualUnwind(%rip) ++ ++ mov \$1,%eax # ExceptionContinueSearch ++ add \$64,%rsp ++ popfq ++ pop %r15 ++ pop %r14 ++ pop %r13 ++ pop %r12 ++ pop %rbp ++ pop %rbx ++ pop %rdi ++ pop %rsi ++ ret ++.size cbc_se_handler,.-cbc_se_handler ++ ++.section .pdata ++.align 4 ++ .rva .LSEH_begin_${PREFIX}_ecb_encrypt ++ .rva .LSEH_end_${PREFIX}_ecb_encrypt ++ .rva .LSEH_info_ecb ++ ++ .rva .LSEH_begin_${PREFIX}_cbc_encrypt ++ .rva .LSEH_end_${PREFIX}_cbc_encrypt ++ .rva .LSEH_info_cbc ++ ++ .rva ${PREFIX}_set_decrypt_key ++ .rva .LSEH_end_set_decrypt_key ++ .rva .LSEH_info_key ++ ++ .rva ${PREFIX}_set_encrypt_key ++ .rva .LSEH_end_set_encrypt_key ++ .rva .LSEH_info_key ++.section .xdata ++.align 8 ++.LSEH_info_ecb: ++ .byte 9,0,0,0 ++ .rva ecb_se_handler ++.LSEH_info_cbc: ++ .byte 9,0,0,0 ++ .rva cbc_se_handler ++.LSEH_info_key: ++ .byte 0x01,0x04,0x01,0x00 ++ .byte 0x04,0x02,0x00,0x00 ++___ ++} ++ ++sub rex { ++ local *opcode=shift; ++ my ($dst,$src)=@_; ++ ++ if ($dst>=8 || $src>=8) { ++ $rex=0x40; ++ $rex|=0x04 if($dst>=8); ++ $rex|=0x01 if($src>=8); ++ push @opcode,$rex; ++ } ++} ++ ++sub aesni { ++ my $line=shift; ++ my @opcode=(0x66); ++ ++ if ($line=~/(aeskeygenassist)\s+\$([x0-9a-f]+),\s*%xmm([0-9]+),\s*%xmm([0-9]+)/) { ++ rex(\@opcode,$4,$3); ++ push @opcode,0x0f,0x3a,0xdf; ++ push @opcode,0xc0|($3&7)|(($4&7)<<3); # ModR/M ++ my $c=$2; ++ push @opcode,$c=~/^0/?oct($c):$c; ++ return ".byte\t".join(',',@opcode); ++ } ++ elsif ($line=~/(aes[a-z]+)\s+%xmm([0-9]+),\s*%xmm([0-9]+)/) { ++ my %opcodelet = ( ++ "aesimc" => 0xdb, ++ "aesenc" => 0xdc, "aesenclast" => 0xdd, ++ "aesdec" => 0xde, "aesdeclast" => 0xdf ++ ); ++ return undef if (!defined($opcodelet{$1})); ++ rex(\@opcode,$3,$2); ++ push @opcode,0x0f,0x38,$opcodelet{$1}; ++ push @opcode,0xc0|($2&7)|(($3&7)<<3); # ModR/M ++ return ".byte\t".join(',',@opcode); ++ } ++ return $line; ++} ++ ++$code =~ s/\`([^\`]*)\`/eval($1)/gem; ++$code =~ s/\b(aes.*%xmm[0-9]+).*$/aesni($1)/gem; ++ ++print $code; ++ ++close STDOUT; +diff -up openssl-1.0.0b/crypto/aes/Makefile.aesni openssl-1.0.0b/crypto/aes/Makefile +--- openssl-1.0.0b/crypto/aes/Makefile.aesni 2008-12-23 12:33:00.000000000 +0100 ++++ openssl-1.0.0b/crypto/aes/Makefile 2010-11-16 17:33:23.000000000 +0100 +@@ -50,9 +50,13 @@ aes-ia64.s: asm/aes-ia64.S + + aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl + $(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ ++aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl ++ $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@ + + aes-x86_64.s: asm/aes-x86_64.pl + $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@ ++aesni-x86_64.s: asm/aesni-x86_64.pl ++ $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@ + + aes-sparcv9.s: asm/aes-sparcv9.pl + $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@ +diff -up openssl-1.0.0b/crypto/engine/eng_aesni.c.aesni openssl-1.0.0b/crypto/engine/eng_aesni.c +--- openssl-1.0.0b/crypto/engine/eng_aesni.c.aesni 2010-11-16 17:33:23.000000000 +0100 ++++ openssl-1.0.0b/crypto/engine/eng_aesni.c 2010-11-16 17:33:23.000000000 +0100 +@@ -0,0 +1,413 @@ ++/* ++ * Support for Intel AES-NI intruction set ++ * Author: Huang Ying ++ * ++ * Intel AES-NI is a new set of Single Instruction Multiple Data ++ * (SIMD) instructions that are going to be introduced in the next ++ * generation of Intel processor, as of 2009. These instructions ++ * enable fast and secure data encryption and decryption, using the ++ * Advanced Encryption Standard (AES), defined by FIPS Publication ++ * number 197. The architecture introduces six instructions that ++ * offer full hardware support for AES. Four of them support high ++ * performance data encryption and decryption, and the other two ++ * instructions support the AES key expansion procedure. ++ * ++ * The white paper can be downloaded from: ++ * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf ++ * ++ * This file is based on engines/e_padlock.c ++ */ ++ ++/* ==================================================================== ++ * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in ++ * the documentation and/or other materials provided with the ++ * distribution. ++ * ++ * 3. All advertising materials mentioning features or use of this ++ * software must display the following acknowledgment: ++ * "This product includes software developed by the OpenSSL Project ++ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" ++ * ++ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to ++ * endorse or promote products derived from this software without ++ * prior written permission. For written permission, please contact ++ * licensing@OpenSSL.org. ++ * ++ * 5. Products derived from this software may not be called "OpenSSL" ++ * nor may "OpenSSL" appear in their names without prior written ++ * permission of the OpenSSL Project. ++ * ++ * 6. Redistributions of any form whatsoever must retain the following ++ * acknowledgment: ++ * "This product includes software developed by the OpenSSL Project ++ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY ++ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR ++ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR ++ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT ++ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, ++ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED ++ * OF THE POSSIBILITY OF SUCH DAMAGE. ++ * ==================================================================== ++ * ++ * This product includes cryptographic software written by Eric Young ++ * (eay@cryptsoft.com). This product includes software written by Tim ++ * Hudson (tjh@cryptsoft.com). ++ * ++ */ ++ ++ ++#include ++ ++#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AES_NI) && !defined(OPENSSL_NO_AES) ++ ++#include ++#include "cryptlib.h" ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* AES-NI is available *ONLY* on some x86 CPUs. Not only that it ++ doesn't exist elsewhere, but it even can't be compiled on other ++ platforms! */ ++#undef COMPILE_HW_AESNI ++#if (defined(__x86_64) || defined(__x86_64__) || \ ++ defined(_M_AMD64) || defined(_M_X64) || \ ++ defined(OPENSSL_IA32_SSE2)) && !defined(OPENSSL_NO_ASM) ++#define COMPILE_HW_AESNI ++static ENGINE *ENGINE_aesni (void); ++#endif ++ ++void ENGINE_load_aesni (void) ++{ ++/* On non-x86 CPUs it just returns. */ ++#ifdef COMPILE_HW_AESNI ++ ENGINE *toadd = ENGINE_aesni(); ++ if (!toadd) ++ return; ++ ENGINE_add (toadd); ++ ENGINE_register_complete (toadd); ++ ENGINE_free (toadd); ++ ERR_clear_error (); ++#endif ++} ++ ++#ifdef COMPILE_HW_AESNI ++int aesni_set_encrypt_key(const unsigned char *userKey, int bits, ++ AES_KEY *key); ++int aesni_set_decrypt_key(const unsigned char *userKey, int bits, ++ AES_KEY *key); ++ ++void aesni_encrypt(const unsigned char *in, unsigned char *out, ++ const AES_KEY *key); ++void aesni_decrypt(const unsigned char *in, unsigned char *out, ++ const AES_KEY *key); ++ ++void aesni_ecb_encrypt(const unsigned char *in, ++ unsigned char *out, ++ size_t length, ++ const AES_KEY *key, ++ int enc); ++void aesni_cbc_encrypt(const unsigned char *in, ++ unsigned char *out, ++ size_t length, ++ const AES_KEY *key, ++ unsigned char *ivec, int enc); ++ ++/* Function for ENGINE detection and control */ ++static int aesni_init(ENGINE *e); ++ ++/* Cipher Stuff */ ++static int aesni_ciphers(ENGINE *e, const EVP_CIPHER **cipher, ++ const int **nids, int nid); ++ ++#define AESNI_MIN_ALIGN 16 ++#define AESNI_ALIGN(x) \ ++ ((void *)(((unsigned long)(x)+AESNI_MIN_ALIGN-1)&~(AESNI_MIN_ALIGN-1))) ++ ++/* Engine names */ ++static const char aesni_id[] = "aesni", ++ aesni_name[] = "Intel AES-NI engine", ++ no_aesni_name[] = "Intel AES-NI engine (no-aesni)"; ++ ++/* ===== Engine "management" functions ===== */ ++ ++#if defined(_WIN32) ++typedef unsigned __int64 IA32CAP; ++#else ++typedef unsigned long long IA32CAP; ++#endif ++ ++/* Prepare the ENGINE structure for registration */ ++static int ++aesni_bind_helper(ENGINE *e) ++{ ++ int engage; ++ if (sizeof(OPENSSL_ia32cap_P) > 4) { ++ engage = (OPENSSL_ia32cap_P >> 57) & 1; ++ } else { ++ IA32CAP OPENSSL_ia32_cpuid(void); ++ engage = (OPENSSL_ia32_cpuid() >> 57) & 1; ++ } ++ ++ /* Register everything or return with an error */ ++ if (!ENGINE_set_id(e, aesni_id) || ++ !ENGINE_set_name(e, engage ? aesni_name : no_aesni_name) || ++ ++ !ENGINE_set_init_function(e, aesni_init) || ++ (engage && !ENGINE_set_ciphers (e, aesni_ciphers)) ++ ) ++ return 0; ++ ++ /* Everything looks good */ ++ return 1; ++} ++ ++/* Constructor */ ++static ENGINE * ++ENGINE_aesni(void) ++{ ++ ENGINE *eng = ENGINE_new(); ++ ++ if (!eng) { ++ return NULL; ++ } ++ ++ if (!aesni_bind_helper(eng)) { ++ ENGINE_free(eng); ++ return NULL; ++ } ++ ++ return eng; ++} ++ ++/* Check availability of the engine */ ++static int ++aesni_init(ENGINE *e) ++{ ++ return 1; ++} ++ ++#if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) ++#define NID_aes_128_cfb NID_aes_128_cfb128 ++#endif ++ ++#if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) ++#define NID_aes_128_ofb NID_aes_128_ofb128 ++#endif ++ ++#if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) ++#define NID_aes_192_cfb NID_aes_192_cfb128 ++#endif ++ ++#if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) ++#define NID_aes_192_ofb NID_aes_192_ofb128 ++#endif ++ ++#if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) ++#define NID_aes_256_cfb NID_aes_256_cfb128 ++#endif ++ ++#if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) ++#define NID_aes_256_ofb NID_aes_256_ofb128 ++#endif ++ ++/* List of supported ciphers. */ ++static int aesni_cipher_nids[] = { ++ NID_aes_128_ecb, ++ NID_aes_128_cbc, ++ NID_aes_128_cfb, ++ NID_aes_128_ofb, ++ ++ NID_aes_192_ecb, ++ NID_aes_192_cbc, ++ NID_aes_192_cfb, ++ NID_aes_192_ofb, ++ ++ NID_aes_256_ecb, ++ NID_aes_256_cbc, ++ NID_aes_256_cfb, ++ NID_aes_256_ofb, ++}; ++static int aesni_cipher_nids_num = ++ (sizeof(aesni_cipher_nids)/sizeof(aesni_cipher_nids[0])); ++ ++typedef struct ++{ ++ AES_KEY ks; ++ unsigned int _pad1[3]; ++} AESNI_KEY; ++ ++static int ++aesni_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *user_key, ++ const unsigned char *iv, int enc) ++{ ++ int ret; ++ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); ++ ++ if ((ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_CFB_MODE ++ || (ctx->cipher->flags & EVP_CIPH_MODE) == EVP_CIPH_OFB_MODE ++ || enc) ++ ret=aesni_set_encrypt_key(user_key, ctx->key_len * 8, key); ++ else ++ ret=aesni_set_decrypt_key(user_key, ctx->key_len * 8, key); ++ ++ if(ret < 0) { ++ EVPerr(EVP_F_AESNI_INIT_KEY,EVP_R_AES_KEY_SETUP_FAILED); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static int aesni_cipher_ecb(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inl) ++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); ++ aesni_ecb_encrypt(in, out, inl, key, ctx->encrypt); ++ return 1; ++} ++static int aesni_cipher_cbc(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inl) ++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); ++ aesni_cbc_encrypt(in, out, inl, key, ++ ctx->iv, ctx->encrypt); ++ return 1; ++} ++static int aesni_cipher_cfb(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inl) ++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); ++ CRYPTO_cfb128_encrypt(in, out, inl, key, ctx->iv, ++ &ctx->num, ctx->encrypt, ++ (block128_f)aesni_encrypt); ++ return 1; ++} ++static int aesni_cipher_ofb(EVP_CIPHER_CTX *ctx, unsigned char *out, ++ const unsigned char *in, size_t inl) ++{ AES_KEY *key = AESNI_ALIGN(ctx->cipher_data); ++ CRYPTO_ofb128_encrypt(in, out, inl, key, ctx->iv, ++ &ctx->num, (block128_f)aesni_encrypt); ++ return 1; ++} ++ ++#define AES_BLOCK_SIZE 16 ++ ++#define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE ++#define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE ++#define EVP_CIPHER_block_size_OFB 1 ++#define EVP_CIPHER_block_size_CFB 1 ++ ++/* Declaring so many ciphers by hand would be a pain. ++ Instead introduce a bit of preprocessor magic :-) */ ++#define DECLARE_AES_EVP(ksize,lmode,umode) \ ++static const EVP_CIPHER aesni_##ksize##_##lmode = { \ ++ NID_aes_##ksize##_##lmode, \ ++ EVP_CIPHER_block_size_##umode, \ ++ ksize / 8, \ ++ AES_BLOCK_SIZE, \ ++ 0 | EVP_CIPH_##umode##_MODE, \ ++ aesni_init_key, \ ++ aesni_cipher_##lmode, \ ++ NULL, \ ++ sizeof(AESNI_KEY), \ ++ EVP_CIPHER_set_asn1_iv, \ ++ EVP_CIPHER_get_asn1_iv, \ ++ NULL, \ ++ NULL \ ++} ++ ++DECLARE_AES_EVP(128,ecb,ECB); ++DECLARE_AES_EVP(128,cbc,CBC); ++DECLARE_AES_EVP(128,cfb,CFB); ++DECLARE_AES_EVP(128,ofb,OFB); ++ ++DECLARE_AES_EVP(192,ecb,ECB); ++DECLARE_AES_EVP(192,cbc,CBC); ++DECLARE_AES_EVP(192,cfb,CFB); ++DECLARE_AES_EVP(192,ofb,OFB); ++ ++DECLARE_AES_EVP(256,ecb,ECB); ++DECLARE_AES_EVP(256,cbc,CBC); ++DECLARE_AES_EVP(256,cfb,CFB); ++DECLARE_AES_EVP(256,ofb,OFB); ++ ++static int ++aesni_ciphers (ENGINE *e, const EVP_CIPHER **cipher, ++ const int **nids, int nid) ++{ ++ /* No specific cipher => return a list of supported nids ... */ ++ if (!cipher) { ++ *nids = aesni_cipher_nids; ++ return aesni_cipher_nids_num; ++ } ++ ++ /* ... or the requested "cipher" otherwise */ ++ switch (nid) { ++ case NID_aes_128_ecb: ++ *cipher = &aesni_128_ecb; ++ break; ++ case NID_aes_128_cbc: ++ *cipher = &aesni_128_cbc; ++ break; ++ case NID_aes_128_cfb: ++ *cipher = &aesni_128_cfb; ++ break; ++ case NID_aes_128_ofb: ++ *cipher = &aesni_128_ofb; ++ break; ++ ++ case NID_aes_192_ecb: ++ *cipher = &aesni_192_ecb; ++ break; ++ case NID_aes_192_cbc: ++ *cipher = &aesni_192_cbc; ++ break; ++ case NID_aes_192_cfb: ++ *cipher = &aesni_192_cfb; ++ break; ++ case NID_aes_192_ofb: ++ *cipher = &aesni_192_ofb; ++ break; ++ ++ case NID_aes_256_ecb: ++ *cipher = &aesni_256_ecb; ++ break; ++ case NID_aes_256_cbc: ++ *cipher = &aesni_256_cbc; ++ break; ++ case NID_aes_256_cfb: ++ *cipher = &aesni_256_cfb; ++ break; ++ case NID_aes_256_ofb: ++ *cipher = &aesni_256_ofb; ++ break; ++ ++ default: ++ /* Sorry, we don't support this NID */ ++ *cipher = NULL; ++ return 0; ++ } ++ ++ return 1; ++} ++ ++#endif /* COMPILE_HW_AESNI */ ++#endif /* !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) && !defined(OPENSSL_NO_AES) */ +diff -up openssl-1.0.0b/crypto/engine/eng_all.c.aesni openssl-1.0.0b/crypto/engine/eng_all.c +--- openssl-1.0.0b/crypto/engine/eng_all.c.aesni 2010-11-16 17:33:22.000000000 +0100 ++++ openssl-1.0.0b/crypto/engine/eng_all.c 2010-11-16 17:33:23.000000000 +0100 +@@ -85,6 +85,9 @@ void ENGINE_load_builtin_engines(void) + #if !defined(OPENSSL_NO_HW) && (defined(__OpenBSD__) || defined(__FreeBSD__) || defined(HAVE_CRYPTODEV)) + ENGINE_load_cryptodev(); + #endif ++#if !defined(OPENSSL_NO_HW) && !defined(OPENSSL_NO_HW_AESNI) ++ ENGINE_load_aesni(); ++#endif + ENGINE_load_dynamic(); + #ifndef OPENSSL_NO_STATIC_ENGINE + #ifndef OPENSSL_NO_HW +diff -up openssl-1.0.0b/crypto/engine/engine.h.aesni openssl-1.0.0b/crypto/engine/engine.h +--- openssl-1.0.0b/crypto/engine/engine.h.aesni 2010-11-16 17:33:22.000000000 +0100 ++++ openssl-1.0.0b/crypto/engine/engine.h 2010-11-16 17:33:23.000000000 +0100 +@@ -338,6 +338,7 @@ void ENGINE_load_gost(void); + #endif + #endif + void ENGINE_load_cryptodev(void); ++void ENGINE_load_aesni(void); + void ENGINE_load_builtin_engines(void); + + /* Get and set global flags (ENGINE_TABLE_FLAG_***) for the implementation +diff -up openssl-1.0.0b/crypto/engine/Makefile.aesni openssl-1.0.0b/crypto/engine/Makefile +--- openssl-1.0.0b/crypto/engine/Makefile.aesni 2010-11-15 15:44:49.000000000 +0100 ++++ openssl-1.0.0b/crypto/engine/Makefile 2010-11-16 17:33:23.000000000 +0100 +@@ -21,12 +21,14 @@ LIBSRC= eng_err.c eng_lib.c eng_list.c e + eng_table.c eng_pkey.c eng_fat.c eng_all.c \ + tb_rsa.c tb_dsa.c tb_ecdsa.c tb_dh.c tb_ecdh.c tb_rand.c tb_store.c \ + tb_cipher.c tb_digest.c tb_pkmeth.c tb_asnmth.c \ +- eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c ++ eng_openssl.c eng_cnf.c eng_dyn.c eng_cryptodev.c \ ++ eng_aesni.c + LIBOBJ= eng_err.o eng_lib.o eng_list.o eng_init.o eng_ctrl.o \ + eng_table.o eng_pkey.o eng_fat.o eng_all.o \ + tb_rsa.o tb_dsa.o tb_ecdsa.o tb_dh.o tb_ecdh.o tb_rand.o tb_store.o \ + tb_cipher.o tb_digest.o tb_pkmeth.o tb_asnmth.o \ +- eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o ++ eng_openssl.o eng_cnf.o eng_dyn.o eng_cryptodev.o \ ++ eng_aesni.o + + SRC= $(LIBSRC) + +diff -up openssl-1.0.0b/crypto/evp/evp_err.c.aesni openssl-1.0.0b/crypto/evp/evp_err.c +--- openssl-1.0.0b/crypto/evp/evp_err.c.aesni 2010-11-16 17:33:22.000000000 +0100 ++++ openssl-1.0.0b/crypto/evp/evp_err.c 2010-11-16 17:33:23.000000000 +0100 +@@ -1,6 +1,6 @@ + /* crypto/evp/evp_err.c */ + /* ==================================================================== +- * Copyright (c) 1999-2008 The OpenSSL Project. All rights reserved. ++ * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions +@@ -70,6 +70,7 @@ + + static ERR_STRING_DATA EVP_str_functs[]= + { ++{ERR_FUNC(EVP_F_AESNI_INIT_KEY), "AESNI_INIT_KEY"}, + {ERR_FUNC(EVP_F_AES_INIT_KEY), "AES_INIT_KEY"}, + {ERR_FUNC(EVP_F_CAMELLIA_INIT_KEY), "CAMELLIA_INIT_KEY"}, + {ERR_FUNC(EVP_F_D2I_PKEY), "D2I_PKEY"}, +@@ -86,7 +87,7 @@ static ERR_STRING_DATA EVP_str_functs[]= + {ERR_FUNC(EVP_F_EVP_DIGESTINIT_EX), "EVP_DigestInit_ex"}, + {ERR_FUNC(EVP_F_EVP_ENCRYPTFINAL_EX), "EVP_EncryptFinal_ex"}, + {ERR_FUNC(EVP_F_EVP_MD_CTX_COPY_EX), "EVP_MD_CTX_copy_ex"}, +-{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_SIZE"}, ++{ERR_FUNC(EVP_F_EVP_MD_SIZE), "EVP_MD_size"}, + {ERR_FUNC(EVP_F_EVP_OPENINIT), "EVP_OpenInit"}, + {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD), "EVP_PBE_alg_add"}, + {ERR_FUNC(EVP_F_EVP_PBE_ALG_ADD_TYPE), "EVP_PBE_alg_add_type"}, +diff -up openssl-1.0.0b/crypto/evp/evp.h.aesni openssl-1.0.0b/crypto/evp/evp.h +--- openssl-1.0.0b/crypto/evp/evp.h.aesni 2010-11-16 17:33:22.000000000 +0100 ++++ openssl-1.0.0b/crypto/evp/evp.h 2010-11-16 17:33:23.000000000 +0100 +@@ -1167,6 +1167,7 @@ void ERR_load_EVP_strings(void); + /* Error codes for the EVP functions. */ + + /* Function codes. */ ++#define EVP_F_AESNI_INIT_KEY 163 + #define EVP_F_AES_INIT_KEY 133 + #define EVP_F_CAMELLIA_INIT_KEY 159 + #define EVP_F_D2I_PKEY 100 +diff -up openssl-1.0.0b/test/test_aesni.aesni openssl-1.0.0b/test/test_aesni +--- openssl-1.0.0b/test/test_aesni.aesni 2010-11-16 17:33:23.000000000 +0100 ++++ openssl-1.0.0b/test/test_aesni 2010-11-16 17:33:23.000000000 +0100 +@@ -0,0 +1,69 @@ ++#!/bin/sh ++ ++PROG=$1 ++ ++if [ -x $PROG ]; then ++ if expr "x`$PROG version`" : "xOpenSSL" > /dev/null; then ++ : ++ else ++ echo "$PROG is not OpenSSL executable" ++ exit 1 ++ fi ++else ++ echo "$PROG is not executable" ++ exit 1; ++fi ++ ++if $PROG engine aesni | grep -v no-aesni; then ++ ++ HASH=`cat $PROG | $PROG dgst -hex` ++ ++ AES_ALGS=" aes-128-ecb aes-192-ecb aes-256-ecb \ ++ aes-128-cbc aes-192-cbc aes-256-cbc \ ++ aes-128-cfb aes-192-cfb aes-256-cfb \ ++ aes-128-ofb aes-192-ofb aes-256-ofb" ++ BUFSIZE="16 32 48 64 80 96 128 144 999" ++ ++ nerr=0 ++ ++ for alg in $AES_ALGS; do ++ echo $alg ++ for bufsize in $BUFSIZE; do ++ TEST=`( cat $PROG | \ ++ $PROG enc -e -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \ ++ $PROG enc -d -k "$HASH" -$alg | \ ++ $PROG dgst -hex ) 2>/dev/null` ++ if [ "$TEST" != "$HASH" ]; then ++ echo "-$alg/$bufsize encrypt test failed" ++ nerr=`expr $nerr + 1` ++ fi ++ done ++ for bufsize in $BUFSIZE; do ++ TEST=`( cat $PROG | \ ++ $PROG enc -e -k "$HASH" -$alg | \ ++ $PROG enc -d -k "$HASH" -$alg -bufsize $bufsize -engine aesni | \ ++ $PROG dgst -hex ) 2>/dev/null` ++ if [ "$TEST" != "$HASH" ]; then ++ echo "-$alg/$bufsize decrypt test failed" ++ nerr=`expr $nerr + 1` ++ fi ++ done ++ TEST=`( cat $PROG | \ ++ $PROG enc -e -k "$HASH" -$alg -engine aesni | \ ++ $PROG enc -d -k "$HASH" -$alg -engine aesni | \ ++ $PROG dgst -hex ) 2>/dev/null` ++ if [ "$TEST" != "$HASH" ]; then ++ echo "-$alg en/decrypt test failed" ++ nerr=`expr $nerr + 1` ++ fi ++ done ++ ++ if [ $nerr -gt 0 ]; then ++ echo "AESNI engine test failed." ++ exit 1; ++ fi ++else ++ echo "AESNI engine is not available" ++fi ++ ++exit 0 diff --git a/openssl-1.0.0b-ipv6-apps.patch b/openssl-1.0.0b-ipv6-apps.patch new file mode 100644 index 0000000..b85a5d8 --- /dev/null +++ b/openssl-1.0.0b-ipv6-apps.patch @@ -0,0 +1,496 @@ +diff -up openssl-1.0.0b/apps/s_apps.h.ipv6-apps openssl-1.0.0b/apps/s_apps.h +--- openssl-1.0.0b/apps/s_apps.h.ipv6-apps 2010-11-16 17:19:29.000000000 +0100 ++++ openssl-1.0.0b/apps/s_apps.h 2010-11-16 17:19:29.000000000 +0100 +@@ -148,7 +148,7 @@ typedef fd_mask fd_set; + #define PORT_STR "4433" + #define PROTOCOL "tcp" + +-int do_server(int port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context); ++int do_server(char *port, int type, int *ret, int (*cb) (char *hostname, int s, unsigned char *context), unsigned char *context); + #ifdef HEADER_X509_H + int MS_CALLBACK verify_callback(int ok, X509_STORE_CTX *ctx); + #endif +@@ -156,10 +156,9 @@ int MS_CALLBACK verify_callback(int ok, + int set_cert_stuff(SSL_CTX *ctx, char *cert_file, char *key_file); + int set_cert_key_stuff(SSL_CTX *ctx, X509 *cert, EVP_PKEY *key); + #endif +-int init_client(int *sock, char *server, int port, int type); ++int init_client(int *sock, char *server, char *port, int type); + int should_retry(int i); +-int extract_port(char *str, short *port_ptr); +-int extract_host_port(char *str,char **host_ptr,unsigned char *ip,short *p); ++int extract_host_port(char *str,char **host_ptr,char **port_ptr); + + long MS_CALLBACK bio_dump_callback(BIO *bio, int cmd, const char *argp, + int argi, long argl, long ret); +diff -up openssl-1.0.0b/apps/s_client.c.ipv6-apps openssl-1.0.0b/apps/s_client.c +--- openssl-1.0.0b/apps/s_client.c.ipv6-apps 2010-11-16 17:19:29.000000000 +0100 ++++ openssl-1.0.0b/apps/s_client.c 2010-11-16 17:19:29.000000000 +0100 +@@ -389,7 +389,7 @@ int MAIN(int argc, char **argv) + int cbuf_len,cbuf_off; + int sbuf_len,sbuf_off; + fd_set readfds,writefds; +- short port=PORT; ++ char *port_str = PORT_STR; + int full_log=1; + char *host=SSL_HOST_NAME; + char *cert_file=NULL,*key_file=NULL; +@@ -488,13 +488,12 @@ int MAIN(int argc, char **argv) + else if (strcmp(*argv,"-port") == 0) + { + if (--argc < 1) goto bad; +- port=atoi(*(++argv)); +- if (port == 0) goto bad; ++ port_str= *(++argv); + } + else if (strcmp(*argv,"-connect") == 0) + { + if (--argc < 1) goto bad; +- if (!extract_host_port(*(++argv),&host,NULL,&port)) ++ if (!extract_host_port(*(++argv),&host,&port_str)) + goto bad; + } + else if (strcmp(*argv,"-verify") == 0) +@@ -967,7 +966,7 @@ bad: + + re_start: + +- if (init_client(&s,host,port,socket_type) == 0) ++ if (init_client(&s,host,port_str,socket_type) == 0) + { + BIO_printf(bio_err,"connect:errno=%d\n",get_last_socket_error()); + SHUTDOWN(s); +diff -up openssl-1.0.0b/apps/s_server.c.ipv6-apps openssl-1.0.0b/apps/s_server.c +--- openssl-1.0.0b/apps/s_server.c.ipv6-apps 2010-11-16 17:19:29.000000000 +0100 ++++ openssl-1.0.0b/apps/s_server.c 2010-11-16 17:19:29.000000000 +0100 +@@ -838,7 +838,7 @@ int MAIN(int argc, char *argv[]) + { + X509_VERIFY_PARAM *vpm = NULL; + int badarg = 0; +- short port=PORT; ++ char *port_str = PORT_STR; + char *CApath=NULL,*CAfile=NULL; + unsigned char *context = NULL; + char *dhfile = NULL; +@@ -909,8 +909,7 @@ int MAIN(int argc, char *argv[]) + (strcmp(*argv,"-accept") == 0)) + { + if (--argc < 1) goto bad; +- if (!extract_port(*(++argv),&port)) +- goto bad; ++ port_str= *(++argv); + } + else if (strcmp(*argv,"-verify") == 0) + { +@@ -1700,9 +1699,9 @@ bad: + BIO_printf(bio_s_out,"ACCEPT\n"); + (void)BIO_flush(bio_s_out); + if (www) +- do_server(port,socket_type,&accept_socket,www_body, context); ++ do_server(port_str,socket_type,&accept_socket,www_body, context); + else +- do_server(port,socket_type,&accept_socket,sv_body, context); ++ do_server(port_str,socket_type,&accept_socket,sv_body, context); + print_stats(bio_s_out,ctx); + ret=0; + end: +diff -up openssl-1.0.0b/apps/s_socket.c.ipv6-apps openssl-1.0.0b/apps/s_socket.c +--- openssl-1.0.0b/apps/s_socket.c.ipv6-apps 2010-07-05 13:03:22.000000000 +0200 ++++ openssl-1.0.0b/apps/s_socket.c 2010-11-16 17:27:18.000000000 +0100 +@@ -102,9 +102,7 @@ static struct hostent *GetHostByName(cha + static void ssl_sock_cleanup(void); + #endif + static int ssl_sock_init(void); +-static int init_client_ip(int *sock,unsigned char ip[4], int port, int type); +-static int init_server(int *sock, int port, int type); +-static int init_server_long(int *sock, int port,char *ip, int type); ++static int init_server(int *sock, char *port, int type); + static int do_accept(int acc_sock, int *sock, char **host); + static int host_ip(char *str, unsigned char ip[4]); + +@@ -234,58 +232,70 @@ static int ssl_sock_init(void) + return(1); + } + +-int init_client(int *sock, char *host, int port, int type) ++int init_client(int *sock, char *host, char *port, int type) + { +- unsigned char ip[4]; +- +- if (!host_ip(host,&(ip[0]))) +- { +- return(0); +- } +- return(init_client_ip(sock,ip,port,type)); +- } +- +-static int init_client_ip(int *sock, unsigned char ip[4], int port, int type) +- { +- unsigned long addr; +- struct sockaddr_in them; +- int s,i; ++ struct addrinfo *res, *res0, hints; ++ char * failed_call = NULL; ++ int s; ++ int e; + + if (!ssl_sock_init()) return(0); + +- memset((char *)&them,0,sizeof(them)); +- them.sin_family=AF_INET; +- them.sin_port=htons((unsigned short)port); +- addr=(unsigned long) +- ((unsigned long)ip[0]<<24L)| +- ((unsigned long)ip[1]<<16L)| +- ((unsigned long)ip[2]<< 8L)| +- ((unsigned long)ip[3]); +- them.sin_addr.s_addr=htonl(addr); +- +- if (type == SOCK_STREAM) +- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL); +- else /* ( type == SOCK_DGRAM) */ +- s=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP); +- +- if (s == INVALID_SOCKET) { perror("socket"); return(0); } ++ memset(&hints, '\0', sizeof(hints)); ++ hints.ai_socktype = type; ++ hints.ai_flags = AI_ADDRCONFIG; ++ ++ e = getaddrinfo(host, port, &hints, &res); ++ if (e) ++ { ++ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); ++ if (e == EAI_SYSTEM) ++ perror("getaddrinfo"); ++ return (0); ++ } + ++ res0 = res; ++ while (res) ++ { ++ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol); ++ if (s == INVALID_SOCKET) ++ { ++ failed_call = "socket"; ++ goto nextres; ++ } + #if defined(SO_KEEPALIVE) && !defined(OPENSSL_SYS_MPE) + if (type == SOCK_STREAM) + { +- i=0; +- i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE,(char *)&i,sizeof(i)); +- if (i < 0) { perror("keepalive"); return(0); } ++ int i=0; ++ i=setsockopt(s,SOL_SOCKET,SO_KEEPALIVE, ++ (char *)&i,sizeof(i)); ++ if (i < 0) { ++ failed_call = "keepalive"; ++ goto nextres; ++ } + } + #endif +- +- if (connect(s,(struct sockaddr *)&them,sizeof(them)) == -1) +- { closesocket(s); perror("connect"); return(0); } ++ if (connect(s,(struct sockaddr *)res->ai_addr, ++ res->ai_addrlen) == 0) ++ { ++ freeaddrinfo(res0); + *sock=s; + return(1); + } + +-int do_server(int port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context) ++ failed_call = "socket"; ++nextres: ++ if (s != INVALID_SOCKET) ++ close(s); ++ res = res->ai_next; ++ } ++ freeaddrinfo(res0); ++ ++ perror(failed_call); ++ return(0); ++ } ++ ++int do_server(char *port, int type, int *ret, int (*cb)(char *hostname, int s, unsigned char *context), unsigned char *context) + { + int sock; + char *name = NULL; +@@ -323,33 +333,38 @@ int do_server(int port, int type, int *r + } + } + +-static int init_server_long(int *sock, int port, char *ip, int type) ++static int init_server(int *sock, char *port, int type) + { +- int ret=0; +- struct sockaddr_in server; +- int s= -1; ++ struct addrinfo *res, *res0, hints; ++ char * failed_call = NULL; ++ char port_name[8]; ++ int s; ++ int e; + + if (!ssl_sock_init()) return(0); + +- memset((char *)&server,0,sizeof(server)); +- server.sin_family=AF_INET; +- server.sin_port=htons((unsigned short)port); +- if (ip == NULL) +- server.sin_addr.s_addr=INADDR_ANY; +- else +-/* Added for T3E, address-of fails on bit field (beckman@acl.lanl.gov) */ +-#ifndef BIT_FIELD_LIMITS +- memcpy(&server.sin_addr.s_addr,ip,4); +-#else +- memcpy(&server.sin_addr,ip,4); +-#endif ++ memset(&hints, '\0', sizeof(hints)); ++ hints.ai_socktype = type; ++ hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; + +- if (type == SOCK_STREAM) +- s=socket(AF_INET,SOCK_STREAM,SOCKET_PROTOCOL); +- else /* type == SOCK_DGRAM */ +- s=socket(AF_INET, SOCK_DGRAM,IPPROTO_UDP); ++ e = getaddrinfo(NULL, port, &hints, &res); ++ if (e) ++ { ++ fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(e)); ++ if (e == EAI_SYSTEM) ++ perror("getaddrinfo"); ++ return (0); ++ } + +- if (s == INVALID_SOCKET) goto err; ++ res0 = res; ++ while (res) ++ { ++ s = socket(res->ai_family, res->ai_socktype, res->ai_protocol); ++ if (s == INVALID_SOCKET) ++ { ++ failed_call = "socket"; ++ goto nextres; ++ } + #if defined SOL_SOCKET && defined SO_REUSEADDR + { + int j = 1; +@@ -357,35 +372,39 @@ static int init_server_long(int *sock, i + (void *) &j, sizeof j); + } + #endif +- if (bind(s,(struct sockaddr *)&server,sizeof(server)) == -1) ++ ++ if (bind(s,(struct sockaddr *)res->ai_addr, res->ai_addrlen) == -1) + { +-#ifndef OPENSSL_SYS_WINDOWS +- perror("bind"); +-#endif +- goto err; ++ failed_call = "bind"; ++ goto nextres; + } +- /* Make it 128 for linux */ +- if (type==SOCK_STREAM && listen(s,128) == -1) goto err; +- *sock=s; +- ret=1; +-err: +- if ((ret == 0) && (s != -1)) ++ if (type==SOCK_STREAM && listen(s,128) == -1) + { +- SHUTDOWN(s); ++ failed_call = "listen"; ++ goto nextres; + } +- return(ret); ++ ++ *sock=s; ++ return(1); ++ ++nextres: ++ if (s != INVALID_SOCKET) ++ close(s); ++ res = res->ai_next; + } ++ freeaddrinfo(res0); + +-static int init_server(int *sock, int port, int type) +- { +- return(init_server_long(sock, port, NULL, type)); ++ if (s == INVALID_SOCKET) { perror("socket"); return(0); } ++ ++ perror(failed_call); ++ return(0); + } + + static int do_accept(int acc_sock, int *sock, char **host) + { ++ static struct sockaddr_storage from; ++ char buffer[NI_MAXHOST]; + int ret; +- struct hostent *h1,*h2; +- static struct sockaddr_in from; + int len; + /* struct linger ling; */ + +@@ -432,135 +451,58 @@ redoit: + */ + + if (host == NULL) goto end; +-#ifndef BIT_FIELD_LIMITS +- /* I should use WSAAsyncGetHostByName() under windows */ +- h1=gethostbyaddr((char *)&from.sin_addr.s_addr, +- sizeof(from.sin_addr.s_addr),AF_INET); +-#else +- h1=gethostbyaddr((char *)&from.sin_addr, +- sizeof(struct in_addr),AF_INET); +-#endif +- if (h1 == NULL) ++ ++ if (getnameinfo((struct sockaddr *)&from, sizeof(from), ++ buffer, sizeof(buffer), ++ NULL, 0, 0)) + { +- BIO_printf(bio_err,"bad gethostbyaddr\n"); ++ BIO_printf(bio_err,"getnameinfo failed\n"); + *host=NULL; + /* return(0); */ + } + else + { +- if ((*host=(char *)OPENSSL_malloc(strlen(h1->h_name)+1)) == NULL) ++ if ((*host=(char *)OPENSSL_malloc(strlen(buffer)+1)) == NULL) + { + perror("OPENSSL_malloc"); + return(0); + } +- BUF_strlcpy(*host,h1->h_name,strlen(h1->h_name)+1); +- +- h2=GetHostByName(*host); +- if (h2 == NULL) +- { +- BIO_printf(bio_err,"gethostbyname failure\n"); +- return(0); +- } +- if (h2->h_addrtype != AF_INET) +- { +- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); +- return(0); +- } ++ strcpy(*host, buffer); + } + end: + *sock=ret; + return(1); + } + +-int extract_host_port(char *str, char **host_ptr, unsigned char *ip, +- short *port_ptr) ++int extract_host_port(char *str, char **host_ptr, ++ char **port_ptr) + { +- char *h,*p; ++ char *h,*p,*x; + +- h=str; +- p=strchr(str,':'); ++ x=h=str; ++ if (*h == '[') ++ { ++ h++; ++ p=strchr(h,']'); + if (p == NULL) + { +- BIO_printf(bio_err,"no port defined\n"); ++ BIO_printf(bio_err,"no ending bracket for IPv6 address\n"); + return(0); + } + *(p++)='\0'; +- +- if ((ip != NULL) && !host_ip(str,ip)) +- goto err; +- if (host_ptr != NULL) *host_ptr=h; +- +- if (!extract_port(p,port_ptr)) +- goto err; +- return(1); +-err: +- return(0); ++ x = p; + } +- +-static int host_ip(char *str, unsigned char ip[4]) +- { +- unsigned int in[4]; +- int i; +- +- if (sscanf(str,"%u.%u.%u.%u",&(in[0]),&(in[1]),&(in[2]),&(in[3])) == 4) +- { +- for (i=0; i<4; i++) +- if (in[i] > 255) +- { +- BIO_printf(bio_err,"invalid IP address\n"); +- goto err; +- } +- ip[0]=in[0]; +- ip[1]=in[1]; +- ip[2]=in[2]; +- ip[3]=in[3]; +- } +- else +- { /* do a gethostbyname */ +- struct hostent *he; +- +- if (!ssl_sock_init()) return(0); +- +- he=GetHostByName(str); +- if (he == NULL) +- { +- BIO_printf(bio_err,"gethostbyname failure\n"); +- goto err; +- } +- /* cast to short because of win16 winsock definition */ +- if ((short)he->h_addrtype != AF_INET) ++ p=strchr(x,':'); ++ if (p == NULL) + { +- BIO_printf(bio_err,"gethostbyname addr is not AF_INET\n"); +- return(0); +- } +- ip[0]=he->h_addr_list[0][0]; +- ip[1]=he->h_addr_list[0][1]; +- ip[2]=he->h_addr_list[0][2]; +- ip[3]=he->h_addr_list[0][3]; +- } +- return(1); +-err: ++ BIO_printf(bio_err,"no port defined\n"); + return(0); + } ++ *(p++)='\0'; + +-int extract_port(char *str, short *port_ptr) +- { +- int i; +- struct servent *s; ++ if (host_ptr != NULL) *host_ptr=h; ++ if (port_ptr != NULL) *port_ptr=p; + +- i=atoi(str); +- if (i != 0) +- *port_ptr=(unsigned short)i; +- else +- { +- s=getservbyname(str,"tcp"); +- if (s == NULL) +- { +- BIO_printf(bio_err,"getservbyname failure for %s\n",str); +- return(0); +- } +- *port_ptr=ntohs((unsigned short)s->s_port); +- } + return(1); + } + diff --git a/openssl-1.0.0c-apps-ipv6listen.patch b/openssl-1.0.0c-apps-ipv6listen.patch new file mode 100644 index 0000000..7c3d4a0 --- /dev/null +++ b/openssl-1.0.0c-apps-ipv6listen.patch @@ -0,0 +1,57 @@ +diff -up openssl-1.0.0c/apps/s_socket.c.ipv6listen openssl-1.0.0c/apps/s_socket.c +--- openssl-1.0.0c/apps/s_socket.c.ipv6listen 2011-01-24 16:44:18.000000000 +0100 ++++ openssl-1.0.0c/apps/s_socket.c 2011-01-24 16:56:25.000000000 +0100 +@@ -335,15 +335,16 @@ int do_server(char *port, int type, int + + static int init_server(int *sock, char *port, int type) + { +- struct addrinfo *res, *res0, hints; ++ struct addrinfo *res, *res0 = NULL, hints; + char * failed_call = NULL; +- char port_name[8]; + int s; + int e; + + if (!ssl_sock_init()) return(0); + + memset(&hints, '\0', sizeof(hints)); ++ hints.ai_family = AF_INET6; ++tryipv4: + hints.ai_socktype = type; + hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG; + +@@ -365,6 +366,12 @@ static int init_server(int *sock, char * + failed_call = "socket"; + goto nextres; + } ++ if (hints.ai_family == AF_INET6) ++ { ++ int j = 0; ++ setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, ++ (void *) &j, sizeof j); ++ } + #if defined SOL_SOCKET && defined SO_REUSEADDR + { + int j = 1; +@@ -392,9 +399,19 @@ nextres: + close(s); + res = res->ai_next; + } +- freeaddrinfo(res0); ++ if (res0) ++ freeaddrinfo(res0); + +- if (s == INVALID_SOCKET) { perror("socket"); return(0); } ++ if (s == INVALID_SOCKET) ++ { ++ if (hints.ai_family == AF_INET6) ++ { ++ hints.ai_family = AF_INET; ++ goto tryipv4; ++ } ++ perror("socket"); ++ return(0); ++ } + + perror(failed_call); + return(0); diff --git a/openssl-1.0.0c-fips-md5-allow.patch b/openssl-1.0.0c-fips-md5-allow.patch new file mode 100644 index 0000000..f9f5e5d --- /dev/null +++ b/openssl-1.0.0c-fips-md5-allow.patch @@ -0,0 +1,20 @@ +diff -up openssl-1.0.0c/crypto/md5/md5_dgst.c.md5-allow openssl-1.0.0c/crypto/md5/md5_dgst.c +--- openssl-1.0.0c/crypto/md5/md5_dgst.c.md5-allow 2011-02-03 19:53:28.000000000 +0100 ++++ openssl-1.0.0c/crypto/md5/md5_dgst.c 2011-02-03 20:33:14.000000000 +0100 +@@ -75,7 +75,15 @@ const char MD5_version[]="MD5" OPENSSL_V + #define INIT_DATA_C (unsigned long)0x98badcfeL + #define INIT_DATA_D (unsigned long)0x10325476L + +-FIPS_NON_FIPS_MD_Init(MD5) ++int MD5_Init(MD5_CTX *c) ++#ifdef OPENSSL_FIPS ++ { ++ if (FIPS_mode() && getenv("OPENSSL_FIPS_NON_APPROVED_MD5_ALLOW") == NULL) ++ FIPS_BAD_ALGORITHM(alg) ++ return private_MD5_Init(c); ++ } ++int private_MD5_Init(MD5_CTX *c) ++#endif + { + memset (c,0,sizeof(*c)); + c->A=INIT_DATA_A; diff --git a/openssl-1.0.0c-fips186-3.patch b/openssl-1.0.0c-fips186-3.patch new file mode 100644 index 0000000..de3e5ab --- /dev/null +++ b/openssl-1.0.0c-fips186-3.patch @@ -0,0 +1,384 @@ +diff -up openssl-1.0.0c/crypto/dsa/dsa_gen.c.fips186-3 openssl-1.0.0c/crypto/dsa/dsa_gen.c +--- openssl-1.0.0c/crypto/dsa/dsa_gen.c.fips186-3 2011-02-03 21:04:14.000000000 +0100 ++++ openssl-1.0.0c/crypto/dsa/dsa_gen.c 2011-02-04 08:54:42.000000000 +0100 +@@ -120,11 +120,11 @@ int dsa_builtin_paramgen(DSA *ret, size_ + int ok=0; + unsigned char seed[SHA256_DIGEST_LENGTH]; + unsigned char md[SHA256_DIGEST_LENGTH]; +- unsigned char buf[SHA256_DIGEST_LENGTH],buf2[SHA256_DIGEST_LENGTH]; ++ unsigned char buf[SHA256_DIGEST_LENGTH]; + BIGNUM *r0,*W,*X,*c,*test; + BIGNUM *g=NULL,*q=NULL,*p=NULL; + BN_MONT_CTX *mont=NULL; +- int i, k, n=0, m=0, qsize = qbits >> 3; ++ int i, k, b, n=0, m=0, qsize = qbits >> 3; + int counter=0; + int r=0; + BN_CTX *ctx=NULL; +@@ -138,9 +138,13 @@ int dsa_builtin_paramgen(DSA *ret, size_ + goto err; + } + +- if (FIPS_mode() && (bits < OPENSSL_DSA_FIPS_MIN_MODULUS_BITS)) ++ if (FIPS_mode() && ++ (bits != 1024 || qbits != 160) && ++ (bits != 2048 || qbits != 224) && ++ (bits != 2048 || qbits != 256) && ++ (bits != 3072 || qbits != 256)) + { +- DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN, DSA_R_KEY_SIZE_TOO_SMALL); ++ DSAerr(DSA_F_DSA_BUILTIN_PARAMGEN, DSA_R_KEY_SIZE_INVALID); + goto err; + } + #endif +@@ -151,22 +155,25 @@ int dsa_builtin_paramgen(DSA *ret, size_ + return 0; + + if (evpmd == NULL) +- /* use SHA1 as default */ +- evpmd = EVP_sha1(); ++ { ++ if (qbits <= 160) ++ evpmd = EVP_sha1(); ++ else if (qbits <= 224) ++ evpmd = EVP_sha224(); ++ else ++ evpmd = EVP_sha256(); ++ } + + if (bits < 512) + bits = 512; + + bits = (bits+63)/64*64; + +- /* NB: seed_len == 0 is special case: copy generated seed to +- * seed_in if it is not NULL. +- */ + if (seed_len && (seed_len < (size_t)qsize)) + seed_in = NULL; /* seed buffer too small -- ignore */ + if (seed_len > (size_t)qsize) + seed_len = qsize; /* App. 2.2 of FIPS PUB 186 allows larger SEED, +- * but our internal buffers are restricted to 160 bits*/ ++ * but our internal buffers are restricted to 256 bits*/ + if (seed_in != NULL) + memcpy(seed, seed_in, seed_len); + +@@ -189,13 +196,18 @@ int dsa_builtin_paramgen(DSA *ret, size_ + if (!BN_lshift(test,BN_value_one(),bits-1)) + goto err; + ++ /* step 3 n = \lceil bits / qbits \rceil - 1 */ ++ n = (bits+qbits-1)/qbits - 1; ++ /* step 4 b = bits - 1 - n * qbits */ ++ b = bits - 1 - n*qbits; ++ + for (;;) + { + for (;;) /* find q */ + { + int seed_is_random; + +- /* step 1 */ ++ /* step 5 generate seed */ + if(!BN_GENCB_call(cb, 0, m++)) + goto err; + +@@ -210,28 +222,17 @@ int dsa_builtin_paramgen(DSA *ret, size_ + seed_len=0; /* use random seed if 'seed_in' turns out to be bad*/ + } + memcpy(buf , seed, qsize); +- memcpy(buf2, seed, qsize); +- /* precompute "SEED + 1" for step 7: */ +- for (i = qsize-1; i >= 0; i--) +- { +- buf[i]++; +- if (buf[i] != 0) +- break; +- } + +- /* step 2 */ ++ /* step 6 U = hash(seed) */ + EVP_Digest(seed, qsize, md, NULL, evpmd, NULL); +- EVP_Digest(buf, qsize, buf2, NULL, evpmd, NULL); +- for (i = 0; i < qsize; i++) +- md[i]^=buf2[i]; + +- /* step 3 */ ++ /* step 7 q = 2^(qbits-1) + U + 1 - (U mod 2) */ + md[0] |= 0x80; + md[qsize-1] |= 0x01; + if (!BN_bin2bn(md, qsize, q)) + goto err; + +- /* step 4 */ ++ /* step 8 test for prime (64 round of Rabin-Miller) */ + r = BN_is_prime_fasttest_ex(q, DSS_prime_checks, ctx, + seed_is_random, cb); + if (r > 0) +@@ -239,27 +240,22 @@ int dsa_builtin_paramgen(DSA *ret, size_ + if (r != 0) + goto err; + +- /* do a callback call */ +- /* step 5 */ + } + + if(!BN_GENCB_call(cb, 2, 0)) goto err; + if(!BN_GENCB_call(cb, 3, 0)) goto err; + +- /* step 6 */ ++ /* step 11 */ + counter=0; +- /* "offset = 2" */ +- +- n=(bits-1)/160; ++ /* "offset = 1" */ + + for (;;) + { + if ((counter != 0) && !BN_GENCB_call(cb, 0, counter)) + goto err; + +- /* step 7 */ ++ /* step 11.1, 11.2 obtain W */ + BN_zero(W); +- /* now 'buf' contains "SEED + offset - 1" */ + for (k=0; k<=n; k++) + { + /* obtain "SEED + offset + k" by incrementing: */ +@@ -272,28 +268,30 @@ int dsa_builtin_paramgen(DSA *ret, size_ + + EVP_Digest(buf, qsize, md ,NULL, evpmd, NULL); + +- /* step 8 */ + if (!BN_bin2bn(md, qsize, r0)) + goto err; +- if (!BN_lshift(r0,r0,(qsize << 3)*k)) goto err; ++ if (k == n) ++ BN_mask_bits(r0,b); ++ if (!BN_lshift(r0,r0,qbits*k)) goto err; + if (!BN_add(W,W,r0)) goto err; + } + +- /* more of step 8 */ +- if (!BN_mask_bits(W,bits-1)) goto err; ++ /* step 11.3 X = W + 2^(L-1) */ + if (!BN_copy(X,W)) goto err; + if (!BN_add(X,X,test)) goto err; + +- /* step 9 */ ++ /* step 11.4 c = X mod 2*q */ + if (!BN_lshift1(r0,q)) goto err; + if (!BN_mod(c,X,r0,ctx)) goto err; ++ ++ /* step 11.5 p = X - (c - 1) */ + if (!BN_sub(r0,c,BN_value_one())) goto err; + if (!BN_sub(p,X,r0)) goto err; + +- /* step 10 */ ++ /* step 11.6 */ + if (BN_cmp(p,test) >= 0) + { +- /* step 11 */ ++ /* step 11.7 */ + r = BN_is_prime_fasttest_ex(p, DSS_prime_checks, + ctx, 1, cb); + if (r > 0) +@@ -302,12 +300,12 @@ int dsa_builtin_paramgen(DSA *ret, size_ + goto err; + } + +- /* step 13 */ ++ /* step 11.9 */ + counter++; + /* "offset = offset + n + 1" */ + +- /* step 14 */ +- if (counter >= 4096) break; ++ /* step 12 */ ++ if (counter >= 4*bits) break; + } + } + end: +diff -up openssl-1.0.0c/crypto/dsa/dsa.h.fips186-3 openssl-1.0.0c/crypto/dsa/dsa.h +--- openssl-1.0.0c/crypto/dsa/dsa.h.fips186-3 2011-02-03 21:04:14.000000000 +0100 ++++ openssl-1.0.0c/crypto/dsa/dsa.h 2011-02-03 21:04:14.000000000 +0100 +@@ -316,6 +316,7 @@ void ERR_load_DSA_strings(void); + #define DSA_R_DATA_TOO_LARGE_FOR_KEY_SIZE 100 + #define DSA_R_DECODE_ERROR 104 + #define DSA_R_INVALID_DIGEST_TYPE 106 ++#define DSA_R_KEY_SIZE_INVALID 113 + #define DSA_R_KEY_SIZE_TOO_SMALL 110 + #define DSA_R_MISSING_PARAMETERS 101 + #define DSA_R_MODULUS_TOO_LARGE 103 +diff -up openssl-1.0.0c/crypto/dsa/dsatest.c.fips186-3 openssl-1.0.0c/crypto/dsa/dsatest.c +--- openssl-1.0.0c/crypto/dsa/dsatest.c.fips186-3 2011-02-03 21:14:07.000000000 +0100 ++++ openssl-1.0.0c/crypto/dsa/dsatest.c 2011-02-04 08:40:24.000000000 +0100 +@@ -96,36 +96,41 @@ static int MS_CALLBACK dsa_cb(int p, int + /* seed, out_p, out_q, out_g are taken from the updated Appendix 5 to + * FIPS PUB 186 and also appear in Appendix 5 to FIPS PIB 186-1 */ + static unsigned char seed[20]={ +- 0xd5,0x01,0x4e,0x4b,0x60,0xef,0x2b,0xa8,0xb6,0x21,0x1b,0x40, +- 0x62,0xba,0x32,0x24,0xe0,0x42,0x7d,0xd3, ++ 0x02,0x47,0x11,0x92,0x11,0x88,0xC8,0xFB,0xAF,0x48,0x4C,0x62, ++ 0xDF,0xA5,0xBE,0xA0,0xA4,0x3C,0x56,0xE3, + }; + + static unsigned char out_p[]={ +- 0x8d,0xf2,0xa4,0x94,0x49,0x22,0x76,0xaa, +- 0x3d,0x25,0x75,0x9b,0xb0,0x68,0x69,0xcb, +- 0xea,0xc0,0xd8,0x3a,0xfb,0x8d,0x0c,0xf7, +- 0xcb,0xb8,0x32,0x4f,0x0d,0x78,0x82,0xe5, +- 0xd0,0x76,0x2f,0xc5,0xb7,0x21,0x0e,0xaf, +- 0xc2,0xe9,0xad,0xac,0x32,0xab,0x7a,0xac, +- 0x49,0x69,0x3d,0xfb,0xf8,0x37,0x24,0xc2, +- 0xec,0x07,0x36,0xee,0x31,0xc8,0x02,0x91, ++ 0xAC,0xCB,0x1E,0x63,0x60,0x69,0x0C,0xFB,0x06,0x19,0x68,0x3E, ++ 0xA5,0x01,0x5A,0xA2,0x15,0x5C,0xE2,0x99,0x2D,0xD5,0x30,0x99, ++ 0x7E,0x5F,0x8D,0xE2,0xF7,0xC6,0x2E,0x8D,0xA3,0x9F,0x58,0xAD, ++ 0xD6,0xA9,0x7D,0x0E,0x0D,0x95,0x53,0xA6,0x71,0x3A,0xDE,0xAB, ++ 0xAC,0xE9,0xF4,0x36,0x55,0x9E,0xB9,0xD6,0x93,0xBF,0xF3,0x18, ++ 0x1C,0x14,0x7B,0xA5,0x42,0x2E,0xCD,0x00,0xEB,0x35,0x3B,0x1B, ++ 0xA8,0x51,0xBB,0xE1,0x58,0x42,0x85,0x84,0x22,0xA7,0x97,0x5E, ++ 0x99,0x6F,0x38,0x20,0xBD,0x9D,0xB6,0xD9,0x33,0x37,0x2A,0xFD, ++ 0xBB,0xD4,0xBC,0x0C,0x2A,0x67,0xCB,0x9F,0xBB,0xDF,0xF9,0x93, ++ 0xAA,0xD6,0xF0,0xD6,0x95,0x0B,0x5D,0x65,0x14,0xD0,0x18,0x9D, ++ 0xC6,0xAF,0xF0,0xC6,0x37,0x7C,0xF3,0x5F, + }; + + static unsigned char out_q[]={ +- 0xc7,0x73,0x21,0x8c,0x73,0x7e,0xc8,0xee, +- 0x99,0x3b,0x4f,0x2d,0xed,0x30,0xf4,0x8e, +- 0xda,0xce,0x91,0x5f, ++ 0xE3,0x8E,0x5E,0x6D,0xBF,0x2B,0x79,0xF8,0xC5,0x4B,0x89,0x8B, ++ 0xBA,0x2D,0x91,0xC3,0x6C,0x80,0xAC,0x87, + }; + + static unsigned char out_g[]={ +- 0x62,0x6d,0x02,0x78,0x39,0xea,0x0a,0x13, +- 0x41,0x31,0x63,0xa5,0x5b,0x4c,0xb5,0x00, +- 0x29,0x9d,0x55,0x22,0x95,0x6c,0xef,0xcb, +- 0x3b,0xff,0x10,0xf3,0x99,0xce,0x2c,0x2e, +- 0x71,0xcb,0x9d,0xe5,0xfa,0x24,0xba,0xbf, +- 0x58,0xe5,0xb7,0x95,0x21,0x92,0x5c,0x9c, +- 0xc4,0x2e,0x9f,0x6f,0x46,0x4b,0x08,0x8c, +- 0xc5,0x72,0xaf,0x53,0xe6,0xd7,0x88,0x02, ++ 0x42,0x4A,0x04,0x4E,0x79,0xB4,0x99,0x7F,0xFD,0x58,0x36,0x2C, ++ 0x1B,0x5F,0x18,0x7E,0x0D,0xCC,0xAB,0x81,0xC9,0x5D,0x10,0xCE, ++ 0x4E,0x80,0x7E,0x58,0xB4,0x34,0x3F,0xA7,0x45,0xC7,0xAA,0x36, ++ 0x24,0x42,0xA9,0x3B,0xE8,0x0E,0x04,0x02,0x2D,0xFB,0xA6,0x13, ++ 0xB9,0xB5,0x15,0xA5,0x56,0x07,0x35,0xE4,0x03,0xB6,0x79,0x7C, ++ 0x62,0xDD,0xDF,0x3F,0x71,0x3A,0x9D,0x8B,0xC4,0xF6,0xE7,0x1D, ++ 0x52,0xA8,0xA9,0x43,0x1D,0x33,0x51,0x88,0x39,0xBD,0x73,0xE9, ++ 0x5F,0xBE,0x82,0x49,0x27,0xE6,0xB5,0x53,0xC1,0x38,0xAC,0x2F, ++ 0x6D,0x97,0x6C,0xEB,0x67,0xC1,0x5F,0x67,0xF8,0x35,0x05,0x5E, ++ 0xD5,0x68,0x80,0xAA,0x96,0xCA,0x0B,0x8A,0xE6,0xF1,0xB1,0x41, ++ 0xC6,0x75,0x94,0x0A,0x0A,0x2A,0xFA,0x29, + }; + + static const unsigned char str1[]="12345678901234567890"; +@@ -157,7 +162,7 @@ int main(int argc, char **argv) + BIO_printf(bio_err,"test generation of DSA parameters\n"); + + BN_GENCB_set(&cb, dsa_cb, bio_err); +- if(((dsa = DSA_new()) == NULL) || !DSA_generate_parameters_ex(dsa, 512, ++ if(((dsa = DSA_new()) == NULL) || !DSA_generate_parameters_ex(dsa, 1024, + seed, 20, &counter, &h, &cb)) + goto end; + +@@ -170,9 +175,9 @@ int main(int argc, char **argv) + BIO_printf(bio_err,"\ncounter=%d h=%ld\n",counter,h); + + DSA_print(bio_err,dsa,0); +- if (counter != 105) ++ if (counter != 239) + { +- BIO_printf(bio_err,"counter should be 105\n"); ++ BIO_printf(bio_err,"counter should be 239\n"); + goto end; + } + if (h != 2) +diff -up openssl-1.0.0c/crypto/fips/fips_dsa_selftest.c.fips186-3 openssl-1.0.0c/crypto/fips/fips_dsa_selftest.c +--- openssl-1.0.0c/crypto/fips/fips_dsa_selftest.c.fips186-3 2011-02-03 21:04:14.000000000 +0100 ++++ openssl-1.0.0c/crypto/fips/fips_dsa_selftest.c 2011-02-04 09:03:03.000000000 +0100 +@@ -68,44 +68,42 @@ + + #ifdef OPENSSL_FIPS + +-/* seed, out_p, out_q, out_g are taken the NIST test vectors */ +- + static unsigned char seed[20] = { +- 0x77, 0x8f, 0x40, 0x74, 0x6f, 0x66, 0xbe, 0x33, 0xce, 0xbe, 0x99, 0x34, +- 0x4c, 0xfc, 0xf3, 0x28, 0xaa, 0x70, 0x2d, 0x3a +- }; ++ 0x02,0x47,0x11,0x92,0x11,0x88,0xC8,0xFB,0xAF,0x48,0x4C,0x62, ++ 0xDF,0xA5,0xBE,0xA0,0xA4,0x3C,0x56,0xE3, ++ }; + + static unsigned char out_p[] = { +- 0xf7, 0x7c, 0x1b, 0x83, 0xd8, 0xe8, 0x5c, 0x7f, 0x85, 0x30, 0x17, 0x57, +- 0x21, 0x95, 0xfe, 0x26, 0x04, 0xeb, 0x47, 0x4c, 0x3a, 0x4a, 0x81, 0x4b, +- 0x71, 0x2e, 0xed, 0x6e, 0x4f, 0x3d, 0x11, 0x0f, 0x7c, 0xfe, 0x36, 0x43, +- 0x51, 0xd9, 0x81, 0x39, 0x17, 0xdf, 0x62, 0xf6, 0x9c, 0x01, 0xa8, 0x69, +- 0x71, 0xdd, 0x29, 0x7f, 0x47, 0xe6, 0x65, 0xa6, 0x22, 0xe8, 0x6a, 0x12, +- 0x2b, 0xc2, 0x81, 0xff, 0x32, 0x70, 0x2f, 0x9e, 0xca, 0x53, 0x26, 0x47, +- 0x0f, 0x59, 0xd7, 0x9e, 0x2c, 0xa5, 0x07, 0xc4, 0x49, 0x52, 0xa3, 0xe4, +- 0x6b, 0x04, 0x00, 0x25, 0x49, 0xe2, 0xe6, 0x7f, 0x28, 0x78, 0x97, 0xb8, +- 0x3a, 0x32, 0x14, 0x38, 0xa2, 0x51, 0x33, 0x22, 0x44, 0x7e, 0xd7, 0xef, +- 0x45, 0xdb, 0x06, 0x4a, 0xd2, 0x82, 0x4a, 0x82, 0x2c, 0xb1, 0xd7, 0xd8, +- 0xb6, 0x73, 0x00, 0x4d, 0x94, 0x77, 0x94, 0xef ++ 0xAC,0xCB,0x1E,0x63,0x60,0x69,0x0C,0xFB,0x06,0x19,0x68,0x3E, ++ 0xA5,0x01,0x5A,0xA2,0x15,0x5C,0xE2,0x99,0x2D,0xD5,0x30,0x99, ++ 0x7E,0x5F,0x8D,0xE2,0xF7,0xC6,0x2E,0x8D,0xA3,0x9F,0x58,0xAD, ++ 0xD6,0xA9,0x7D,0x0E,0x0D,0x95,0x53,0xA6,0x71,0x3A,0xDE,0xAB, ++ 0xAC,0xE9,0xF4,0x36,0x55,0x9E,0xB9,0xD6,0x93,0xBF,0xF3,0x18, ++ 0x1C,0x14,0x7B,0xA5,0x42,0x2E,0xCD,0x00,0xEB,0x35,0x3B,0x1B, ++ 0xA8,0x51,0xBB,0xE1,0x58,0x42,0x85,0x84,0x22,0xA7,0x97,0x5E, ++ 0x99,0x6F,0x38,0x20,0xBD,0x9D,0xB6,0xD9,0x33,0x37,0x2A,0xFD, ++ 0xBB,0xD4,0xBC,0x0C,0x2A,0x67,0xCB,0x9F,0xBB,0xDF,0xF9,0x93, ++ 0xAA,0xD6,0xF0,0xD6,0x95,0x0B,0x5D,0x65,0x14,0xD0,0x18,0x9D, ++ 0xC6,0xAF,0xF0,0xC6,0x37,0x7C,0xF3,0x5F, + }; + + static unsigned char out_q[] = { +- 0xd4, 0x0a, 0xac, 0x9f, 0xbd, 0x8c, 0x80, 0xc2, 0x38, 0x7e, 0x2e, 0x0c, +- 0x52, 0x5c, 0xea, 0x34, 0xa1, 0x83, 0x32, 0xf3 ++ 0xE3,0x8E,0x5E,0x6D,0xBF,0x2B,0x79,0xF8,0xC5,0x4B,0x89,0x8B, ++ 0xBA,0x2D,0x91,0xC3,0x6C,0x80,0xAC,0x87, + }; + + static unsigned char out_g[] = { +- 0x34, 0x73, 0x8b, 0x57, 0x84, 0x8e, 0x55, 0xbf, 0x57, 0xcc, 0x41, 0xbb, +- 0x5e, 0x2b, 0xd5, 0x42, 0xdd, 0x24, 0x22, 0x2a, 0x09, 0xea, 0x26, 0x1e, +- 0x17, 0x65, 0xcb, 0x1a, 0xb3, 0x12, 0x44, 0xa3, 0x9e, 0x99, 0xe9, 0x63, +- 0xeb, 0x30, 0xb1, 0x78, 0x7b, 0x09, 0x40, 0x30, 0xfa, 0x83, 0xc2, 0x35, +- 0xe1, 0xc4, 0x2d, 0x74, 0x1a, 0xb1, 0x83, 0x54, 0xd8, 0x29, 0xf4, 0xcf, +- 0x7f, 0x6f, 0x67, 0x1c, 0x36, 0x49, 0xee, 0x6c, 0xa2, 0x3c, 0x2d, 0x6a, +- 0xe9, 0xd3, 0x9a, 0xf6, 0x57, 0x78, 0x6f, 0xfd, 0x33, 0xcd, 0x3c, 0xed, +- 0xfd, 0xd4, 0x41, 0xe6, 0x5c, 0x8b, 0xe0, 0x68, 0x31, 0x47, 0x47, 0xaf, +- 0x12, 0xa7, 0xf9, 0x32, 0x0d, 0x94, 0x15, 0x48, 0xd0, 0x54, 0x85, 0xb2, +- 0x04, 0xb5, 0x4d, 0xd4, 0x9d, 0x05, 0x22, 0x25, 0xd9, 0xfd, 0x6c, 0x36, +- 0xef, 0xbe, 0x69, 0x6c, 0x55, 0xf4, 0xee, 0xec ++ 0x42,0x4A,0x04,0x4E,0x79,0xB4,0x99,0x7F,0xFD,0x58,0x36,0x2C, ++ 0x1B,0x5F,0x18,0x7E,0x0D,0xCC,0xAB,0x81,0xC9,0x5D,0x10,0xCE, ++ 0x4E,0x80,0x7E,0x58,0xB4,0x34,0x3F,0xA7,0x45,0xC7,0xAA,0x36, ++ 0x24,0x42,0xA9,0x3B,0xE8,0x0E,0x04,0x02,0x2D,0xFB,0xA6,0x13, ++ 0xB9,0xB5,0x15,0xA5,0x56,0x07,0x35,0xE4,0x03,0xB6,0x79,0x7C, ++ 0x62,0xDD,0xDF,0x3F,0x71,0x3A,0x9D,0x8B,0xC4,0xF6,0xE7,0x1D, ++ 0x52,0xA8,0xA9,0x43,0x1D,0x33,0x51,0x88,0x39,0xBD,0x73,0xE9, ++ 0x5F,0xBE,0x82,0x49,0x27,0xE6,0xB5,0x53,0xC1,0x38,0xAC,0x2F, ++ 0x6D,0x97,0x6C,0xEB,0x67,0xC1,0x5F,0x67,0xF8,0x35,0x05,0x5E, ++ 0xD5,0x68,0x80,0xAA,0x96,0xCA,0x0B,0x8A,0xE6,0xF1,0xB1,0x41, ++ 0xC6,0x75,0x94,0x0A,0x0A,0x2A,0xFA,0x29, + }; + + static const unsigned char str1[]="12345678901234567890"; +@@ -133,7 +131,7 @@ int FIPS_selftest_dsa() + goto err; + if(!DSA_generate_parameters_ex(dsa, 1024,seed,20,&counter,&h,NULL)) + goto err; +- if (counter != 378) ++ if (counter != 239) + goto err; + if (h != 2) + goto err; diff --git a/openssl-1.0.0c-pkcs12-fips-default.patch b/openssl-1.0.0c-pkcs12-fips-default.patch new file mode 100644 index 0000000..a671722 --- /dev/null +++ b/openssl-1.0.0c-pkcs12-fips-default.patch @@ -0,0 +1,25 @@ +diff -up openssl-1.0.0c/apps/pkcs12.c.fips-default openssl-1.0.0c/apps/pkcs12.c +--- openssl-1.0.0c/apps/pkcs12.c.fips-default 2009-07-27 23:08:45.000000000 +0200 ++++ openssl-1.0.0c/apps/pkcs12.c 2011-02-04 15:25:38.000000000 +0100 +@@ -67,6 +67,9 @@ + #include + #include + #include ++#ifdef OPENSSL_FIPS ++#include ++#endif + + #define PROG pkcs12_main + +@@ -130,6 +133,11 @@ int MAIN(int argc, char **argv) + + apps_startup(); + ++#ifdef OPENSSL_FIPS ++ if (FIPS_mode()) ++ cert_pbe = key_pbe; /* cannot use RC2 in the FIPS mode */ ++#endif ++ + enc = EVP_des_ede3_cbc(); + if (bio_err == NULL ) bio_err = BIO_new_fp (stderr, BIO_NOCLOSE); + diff --git a/openssl-1.0.0c-rsa-x931.patch b/openssl-1.0.0c-rsa-x931.patch new file mode 100644 index 0000000..a60bbcb --- /dev/null +++ b/openssl-1.0.0c-rsa-x931.patch @@ -0,0 +1,36 @@ +diff -up openssl-1.0.0c/apps/genrsa.c.x931 openssl-1.0.0c/apps/genrsa.c +--- openssl-1.0.0c/apps/genrsa.c.x931 2010-03-01 15:22:02.000000000 +0100 ++++ openssl-1.0.0c/apps/genrsa.c 2011-02-01 18:32:05.000000000 +0100 +@@ -95,6 +95,7 @@ int MAIN(int argc, char **argv) + int ret=1; + int i,num=DEFBITS; + long l; ++ int use_x931 = 0; + const EVP_CIPHER *enc=NULL; + unsigned long f4=RSA_F4; + char *outfile=NULL; +@@ -138,6 +139,8 @@ int MAIN(int argc, char **argv) + f4=3; + else if (strcmp(*argv,"-F4") == 0 || strcmp(*argv,"-f4") == 0) + f4=RSA_F4; ++ else if (strcmp(*argv,"-x931") == 0) ++ use_x931 = 1; + #ifndef OPENSSL_NO_ENGINE + else if (strcmp(*argv,"-engine") == 0) + { +@@ -273,7 +276,14 @@ bad: + if (!rsa) + goto err; + +- if(!BN_set_word(bn, f4) || !RSA_generate_key_ex(rsa, num, bn, &cb)) ++ if (use_x931) ++ { ++ if (!BN_set_word(bn, f4)) ++ goto err; ++ if (!RSA_X931_generate_key_ex(rsa, num, bn, &cb)) ++ goto err; ++ } ++ else if(!BN_set_word(bn, f4) || !RSA_generate_key_ex(rsa, num, bn, &cb)) + goto err; + + app_RAND_write_file(NULL, bio_err); diff --git a/openssl-1.0.0c-speed-fips.patch b/openssl-1.0.0c-speed-fips.patch new file mode 100644 index 0000000..048d673 --- /dev/null +++ b/openssl-1.0.0c-speed-fips.patch @@ -0,0 +1,94 @@ +diff -up openssl-1.0.0c/apps/speed.c.spfips openssl-1.0.0c/apps/speed.c +--- openssl-1.0.0c/apps/speed.c.spfips 2010-11-18 14:22:26.000000000 +0100 ++++ openssl-1.0.0c/apps/speed.c 2011-01-24 17:25:32.000000000 +0100 +@@ -100,6 +100,9 @@ + #include + #include + #include ++#ifdef OPENSSL_FIPS ++#include ++#endif + #if !defined(OPENSSL_SYS_MSDOS) + #include OPENSSL_UNISTD + #endif +@@ -908,7 +911,12 @@ int MAIN(int argc, char **argv) + #ifndef OPENSSL_NO_RSA + if (strcmp(*argv,"rsa") == 0) + { ++#ifdef OPENSSL_FIPS ++ if (!FIPS_mode()) ++#endif ++ { + rsa_doit[R_RSA_512]=1; ++ } + rsa_doit[R_RSA_1024]=1; + rsa_doit[R_RSA_2048]=1; + rsa_doit[R_RSA_4096]=1; +@@ -918,7 +926,12 @@ int MAIN(int argc, char **argv) + #ifndef OPENSSL_NO_DSA + if (strcmp(*argv,"dsa") == 0) + { ++#ifdef OPENSSL_FIPS ++ if (!FIPS_mode()) ++#endif ++ { + dsa_doit[R_DSA_512]=1; ++ } + dsa_doit[R_DSA_1024]=1; + dsa_doit[R_DSA_2048]=1; + } +@@ -1193,30 +1206,54 @@ int MAIN(int argc, char **argv) + AES_set_encrypt_key(key32,256,&aes_ks3); + #endif + #ifndef OPENSSL_NO_CAMELLIA ++ if (doit[D_CBC_128_CML] || doit[D_CBC_192_CML] || doit[D_CBC_256_CML]) ++ { + Camellia_set_key(key16,128,&camellia_ks1); + Camellia_set_key(ckey24,192,&camellia_ks2); + Camellia_set_key(ckey32,256,&camellia_ks3); ++ } + #endif + #ifndef OPENSSL_NO_IDEA ++ if (doit[D_CBC_IDEA]) ++ { + idea_set_encrypt_key(key16,&idea_ks); ++ } + #endif + #ifndef OPENSSL_NO_SEED ++ if (doit[D_CBC_SEED]) ++ { + SEED_set_key(key16,&seed_ks); ++ } + #endif + #ifndef OPENSSL_NO_RC4 ++ if (doit[D_RC4]) ++ { + RC4_set_key(&rc4_ks,16,key16); ++ } + #endif + #ifndef OPENSSL_NO_RC2 ++ if (doit[D_CBC_RC2]) ++ { + RC2_set_key(&rc2_ks,16,key16,128); ++ } + #endif + #ifndef OPENSSL_NO_RC5 ++ if (doit[D_CBC_RC5]) ++ { + RC5_32_set_key(&rc5_ks,16,key16,12); ++ } + #endif + #ifndef OPENSSL_NO_BF ++ if (doit[D_CBC_BF]) ++ { + BF_set_key(&bf_ks,16,key16); ++ } + #endif + #ifndef OPENSSL_NO_CAST ++ if (doit[D_CBC_CAST]) ++ { + CAST_set_key(&cast_ks,16,key16); ++ } + #endif + #ifndef OPENSSL_NO_RSA + memset(rsa_c,0,sizeof(rsa_c)); diff --git a/openssl-1.0.0d-apps-dgst.patch b/openssl-1.0.0d-apps-dgst.patch new file mode 100644 index 0000000..da20481 --- /dev/null +++ b/openssl-1.0.0d-apps-dgst.patch @@ -0,0 +1,110 @@ +diff -up openssl-1.0.0d/apps/ca.c.dgst openssl-1.0.0d/apps/ca.c +--- openssl-1.0.0d/apps/ca.c.dgst 2009-12-02 15:41:24.000000000 +0100 ++++ openssl-1.0.0d/apps/ca.c 2011-04-05 21:09:42.000000000 +0200 +@@ -157,7 +157,7 @@ static const char *ca_usage[]={ + " -startdate YYMMDDHHMMSSZ - certificate validity notBefore\n", + " -enddate YYMMDDHHMMSSZ - certificate validity notAfter (overrides -days)\n", + " -days arg - number of days to certify the certificate for\n", +-" -md arg - md to use, one of md2, md5, sha or sha1\n", ++" -md arg - md to use, see openssl dgst -h for list\n", + " -policy arg - The CA 'policy' to support\n", + " -keyfile arg - private key file\n", + " -keyform arg - private key file format (PEM or ENGINE)\n", +diff -up openssl-1.0.0d/apps/enc.c.dgst openssl-1.0.0d/apps/enc.c +--- openssl-1.0.0d/apps/enc.c.dgst 2010-06-15 19:25:02.000000000 +0200 ++++ openssl-1.0.0d/apps/enc.c 2011-04-05 21:11:54.000000000 +0200 +@@ -302,7 +302,7 @@ bad: + BIO_printf(bio_err,"%-14s passphrase is the next argument\n","-k"); + BIO_printf(bio_err,"%-14s passphrase is the first line of the file argument\n","-kfile"); + BIO_printf(bio_err,"%-14s the next argument is the md to use to create a key\n","-md"); +- BIO_printf(bio_err,"%-14s from a passphrase. One of md2, md5, sha or sha1\n",""); ++ BIO_printf(bio_err,"%-14s from a passphrase. See openssl dgst -h for list.\n",""); + BIO_printf(bio_err,"%-14s salt in hex is the next argument\n","-S"); + BIO_printf(bio_err,"%-14s key/iv in hex is the next argument\n","-K/-iv"); + BIO_printf(bio_err,"%-14s print the iv/key (then exit if -P)\n","-[pP]"); +diff -up openssl-1.0.0d/apps/req.c.dgst openssl-1.0.0d/apps/req.c +--- openssl-1.0.0d/apps/req.c.dgst 2010-03-10 14:48:21.000000000 +0100 ++++ openssl-1.0.0d/apps/req.c 2011-04-05 21:12:33.000000000 +0200 +@@ -421,7 +421,7 @@ bad: + #ifndef OPENSSL_NO_ECDSA + BIO_printf(bio_err," -newkey ec:file generate a new EC key, parameters taken from CA in 'file'\n"); + #endif +- BIO_printf(bio_err," -[digest] Digest to sign with (md5, sha1, md2, mdc2, md4)\n"); ++ BIO_printf(bio_err," -[digest] Digest to sign with (see openssl dgst -h for list)\n"); + BIO_printf(bio_err," -config file request template file.\n"); + BIO_printf(bio_err," -subj arg set or modify request subject\n"); + BIO_printf(bio_err," -multivalue-rdn enable support for multivalued RDNs\n"); +diff -up openssl-1.0.0d/apps/ts.c.dgst openssl-1.0.0d/apps/ts.c +--- openssl-1.0.0d/apps/ts.c.dgst 2009-10-18 16:42:26.000000000 +0200 ++++ openssl-1.0.0d/apps/ts.c 2011-04-05 21:16:07.000000000 +0200 +@@ -368,7 +368,7 @@ int MAIN(int argc, char **argv) + BIO_printf(bio_err, "usage:\n" + "ts -query [-rand file%cfile%c...] [-config configfile] " + "[-data file_to_hash] [-digest digest_bytes]" +- "[-md2|-md4|-md5|-sha|-sha1|-mdc2|-ripemd160] " ++ "[-] " + "[-policy object_id] [-no_nonce] [-cert] " + "[-in request.tsq] [-out request.tsq] [-text]\n", + LIST_SEPARATOR_CHAR, LIST_SEPARATOR_CHAR); +diff -up openssl-1.0.0d/apps/x509.c.dgst openssl-1.0.0d/apps/x509.c +--- openssl-1.0.0d/apps/x509.c.dgst 2011-04-05 21:13:42.000000000 +0200 ++++ openssl-1.0.0d/apps/x509.c 2011-04-05 21:13:17.000000000 +0200 +@@ -141,7 +141,7 @@ static const char *x509_usage[]={ + " -set_serial - serial number to use\n", + " -text - print the certificate in text form\n", + " -C - print out C code forms\n", +-" -md2/-md5/-sha1/-mdc2 - digest to use\n", ++" - - digest to use, see openssl dgst -h output for list\n", + " -extfile - configuration file with X509V3 extensions to add\n", + " -extensions - section from config file with X509V3 extensions to add\n", + " -clrext - delete extensions before signing and input certificate\n", +diff -up openssl-1.0.0d/doc/apps/ca.pod.dgst openssl-1.0.0d/doc/apps/ca.pod +--- openssl-1.0.0d/doc/apps/ca.pod.dgst 2009-04-10 13:25:53.000000000 +0200 ++++ openssl-1.0.0d/doc/apps/ca.pod 2011-04-05 21:16:39.000000000 +0200 +@@ -160,7 +160,8 @@ the number of days to certify the certif + =item B<-md alg> + + the message digest to use. Possible values include md5, sha1 and mdc2. +-This option also applies to CRLs. ++For full list of digests see openssl dgst -h output. This option also ++applies to CRLs. + + =item B<-policy arg> + +diff -up openssl-1.0.0d/doc/apps/ocsp.pod.dgst openssl-1.0.0d/doc/apps/ocsp.pod +--- openssl-1.0.0d/doc/apps/ocsp.pod.dgst 2008-02-25 19:11:47.000000000 +0100 ++++ openssl-1.0.0d/doc/apps/ocsp.pod 2011-04-05 21:18:17.000000000 +0200 +@@ -210,7 +210,8 @@ check is not performed. + =item B<-md5|-sha1|-sha256|-ripemod160|...> + + this option sets digest algorithm to use for certificate identification +-in the OCSP request. By default SHA-1 is used. ++in the OCSP request. By default SHA-1 is used. See openssl dgst -h output for ++the list of available algorithms. + + =back + +diff -up openssl-1.0.0d/doc/apps/req.pod.dgst openssl-1.0.0d/doc/apps/req.pod +--- openssl-1.0.0d/doc/apps/req.pod.dgst 2009-04-10 18:42:28.000000000 +0200 ++++ openssl-1.0.0d/doc/apps/req.pod 2011-04-05 21:20:47.000000000 +0200 +@@ -201,7 +201,8 @@ will not be encrypted. + + this specifies the message digest to sign the request with (such as + B<-md5>, B<-sha1>). This overrides the digest algorithm specified in +-the configuration file. ++the configuration file. For full list of possible digests see openssl ++dgst -h output. + + Some public key algorithms may override this choice. For instance, DSA + signatures always use SHA1, GOST R 34.10 signatures always use +diff -up openssl-1.0.0d/doc/apps/x509.pod.dgst openssl-1.0.0d/doc/apps/x509.pod +--- openssl-1.0.0d/doc/apps/x509.pod.dgst 2010-01-12 18:27:11.000000000 +0100 ++++ openssl-1.0.0d/doc/apps/x509.pod 2011-04-05 21:19:56.000000000 +0200 +@@ -101,6 +101,7 @@ the digest to use. This affects any sign + digest, such as the B<-fingerprint>, B<-signkey> and B<-CA> options. If not + specified then SHA1 is used. If the key being used to sign with is a DSA key + then this option has no effect: SHA1 is always used with DSA keys. ++For full list of digests see openssl dgst -h output. + + =item B<-engine id> + diff --git a/openssl-1.0.0d-version.patch b/openssl-1.0.0d-version.patch new file mode 100644 index 0000000..da50fb4 --- /dev/null +++ b/openssl-1.0.0d-version.patch @@ -0,0 +1,22 @@ +diff -up openssl-1.0.0d/crypto/opensslv.h.version openssl-1.0.0d/crypto/opensslv.h +--- openssl-1.0.0d/crypto/opensslv.h.version 2011-02-10 14:24:52.000000000 +0100 ++++ openssl-1.0.0d/crypto/opensslv.h 2011-02-10 14:48:00.000000000 +0100 +@@ -25,7 +25,8 @@ + * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for + * major minor fix final patch/beta) + */ +-#define OPENSSL_VERSION_NUMBER 0x1000004fL ++/* we have to keep the version number to not break the abi */ ++#define OPENSSL_VERSION_NUMBER 0x10000003 + #ifdef OPENSSL_FIPS + #define OPENSSL_VERSION_TEXT "OpenSSL 1.0.0d-fips 8 Feb 2011" + #else +@@ -83,7 +84,7 @@ + * should only keep the versions that are binary compatible with the current. + */ + #define SHLIB_VERSION_HISTORY "" +-#define SHLIB_VERSION_NUMBER "1.0.0" ++#define SHLIB_VERSION_NUMBER "1.0.0d" + + + #endif /* HEADER_OPENSSLV_H */ diff --git a/sources b/sources index f42b68d..302a734 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -36a9936e1791566b205daa7cb4bea074 openssl-1.0.0a-usa.tar.bz2 +531c1627ff9701cb8540ee3bd03de5d7 openssl-1.0.0d-usa.tar.bz2