diff options
author | Jung-uk Kim <jkim@FreeBSD.org> | 2020-09-22 14:27:08 +0000 |
---|---|---|
committer | Jung-uk Kim <jkim@FreeBSD.org> | 2020-09-22 14:27:08 +0000 |
commit | 92f02b3b0f21350e7c92a16ca9b594ad7682c717 (patch) | |
tree | 00444fe1520f87a0f22770b5c0be936737fb2179 /crypto | |
parent | 65aa3028e51cba07879f3dc4608949c5c6b9fcc0 (diff) |
Import OpenSSL 1.1.1h.vendor/openssl/1.1.1h
Notes
Notes:
svn path=/vendor-crypto/openssl/dist/; revision=365997
svn path=/vendor-crypto/openssl/1.1.1h/; revision=365998; tag=vendor/openssl/1.1.1h
Diffstat (limited to 'crypto')
86 files changed, 1210 insertions, 3246 deletions
diff --git a/crypto/aes/aes_core.c b/crypto/aes/aes_core.c index 687dd5829baa..ad00c729e700 100644 --- a/crypto/aes/aes_core.c +++ b/crypto/aes/aes_core.c @@ -673,357 +673,6 @@ void AES_decrypt(const unsigned char *in, unsigned char *out, InvCipher(in, out, rk, key->rounds); } - -# ifndef OPENSSL_SMALL_FOOTPRINT -void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, - size_t blocks, const AES_KEY *key, - const unsigned char *ivec); - -static void RawToBits(const u8 raw[64], u64 bits[8]) -{ - int i, j; - u64 in, out; - - memset(bits, 0, 64); - for (i = 0; i < 8; i++) { - in = 0; - for (j = 0; j < 8; j++) - in |= ((u64)raw[i * 8 + j]) << (8 * j); - out = in & 0xF0F0F0F00F0F0F0FuLL; - out |= (in & 0x0F0F0F0F00000000uLL) >> 28; - out |= (in & 0x00000000F0F0F0F0uLL) << 28; - in = out & 0xCCCC3333CCCC3333uLL; - in |= (out & 0x3333000033330000uLL) >> 14; - in |= (out & 0x0000CCCC0000CCCCuLL) << 14; - out = in & 0xAA55AA55AA55AA55uLL; - out |= (in & 0x5500550055005500uLL) >> 7; - out |= (in & 0x00AA00AA00AA00AAuLL) << 7; - for (j = 0; j < 8; j++) { - bits[j] |= (out & 0xFFuLL) << (8 * i); - out = out >> 8; - } - } -} - -static void BitsToRaw(const u64 bits[8], u8 raw[64]) -{ - int i, j; - u64 in, out; - - for (i = 0; i < 8; i++) { - in = 0; - for (j = 0; j < 8; j++) - in |= ((bits[j] >> (8 * i)) & 0xFFuLL) << (8 * j); - out = in & 0xF0F0F0F00F0F0F0FuLL; - out |= (in & 0x0F0F0F0F00000000uLL) >> 28; - out |= (in & 0x00000000F0F0F0F0uLL) << 28; - in = out & 0xCCCC3333CCCC3333uLL; - in |= (out & 0x3333000033330000uLL) >> 14; - in |= (out & 0x0000CCCC0000CCCCuLL) << 14; - out = in & 0xAA55AA55AA55AA55uLL; - out |= (in & 0x5500550055005500uLL) >> 7; - out |= (in & 0x00AA00AA00AA00AAuLL) << 7; - for (j = 0; j < 8; j++) { - raw[i * 8 + j] = (u8)out; - out = out >> 8; - } - } -} - -static void BitsXtime(u64 state[8]) -{ - u64 b; - - b = state[7]; - state[7] = state[6]; - state[6] = state[5]; - state[5] = state[4]; - state[4] = state[3] ^ b; - state[3] = state[2] ^ b; - state[2] = state[1]; - state[1] = state[0] ^ b; - state[0] = b; -} - -/* - * This S-box implementation follows a circuit described in - * Boyar and Peralta: "A new combinational logic minimization - * technique with applications to cryptology." - * https://eprint.iacr.org/2009/191.pdf - * - * The math is similar to above, in that it uses - * a tower field of GF(2^2^2^2) but with a different - * basis representation, that is better suited to - * logic designs. - */ -static void BitsSub(u64 state[8]) -{ - u64 x0, x1, x2, x3, x4, x5, x6, x7; - u64 y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11; - u64 y12, y13, y14, y15, y16, y17, y18, y19, y20, y21; - u64 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11; - u64 t12, t13, t14, t15, t16, t17, t18, t19, t20, t21; - u64 t22, t23, t24, t25, t26, t27, t28, t29, t30, t31; - u64 t32, t33, t34, t35, t36, t37, t38, t39, t40, t41; - u64 t42, t43, t44, t45, t46, t47, t48, t49, t50, t51; - u64 t52, t53, t54, t55, t56, t57, t58, t59, t60, t61; - u64 t62, t63, t64, t65, t66, t67; - u64 z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11; - u64 z12, z13, z14, z15, z16, z17; - u64 s0, s1, s2, s3, s4, s5, s6, s7; - - x7 = state[0]; - x6 = state[1]; - x5 = state[2]; - x4 = state[3]; - x3 = state[4]; - x2 = state[5]; - x1 = state[6]; - x0 = state[7]; - y14 = x3 ^ x5; - y13 = x0 ^ x6; - y9 = x0 ^ x3; - y8 = x0 ^ x5; - t0 = x1 ^ x2; - y1 = t0 ^ x7; - y4 = y1 ^ x3; - y12 = y13 ^ y14; - y2 = y1 ^ x0; - y5 = y1 ^ x6; - y3 = y5 ^ y8; - t1 = x4 ^ y12; - y15 = t1 ^ x5; - y20 = t1 ^ x1; - y6 = y15 ^ x7; - y10 = y15 ^ t0; - y11 = y20 ^ y9; - y7 = x7 ^ y11; - y17 = y10 ^ y11; - y19 = y10 ^ y8; - y16 = t0 ^ y11; - y21 = y13 ^ y16; - y18 = x0 ^ y16; - t2 = y12 & y15; - t3 = y3 & y6; - t4 = t3 ^ t2; - t5 = y4 & x7; - t6 = t5 ^ t2; - t7 = y13 & y16; - t8 = y5 & y1; - t9 = t8 ^ t7; - t10 = y2 & y7; - t11 = t10 ^ t7; - t12 = y9 & y11; - t13 = y14 & y17; - t14 = t13 ^ t12; - t15 = y8 & y10; - t16 = t15 ^ t12; - t17 = t4 ^ t14; - t18 = t6 ^ t16; - t19 = t9 ^ t14; - t20 = t11 ^ t16; - t21 = t17 ^ y20; - t22 = t18 ^ y19; - t23 = t19 ^ y21; - t24 = t20 ^ y18; - t25 = t21 ^ t22; - t26 = t21 & t23; - t27 = t24 ^ t26; - t28 = t25 & t27; - t29 = t28 ^ t22; - t30 = t23 ^ t24; - t31 = t22 ^ t26; - t32 = t31 & t30; - t33 = t32 ^ t24; - t34 = t23 ^ t33; - t35 = t27 ^ t33; - t36 = t24 & t35; - t37 = t36 ^ t34; - t38 = t27 ^ t36; - t39 = t29 & t38; - t40 = t25 ^ t39; - t41 = t40 ^ t37; - t42 = t29 ^ t33; - t43 = t29 ^ t40; - t44 = t33 ^ t37; - t45 = t42 ^ t41; - z0 = t44 & y15; - z1 = t37 & y6; - z2 = t33 & x7; - z3 = t43 & y16; - z4 = t40 & y1; - z5 = t29 & y7; - z6 = t42 & y11; - z7 = t45 & y17; - z8 = t41 & y10; - z9 = t44 & y12; - z10 = t37 & y3; - z11 = t33 & y4; - z12 = t43 & y13; - z13 = t40 & y5; - z14 = t29 & y2; - z15 = t42 & y9; - z16 = t45 & y14; - z17 = t41 & y8; - t46 = z15 ^ z16; - t47 = z10 ^ z11; - t48 = z5 ^ z13; - t49 = z9 ^ z10; - t50 = z2 ^ z12; - t51 = z2 ^ z5; - t52 = z7 ^ z8; - t53 = z0 ^ z3; - t54 = z6 ^ z7; - t55 = z16 ^ z17; - t56 = z12 ^ t48; - t57 = t50 ^ t53; - t58 = z4 ^ t46; - t59 = z3 ^ t54; - t60 = t46 ^ t57; - t61 = z14 ^ t57; - t62 = t52 ^ t58; - t63 = t49 ^ t58; - t64 = z4 ^ t59; - t65 = t61 ^ t62; - t66 = z1 ^ t63; - s0 = t59 ^ t63; - s6 = ~(t56 ^ t62); - s7 = ~(t48 ^ t60); - t67 = t64 ^ t65; - s3 = t53 ^ t66; - s4 = t51 ^ t66; - s5 = t47 ^ t65; - s1 = ~(t64 ^ s3); - s2 = ~(t55 ^ t67); - state[0] = s7; - state[1] = s6; - state[2] = s5; - state[3] = s4; - state[4] = s3; - state[5] = s2; - state[6] = s1; - state[7] = s0; -} - -static void BitsShiftRows(u64 state[8]) -{ - u64 s, s0; - int i; - - for (i = 0; i < 8; i++) { - s = state[i]; - s0 = s & 0x1111111111111111uLL; - s0 |= ((s & 0x2220222022202220uLL) >> 4) | ((s & 0x0002000200020002uLL) << 12); - s0 |= ((s & 0x4400440044004400uLL) >> 8) | ((s & 0x0044004400440044uLL) << 8); - s0 |= ((s & 0x8000800080008000uLL) >> 12) | ((s & 0x0888088808880888uLL) << 4); - state[i] = s0; - } -} - -static void BitsMixColumns(u64 state[8]) -{ - u64 s1, s; - u64 s0[8]; - int i; - - for (i = 0; i < 8; i++) { - s1 = state[i]; - s = s1; - s ^= ((s & 0xCCCCCCCCCCCCCCCCuLL) >> 2) | ((s & 0x3333333333333333uLL) << 2); - s ^= ((s & 0xAAAAAAAAAAAAAAAAuLL) >> 1) | ((s & 0x5555555555555555uLL) << 1); - s ^= s1; - s0[i] = s; - } - BitsXtime(state); - for (i = 0; i < 8; i++) { - s1 = state[i]; - s = s0[i]; - s ^= s1; - s ^= ((s1 & 0xEEEEEEEEEEEEEEEEuLL) >> 1) | ((s1 & 0x1111111111111111uLL) << 3); - state[i] = s; - } -} - -static void BitsAddRoundKey(u64 state[8], const u64 key[8]) -{ - int i; - - for (i = 0; i < 8; i++) - state[i] ^= key[i]; -} - -void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, - size_t blocks, const AES_KEY *key, - const unsigned char *ivec) -{ - struct { - u8 cipher[64]; - u64 state[8]; - u64 rd_key[AES_MAXNR + 1][8]; - } *bs; - u32 ctr32; - int i; - - ctr32 = GETU32(ivec + 12); - if (blocks >= 4 - && (bs = OPENSSL_malloc(sizeof(*bs)))) { - for (i = 0; i < key->rounds + 1; i++) { - memcpy(bs->cipher + 0, &key->rd_key[4 * i], 16); - memcpy(bs->cipher + 16, bs->cipher, 16); - memcpy(bs->cipher + 32, bs->cipher, 32); - RawToBits(bs->cipher, bs->rd_key[i]); - } - while (blocks) { - memcpy(bs->cipher, ivec, 12); - PUTU32(bs->cipher + 12, ctr32); - ctr32++; - memcpy(bs->cipher + 16, ivec, 12); - PUTU32(bs->cipher + 28, ctr32); - ctr32++; - memcpy(bs->cipher + 32, ivec, 12); - PUTU32(bs->cipher + 44, ctr32); - ctr32++; - memcpy(bs->cipher + 48, ivec, 12); - PUTU32(bs->cipher + 60, ctr32); - ctr32++; - RawToBits(bs->cipher, bs->state); - BitsAddRoundKey(bs->state, bs->rd_key[0]); - for (i = 1; i < key->rounds; i++) { - BitsSub(bs->state); - BitsShiftRows(bs->state); - BitsMixColumns(bs->state); - BitsAddRoundKey(bs->state, bs->rd_key[i]); - } - BitsSub(bs->state); - BitsShiftRows(bs->state); - BitsAddRoundKey(bs->state, bs->rd_key[key->rounds]); - BitsToRaw(bs->state, bs->cipher); - for (i = 0; i < 64 && blocks; i++) { - out[i] = in[i] ^ bs->cipher[i]; - if ((i & 15) == 15) - blocks--; - } - in += i; - out += i; - } - OPENSSL_clear_free(bs, sizeof(*bs)); - } else { - unsigned char cipher[16]; - - while (blocks) { - memcpy(cipher, ivec, 12); - PUTU32(cipher + 12, ctr32); - AES_encrypt(cipher, cipher, key); - for (i = 0; i < 16; i++) - out[i] = in[i] ^ cipher[i]; - in += 16; - out += 16; - ctr32++; - blocks--; - } - } -} -# endif #elif !defined(AES_ASM) /*- Te0[x] = S [x].[02, 01, 01, 03]; diff --git a/crypto/aes/aes_ige.c b/crypto/aes/aes_ige.c index dce4ef11be4f..804b3a723d1f 100644 --- a/crypto/aes/aes_ige.c +++ b/crypto/aes/aes_ige.c @@ -1,5 +1,5 @@ /* - * Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -12,11 +12,6 @@ #include <openssl/aes.h> #include "aes_local.h" -#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long)) -typedef struct { - unsigned long data[N_WORDS]; -} aes_block_t; - /* XXX: probably some better way to do this */ #if defined(__i386__) || defined(__x86_64__) # define UNALIGNED_MEMOPS_ARE_FAST 1 @@ -24,6 +19,15 @@ typedef struct { # define UNALIGNED_MEMOPS_ARE_FAST 0 #endif +#define N_WORDS (AES_BLOCK_SIZE / sizeof(unsigned long)) +typedef struct { + unsigned long data[N_WORDS]; +#if defined(__GNUC__) && UNALIGNED_MEMOPS_ARE_FAST +} aes_block_t __attribute((__aligned__(1))); +#else +} aes_block_t; +#endif + #if UNALIGNED_MEMOPS_ARE_FAST # define load_block(d, s) (d) = *(const aes_block_t *)(s) # define store_block(d, s) *(aes_block_t *)(d) = (s) diff --git a/crypto/aes/asm/aesni-mb-x86_64.pl b/crypto/aes/asm/aesni-mb-x86_64.pl index be2434f12041..a80cfdc13948 100755 --- a/crypto/aes/asm/aesni-mb-x86_64.pl +++ b/crypto/aes/asm/aesni-mb-x86_64.pl @@ -70,7 +70,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/aes/asm/aesni-sha1-x86_64.pl b/crypto/aes/asm/aesni-sha1-x86_64.pl index 42fe5d469ea6..04fd13be5e09 100755 --- a/crypto/aes/asm/aesni-sha1-x86_64.pl +++ b/crypto/aes/asm/aesni-sha1-x86_64.pl @@ -108,7 +108,7 @@ $avx=1 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && $avx=1 if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && `ml64 2>&1` =~ /Version ([0-9]+)\./ && $1>=10); -$avx=1 if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/ && $2>=3.0); +$avx=1 if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/ && $2>=3.0); $shaext=1; ### set to zero if compiling for 1.0.1 diff --git a/crypto/aes/asm/aesni-sha256-x86_64.pl b/crypto/aes/asm/aesni-sha256-x86_64.pl index f1b144eccbdd..ff9b18507da0 100755 --- a/crypto/aes/asm/aesni-sha256-x86_64.pl +++ b/crypto/aes/asm/aesni-sha256-x86_64.pl @@ -70,7 +70,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=12); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/asn1/d2i_pr.c b/crypto/asn1/d2i_pr.c index 6ec010738049..7b127d2092fa 100644 --- a/crypto/asn1/d2i_pr.c +++ b/crypto/asn1/d2i_pr.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -56,6 +56,8 @@ EVP_PKEY *d2i_PrivateKey(int type, EVP_PKEY **a, const unsigned char **pp, goto err; EVP_PKEY_free(ret); ret = tmp; + if (EVP_PKEY_type(type) != EVP_PKEY_base_id(ret)) + goto err; } else { ASN1err(ASN1_F_D2I_PRIVATEKEY, ERR_R_ASN1_LIB); goto err; diff --git a/crypto/asn1/x_algor.c b/crypto/asn1/x_algor.c index 4c4a718850ee..c9a8f1e9d1d4 100644 --- a/crypto/asn1/x_algor.c +++ b/crypto/asn1/x_algor.c @@ -1,5 +1,5 @@ /* - * Copyright 1998-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1998-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -92,3 +92,35 @@ int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b) return 0; return ASN1_TYPE_cmp(a->parameter, b->parameter); } + +int X509_ALGOR_copy(X509_ALGOR *dest, const X509_ALGOR *src) +{ + if (src == NULL || dest == NULL) + return 0; + + if (dest->algorithm) + ASN1_OBJECT_free(dest->algorithm); + dest->algorithm = NULL; + + if (dest->parameter) + ASN1_TYPE_free(dest->parameter); + dest->parameter = NULL; + + if (src->algorithm) + if ((dest->algorithm = OBJ_dup(src->algorithm)) == NULL) + return 0; + + if (src->parameter) { + dest->parameter = ASN1_TYPE_new(); + if (dest->parameter == NULL) + return 0; + + /* Assuming this is also correct for a BOOL. + * set does copy as a side effect. + */ + if (ASN1_TYPE_set1(dest->parameter, + src->parameter->type, src->parameter->value.ptr) == 0) + return 0; + } + return 1; +} diff --git a/crypto/bio/b_print.c b/crypto/bio/b_print.c index 8ef90ac1d4f8..41b7f5e2f61d 100644 --- a/crypto/bio/b_print.c +++ b/crypto/bio/b_print.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -635,7 +635,11 @@ fmtfp(char **sbuffer, fvalue = tmpvalue; } ufvalue = abs_val(fvalue); - if (ufvalue > ULONG_MAX) { + /* + * By subtracting 65535 (2^16-1) we cancel the low order 15 bits + * of ULONG_MAX to avoid using imprecise floating point values. + */ + if (ufvalue >= (double)(ULONG_MAX - 65535) + 65536.0) { /* Number too big */ return 0; } diff --git a/crypto/bio/bss_acpt.c b/crypto/bio/bss_acpt.c index 5a2cb50dfc39..4461eae2333d 100644 --- a/crypto/bio/bss_acpt.c +++ b/crypto/bio/bss_acpt.c @@ -434,8 +434,10 @@ static long acpt_ctrl(BIO *b, int cmd, long num, void *ptr) b->init = 1; } else if (num == 1) { OPENSSL_free(data->param_serv); - data->param_serv = BUF_strdup(ptr); - b->init = 1; + if ((data->param_serv = OPENSSL_strdup(ptr)) == NULL) + ret = 0; + else + b->init = 1; } else if (num == 2) { data->bind_mode |= BIO_SOCK_NONBLOCK; } else if (num == 3) { diff --git a/crypto/bio/bss_conn.c b/crypto/bio/bss_conn.c index dd43a406018c..807a82b23ba2 100644 --- a/crypto/bio/bss_conn.c +++ b/crypto/bio/bss_conn.c @@ -186,8 +186,17 @@ static int conn_state(BIO *b, BIO_CONNECT *c) case BIO_CONN_S_BLOCKED_CONNECT: i = BIO_sock_error(b->num); - if (i) { + if (i != 0) { BIO_clear_retry_flags(b); + if ((c->addr_iter = BIO_ADDRINFO_next(c->addr_iter)) != NULL) { + /* + * if there are more addresses to try, do that first + */ + BIO_closesocket(b->num); + c->state = BIO_CONN_S_CREATE_SOCKET; + ERR_clear_error(); + break; + } SYSerr(SYS_F_CONNECT, i); ERR_add_error_data(4, "hostname=", c->param_hostname, @@ -407,12 +416,13 @@ static long conn_ctrl(BIO *b, int cmd, long num, void *ptr) case BIO_C_SET_CONNECT: if (ptr != NULL) { b->init = 1; - if (num == 0) { + if (num == 0) { /* BIO_set_conn_hostname */ char *hold_service = data->param_service; /* We affect the hostname regardless. However, the input * string might contain a host:service spec, so we must * parse it, which might or might not affect the service */ + OPENSSL_free(data->param_hostname); data->param_hostname = NULL; ret = BIO_parse_hostserv(ptr, @@ -421,19 +431,29 @@ static long conn_ctrl(BIO *b, int cmd, long num, void *ptr) BIO_PARSE_PRIO_HOST); if (hold_service != data->param_service) OPENSSL_free(hold_service); - } else if (num == 1) { + } else if (num == 1) { /* BIO_set_conn_port */ OPENSSL_free(data->param_service); - data->param_service = BUF_strdup(ptr); - } else if (num == 2) { + if ((data->param_service = OPENSSL_strdup(ptr)) == NULL) + ret = 0; + } else if (num == 2) { /* BIO_set_conn_address */ const BIO_ADDR *addr = (const BIO_ADDR *)ptr; + char *host = BIO_ADDR_hostname_string(addr, 1); + char *service = BIO_ADDR_service_string(addr, 1); + + ret = host != NULL && service != NULL; if (ret) { - data->param_hostname = BIO_ADDR_hostname_string(addr, 1); - data->param_service = BIO_ADDR_service_string(addr, 1); + OPENSSL_free(data->param_hostname); + data->param_hostname = host; + OPENSSL_free(data->param_service); + data->param_service = service; BIO_ADDRINFO_free(data->addr_first); data->addr_first = NULL; data->addr_iter = NULL; + } else { + OPENSSL_free(host); + OPENSSL_free(service); } - } else if (num == 3) { + } else if (num == 3) { /* BIO_set_conn_ip_family */ data->connect_family = *(int *)ptr; } else { ret = 0; diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index 9b54f9d9ed3c..0be771febc16 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -66,7 +66,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([0-9]+)\.([0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|based on LLVM) ([0-9]+)\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $avx = ($ver>=3.0) + ($ver>=3.01); $addx = ($ver>=3.03); diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl index 8172dace74ee..cf08ce9b8356 100755 --- a/crypto/bn/asm/rsaz-x86_64.pl +++ b/crypto/bn/asm/rsaz-x86_64.pl @@ -81,7 +81,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } -if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $addx = ($ver>=3.03); } diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index e00cac448b89..f14d4e63b975 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -75,7 +75,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } -if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $addx = ($ver>=3.03); } diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 887770b30db1..8c37d132e476 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -60,7 +60,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } -if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $addx = ($ver>=3.03); } diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c index ef81acb77ba6..0941f7b97f3f 100644 --- a/crypto/bn/bn_gcd.c +++ b/crypto/bn/bn_gcd.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -10,22 +10,189 @@ #include "internal/cryptlib.h" #include "bn_local.h" -/* solves ax == 1 (mod n) */ -static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, - const BIGNUM *a, const BIGNUM *n, - BN_CTX *ctx); - -BIGNUM *BN_mod_inverse(BIGNUM *in, - const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) +/* + * bn_mod_inverse_no_branch is a special version of BN_mod_inverse. It does + * not contain branches that may leak sensitive information. + * + * This is a static function, we ensure all callers in this file pass valid + * arguments: all passed pointers here are non-NULL. + */ +static ossl_inline +BIGNUM *bn_mod_inverse_no_branch(BIGNUM *in, + const BIGNUM *a, const BIGNUM *n, + BN_CTX *ctx, int *pnoinv) { - BIGNUM *rv; - int noinv; - rv = int_bn_mod_inverse(in, a, n, ctx, &noinv); - if (noinv) - BNerr(BN_F_BN_MOD_INVERSE, BN_R_NO_INVERSE); - return rv; + BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL; + BIGNUM *ret = NULL; + int sign; + + bn_check_top(a); + bn_check_top(n); + + BN_CTX_start(ctx); + A = BN_CTX_get(ctx); + B = BN_CTX_get(ctx); + X = BN_CTX_get(ctx); + D = BN_CTX_get(ctx); + M = BN_CTX_get(ctx); + Y = BN_CTX_get(ctx); + T = BN_CTX_get(ctx); + if (T == NULL) + goto err; + + if (in == NULL) + R = BN_new(); + else + R = in; + if (R == NULL) + goto err; + + BN_one(X); + BN_zero(Y); + if (BN_copy(B, a) == NULL) + goto err; + if (BN_copy(A, n) == NULL) + goto err; + A->neg = 0; + + if (B->neg || (BN_ucmp(B, A) >= 0)) { + /* + * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked, + * BN_div_no_branch will be called eventually. + */ + { + BIGNUM local_B; + bn_init(&local_B); + BN_with_flags(&local_B, B, BN_FLG_CONSTTIME); + if (!BN_nnmod(B, &local_B, A, ctx)) + goto err; + /* Ensure local_B goes out of scope before any further use of B */ + } + } + sign = -1; + /*- + * From B = a mod |n|, A = |n| it follows that + * + * 0 <= B < A, + * -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|). + */ + + while (!BN_is_zero(B)) { + BIGNUM *tmp; + + /*- + * 0 < B < A, + * (*) -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|) + */ + + /* + * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked, + * BN_div_no_branch will be called eventually. + */ + { + BIGNUM local_A; + bn_init(&local_A); + BN_with_flags(&local_A, A, BN_FLG_CONSTTIME); + + /* (D, M) := (A/B, A%B) ... */ + if (!BN_div(D, M, &local_A, B, ctx)) + goto err; + /* Ensure local_A goes out of scope before any further use of A */ + } + + /*- + * Now + * A = D*B + M; + * thus we have + * (**) sign*Y*a == D*B + M (mod |n|). + */ + + tmp = A; /* keep the BIGNUM object, the value does not + * matter */ + + /* (A, B) := (B, A mod B) ... */ + A = B; + B = M; + /* ... so we have 0 <= B < A again */ + + /*- + * Since the former M is now B and the former B is now A, + * (**) translates into + * sign*Y*a == D*A + B (mod |n|), + * i.e. + * sign*Y*a - D*A == B (mod |n|). + * Similarly, (*) translates into + * -sign*X*a == A (mod |n|). + * + * Thus, + * sign*Y*a + D*sign*X*a == B (mod |n|), + * i.e. + * sign*(Y + D*X)*a == B (mod |n|). + * + * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at + * -sign*X*a == B (mod |n|), + * sign*Y*a == A (mod |n|). + * Note that X and Y stay non-negative all the time. + */ + + if (!BN_mul(tmp, D, X, ctx)) + goto err; + if (!BN_add(tmp, tmp, Y)) + goto err; + + M = Y; /* keep the BIGNUM object, the value does not + * matter */ + Y = X; + X = tmp; + sign = -sign; + } + + /*- + * The while loop (Euclid's algorithm) ends when + * A == gcd(a,n); + * we have + * sign*Y*a == A (mod |n|), + * where Y is non-negative. + */ + + if (sign < 0) { + if (!BN_sub(Y, n, Y)) + goto err; + } + /* Now Y*a == A (mod |n|). */ + + if (BN_is_one(A)) { + /* Y*a == 1 (mod |n|) */ + if (!Y->neg && BN_ucmp(Y, n) < 0) { + if (!BN_copy(R, Y)) + goto err; + } else { + if (!BN_nnmod(R, Y, n, ctx)) + goto err; + } + } else { + *pnoinv = 1; + /* caller sets the BN_R_NO_INVERSE error */ + goto err; + } + + ret = R; + *pnoinv = 0; + + err: + if ((ret == NULL) && (in == NULL)) + BN_free(R); + BN_CTX_end(ctx); + bn_check_top(ret); + return ret; } +/* + * This is an internal function, we assume all callers pass valid arguments: + * all pointers passed here are assumed non-NULL. + */ BIGNUM *int_bn_mod_inverse(BIGNUM *in, const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx, int *pnoinv) @@ -36,17 +203,15 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in, /* This is invalid input so we don't worry about constant time here */ if (BN_abs_is_word(n, 1) || BN_is_zero(n)) { - if (pnoinv != NULL) - *pnoinv = 1; + *pnoinv = 1; return NULL; } - if (pnoinv != NULL) - *pnoinv = 0; + *pnoinv = 0; if ((BN_get_flags(a, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(n, BN_FLG_CONSTTIME) != 0)) { - return BN_mod_inverse_no_branch(in, a, n, ctx); + return bn_mod_inverse_no_branch(in, a, n, ctx, pnoinv); } bn_check_top(a); @@ -332,8 +497,7 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in, goto err; } } else { - if (pnoinv) - *pnoinv = 1; + *pnoinv = 1; goto err; } ret = R; @@ -345,175 +509,27 @@ BIGNUM *int_bn_mod_inverse(BIGNUM *in, return ret; } -/* - * BN_mod_inverse_no_branch is a special version of BN_mod_inverse. It does - * not contain branches that may leak sensitive information. - */ -static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, - const BIGNUM *a, const BIGNUM *n, - BN_CTX *ctx) +/* solves ax == 1 (mod n) */ +BIGNUM *BN_mod_inverse(BIGNUM *in, + const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) { - BIGNUM *A, *B, *X, *Y, *M, *D, *T, *R = NULL; - BIGNUM *ret = NULL; - int sign; - - bn_check_top(a); - bn_check_top(n); - - BN_CTX_start(ctx); - A = BN_CTX_get(ctx); - B = BN_CTX_get(ctx); - X = BN_CTX_get(ctx); - D = BN_CTX_get(ctx); - M = BN_CTX_get(ctx); - Y = BN_CTX_get(ctx); - T = BN_CTX_get(ctx); - if (T == NULL) - goto err; - - if (in == NULL) - R = BN_new(); - else - R = in; - if (R == NULL) - goto err; - - BN_one(X); - BN_zero(Y); - if (BN_copy(B, a) == NULL) - goto err; - if (BN_copy(A, n) == NULL) - goto err; - A->neg = 0; - - if (B->neg || (BN_ucmp(B, A) >= 0)) { - /* - * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked, - * BN_div_no_branch will be called eventually. - */ - { - BIGNUM local_B; - bn_init(&local_B); - BN_with_flags(&local_B, B, BN_FLG_CONSTTIME); - if (!BN_nnmod(B, &local_B, A, ctx)) - goto err; - /* Ensure local_B goes out of scope before any further use of B */ - } - } - sign = -1; - /*- - * From B = a mod |n|, A = |n| it follows that - * - * 0 <= B < A, - * -sign*X*a == B (mod |n|), - * sign*Y*a == A (mod |n|). - */ - - while (!BN_is_zero(B)) { - BIGNUM *tmp; - - /*- - * 0 < B < A, - * (*) -sign*X*a == B (mod |n|), - * sign*Y*a == A (mod |n|) - */ - - /* - * Turn BN_FLG_CONSTTIME flag on, so that when BN_div is invoked, - * BN_div_no_branch will be called eventually. - */ - { - BIGNUM local_A; - bn_init(&local_A); - BN_with_flags(&local_A, A, BN_FLG_CONSTTIME); + BN_CTX *new_ctx = NULL; + BIGNUM *rv; + int noinv = 0; - /* (D, M) := (A/B, A%B) ... */ - if (!BN_div(D, M, &local_A, B, ctx)) - goto err; - /* Ensure local_A goes out of scope before any further use of A */ + if (ctx == NULL) { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) { + BNerr(BN_F_BN_MOD_INVERSE, ERR_R_MALLOC_FAILURE); + return NULL; } - - /*- - * Now - * A = D*B + M; - * thus we have - * (**) sign*Y*a == D*B + M (mod |n|). - */ - - tmp = A; /* keep the BIGNUM object, the value does not - * matter */ - - /* (A, B) := (B, A mod B) ... */ - A = B; - B = M; - /* ... so we have 0 <= B < A again */ - - /*- - * Since the former M is now B and the former B is now A, - * (**) translates into - * sign*Y*a == D*A + B (mod |n|), - * i.e. - * sign*Y*a - D*A == B (mod |n|). - * Similarly, (*) translates into - * -sign*X*a == A (mod |n|). - * - * Thus, - * sign*Y*a + D*sign*X*a == B (mod |n|), - * i.e. - * sign*(Y + D*X)*a == B (mod |n|). - * - * So if we set (X, Y, sign) := (Y + D*X, X, -sign), we arrive back at - * -sign*X*a == B (mod |n|), - * sign*Y*a == A (mod |n|). - * Note that X and Y stay non-negative all the time. - */ - - if (!BN_mul(tmp, D, X, ctx)) - goto err; - if (!BN_add(tmp, tmp, Y)) - goto err; - - M = Y; /* keep the BIGNUM object, the value does not - * matter */ - Y = X; - X = tmp; - sign = -sign; - } - - /*- - * The while loop (Euclid's algorithm) ends when - * A == gcd(a,n); - * we have - * sign*Y*a == A (mod |n|), - * where Y is non-negative. - */ - - if (sign < 0) { - if (!BN_sub(Y, n, Y)) - goto err; } - /* Now Y*a == A (mod |n|). */ - if (BN_is_one(A)) { - /* Y*a == 1 (mod |n|) */ - if (!Y->neg && BN_ucmp(Y, n) < 0) { - if (!BN_copy(R, Y)) - goto err; - } else { - if (!BN_nnmod(R, Y, n, ctx)) - goto err; - } - } else { - BNerr(BN_F_BN_MOD_INVERSE_NO_BRANCH, BN_R_NO_INVERSE); - goto err; - } - ret = R; - err: - if ((ret == NULL) && (in == NULL)) - BN_free(R); - BN_CTX_end(ctx); - bn_check_top(ret); - return ret; + rv = int_bn_mod_inverse(in, a, n, ctx, &noinv); + if (noinv) + BNerr(BN_F_BN_MOD_INVERSE, BN_R_NO_INVERSE); + BN_CTX_free(new_ctx); + return rv; } /*- diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c index 86d4956c8a8c..eb4a31849bef 100644 --- a/crypto/bn/bn_lib.c +++ b/crypto/bn/bn_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -87,6 +87,15 @@ const BIGNUM *BN_value_one(void) return &const_one; } +/* + * Old Visual Studio ARM compiler miscompiles BN_num_bits_word() + * https://mta.openssl.org/pipermail/openssl-users/2018-August/008465.html + */ +#if defined(_MSC_VER) && defined(_ARM_) && defined(_WIN32_WCE) \ + && _MSC_VER>=1400 && _MSC_VER<1501 +# define MS_BROKEN_BN_num_bits_word +# pragma optimize("", off) +#endif int BN_num_bits_word(BN_ULONG l) { BN_ULONG x, mask; @@ -131,6 +140,9 @@ int BN_num_bits_word(BN_ULONG l) return bits; } +#ifdef MS_BROKEN_BN_num_bits_word +# pragma optimize("", on) +#endif /* * This function still leaks `a->dmax`: it's caller's responsibility to @@ -322,15 +334,19 @@ BIGNUM *BN_dup(const BIGNUM *a) BIGNUM *BN_copy(BIGNUM *a, const BIGNUM *b) { + int bn_words; + bn_check_top(b); + bn_words = BN_get_flags(b, BN_FLG_CONSTTIME) ? b->dmax : b->top; + if (a == b) return a; - if (bn_wexpand(a, b->top) == NULL) + if (bn_wexpand(a, bn_words) == NULL) return NULL; if (b->top > 0) - memcpy(a->d, b->d, sizeof(b->d[0]) * b->top); + memcpy(a->d, b->d, sizeof(b->d[0]) * bn_words); a->neg = b->neg; a->top = b->top; diff --git a/crypto/bn/bn_mpi.c b/crypto/bn/bn_mpi.c index bdbe822415c7..0902da5d076e 100644 --- a/crypto/bn/bn_mpi.c +++ b/crypto/bn/bn_mpi.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -45,7 +45,7 @@ BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *ain) int neg = 0; BIGNUM *a = NULL; - if (n < 4) { + if (n < 4 || (d[0] & 0x80) != 0) { BNerr(BN_F_BN_MPI2BN, BN_R_INVALID_LENGTH); return NULL; } diff --git a/crypto/chacha/asm/chacha-x86.pl b/crypto/chacha/asm/chacha-x86.pl index a1f5694b691e..492fda5f114c 100755 --- a/crypto/chacha/asm/chacha-x86.pl +++ b/crypto/chacha/asm/chacha-x86.pl @@ -62,7 +62,7 @@ $ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32" && $1>=10); # first version supporting AVX $ymm=1 if ($xmm && !$ymm && - `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/ && + `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/ && $2>=3.0); # first version supporting AVX $a="eax"; diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl index 647d2537ceae..227ee59ff2ba 100755 --- a/crypto/chacha/asm/chacha-x86_64.pl +++ b/crypto/chacha/asm/chacha-x86_64.pl @@ -85,7 +85,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/cmac/cmac.c b/crypto/cmac/cmac.c index 6989c32d0660..1fac53101687 100644 --- a/crypto/cmac/cmac.c +++ b/crypto/cmac/cmac.c @@ -1,5 +1,5 @@ /* - * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -116,11 +116,18 @@ int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, return 1; } /* Initialise context */ - if (cipher && !EVP_EncryptInit_ex(ctx->cctx, cipher, impl, NULL, NULL)) - return 0; + if (cipher != NULL) { + /* Ensure we can't use this ctx until we also have a key */ + ctx->nlast_block = -1; + if (!EVP_EncryptInit_ex(ctx->cctx, cipher, impl, NULL, NULL)) + return 0; + } /* Non-NULL key means initialisation complete */ - if (key) { + if (key != NULL) { int bl; + + /* If anything fails then ensure we can't use this ctx */ + ctx->nlast_block = -1; if (!EVP_CIPHER_CTX_cipher(ctx->cctx)) return 0; if (!EVP_CIPHER_CTX_set_key_length(ctx->cctx, keylen)) @@ -128,7 +135,7 @@ int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen, if (!EVP_EncryptInit_ex(ctx->cctx, NULL, NULL, key, zero_iv)) return 0; bl = EVP_CIPHER_CTX_block_size(ctx->cctx); - if (!EVP_Cipher(ctx->cctx, ctx->tbl, zero_iv, bl)) + if (EVP_Cipher(ctx->cctx, ctx->tbl, zero_iv, bl) <= 0) return 0; make_kn(ctx->k1, ctx->tbl, bl); make_kn(ctx->k2, ctx->k1, bl); @@ -166,12 +173,12 @@ int CMAC_Update(CMAC_CTX *ctx, const void *in, size_t dlen) return 1; data += nleft; /* Else not final block so encrypt it */ - if (!EVP_Cipher(ctx->cctx, ctx->tbl, ctx->last_block, bl)) + if (EVP_Cipher(ctx->cctx, ctx->tbl, ctx->last_block, bl) <= 0) return 0; } /* Encrypt all but one of the complete blocks left */ while (dlen > bl) { - if (!EVP_Cipher(ctx->cctx, ctx->tbl, data, bl)) + if (EVP_Cipher(ctx->cctx, ctx->tbl, data, bl) <= 0) return 0; dlen -= bl; data += bl; diff --git a/crypto/cms/cms_lib.c b/crypto/cms/cms_lib.c index 57afba436115..be4c2c703f1a 100644 --- a/crypto/cms/cms_lib.c +++ b/crypto/cms/cms_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -92,12 +92,13 @@ BIO *CMS_dataInit(CMS_ContentInfo *cms, BIO *icont) default: CMSerr(CMS_F_CMS_DATAINIT, CMS_R_UNSUPPORTED_TYPE); - return NULL; + goto err; } if (cmsbio) return BIO_push(cmsbio, cont); +err: if (!icont) BIO_free(cont); return NULL; diff --git a/crypto/cms/cms_sd.c b/crypto/cms/cms_sd.c index 29ba4c1b1334..3f2a782565a8 100644 --- a/crypto/cms/cms_sd.c +++ b/crypto/cms/cms_sd.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -897,8 +897,10 @@ int CMS_add_simple_smimecap(STACK_OF(X509_ALGOR) **algs, ASN1_INTEGER *key = NULL; if (keysize > 0) { key = ASN1_INTEGER_new(); - if (key == NULL || !ASN1_INTEGER_set(key, keysize)) + if (key == NULL || !ASN1_INTEGER_set(key, keysize)) { + ASN1_INTEGER_free(key); return 0; + } } alg = X509_ALGOR_new(); if (alg == NULL) { diff --git a/crypto/conf/conf_def.c b/crypto/conf/conf_def.c index ca76fa3679b8..3d710f12ae07 100644 --- a/crypto/conf/conf_def.c +++ b/crypto/conf/conf_def.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -376,11 +376,13 @@ static int def_load_bio(CONF *conf, BIO *in, long *line) if (biosk == NULL) { if ((biosk = sk_BIO_new_null()) == NULL) { CONFerr(CONF_F_DEF_LOAD_BIO, ERR_R_MALLOC_FAILURE); + BIO_free(next); goto err; } } if (!sk_BIO_push(biosk, in)) { CONFerr(CONF_F_DEF_LOAD_BIO, ERR_R_MALLOC_FAILURE); + BIO_free(next); goto err; } /* continue with reading from the included BIO */ diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl index ea538c0698d5..fa833ce6aaf3 100755 --- a/crypto/ec/asm/ecp_nistz256-armv4.pl +++ b/crypto/ec/asm/ecp_nistz256-armv4.pl @@ -1517,9 +1517,9 @@ ecp_nistz256_point_add: ldr $t2,[sp,#32*18+12] @ ~is_equal(S1,S2) mvn $t0,$t0 @ -1/0 -> 0/-1 mvn $t1,$t1 @ -1/0 -> 0/-1 - orr $a0,$t0 - orr $a0,$t1 - orrs $a0,$t2 @ set flags + orr $a0,$a0,$t0 + orr $a0,$a0,$t1 + orrs $a0,$a0,$t2 @ set flags @ if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2)) bne .Ladd_proceed diff --git a/crypto/ec/asm/ecp_nistz256-avx2.pl b/crypto/ec/asm/ecp_nistz256-avx2.pl deleted file mode 100755 index 5071d09ac2ec..000000000000 --- a/crypto/ec/asm/ecp_nistz256-avx2.pl +++ /dev/null @@ -1,2080 +0,0 @@ -#! /usr/bin/env perl -# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. -# Copyright (c) 2014, Intel Corporation. All Rights Reserved. -# -# Licensed under the OpenSSL license (the "License"). You may not use -# this file except in compliance with the License. You can obtain a copy -# in the file LICENSE in the source distribution or at -# https://www.openssl.org/source/license.html -# -# Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) -# (1) Intel Corporation, Israel Development Center, Haifa, Israel -# (2) University of Haifa, Israel -# -# Reference: -# S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with -# 256 Bit Primes" - -$flavour = shift; -$output = shift; -if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } - -$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or -die "can't locate x86_64-xlate.pl"; - -open OUT,"| \"$^X\" $xlate $flavour $output"; -*STDOUT=*OUT; - -if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` - =~ /GNU assembler version ([2-9]\.[0-9]+)/) { - $avx = ($1>=2.19) + ($1>=2.22); - $addx = ($1>=2.23); -} - -if (!$addx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && - `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { - $avx = ($1>=2.09) + ($1>=2.10); - $addx = ($1>=2.10); -} - -if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && - `ml64 2>&1` =~ /Version ([0-9]+)\./) { - $avx = ($1>=10) + ($1>=11); - $addx = ($1>=12); -} - -if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([0-9]+)\.([0-9]+)/) { - my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 - $avx = ($ver>=3.0) + ($ver>=3.01); - $addx = ($ver>=3.03); -} - -if ($avx>=2) {{ -$digit_size = "\$29"; -$n_digits = "\$9"; - -$code.=<<___; -.text - -.align 64 -.LAVX2_AND_MASK: -.LAVX2_POLY: -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x00040000, 0x00040000, 0x00040000, 0x00040000 -.quad 0x1fe00000, 0x1fe00000, 0x1fe00000, 0x1fe00000 -.quad 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff - -.LAVX2_POLY_x2: -.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC -.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC -.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC -.quad 0x400007FC, 0x400007FC, 0x400007FC, 0x400007FC -.quad 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE -.quad 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE, 0x3FFFFFFE -.quad 0x400FFFFE, 0x400FFFFE, 0x400FFFFE, 0x400FFFFE -.quad 0x7F7FFFFE, 0x7F7FFFFE, 0x7F7FFFFE, 0x7F7FFFFE -.quad 0x03FFFFFC, 0x03FFFFFC, 0x03FFFFFC, 0x03FFFFFC - -.LAVX2_POLY_x8: -.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8 -.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8 -.quad 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8, 0xFFFFFFF8 -.quad 0x80000FF8, 0x80000FF8, 0x80000FF8, 0x80000FF8 -.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC -.quad 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC, 0x7FFFFFFC -.quad 0x801FFFFC, 0x801FFFFC, 0x801FFFFC, 0x801FFFFC -.quad 0xFEFFFFFC, 0xFEFFFFFC, 0xFEFFFFFC, 0xFEFFFFFC -.quad 0x07FFFFF8, 0x07FFFFF8, 0x07FFFFF8, 0x07FFFFF8 - -.LONE: -.quad 0x00000020, 0x00000020, 0x00000020, 0x00000020 -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x1fffc000, 0x1fffc000, 0x1fffc000, 0x1fffc000 -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x1f7fffff, 0x1f7fffff, 0x1f7fffff, 0x1f7fffff -.quad 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 - -# RR = 2^266 mod p in AVX2 format, to transform from the native OpenSSL -# Montgomery form (*2^256) to our format (*2^261) - -.LTO_MONT_AVX2: -.quad 0x00000400, 0x00000400, 0x00000400, 0x00000400 -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x1ff80000, 0x1ff80000, 0x1ff80000, 0x1ff80000 -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x00000003, 0x00000003, 0x00000003, 0x00000003 - -.LFROM_MONT_AVX2: -.quad 0x00000001, 0x00000001, 0x00000001, 0x00000001 -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 -.quad 0x1ffffe00, 0x1ffffe00, 0x1ffffe00, 0x1ffffe00 -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff -.quad 0x1ffbffff, 0x1ffbffff, 0x1ffbffff, 0x1ffbffff -.quad 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff -.quad 0x00000000, 0x00000000, 0x00000000, 0x00000000 - -.LIntOne: -.long 1,1,1,1,1,1,1,1 -___ - -{ -# This function receives a pointer to an array of four affine points -# (X, Y, <1>) and rearranges the data for AVX2 execution, while -# converting it to 2^29 radix redundant form - -my ($X0,$X1,$X2,$X3, $Y0,$Y1,$Y2,$Y3, - $T0,$T1,$T2,$T3, $T4,$T5,$T6,$T7)=map("%ymm$_",(0..15)); - -$code.=<<___; -.globl ecp_nistz256_avx2_transpose_convert -.type ecp_nistz256_avx2_transpose_convert,\@function,2 -.align 64 -ecp_nistz256_avx2_transpose_convert: - vzeroupper -___ -$code.=<<___ if ($win64); - lea -8-16*10(%rsp), %rsp - vmovaps %xmm6, -8-16*10(%rax) - vmovaps %xmm7, -8-16*9(%rax) - vmovaps %xmm8, -8-16*8(%rax) - vmovaps %xmm9, -8-16*7(%rax) - vmovaps %xmm10, -8-16*6(%rax) - vmovaps %xmm11, -8-16*5(%rax) - vmovaps %xmm12, -8-16*4(%rax) - vmovaps %xmm13, -8-16*3(%rax) - vmovaps %xmm14, -8-16*2(%rax) - vmovaps %xmm15, -8-16*1(%rax) -___ -$code.=<<___; - # Load the data - vmovdqa 32*0(%rsi), $X0 - lea 112(%rsi), %rax # size optimization - vmovdqa 32*1(%rsi), $Y0 - lea .LAVX2_AND_MASK(%rip), %rdx - vmovdqa 32*2(%rsi), $X1 - vmovdqa 32*3(%rsi), $Y1 - vmovdqa 32*4-112(%rax), $X2 - vmovdqa 32*5-112(%rax), $Y2 - vmovdqa 32*6-112(%rax), $X3 - vmovdqa 32*7-112(%rax), $Y3 - - # Transpose X and Y independently - vpunpcklqdq $X1, $X0, $T0 # T0 = [B2 A2 B0 A0] - vpunpcklqdq $X3, $X2, $T1 # T1 = [D2 C2 D0 C0] - vpunpckhqdq $X1, $X0, $T2 # T2 = [B3 A3 B1 A1] - vpunpckhqdq $X3, $X2, $T3 # T3 = [D3 C3 D1 C1] - - vpunpcklqdq $Y1, $Y0, $T4 - vpunpcklqdq $Y3, $Y2, $T5 - vpunpckhqdq $Y1, $Y0, $T6 - vpunpckhqdq $Y3, $Y2, $T7 - - vperm2i128 \$0x20, $T1, $T0, $X0 # X0 = [D0 C0 B0 A0] - vperm2i128 \$0x20, $T3, $T2, $X1 # X1 = [D1 C1 B1 A1] - vperm2i128 \$0x31, $T1, $T0, $X2 # X2 = [D2 C2 B2 A2] - vperm2i128 \$0x31, $T3, $T2, $X3 # X3 = [D3 C3 B3 A3] - - vperm2i128 \$0x20, $T5, $T4, $Y0 - vperm2i128 \$0x20, $T7, $T6, $Y1 - vperm2i128 \$0x31, $T5, $T4, $Y2 - vperm2i128 \$0x31, $T7, $T6, $Y3 - vmovdqa (%rdx), $T7 - - vpand (%rdx), $X0, $T0 # out[0] = in[0] & mask; - vpsrlq \$29, $X0, $X0 - vpand $T7, $X0, $T1 # out[1] = (in[0] >> shift) & mask; - vpsrlq \$29, $X0, $X0 - vpsllq \$6, $X1, $T2 - vpxor $X0, $T2, $T2 - vpand $T7, $T2, $T2 # out[2] = ((in[0] >> (shift*2)) ^ (in[1] << (64-shift*2))) & mask; - vpsrlq \$23, $X1, $X1 - vpand $T7, $X1, $T3 # out[3] = (in[1] >> ((shift*3)%64)) & mask; - vpsrlq \$29, $X1, $X1 - vpsllq \$12, $X2, $T4 - vpxor $X1, $T4, $T4 - vpand $T7, $T4, $T4 # out[4] = ((in[1] >> ((shift*4)%64)) ^ (in[2] << (64*2-shift*4))) & mask; - vpsrlq \$17, $X2, $X2 - vpand $T7, $X2, $T5 # out[5] = (in[2] >> ((shift*5)%64)) & mask; - vpsrlq \$29, $X2, $X2 - vpsllq \$18, $X3, $T6 - vpxor $X2, $T6, $T6 - vpand $T7, $T6, $T6 # out[6] = ((in[2] >> ((shift*6)%64)) ^ (in[3] << (64*3-shift*6))) & mask; - vpsrlq \$11, $X3, $X3 - vmovdqa $T0, 32*0(%rdi) - lea 112(%rdi), %rax # size optimization - vpand $T7, $X3, $T0 # out[7] = (in[3] >> ((shift*7)%64)) & mask; - vpsrlq \$29, $X3, $X3 # out[8] = (in[3] >> ((shift*8)%64)) & mask; - - vmovdqa $T1, 32*1(%rdi) - vmovdqa $T2, 32*2(%rdi) - vmovdqa $T3, 32*3(%rdi) - vmovdqa $T4, 32*4-112(%rax) - vmovdqa $T5, 32*5-112(%rax) - vmovdqa $T6, 32*6-112(%rax) - vmovdqa $T0, 32*7-112(%rax) - vmovdqa $X3, 32*8-112(%rax) - lea 448(%rdi), %rax # size optimization - - vpand $T7, $Y0, $T0 # out[0] = in[0] & mask; - vpsrlq \$29, $Y0, $Y0 - vpand $T7, $Y0, $T1 # out[1] = (in[0] >> shift) & mask; - vpsrlq \$29, $Y0, $Y0 - vpsllq \$6, $Y1, $T2 - vpxor $Y0, $T2, $T2 - vpand $T7, $T2, $T2 # out[2] = ((in[0] >> (shift*2)) ^ (in[1] << (64-shift*2))) & mask; - vpsrlq \$23, $Y1, $Y1 - vpand $T7, $Y1, $T3 # out[3] = (in[1] >> ((shift*3)%64)) & mask; - vpsrlq \$29, $Y1, $Y1 - vpsllq \$12, $Y2, $T4 - vpxor $Y1, $T4, $T4 - vpand $T7, $T4, $T4 # out[4] = ((in[1] >> ((shift*4)%64)) ^ (in[2] << (64*2-shift*4))) & mask; - vpsrlq \$17, $Y2, $Y2 - vpand $T7, $Y2, $T5 # out[5] = (in[2] >> ((shift*5)%64)) & mask; - vpsrlq \$29, $Y2, $Y2 - vpsllq \$18, $Y3, $T6 - vpxor $Y2, $T6, $T6 - vpand $T7, $T6, $T6 # out[6] = ((in[2] >> ((shift*6)%64)) ^ (in[3] << (64*3-shift*6))) & mask; - vpsrlq \$11, $Y3, $Y3 - vmovdqa $T0, 32*9-448(%rax) - vpand $T7, $Y3, $T0 # out[7] = (in[3] >> ((shift*7)%64)) & mask; - vpsrlq \$29, $Y3, $Y3 # out[8] = (in[3] >> ((shift*8)%64)) & mask; - - vmovdqa $T1, 32*10-448(%rax) - vmovdqa $T2, 32*11-448(%rax) - vmovdqa $T3, 32*12-448(%rax) - vmovdqa $T4, 32*13-448(%rax) - vmovdqa $T5, 32*14-448(%rax) - vmovdqa $T6, 32*15-448(%rax) - vmovdqa $T0, 32*16-448(%rax) - vmovdqa $Y3, 32*17-448(%rax) - - vzeroupper -___ -$code.=<<___ if ($win64); - movaps 16*0(%rsp), %xmm6 - movaps 16*1(%rsp), %xmm7 - movaps 16*2(%rsp), %xmm8 - movaps 16*3(%rsp), %xmm9 - movaps 16*4(%rsp), %xmm10 - movaps 16*5(%rsp), %xmm11 - movaps 16*6(%rsp), %xmm12 - movaps 16*7(%rsp), %xmm13 - movaps 16*8(%rsp), %xmm14 - movaps 16*9(%rsp), %xmm15 - lea 8+16*10(%rsp), %rsp -___ -$code.=<<___; - ret -.size ecp_nistz256_avx2_transpose_convert,.-ecp_nistz256_avx2_transpose_convert -___ -} -{ -################################################################################ -# This function receives a pointer to an array of four AVX2 formatted points -# (X, Y, Z) convert the data to normal representation, and rearranges the data - -my ($D0,$D1,$D2,$D3, $D4,$D5,$D6,$D7, $D8)=map("%ymm$_",(0..8)); -my ($T0,$T1,$T2,$T3, $T4,$T5,$T6)=map("%ymm$_",(9..15)); - -$code.=<<___; - -.globl ecp_nistz256_avx2_convert_transpose_back -.type ecp_nistz256_avx2_convert_transpose_back,\@function,2 -.align 32 -ecp_nistz256_avx2_convert_transpose_back: - vzeroupper -___ -$code.=<<___ if ($win64); - lea -8-16*10(%rsp), %rsp - vmovaps %xmm6, -8-16*10(%rax) - vmovaps %xmm7, -8-16*9(%rax) - vmovaps %xmm8, -8-16*8(%rax) - vmovaps %xmm9, -8-16*7(%rax) - vmovaps %xmm10, -8-16*6(%rax) - vmovaps %xmm11, -8-16*5(%rax) - vmovaps %xmm12, -8-16*4(%rax) - vmovaps %xmm13, -8-16*3(%rax) - vmovaps %xmm14, -8-16*2(%rax) - vmovaps %xmm15, -8-16*1(%rax) -___ -$code.=<<___; - mov \$3, %ecx - -.Lconv_loop: - vmovdqa 32*0(%rsi), $D0 - lea 160(%rsi), %rax # size optimization - vmovdqa 32*1(%rsi), $D1 - vmovdqa 32*2(%rsi), $D2 - vmovdqa 32*3(%rsi), $D3 - vmovdqa 32*4-160(%rax), $D4 - vmovdqa 32*5-160(%rax), $D5 - vmovdqa 32*6-160(%rax), $D6 - vmovdqa 32*7-160(%rax), $D7 - vmovdqa 32*8-160(%rax), $D8 - - vpsllq \$29, $D1, $D1 - vpsllq \$58, $D2, $T0 - vpaddq $D1, $D0, $D0 - vpaddq $T0, $D0, $D0 # out[0] = (in[0]) ^ (in[1] << shift*1) ^ (in[2] << shift*2); - - vpsrlq \$6, $D2, $D2 - vpsllq \$23, $D3, $D3 - vpsllq \$52, $D4, $T1 - vpaddq $D2, $D3, $D3 - vpaddq $D3, $T1, $D1 # out[1] = (in[2] >> (64*1-shift*2)) ^ (in[3] << shift*3%64) ^ (in[4] << shift*4%64); - - vpsrlq \$12, $D4, $D4 - vpsllq \$17, $D5, $D5 - vpsllq \$46, $D6, $T2 - vpaddq $D4, $D5, $D5 - vpaddq $D5, $T2, $D2 # out[2] = (in[4] >> (64*2-shift*4)) ^ (in[5] << shift*5%64) ^ (in[6] << shift*6%64); - - vpsrlq \$18, $D6, $D6 - vpsllq \$11, $D7, $D7 - vpsllq \$40, $D8, $T3 - vpaddq $D6, $D7, $D7 - vpaddq $D7, $T3, $D3 # out[3] = (in[6] >> (64*3-shift*6)) ^ (in[7] << shift*7%64) ^ (in[8] << shift*8%64); - - vpunpcklqdq $D1, $D0, $T0 # T0 = [B2 A2 B0 A0] - vpunpcklqdq $D3, $D2, $T1 # T1 = [D2 C2 D0 C0] - vpunpckhqdq $D1, $D0, $T2 # T2 = [B3 A3 B1 A1] - vpunpckhqdq $D3, $D2, $T3 # T3 = [D3 C3 D1 C1] - - vperm2i128 \$0x20, $T1, $T0, $D0 # X0 = [D0 C0 B0 A0] - vperm2i128 \$0x20, $T3, $T2, $D1 # X1 = [D1 C1 B1 A1] - vperm2i128 \$0x31, $T1, $T0, $D2 # X2 = [D2 C2 B2 A2] - vperm2i128 \$0x31, $T3, $T2, $D3 # X3 = [D3 C3 B3 A3] - - vmovdqa $D0, 32*0(%rdi) - vmovdqa $D1, 32*3(%rdi) - vmovdqa $D2, 32*6(%rdi) - vmovdqa $D3, 32*9(%rdi) - - lea 32*9(%rsi), %rsi - lea 32*1(%rdi), %rdi - - dec %ecx - jnz .Lconv_loop - - vzeroupper -___ -$code.=<<___ if ($win64); - movaps 16*0(%rsp), %xmm6 - movaps 16*1(%rsp), %xmm7 - movaps 16*2(%rsp), %xmm8 - movaps 16*3(%rsp), %xmm9 - movaps 16*4(%rsp), %xmm10 - movaps 16*5(%rsp), %xmm11 - movaps 16*6(%rsp), %xmm12 - movaps 16*7(%rsp), %xmm13 - movaps 16*8(%rsp), %xmm14 - movaps 16*9(%rsp), %xmm15 - lea 8+16*10(%rsp), %rsp -___ -$code.=<<___; - ret -.size ecp_nistz256_avx2_convert_transpose_back,.-ecp_nistz256_avx2_convert_transpose_back -___ -} -{ -my ($r_ptr,$a_ptr,$b_ptr,$itr)=("%rdi","%rsi","%rdx","%ecx"); -my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4,$ACC5,$ACC6,$ACC7,$ACC8)=map("%ymm$_",(0..8)); -my ($B,$Y,$T0,$AND_MASK,$OVERFLOW)=map("%ymm$_",(9..13)); - -sub NORMALIZE { -my $ret=<<___; - vpsrlq $digit_size, $ACC0, $T0 - vpand $AND_MASK, $ACC0, $ACC0 - vpaddq $T0, $ACC1, $ACC1 - - vpsrlq $digit_size, $ACC1, $T0 - vpand $AND_MASK, $ACC1, $ACC1 - vpaddq $T0, $ACC2, $ACC2 - - vpsrlq $digit_size, $ACC2, $T0 - vpand $AND_MASK, $ACC2, $ACC2 - vpaddq $T0, $ACC3, $ACC3 - - vpsrlq $digit_size, $ACC3, $T0 - vpand $AND_MASK, $ACC3, $ACC3 - vpaddq $T0, $ACC4, $ACC4 - - vpsrlq $digit_size, $ACC4, $T0 - vpand $AND_MASK, $ACC4, $ACC4 - vpaddq $T0, $ACC5, $ACC5 - - vpsrlq $digit_size, $ACC5, $T0 - vpand $AND_MASK, $ACC5, $ACC5 - vpaddq $T0, $ACC6, $ACC6 - - vpsrlq $digit_size, $ACC6, $T0 - vpand $AND_MASK, $ACC6, $ACC6 - vpaddq $T0, $ACC7, $ACC7 - - vpsrlq $digit_size, $ACC7, $T0 - vpand $AND_MASK, $ACC7, $ACC7 - vpaddq $T0, $ACC8, $ACC8 - #vpand $AND_MASK, $ACC8, $ACC8 -___ - $ret; -} - -sub STORE { -my $ret=<<___; - vmovdqa $ACC0, 32*0(%rdi) - lea 160(%rdi), %rax # size optimization - vmovdqa $ACC1, 32*1(%rdi) - vmovdqa $ACC2, 32*2(%rdi) - vmovdqa $ACC3, 32*3(%rdi) - vmovdqa $ACC4, 32*4-160(%rax) - vmovdqa $ACC5, 32*5-160(%rax) - vmovdqa $ACC6, 32*6-160(%rax) - vmovdqa $ACC7, 32*7-160(%rax) - vmovdqa $ACC8, 32*8-160(%rax) -___ - $ret; -} - -$code.=<<___; -.type avx2_normalize,\@abi-omnipotent -.align 32 -avx2_normalize: - vpsrlq $digit_size, $ACC0, $T0 - vpand $AND_MASK, $ACC0, $ACC0 - vpaddq $T0, $ACC1, $ACC1 - - vpsrlq $digit_size, $ACC1, $T0 - vpand $AND_MASK, $ACC1, $ACC1 - vpaddq $T0, $ACC2, $ACC2 - - vpsrlq $digit_size, $ACC2, $T0 - vpand $AND_MASK, $ACC2, $ACC2 - vpaddq $T0, $ACC3, $ACC3 - - vpsrlq $digit_size, $ACC3, $T0 - vpand $AND_MASK, $ACC3, $ACC3 - vpaddq $T0, $ACC4, $ACC4 - - vpsrlq $digit_size, $ACC4, $T0 - vpand $AND_MASK, $ACC4, $ACC4 - vpaddq $T0, $ACC5, $ACC5 - - vpsrlq $digit_size, $ACC5, $T0 - vpand $AND_MASK, $ACC5, $ACC5 - vpaddq $T0, $ACC6, $ACC6 - - vpsrlq $digit_size, $ACC6, $T0 - vpand $AND_MASK, $ACC6, $ACC6 - vpaddq $T0, $ACC7, $ACC7 - - vpsrlq $digit_size, $ACC7, $T0 - vpand $AND_MASK, $ACC7, $ACC7 - vpaddq $T0, $ACC8, $ACC8 - #vpand $AND_MASK, $ACC8, $ACC8 - - ret -.size avx2_normalize,.-avx2_normalize - -.type avx2_normalize_n_store,\@abi-omnipotent -.align 32 -avx2_normalize_n_store: - vpsrlq $digit_size, $ACC0, $T0 - vpand $AND_MASK, $ACC0, $ACC0 - vpaddq $T0, $ACC1, $ACC1 - - vpsrlq $digit_size, $ACC1, $T0 - vpand $AND_MASK, $ACC1, $ACC1 - vmovdqa $ACC0, 32*0(%rdi) - lea 160(%rdi), %rax # size optimization - vpaddq $T0, $ACC2, $ACC2 - - vpsrlq $digit_size, $ACC2, $T0 - vpand $AND_MASK, $ACC2, $ACC2 - vmovdqa $ACC1, 32*1(%rdi) - vpaddq $T0, $ACC3, $ACC3 - - vpsrlq $digit_size, $ACC3, $T0 - vpand $AND_MASK, $ACC3, $ACC3 - vmovdqa $ACC2, 32*2(%rdi) - vpaddq $T0, $ACC4, $ACC4 - - vpsrlq $digit_size, $ACC4, $T0 - vpand $AND_MASK, $ACC4, $ACC4 - vmovdqa $ACC3, 32*3(%rdi) - vpaddq $T0, $ACC5, $ACC5 - - vpsrlq $digit_size, $ACC5, $T0 - vpand $AND_MASK, $ACC5, $ACC5 - vmovdqa $ACC4, 32*4-160(%rax) - vpaddq $T0, $ACC6, $ACC6 - - vpsrlq $digit_size, $ACC6, $T0 - vpand $AND_MASK, $ACC6, $ACC6 - vmovdqa $ACC5, 32*5-160(%rax) - vpaddq $T0, $ACC7, $ACC7 - - vpsrlq $digit_size, $ACC7, $T0 - vpand $AND_MASK, $ACC7, $ACC7 - vmovdqa $ACC6, 32*6-160(%rax) - vpaddq $T0, $ACC8, $ACC8 - #vpand $AND_MASK, $ACC8, $ACC8 - vmovdqa $ACC7, 32*7-160(%rax) - vmovdqa $ACC8, 32*8-160(%rax) - - ret -.size avx2_normalize_n_store,.-avx2_normalize_n_store - -################################################################################ -# void avx2_mul_x4(void* RESULTx4, void *Ax4, void *Bx4); -.type avx2_mul_x4,\@abi-omnipotent -.align 32 -avx2_mul_x4: - lea .LAVX2_POLY(%rip), %rax - - vpxor $ACC0, $ACC0, $ACC0 - vpxor $ACC1, $ACC1, $ACC1 - vpxor $ACC2, $ACC2, $ACC2 - vpxor $ACC3, $ACC3, $ACC3 - vpxor $ACC4, $ACC4, $ACC4 - vpxor $ACC5, $ACC5, $ACC5 - vpxor $ACC6, $ACC6, $ACC6 - vpxor $ACC7, $ACC7, $ACC7 - - vmovdqa 32*7(%rax), %ymm14 - vmovdqa 32*8(%rax), %ymm15 - - mov $n_digits, $itr - lea -512($a_ptr), $a_ptr # strategic bias to control u-op density - jmp .Lavx2_mul_x4_loop - -.align 32 -.Lavx2_mul_x4_loop: - vmovdqa 32*0($b_ptr), $B - lea 32*1($b_ptr), $b_ptr - - vpmuludq 32*0+512($a_ptr), $B, $T0 - vpmuludq 32*1+512($a_ptr), $B, $OVERFLOW # borrow $OVERFLOW - vpaddq $T0, $ACC0, $ACC0 - vpmuludq 32*2+512($a_ptr), $B, $T0 - vpaddq $OVERFLOW, $ACC1, $ACC1 - vpand $AND_MASK, $ACC0, $Y - vpmuludq 32*3+512($a_ptr), $B, $OVERFLOW - vpaddq $T0, $ACC2, $ACC2 - vpmuludq 32*4+512($a_ptr), $B, $T0 - vpaddq $OVERFLOW, $ACC3, $ACC3 - vpmuludq 32*5+512($a_ptr), $B, $OVERFLOW - vpaddq $T0, $ACC4, $ACC4 - vpmuludq 32*6+512($a_ptr), $B, $T0 - vpaddq $OVERFLOW, $ACC5, $ACC5 - vpmuludq 32*7+512($a_ptr), $B, $OVERFLOW - vpaddq $T0, $ACC6, $ACC6 - - # Skip some multiplications, optimizing for the constant poly - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*8+512($a_ptr), $B, $ACC8 - vpaddq $T0, $ACC0, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - .byte 0x67 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $OVERFLOW - .byte 0x67 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $T0 - vpaddq $OVERFLOW, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $OVERFLOW - vpaddq $T0, $ACC7, $ACC6 - vpaddq $OVERFLOW, $ACC8, $ACC7 - - dec $itr - jnz .Lavx2_mul_x4_loop - - vpxor $ACC8, $ACC8, $ACC8 - - ret -.size avx2_mul_x4,.-avx2_mul_x4 - -# Function optimized for the constant 1 -################################################################################ -# void avx2_mul_by1_x4(void* RESULTx4, void *Ax4); -.type avx2_mul_by1_x4,\@abi-omnipotent -.align 32 -avx2_mul_by1_x4: - lea .LAVX2_POLY(%rip), %rax - - vpxor $ACC0, $ACC0, $ACC0 - vpxor $ACC1, $ACC1, $ACC1 - vpxor $ACC2, $ACC2, $ACC2 - vpxor $ACC3, $ACC3, $ACC3 - vpxor $ACC4, $ACC4, $ACC4 - vpxor $ACC5, $ACC5, $ACC5 - vpxor $ACC6, $ACC6, $ACC6 - vpxor $ACC7, $ACC7, $ACC7 - vpxor $ACC8, $ACC8, $ACC8 - - vmovdqa 32*3+.LONE(%rip), %ymm14 - vmovdqa 32*7+.LONE(%rip), %ymm15 - - mov $n_digits, $itr - jmp .Lavx2_mul_by1_x4_loop - -.align 32 -.Lavx2_mul_by1_x4_loop: - vmovdqa 32*0($a_ptr), $B - .byte 0x48,0x8d,0xb6,0x20,0,0,0 # lea 32*1($a_ptr), $a_ptr - - vpsllq \$5, $B, $OVERFLOW - vpmuludq %ymm14, $B, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC3 - .byte 0x67 - vpmuludq $AND_MASK, $B, $T0 - vpand $AND_MASK, $ACC0, $Y - vpaddq $T0, $ACC4, $ACC4 - vpaddq $T0, $ACC5, $ACC5 - vpaddq $T0, $ACC6, $ACC6 - vpsllq \$23, $B, $T0 - - .byte 0x67,0x67 - vpmuludq %ymm15, $B, $OVERFLOW - vpsubq $T0, $ACC6, $ACC6 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpaddq $T0, $ACC0, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - .byte 0x67,0x67 - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $OVERFLOW - vmovdqa $ACC5, $ACC4 - vpmuludq 32*7(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC6, $ACC5 - vpaddq $T0, $ACC7, $ACC6 - vpmuludq 32*8(%rax), $Y, $ACC7 - - dec $itr - jnz .Lavx2_mul_by1_x4_loop - - ret -.size avx2_mul_by1_x4,.-avx2_mul_by1_x4 - -################################################################################ -# void avx2_sqr_x4(void* RESULTx4, void *Ax4, void *Bx4); -.type avx2_sqr_x4,\@abi-omnipotent -.align 32 -avx2_sqr_x4: - lea .LAVX2_POLY(%rip), %rax - - vmovdqa 32*7(%rax), %ymm14 - vmovdqa 32*8(%rax), %ymm15 - - vmovdqa 32*0($a_ptr), $B - vmovdqa 32*1($a_ptr), $ACC1 - vmovdqa 32*2($a_ptr), $ACC2 - vmovdqa 32*3($a_ptr), $ACC3 - vmovdqa 32*4($a_ptr), $ACC4 - vmovdqa 32*5($a_ptr), $ACC5 - vmovdqa 32*6($a_ptr), $ACC6 - vmovdqa 32*7($a_ptr), $ACC7 - vpaddq $ACC1, $ACC1, $ACC1 # 2*$ACC0..7 - vmovdqa 32*8($a_ptr), $ACC8 - vpaddq $ACC2, $ACC2, $ACC2 - vmovdqa $ACC1, 32*0(%rcx) - vpaddq $ACC3, $ACC3, $ACC3 - vmovdqa $ACC2, 32*1(%rcx) - vpaddq $ACC4, $ACC4, $ACC4 - vmovdqa $ACC3, 32*2(%rcx) - vpaddq $ACC5, $ACC5, $ACC5 - vmovdqa $ACC4, 32*3(%rcx) - vpaddq $ACC6, $ACC6, $ACC6 - vmovdqa $ACC5, 32*4(%rcx) - vpaddq $ACC7, $ACC7, $ACC7 - vmovdqa $ACC6, 32*5(%rcx) - vpaddq $ACC8, $ACC8, $ACC8 - vmovdqa $ACC7, 32*6(%rcx) - vmovdqa $ACC8, 32*7(%rcx) - - #itr 1 - vpmuludq $B, $B, $ACC0 - vpmuludq $B, $ACC1, $ACC1 - vpand $AND_MASK, $ACC0, $Y - vpmuludq $B, $ACC2, $ACC2 - vpmuludq $B, $ACC3, $ACC3 - vpmuludq $B, $ACC4, $ACC4 - vpmuludq $B, $ACC5, $ACC5 - vpmuludq $B, $ACC6, $ACC6 - vpmuludq $AND_MASK, $Y, $T0 - vpmuludq $B, $ACC7, $ACC7 - vpmuludq $B, $ACC8, $ACC8 - vmovdqa 32*1($a_ptr), $B - - vpaddq $T0, $ACC0, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 2 - vpmuludq $B, $B, $OVERFLOW - vpand $AND_MASK, $ACC0, $Y - vpmuludq 32*1(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC1, $ACC1 - vpmuludq 32*2(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC2, $ACC2 - vpmuludq 32*3(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC3, $ACC3 - vpmuludq 32*4(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC4, $ACC4 - vpmuludq 32*5(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC5, $ACC5 - vpmuludq 32*6(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC6, $ACC6 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*7(%rcx), $B, $ACC8 - vmovdqa 32*2($a_ptr), $B - vpaddq $T0, $ACC0, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 3 - vpmuludq $B, $B, $T0 - vpand $AND_MASK, $ACC0, $Y - vpmuludq 32*2(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC2, $ACC2 - vpmuludq 32*3(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC3, $ACC3 - vpmuludq 32*4(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC4, $ACC4 - vpmuludq 32*5(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC5, $ACC5 - vpmuludq 32*6(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC6, $ACC6 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*7(%rcx), $B, $ACC8 - vmovdqa 32*3($a_ptr), $B - vpaddq $T0, $ACC0, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpand $AND_MASK, $ACC0, $Y - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 4 - vpmuludq $B, $B, $OVERFLOW - vpmuludq 32*3(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC3, $ACC3 - vpmuludq 32*4(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC4, $ACC4 - vpmuludq 32*5(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC5, $ACC5 - vpmuludq 32*6(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC6, $ACC6 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*7(%rcx), $B, $ACC8 - vmovdqa 32*4($a_ptr), $B - vpaddq $T0, $ACC0, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpand $AND_MASK, $ACC0, $Y - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 5 - vpmuludq $B, $B, $T0 - vpmuludq 32*4(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC4, $ACC4 - vpmuludq 32*5(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC5, $ACC5 - vpmuludq 32*6(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC6, $ACC6 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*7(%rcx), $B, $ACC8 - vmovdqa 32*5($a_ptr), $B - vpaddq $T0, $ACC0, $OVERFLOW - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3+.LAVX2_POLY(%rip), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpand $AND_MASK, $ACC0, $Y - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 6 - vpmuludq $B, $B, $OVERFLOW - vpmuludq 32*5(%rcx), $B, $T0 - vpaddq $OVERFLOW, $ACC5, $ACC5 - vpmuludq 32*6(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC6, $ACC6 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*7(%rcx), $B, $ACC8 - vmovdqa 32*6($a_ptr), $B - vpaddq $T0, $ACC0, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpand $AND_MASK, $ACC0, $Y - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 7 - vpmuludq $B, $B, $T0 - vpmuludq 32*6(%rcx), $B, $OVERFLOW - vpaddq $T0, $ACC6, $ACC6 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*7(%rcx), $B, $ACC8 - vmovdqa 32*7($a_ptr), $B - vpaddq $T0, $ACC0, $OVERFLOW - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpand $AND_MASK, $ACC0, $Y - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 8 - vpmuludq $B, $B, $OVERFLOW - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC7 - vpmuludq 32*7(%rcx), $B, $ACC8 - vmovdqa 32*8($a_ptr), $B - vpaddq $T0, $ACC0, $OVERFLOW - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpand $AND_MASK, $ACC0, $Y - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - #itr 9 - vpmuludq $B, $B, $ACC8 - - vpmuludq $AND_MASK, $Y, $T0 - vpaddq $T0, $ACC0, $OVERFLOW - vpsrlq $digit_size, $OVERFLOW, $OVERFLOW - vpaddq $T0, $ACC1, $ACC0 - vpaddq $T0, $ACC2, $ACC1 - vpmuludq 32*3(%rax), $Y, $T0 - vpaddq $OVERFLOW, $ACC0, $ACC0 - vpaddq $T0, $ACC3, $ACC2 - vmovdqa $ACC4, $ACC3 - vpsllq \$18, $Y, $T0 - vmovdqa $ACC5, $ACC4 - vpmuludq %ymm14, $Y, $OVERFLOW - vpaddq $T0, $ACC6, $ACC5 - vpmuludq %ymm15, $Y, $T0 - vpaddq $OVERFLOW, $ACC7, $ACC6 - vpaddq $T0, $ACC8, $ACC7 - - vpxor $ACC8, $ACC8, $ACC8 - - ret -.size avx2_sqr_x4,.-avx2_sqr_x4 - -################################################################################ -# void avx2_sub_x4(void* RESULTx4, void *Ax4, void *Bx4); -.type avx2_sub_x4,\@abi-omnipotent -.align 32 -avx2_sub_x4: - vmovdqa 32*0($a_ptr), $ACC0 - lea 160($a_ptr), $a_ptr - lea .LAVX2_POLY_x8+128(%rip), %rax - lea 128($b_ptr), $b_ptr - vmovdqa 32*1-160($a_ptr), $ACC1 - vmovdqa 32*2-160($a_ptr), $ACC2 - vmovdqa 32*3-160($a_ptr), $ACC3 - vmovdqa 32*4-160($a_ptr), $ACC4 - vmovdqa 32*5-160($a_ptr), $ACC5 - vmovdqa 32*6-160($a_ptr), $ACC6 - vmovdqa 32*7-160($a_ptr), $ACC7 - vmovdqa 32*8-160($a_ptr), $ACC8 - - vpaddq 32*0-128(%rax), $ACC0, $ACC0 - vpaddq 32*1-128(%rax), $ACC1, $ACC1 - vpaddq 32*2-128(%rax), $ACC2, $ACC2 - vpaddq 32*3-128(%rax), $ACC3, $ACC3 - vpaddq 32*4-128(%rax), $ACC4, $ACC4 - vpaddq 32*5-128(%rax), $ACC5, $ACC5 - vpaddq 32*6-128(%rax), $ACC6, $ACC6 - vpaddq 32*7-128(%rax), $ACC7, $ACC7 - vpaddq 32*8-128(%rax), $ACC8, $ACC8 - - vpsubq 32*0-128($b_ptr), $ACC0, $ACC0 - vpsubq 32*1-128($b_ptr), $ACC1, $ACC1 - vpsubq 32*2-128($b_ptr), $ACC2, $ACC2 - vpsubq 32*3-128($b_ptr), $ACC3, $ACC3 - vpsubq 32*4-128($b_ptr), $ACC4, $ACC4 - vpsubq 32*5-128($b_ptr), $ACC5, $ACC5 - vpsubq 32*6-128($b_ptr), $ACC6, $ACC6 - vpsubq 32*7-128($b_ptr), $ACC7, $ACC7 - vpsubq 32*8-128($b_ptr), $ACC8, $ACC8 - - ret -.size avx2_sub_x4,.-avx2_sub_x4 - -.type avx2_select_n_store,\@abi-omnipotent -.align 32 -avx2_select_n_store: - vmovdqa `8+32*9*8`(%rsp), $Y - vpor `8+32*9*8+32`(%rsp), $Y, $Y - - vpandn $ACC0, $Y, $ACC0 - vpandn $ACC1, $Y, $ACC1 - vpandn $ACC2, $Y, $ACC2 - vpandn $ACC3, $Y, $ACC3 - vpandn $ACC4, $Y, $ACC4 - vpandn $ACC5, $Y, $ACC5 - vpandn $ACC6, $Y, $ACC6 - vmovdqa `8+32*9*8+32`(%rsp), $B - vpandn $ACC7, $Y, $ACC7 - vpandn `8+32*9*8`(%rsp), $B, $B - vpandn $ACC8, $Y, $ACC8 - - vpand 32*0(%rsi), $B, $T0 - lea 160(%rsi), %rax - vpand 32*1(%rsi), $B, $Y - vpxor $T0, $ACC0, $ACC0 - vpand 32*2(%rsi), $B, $T0 - vpxor $Y, $ACC1, $ACC1 - vpand 32*3(%rsi), $B, $Y - vpxor $T0, $ACC2, $ACC2 - vpand 32*4-160(%rax), $B, $T0 - vpxor $Y, $ACC3, $ACC3 - vpand 32*5-160(%rax), $B, $Y - vpxor $T0, $ACC4, $ACC4 - vpand 32*6-160(%rax), $B, $T0 - vpxor $Y, $ACC5, $ACC5 - vpand 32*7-160(%rax), $B, $Y - vpxor $T0, $ACC6, $ACC6 - vpand 32*8-160(%rax), $B, $T0 - vmovdqa `8+32*9*8+32`(%rsp), $B - vpxor $Y, $ACC7, $ACC7 - - vpand 32*0(%rdx), $B, $Y - lea 160(%rdx), %rax - vpxor $T0, $ACC8, $ACC8 - vpand 32*1(%rdx), $B, $T0 - vpxor $Y, $ACC0, $ACC0 - vpand 32*2(%rdx), $B, $Y - vpxor $T0, $ACC1, $ACC1 - vpand 32*3(%rdx), $B, $T0 - vpxor $Y, $ACC2, $ACC2 - vpand 32*4-160(%rax), $B, $Y - vpxor $T0, $ACC3, $ACC3 - vpand 32*5-160(%rax), $B, $T0 - vpxor $Y, $ACC4, $ACC4 - vpand 32*6-160(%rax), $B, $Y - vpxor $T0, $ACC5, $ACC5 - vpand 32*7-160(%rax), $B, $T0 - vpxor $Y, $ACC6, $ACC6 - vpand 32*8-160(%rax), $B, $Y - vpxor $T0, $ACC7, $ACC7 - vpxor $Y, $ACC8, $ACC8 - `&STORE` - - ret -.size avx2_select_n_store,.-avx2_select_n_store -___ -$code.=<<___ if (0); # inlined -################################################################################ -# void avx2_mul_by2_x4(void* RESULTx4, void *Ax4); -.type avx2_mul_by2_x4,\@abi-omnipotent -.align 32 -avx2_mul_by2_x4: - vmovdqa 32*0($a_ptr), $ACC0 - lea 160($a_ptr), %rax - vmovdqa 32*1($a_ptr), $ACC1 - vmovdqa 32*2($a_ptr), $ACC2 - vmovdqa 32*3($a_ptr), $ACC3 - vmovdqa 32*4-160(%rax), $ACC4 - vmovdqa 32*5-160(%rax), $ACC5 - vmovdqa 32*6-160(%rax), $ACC6 - vmovdqa 32*7-160(%rax), $ACC7 - vmovdqa 32*8-160(%rax), $ACC8 - - vpaddq $ACC0, $ACC0, $ACC0 - vpaddq $ACC1, $ACC1, $ACC1 - vpaddq $ACC2, $ACC2, $ACC2 - vpaddq $ACC3, $ACC3, $ACC3 - vpaddq $ACC4, $ACC4, $ACC4 - vpaddq $ACC5, $ACC5, $ACC5 - vpaddq $ACC6, $ACC6, $ACC6 - vpaddq $ACC7, $ACC7, $ACC7 - vpaddq $ACC8, $ACC8, $ACC8 - - ret -.size avx2_mul_by2_x4,.-avx2_mul_by2_x4 -___ -my ($r_ptr_in,$a_ptr_in,$b_ptr_in)=("%rdi","%rsi","%rdx"); -my ($r_ptr,$a_ptr,$b_ptr)=("%r8","%r9","%r10"); - -$code.=<<___; -################################################################################ -# void ecp_nistz256_avx2_point_add_affine_x4(void* RESULTx4, void *Ax4, void *Bx4); -.globl ecp_nistz256_avx2_point_add_affine_x4 -.type ecp_nistz256_avx2_point_add_affine_x4,\@function,3 -.align 32 -ecp_nistz256_avx2_point_add_affine_x4: - mov %rsp, %rax - push %rbp - vzeroupper -___ -$code.=<<___ if ($win64); - lea -16*10(%rsp), %rsp - vmovaps %xmm6, -8-16*10(%rax) - vmovaps %xmm7, -8-16*9(%rax) - vmovaps %xmm8, -8-16*8(%rax) - vmovaps %xmm9, -8-16*7(%rax) - vmovaps %xmm10, -8-16*6(%rax) - vmovaps %xmm11, -8-16*5(%rax) - vmovaps %xmm12, -8-16*4(%rax) - vmovaps %xmm13, -8-16*3(%rax) - vmovaps %xmm14, -8-16*2(%rax) - vmovaps %xmm15, -8-16*1(%rax) -___ -$code.=<<___; - lea -8(%rax), %rbp - -# Result + 32*0 = Result.X -# Result + 32*9 = Result.Y -# Result + 32*18 = Result.Z - -# A + 32*0 = A.X -# A + 32*9 = A.Y -# A + 32*18 = A.Z - -# B + 32*0 = B.X -# B + 32*9 = B.Y - - sub \$`32*9*8+32*2+32*8`, %rsp - and \$-64, %rsp - - mov $r_ptr_in, $r_ptr - mov $a_ptr_in, $a_ptr - mov $b_ptr_in, $b_ptr - - vmovdqa 32*0($a_ptr_in), %ymm0 - vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK - vpxor %ymm1, %ymm1, %ymm1 - lea 256($a_ptr_in), %rax # size optimization - vpor 32*1($a_ptr_in), %ymm0, %ymm0 - vpor 32*2($a_ptr_in), %ymm0, %ymm0 - vpor 32*3($a_ptr_in), %ymm0, %ymm0 - vpor 32*4-256(%rax), %ymm0, %ymm0 - lea 256(%rax), %rcx # size optimization - vpor 32*5-256(%rax), %ymm0, %ymm0 - vpor 32*6-256(%rax), %ymm0, %ymm0 - vpor 32*7-256(%rax), %ymm0, %ymm0 - vpor 32*8-256(%rax), %ymm0, %ymm0 - vpor 32*9-256(%rax), %ymm0, %ymm0 - vpor 32*10-256(%rax), %ymm0, %ymm0 - vpor 32*11-256(%rax), %ymm0, %ymm0 - vpor 32*12-512(%rcx), %ymm0, %ymm0 - vpor 32*13-512(%rcx), %ymm0, %ymm0 - vpor 32*14-512(%rcx), %ymm0, %ymm0 - vpor 32*15-512(%rcx), %ymm0, %ymm0 - vpor 32*16-512(%rcx), %ymm0, %ymm0 - vpor 32*17-512(%rcx), %ymm0, %ymm0 - vpcmpeqq %ymm1, %ymm0, %ymm0 - vmovdqa %ymm0, `32*9*8`(%rsp) - - vpxor %ymm1, %ymm1, %ymm1 - vmovdqa 32*0($b_ptr), %ymm0 - lea 256($b_ptr), %rax # size optimization - vpor 32*1($b_ptr), %ymm0, %ymm0 - vpor 32*2($b_ptr), %ymm0, %ymm0 - vpor 32*3($b_ptr), %ymm0, %ymm0 - vpor 32*4-256(%rax), %ymm0, %ymm0 - lea 256(%rax), %rcx # size optimization - vpor 32*5-256(%rax), %ymm0, %ymm0 - vpor 32*6-256(%rax), %ymm0, %ymm0 - vpor 32*7-256(%rax), %ymm0, %ymm0 - vpor 32*8-256(%rax), %ymm0, %ymm0 - vpor 32*9-256(%rax), %ymm0, %ymm0 - vpor 32*10-256(%rax), %ymm0, %ymm0 - vpor 32*11-256(%rax), %ymm0, %ymm0 - vpor 32*12-512(%rcx), %ymm0, %ymm0 - vpor 32*13-512(%rcx), %ymm0, %ymm0 - vpor 32*14-512(%rcx), %ymm0, %ymm0 - vpor 32*15-512(%rcx), %ymm0, %ymm0 - vpor 32*16-512(%rcx), %ymm0, %ymm0 - vpor 32*17-512(%rcx), %ymm0, %ymm0 - vpcmpeqq %ymm1, %ymm0, %ymm0 - vmovdqa %ymm0, `32*9*8+32`(%rsp) - - # Z1^2 = Z1*Z1 - lea `32*9*2`($a_ptr), %rsi - lea `32*9*2`(%rsp), %rdi - lea `32*9*8+32*2`(%rsp), %rcx # temporary vector - call avx2_sqr_x4 - call avx2_normalize_n_store - - # U2 = X2*Z1^2 - lea `32*9*0`($b_ptr), %rsi - lea `32*9*2`(%rsp), %rdx - lea `32*9*0`(%rsp), %rdi - call avx2_mul_x4 - #call avx2_normalize - `&STORE` - - # S2 = Z1*Z1^2 = Z1^3 - lea `32*9*2`($a_ptr), %rsi - lea `32*9*2`(%rsp), %rdx - lea `32*9*1`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # S2 = S2*Y2 = Y2*Z1^3 - lea `32*9*1`($b_ptr), %rsi - lea `32*9*1`(%rsp), %rdx - lea `32*9*1`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # H = U2 - U1 = U2 - X1 - lea `32*9*0`(%rsp), %rsi - lea `32*9*0`($a_ptr), %rdx - lea `32*9*3`(%rsp), %rdi - call avx2_sub_x4 - call avx2_normalize_n_store - - # R = S2 - S1 = S2 - Y1 - lea `32*9*1`(%rsp), %rsi - lea `32*9*1`($a_ptr), %rdx - lea `32*9*4`(%rsp), %rdi - call avx2_sub_x4 - call avx2_normalize_n_store - - # Z3 = H*Z1*Z2 - lea `32*9*3`(%rsp), %rsi - lea `32*9*2`($a_ptr), %rdx - lea `32*9*2`($r_ptr), %rdi - call avx2_mul_x4 - call avx2_normalize - - lea .LONE(%rip), %rsi - lea `32*9*2`($a_ptr), %rdx - call avx2_select_n_store - - # R^2 = R^2 - lea `32*9*4`(%rsp), %rsi - lea `32*9*6`(%rsp), %rdi - lea `32*9*8+32*2`(%rsp), %rcx # temporary vector - call avx2_sqr_x4 - call avx2_normalize_n_store - - # H^2 = H^2 - lea `32*9*3`(%rsp), %rsi - lea `32*9*5`(%rsp), %rdi - call avx2_sqr_x4 - call avx2_normalize_n_store - - # H^3 = H^2*H - lea `32*9*3`(%rsp), %rsi - lea `32*9*5`(%rsp), %rdx - lea `32*9*7`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # U2 = U1*H^2 - lea `32*9*0`($a_ptr), %rsi - lea `32*9*5`(%rsp), %rdx - lea `32*9*0`(%rsp), %rdi - call avx2_mul_x4 - #call avx2_normalize - `&STORE` - - # Hsqr = U2*2 - #lea 32*9*0(%rsp), %rsi - #lea 32*9*5(%rsp), %rdi - #call avx2_mul_by2_x4 - - vpaddq $ACC0, $ACC0, $ACC0 # inlined avx2_mul_by2_x4 - lea `32*9*5`(%rsp), %rdi - vpaddq $ACC1, $ACC1, $ACC1 - vpaddq $ACC2, $ACC2, $ACC2 - vpaddq $ACC3, $ACC3, $ACC3 - vpaddq $ACC4, $ACC4, $ACC4 - vpaddq $ACC5, $ACC5, $ACC5 - vpaddq $ACC6, $ACC6, $ACC6 - vpaddq $ACC7, $ACC7, $ACC7 - vpaddq $ACC8, $ACC8, $ACC8 - call avx2_normalize_n_store - - # X3 = R^2 - H^3 - #lea 32*9*6(%rsp), %rsi - #lea 32*9*7(%rsp), %rdx - #lea 32*9*5(%rsp), %rcx - #lea 32*9*0($r_ptr), %rdi - #call avx2_sub_x4 - #NORMALIZE - #STORE - - # X3 = X3 - U2*2 - #lea 32*9*0($r_ptr), %rsi - #lea 32*9*0($r_ptr), %rdi - #call avx2_sub_x4 - #NORMALIZE - #STORE - - lea `32*9*6+128`(%rsp), %rsi - lea .LAVX2_POLY_x2+128(%rip), %rax - lea `32*9*7+128`(%rsp), %rdx - lea `32*9*5+128`(%rsp), %rcx - lea `32*9*0`($r_ptr), %rdi - - vmovdqa 32*0-128(%rsi), $ACC0 - vmovdqa 32*1-128(%rsi), $ACC1 - vmovdqa 32*2-128(%rsi), $ACC2 - vmovdqa 32*3-128(%rsi), $ACC3 - vmovdqa 32*4-128(%rsi), $ACC4 - vmovdqa 32*5-128(%rsi), $ACC5 - vmovdqa 32*6-128(%rsi), $ACC6 - vmovdqa 32*7-128(%rsi), $ACC7 - vmovdqa 32*8-128(%rsi), $ACC8 - - vpaddq 32*0-128(%rax), $ACC0, $ACC0 - vpaddq 32*1-128(%rax), $ACC1, $ACC1 - vpaddq 32*2-128(%rax), $ACC2, $ACC2 - vpaddq 32*3-128(%rax), $ACC3, $ACC3 - vpaddq 32*4-128(%rax), $ACC4, $ACC4 - vpaddq 32*5-128(%rax), $ACC5, $ACC5 - vpaddq 32*6-128(%rax), $ACC6, $ACC6 - vpaddq 32*7-128(%rax), $ACC7, $ACC7 - vpaddq 32*8-128(%rax), $ACC8, $ACC8 - - vpsubq 32*0-128(%rdx), $ACC0, $ACC0 - vpsubq 32*1-128(%rdx), $ACC1, $ACC1 - vpsubq 32*2-128(%rdx), $ACC2, $ACC2 - vpsubq 32*3-128(%rdx), $ACC3, $ACC3 - vpsubq 32*4-128(%rdx), $ACC4, $ACC4 - vpsubq 32*5-128(%rdx), $ACC5, $ACC5 - vpsubq 32*6-128(%rdx), $ACC6, $ACC6 - vpsubq 32*7-128(%rdx), $ACC7, $ACC7 - vpsubq 32*8-128(%rdx), $ACC8, $ACC8 - - vpsubq 32*0-128(%rcx), $ACC0, $ACC0 - vpsubq 32*1-128(%rcx), $ACC1, $ACC1 - vpsubq 32*2-128(%rcx), $ACC2, $ACC2 - vpsubq 32*3-128(%rcx), $ACC3, $ACC3 - vpsubq 32*4-128(%rcx), $ACC4, $ACC4 - vpsubq 32*5-128(%rcx), $ACC5, $ACC5 - vpsubq 32*6-128(%rcx), $ACC6, $ACC6 - vpsubq 32*7-128(%rcx), $ACC7, $ACC7 - vpsubq 32*8-128(%rcx), $ACC8, $ACC8 - call avx2_normalize - - lea 32*0($b_ptr), %rsi - lea 32*0($a_ptr), %rdx - call avx2_select_n_store - - # H = U2 - X3 - lea `32*9*0`(%rsp), %rsi - lea `32*9*0`($r_ptr), %rdx - lea `32*9*3`(%rsp), %rdi - call avx2_sub_x4 - call avx2_normalize_n_store - - # - lea `32*9*3`(%rsp), %rsi - lea `32*9*4`(%rsp), %rdx - lea `32*9*3`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # - lea `32*9*7`(%rsp), %rsi - lea `32*9*1`($a_ptr), %rdx - lea `32*9*1`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # - lea `32*9*3`(%rsp), %rsi - lea `32*9*1`(%rsp), %rdx - lea `32*9*1`($r_ptr), %rdi - call avx2_sub_x4 - call avx2_normalize - - lea 32*9($b_ptr), %rsi - lea 32*9($a_ptr), %rdx - call avx2_select_n_store - - #lea 32*9*0($r_ptr), %rsi - #lea 32*9*0($r_ptr), %rdi - #call avx2_mul_by1_x4 - #NORMALIZE - #STORE - - lea `32*9*1`($r_ptr), %rsi - lea `32*9*1`($r_ptr), %rdi - call avx2_mul_by1_x4 - call avx2_normalize_n_store - - vzeroupper -___ -$code.=<<___ if ($win64); - movaps %xmm6, -16*10(%rbp) - movaps %xmm7, -16*9(%rbp) - movaps %xmm8, -16*8(%rbp) - movaps %xmm9, -16*7(%rbp) - movaps %xmm10, -16*6(%rbp) - movaps %xmm11, -16*5(%rbp) - movaps %xmm12, -16*4(%rbp) - movaps %xmm13, -16*3(%rbp) - movaps %xmm14, -16*2(%rbp) - movaps %xmm15, -16*1(%rbp) -___ -$code.=<<___; - mov %rbp, %rsp - pop %rbp - ret -.size ecp_nistz256_avx2_point_add_affine_x4,.-ecp_nistz256_avx2_point_add_affine_x4 - -################################################################################ -# void ecp_nistz256_avx2_point_add_affines_x4(void* RESULTx4, void *Ax4, void *Bx4); -.globl ecp_nistz256_avx2_point_add_affines_x4 -.type ecp_nistz256_avx2_point_add_affines_x4,\@function,3 -.align 32 -ecp_nistz256_avx2_point_add_affines_x4: - mov %rsp, %rax - push %rbp - vzeroupper -___ -$code.=<<___ if ($win64); - lea -16*10(%rsp), %rsp - vmovaps %xmm6, -8-16*10(%rax) - vmovaps %xmm7, -8-16*9(%rax) - vmovaps %xmm8, -8-16*8(%rax) - vmovaps %xmm9, -8-16*7(%rax) - vmovaps %xmm10, -8-16*6(%rax) - vmovaps %xmm11, -8-16*5(%rax) - vmovaps %xmm12, -8-16*4(%rax) - vmovaps %xmm13, -8-16*3(%rax) - vmovaps %xmm14, -8-16*2(%rax) - vmovaps %xmm15, -8-16*1(%rax) -___ -$code.=<<___; - lea -8(%rax), %rbp - -# Result + 32*0 = Result.X -# Result + 32*9 = Result.Y -# Result + 32*18 = Result.Z - -# A + 32*0 = A.X -# A + 32*9 = A.Y - -# B + 32*0 = B.X -# B + 32*9 = B.Y - - sub \$`32*9*8+32*2+32*8`, %rsp - and \$-64, %rsp - - mov $r_ptr_in, $r_ptr - mov $a_ptr_in, $a_ptr - mov $b_ptr_in, $b_ptr - - vmovdqa 32*0($a_ptr_in), %ymm0 - vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK - vpxor %ymm1, %ymm1, %ymm1 - lea 256($a_ptr_in), %rax # size optimization - vpor 32*1($a_ptr_in), %ymm0, %ymm0 - vpor 32*2($a_ptr_in), %ymm0, %ymm0 - vpor 32*3($a_ptr_in), %ymm0, %ymm0 - vpor 32*4-256(%rax), %ymm0, %ymm0 - lea 256(%rax), %rcx # size optimization - vpor 32*5-256(%rax), %ymm0, %ymm0 - vpor 32*6-256(%rax), %ymm0, %ymm0 - vpor 32*7-256(%rax), %ymm0, %ymm0 - vpor 32*8-256(%rax), %ymm0, %ymm0 - vpor 32*9-256(%rax), %ymm0, %ymm0 - vpor 32*10-256(%rax), %ymm0, %ymm0 - vpor 32*11-256(%rax), %ymm0, %ymm0 - vpor 32*12-512(%rcx), %ymm0, %ymm0 - vpor 32*13-512(%rcx), %ymm0, %ymm0 - vpor 32*14-512(%rcx), %ymm0, %ymm0 - vpor 32*15-512(%rcx), %ymm0, %ymm0 - vpor 32*16-512(%rcx), %ymm0, %ymm0 - vpor 32*17-512(%rcx), %ymm0, %ymm0 - vpcmpeqq %ymm1, %ymm0, %ymm0 - vmovdqa %ymm0, `32*9*8`(%rsp) - - vpxor %ymm1, %ymm1, %ymm1 - vmovdqa 32*0($b_ptr), %ymm0 - lea 256($b_ptr), %rax # size optimization - vpor 32*1($b_ptr), %ymm0, %ymm0 - vpor 32*2($b_ptr), %ymm0, %ymm0 - vpor 32*3($b_ptr), %ymm0, %ymm0 - vpor 32*4-256(%rax), %ymm0, %ymm0 - lea 256(%rax), %rcx # size optimization - vpor 32*5-256(%rax), %ymm0, %ymm0 - vpor 32*6-256(%rax), %ymm0, %ymm0 - vpor 32*7-256(%rax), %ymm0, %ymm0 - vpor 32*8-256(%rax), %ymm0, %ymm0 - vpor 32*9-256(%rax), %ymm0, %ymm0 - vpor 32*10-256(%rax), %ymm0, %ymm0 - vpor 32*11-256(%rax), %ymm0, %ymm0 - vpor 32*12-512(%rcx), %ymm0, %ymm0 - vpor 32*13-512(%rcx), %ymm0, %ymm0 - vpor 32*14-512(%rcx), %ymm0, %ymm0 - vpor 32*15-512(%rcx), %ymm0, %ymm0 - vpor 32*16-512(%rcx), %ymm0, %ymm0 - vpor 32*17-512(%rcx), %ymm0, %ymm0 - vpcmpeqq %ymm1, %ymm0, %ymm0 - vmovdqa %ymm0, `32*9*8+32`(%rsp) - - # H = U2 - U1 = X2 - X1 - lea `32*9*0`($b_ptr), %rsi - lea `32*9*0`($a_ptr), %rdx - lea `32*9*3`(%rsp), %rdi - call avx2_sub_x4 - call avx2_normalize_n_store - - # R = S2 - S1 = Y2 - Y1 - lea `32*9*1`($b_ptr), %rsi - lea `32*9*1`($a_ptr), %rdx - lea `32*9*4`(%rsp), %rdi - call avx2_sub_x4 - call avx2_normalize_n_store - - # Z3 = H*Z1*Z2 = H - lea `32*9*3`(%rsp), %rsi - lea `32*9*2`($r_ptr), %rdi - call avx2_mul_by1_x4 - call avx2_normalize - - vmovdqa `32*9*8`(%rsp), $B - vpor `32*9*8+32`(%rsp), $B, $B - - vpandn $ACC0, $B, $ACC0 - lea .LONE+128(%rip), %rax - vpandn $ACC1, $B, $ACC1 - vpandn $ACC2, $B, $ACC2 - vpandn $ACC3, $B, $ACC3 - vpandn $ACC4, $B, $ACC4 - vpandn $ACC5, $B, $ACC5 - vpandn $ACC6, $B, $ACC6 - vpandn $ACC7, $B, $ACC7 - - vpand 32*0-128(%rax), $B, $T0 - vpandn $ACC8, $B, $ACC8 - vpand 32*1-128(%rax), $B, $Y - vpxor $T0, $ACC0, $ACC0 - vpand 32*2-128(%rax), $B, $T0 - vpxor $Y, $ACC1, $ACC1 - vpand 32*3-128(%rax), $B, $Y - vpxor $T0, $ACC2, $ACC2 - vpand 32*4-128(%rax), $B, $T0 - vpxor $Y, $ACC3, $ACC3 - vpand 32*5-128(%rax), $B, $Y - vpxor $T0, $ACC4, $ACC4 - vpand 32*6-128(%rax), $B, $T0 - vpxor $Y, $ACC5, $ACC5 - vpand 32*7-128(%rax), $B, $Y - vpxor $T0, $ACC6, $ACC6 - vpand 32*8-128(%rax), $B, $T0 - vpxor $Y, $ACC7, $ACC7 - vpxor $T0, $ACC8, $ACC8 - `&STORE` - - # R^2 = R^2 - lea `32*9*4`(%rsp), %rsi - lea `32*9*6`(%rsp), %rdi - lea `32*9*8+32*2`(%rsp), %rcx # temporary vector - call avx2_sqr_x4 - call avx2_normalize_n_store - - # H^2 = H^2 - lea `32*9*3`(%rsp), %rsi - lea `32*9*5`(%rsp), %rdi - call avx2_sqr_x4 - call avx2_normalize_n_store - - # H^3 = H^2*H - lea `32*9*3`(%rsp), %rsi - lea `32*9*5`(%rsp), %rdx - lea `32*9*7`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # U2 = U1*H^2 - lea `32*9*0`($a_ptr), %rsi - lea `32*9*5`(%rsp), %rdx - lea `32*9*0`(%rsp), %rdi - call avx2_mul_x4 - #call avx2_normalize - `&STORE` - - # Hsqr = U2*2 - #lea 32*9*0(%rsp), %rsi - #lea 32*9*5(%rsp), %rdi - #call avx2_mul_by2_x4 - - vpaddq $ACC0, $ACC0, $ACC0 # inlined avx2_mul_by2_x4 - lea `32*9*5`(%rsp), %rdi - vpaddq $ACC1, $ACC1, $ACC1 - vpaddq $ACC2, $ACC2, $ACC2 - vpaddq $ACC3, $ACC3, $ACC3 - vpaddq $ACC4, $ACC4, $ACC4 - vpaddq $ACC5, $ACC5, $ACC5 - vpaddq $ACC6, $ACC6, $ACC6 - vpaddq $ACC7, $ACC7, $ACC7 - vpaddq $ACC8, $ACC8, $ACC8 - call avx2_normalize_n_store - - # X3 = R^2 - H^3 - #lea 32*9*6(%rsp), %rsi - #lea 32*9*7(%rsp), %rdx - #lea 32*9*5(%rsp), %rcx - #lea 32*9*0($r_ptr), %rdi - #call avx2_sub_x4 - #NORMALIZE - #STORE - - # X3 = X3 - U2*2 - #lea 32*9*0($r_ptr), %rsi - #lea 32*9*0($r_ptr), %rdi - #call avx2_sub_x4 - #NORMALIZE - #STORE - - lea `32*9*6+128`(%rsp), %rsi - lea .LAVX2_POLY_x2+128(%rip), %rax - lea `32*9*7+128`(%rsp), %rdx - lea `32*9*5+128`(%rsp), %rcx - lea `32*9*0`($r_ptr), %rdi - - vmovdqa 32*0-128(%rsi), $ACC0 - vmovdqa 32*1-128(%rsi), $ACC1 - vmovdqa 32*2-128(%rsi), $ACC2 - vmovdqa 32*3-128(%rsi), $ACC3 - vmovdqa 32*4-128(%rsi), $ACC4 - vmovdqa 32*5-128(%rsi), $ACC5 - vmovdqa 32*6-128(%rsi), $ACC6 - vmovdqa 32*7-128(%rsi), $ACC7 - vmovdqa 32*8-128(%rsi), $ACC8 - - vpaddq 32*0-128(%rax), $ACC0, $ACC0 - vpaddq 32*1-128(%rax), $ACC1, $ACC1 - vpaddq 32*2-128(%rax), $ACC2, $ACC2 - vpaddq 32*3-128(%rax), $ACC3, $ACC3 - vpaddq 32*4-128(%rax), $ACC4, $ACC4 - vpaddq 32*5-128(%rax), $ACC5, $ACC5 - vpaddq 32*6-128(%rax), $ACC6, $ACC6 - vpaddq 32*7-128(%rax), $ACC7, $ACC7 - vpaddq 32*8-128(%rax), $ACC8, $ACC8 - - vpsubq 32*0-128(%rdx), $ACC0, $ACC0 - vpsubq 32*1-128(%rdx), $ACC1, $ACC1 - vpsubq 32*2-128(%rdx), $ACC2, $ACC2 - vpsubq 32*3-128(%rdx), $ACC3, $ACC3 - vpsubq 32*4-128(%rdx), $ACC4, $ACC4 - vpsubq 32*5-128(%rdx), $ACC5, $ACC5 - vpsubq 32*6-128(%rdx), $ACC6, $ACC6 - vpsubq 32*7-128(%rdx), $ACC7, $ACC7 - vpsubq 32*8-128(%rdx), $ACC8, $ACC8 - - vpsubq 32*0-128(%rcx), $ACC0, $ACC0 - vpsubq 32*1-128(%rcx), $ACC1, $ACC1 - vpsubq 32*2-128(%rcx), $ACC2, $ACC2 - vpsubq 32*3-128(%rcx), $ACC3, $ACC3 - vpsubq 32*4-128(%rcx), $ACC4, $ACC4 - vpsubq 32*5-128(%rcx), $ACC5, $ACC5 - vpsubq 32*6-128(%rcx), $ACC6, $ACC6 - vpsubq 32*7-128(%rcx), $ACC7, $ACC7 - vpsubq 32*8-128(%rcx), $ACC8, $ACC8 - call avx2_normalize - - lea 32*0($b_ptr), %rsi - lea 32*0($a_ptr), %rdx - call avx2_select_n_store - - # H = U2 - X3 - lea `32*9*0`(%rsp), %rsi - lea `32*9*0`($r_ptr), %rdx - lea `32*9*3`(%rsp), %rdi - call avx2_sub_x4 - call avx2_normalize_n_store - - # H = H*R - lea `32*9*3`(%rsp), %rsi - lea `32*9*4`(%rsp), %rdx - lea `32*9*3`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # S2 = S1 * H^3 - lea `32*9*7`(%rsp), %rsi - lea `32*9*1`($a_ptr), %rdx - lea `32*9*1`(%rsp), %rdi - call avx2_mul_x4 - call avx2_normalize_n_store - - # - lea `32*9*3`(%rsp), %rsi - lea `32*9*1`(%rsp), %rdx - lea `32*9*1`($r_ptr), %rdi - call avx2_sub_x4 - call avx2_normalize - - lea 32*9($b_ptr), %rsi - lea 32*9($a_ptr), %rdx - call avx2_select_n_store - - #lea 32*9*0($r_ptr), %rsi - #lea 32*9*0($r_ptr), %rdi - #call avx2_mul_by1_x4 - #NORMALIZE - #STORE - - lea `32*9*1`($r_ptr), %rsi - lea `32*9*1`($r_ptr), %rdi - call avx2_mul_by1_x4 - call avx2_normalize_n_store - - vzeroupper -___ -$code.=<<___ if ($win64); - movaps %xmm6, -16*10(%rbp) - movaps %xmm7, -16*9(%rbp) - movaps %xmm8, -16*8(%rbp) - movaps %xmm9, -16*7(%rbp) - movaps %xmm10, -16*6(%rbp) - movaps %xmm11, -16*5(%rbp) - movaps %xmm12, -16*4(%rbp) - movaps %xmm13, -16*3(%rbp) - movaps %xmm14, -16*2(%rbp) - movaps %xmm15, -16*1(%rbp) -___ -$code.=<<___; - mov %rbp, %rsp - pop %rbp - ret -.size ecp_nistz256_avx2_point_add_affines_x4,.-ecp_nistz256_avx2_point_add_affines_x4 - -################################################################################ -# void ecp_nistz256_avx2_to_mont(void* RESULTx4, void *Ax4); -.globl ecp_nistz256_avx2_to_mont -.type ecp_nistz256_avx2_to_mont,\@function,2 -.align 32 -ecp_nistz256_avx2_to_mont: - vzeroupper -___ -$code.=<<___ if ($win64); - lea -8-16*10(%rsp), %rsp - vmovaps %xmm6, -8-16*10(%rax) - vmovaps %xmm7, -8-16*9(%rax) - vmovaps %xmm8, -8-16*8(%rax) - vmovaps %xmm9, -8-16*7(%rax) - vmovaps %xmm10, -8-16*6(%rax) - vmovaps %xmm11, -8-16*5(%rax) - vmovaps %xmm12, -8-16*4(%rax) - vmovaps %xmm13, -8-16*3(%rax) - vmovaps %xmm14, -8-16*2(%rax) - vmovaps %xmm15, -8-16*1(%rax) -___ -$code.=<<___; - vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK - lea .LTO_MONT_AVX2(%rip), %rdx - call avx2_mul_x4 - call avx2_normalize_n_store - - vzeroupper -___ -$code.=<<___ if ($win64); - movaps 16*0(%rsp), %xmm6 - movaps 16*1(%rsp), %xmm7 - movaps 16*2(%rsp), %xmm8 - movaps 16*3(%rsp), %xmm9 - movaps 16*4(%rsp), %xmm10 - movaps 16*5(%rsp), %xmm11 - movaps 16*6(%rsp), %xmm12 - movaps 16*7(%rsp), %xmm13 - movaps 16*8(%rsp), %xmm14 - movaps 16*9(%rsp), %xmm15 - lea 8+16*10(%rsp), %rsp -___ -$code.=<<___; - ret -.size ecp_nistz256_avx2_to_mont,.-ecp_nistz256_avx2_to_mont - -################################################################################ -# void ecp_nistz256_avx2_from_mont(void* RESULTx4, void *Ax4); -.globl ecp_nistz256_avx2_from_mont -.type ecp_nistz256_avx2_from_mont,\@function,2 -.align 32 -ecp_nistz256_avx2_from_mont: - vzeroupper -___ -$code.=<<___ if ($win64); - lea -8-16*10(%rsp), %rsp - vmovaps %xmm6, -8-16*10(%rax) - vmovaps %xmm7, -8-16*9(%rax) - vmovaps %xmm8, -8-16*8(%rax) - vmovaps %xmm9, -8-16*7(%rax) - vmovaps %xmm10, -8-16*6(%rax) - vmovaps %xmm11, -8-16*5(%rax) - vmovaps %xmm12, -8-16*4(%rax) - vmovaps %xmm13, -8-16*3(%rax) - vmovaps %xmm14, -8-16*2(%rax) - vmovaps %xmm15, -8-16*1(%rax) -___ -$code.=<<___; - vmovdqa .LAVX2_AND_MASK(%rip), $AND_MASK - lea .LFROM_MONT_AVX2(%rip), %rdx - call avx2_mul_x4 - call avx2_normalize_n_store - - vzeroupper -___ -$code.=<<___ if ($win64); - movaps 16*0(%rsp), %xmm6 - movaps 16*1(%rsp), %xmm7 - movaps 16*2(%rsp), %xmm8 - movaps 16*3(%rsp), %xmm9 - movaps 16*4(%rsp), %xmm10 - movaps 16*5(%rsp), %xmm11 - movaps 16*6(%rsp), %xmm12 - movaps 16*7(%rsp), %xmm13 - movaps 16*8(%rsp), %xmm14 - movaps 16*9(%rsp), %xmm15 - lea 8+16*10(%rsp), %rsp -___ -$code.=<<___; - ret -.size ecp_nistz256_avx2_from_mont,.-ecp_nistz256_avx2_from_mont - -################################################################################ -# void ecp_nistz256_avx2_set1(void* RESULTx4); -.globl ecp_nistz256_avx2_set1 -.type ecp_nistz256_avx2_set1,\@function,1 -.align 32 -ecp_nistz256_avx2_set1: - lea .LONE+128(%rip), %rax - lea 128(%rdi), %rdi - vzeroupper - vmovdqa 32*0-128(%rax), %ymm0 - vmovdqa 32*1-128(%rax), %ymm1 - vmovdqa 32*2-128(%rax), %ymm2 - vmovdqa 32*3-128(%rax), %ymm3 - vmovdqa 32*4-128(%rax), %ymm4 - vmovdqa 32*5-128(%rax), %ymm5 - vmovdqa %ymm0, 32*0-128(%rdi) - vmovdqa 32*6-128(%rax), %ymm0 - vmovdqa %ymm1, 32*1-128(%rdi) - vmovdqa 32*7-128(%rax), %ymm1 - vmovdqa %ymm2, 32*2-128(%rdi) - vmovdqa 32*8-128(%rax), %ymm2 - vmovdqa %ymm3, 32*3-128(%rdi) - vmovdqa %ymm4, 32*4-128(%rdi) - vmovdqa %ymm5, 32*5-128(%rdi) - vmovdqa %ymm0, 32*6-128(%rdi) - vmovdqa %ymm1, 32*7-128(%rdi) - vmovdqa %ymm2, 32*8-128(%rdi) - - vzeroupper - ret -.size ecp_nistz256_avx2_set1,.-ecp_nistz256_avx2_set1 -___ -} -{ -################################################################################ -# void ecp_nistz256_avx2_multi_gather_w7(void* RESULT, void *in, -# int index0, int index1, int index2, int index3); -################################################################################ - -my ($val,$in_t,$index0,$index1,$index2,$index3)=("%rdi","%rsi","%edx","%ecx","%r8d","%r9d"); -my ($INDEX0,$INDEX1,$INDEX2,$INDEX3)=map("%ymm$_",(0..3)); -my ($R0a,$R0b,$R1a,$R1b,$R2a,$R2b,$R3a,$R3b)=map("%ymm$_",(4..11)); -my ($M0,$T0,$T1,$TMP0)=map("%ymm$_",(12..15)); - -$code.=<<___; -.globl ecp_nistz256_avx2_multi_gather_w7 -.type ecp_nistz256_avx2_multi_gather_w7,\@function,6 -.align 32 -ecp_nistz256_avx2_multi_gather_w7: - vzeroupper -___ -$code.=<<___ if ($win64); - lea -8-16*10(%rsp), %rsp - vmovaps %xmm6, -8-16*10(%rax) - vmovaps %xmm7, -8-16*9(%rax) - vmovaps %xmm8, -8-16*8(%rax) - vmovaps %xmm9, -8-16*7(%rax) - vmovaps %xmm10, -8-16*6(%rax) - vmovaps %xmm11, -8-16*5(%rax) - vmovaps %xmm12, -8-16*4(%rax) - vmovaps %xmm13, -8-16*3(%rax) - vmovaps %xmm14, -8-16*2(%rax) - vmovaps %xmm15, -8-16*1(%rax) -___ -$code.=<<___; - lea .LIntOne(%rip), %rax - - vmovd $index0, %xmm0 - vmovd $index1, %xmm1 - vmovd $index2, %xmm2 - vmovd $index3, %xmm3 - - vpxor $R0a, $R0a, $R0a - vpxor $R0b, $R0b, $R0b - vpxor $R1a, $R1a, $R1a - vpxor $R1b, $R1b, $R1b - vpxor $R2a, $R2a, $R2a - vpxor $R2b, $R2b, $R2b - vpxor $R3a, $R3a, $R3a - vpxor $R3b, $R3b, $R3b - vmovdqa (%rax), $M0 - - vpermd $INDEX0, $R0a, $INDEX0 - vpermd $INDEX1, $R0a, $INDEX1 - vpermd $INDEX2, $R0a, $INDEX2 - vpermd $INDEX3, $R0a, $INDEX3 - - mov \$64, %ecx - lea 112($val), $val # size optimization - jmp .Lmulti_select_loop_avx2 - -# INDEX=0, corresponds to the point at infty (0,0) -.align 32 -.Lmulti_select_loop_avx2: - vpcmpeqd $INDEX0, $M0, $TMP0 - - vmovdqa `32*0+32*64*2*0`($in_t), $T0 - vmovdqa `32*1+32*64*2*0`($in_t), $T1 - vpand $TMP0, $T0, $T0 - vpand $TMP0, $T1, $T1 - vpxor $T0, $R0a, $R0a - vpxor $T1, $R0b, $R0b - - vpcmpeqd $INDEX1, $M0, $TMP0 - - vmovdqa `32*0+32*64*2*1`($in_t), $T0 - vmovdqa `32*1+32*64*2*1`($in_t), $T1 - vpand $TMP0, $T0, $T0 - vpand $TMP0, $T1, $T1 - vpxor $T0, $R1a, $R1a - vpxor $T1, $R1b, $R1b - - vpcmpeqd $INDEX2, $M0, $TMP0 - - vmovdqa `32*0+32*64*2*2`($in_t), $T0 - vmovdqa `32*1+32*64*2*2`($in_t), $T1 - vpand $TMP0, $T0, $T0 - vpand $TMP0, $T1, $T1 - vpxor $T0, $R2a, $R2a - vpxor $T1, $R2b, $R2b - - vpcmpeqd $INDEX3, $M0, $TMP0 - - vmovdqa `32*0+32*64*2*3`($in_t), $T0 - vmovdqa `32*1+32*64*2*3`($in_t), $T1 - vpand $TMP0, $T0, $T0 - vpand $TMP0, $T1, $T1 - vpxor $T0, $R3a, $R3a - vpxor $T1, $R3b, $R3b - - vpaddd (%rax), $M0, $M0 # increment - lea 32*2($in_t), $in_t - - dec %ecx - jnz .Lmulti_select_loop_avx2 - - vmovdqu $R0a, 32*0-112($val) - vmovdqu $R0b, 32*1-112($val) - vmovdqu $R1a, 32*2-112($val) - vmovdqu $R1b, 32*3-112($val) - vmovdqu $R2a, 32*4-112($val) - vmovdqu $R2b, 32*5-112($val) - vmovdqu $R3a, 32*6-112($val) - vmovdqu $R3b, 32*7-112($val) - - vzeroupper -___ -$code.=<<___ if ($win64); - movaps 16*0(%rsp), %xmm6 - movaps 16*1(%rsp), %xmm7 - movaps 16*2(%rsp), %xmm8 - movaps 16*3(%rsp), %xmm9 - movaps 16*4(%rsp), %xmm10 - movaps 16*5(%rsp), %xmm11 - movaps 16*6(%rsp), %xmm12 - movaps 16*7(%rsp), %xmm13 - movaps 16*8(%rsp), %xmm14 - movaps 16*9(%rsp), %xmm15 - lea 8+16*10(%rsp), %rsp -___ -$code.=<<___; - ret -.size ecp_nistz256_avx2_multi_gather_w7,.-ecp_nistz256_avx2_multi_gather_w7 - -.extern OPENSSL_ia32cap_P -.globl ecp_nistz_avx2_eligible -.type ecp_nistz_avx2_eligible,\@abi-omnipotent -.align 32 -ecp_nistz_avx2_eligible: - mov OPENSSL_ia32cap_P+8(%rip),%eax - shr \$5,%eax - and \$1,%eax - ret -.size ecp_nistz_avx2_eligible,.-ecp_nistz_avx2_eligible -___ -} -}} else {{ # assembler is too old -$code.=<<___; -.text - -.globl ecp_nistz256_avx2_transpose_convert -.globl ecp_nistz256_avx2_convert_transpose_back -.globl ecp_nistz256_avx2_point_add_affine_x4 -.globl ecp_nistz256_avx2_point_add_affines_x4 -.globl ecp_nistz256_avx2_to_mont -.globl ecp_nistz256_avx2_from_mont -.globl ecp_nistz256_avx2_set1 -.globl ecp_nistz256_avx2_multi_gather_w7 -.type ecp_nistz256_avx2_multi_gather_w7,\@abi-omnipotent -ecp_nistz256_avx2_transpose_convert: -ecp_nistz256_avx2_convert_transpose_back: -ecp_nistz256_avx2_point_add_affine_x4: -ecp_nistz256_avx2_point_add_affines_x4: -ecp_nistz256_avx2_to_mont: -ecp_nistz256_avx2_from_mont: -ecp_nistz256_avx2_set1: -ecp_nistz256_avx2_multi_gather_w7: - .byte 0x0f,0x0b # ud2 - ret -.size ecp_nistz256_avx2_multi_gather_w7,.-ecp_nistz256_avx2_multi_gather_w7 - -.globl ecp_nistz_avx2_eligible -.type ecp_nistz_avx2_eligible,\@abi-omnipotent -ecp_nistz_avx2_eligible: - xor %eax,%eax - ret -.size ecp_nistz_avx2_eligible,.-ecp_nistz_avx2_eligible -___ -}} - -foreach (split("\n",$code)) { - s/\`([^\`]*)\`/eval($1)/geo; - - print $_,"\n"; -} - -close STDOUT or die "error closing STDOUT: $!"; diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index de9b194510bf..b50ee70191b2 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -72,7 +72,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } -if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $avx = ($ver>=3.0) + ($ver>=3.01); $addx = ($ver>=3.03); diff --git a/crypto/ec/asm/x25519-x86_64.pl b/crypto/ec/asm/x25519-x86_64.pl index 3d9d1dc1ad0c..62599dacaccd 100755 --- a/crypto/ec/asm/x25519-x86_64.pl +++ b/crypto/ec/asm/x25519-x86_64.pl @@ -90,7 +90,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } -if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+)\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $addx = ($ver>=3.03); } diff --git a/crypto/ec/ec_ameth.c b/crypto/ec/ec_ameth.c index 221038373921..5098bd7a6602 100644 --- a/crypto/ec/ec_ameth.c +++ b/crypto/ec/ec_ameth.c @@ -1,5 +1,5 @@ /* - * Copyright 2006-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -23,7 +23,7 @@ static int ecdh_cms_decrypt(CMS_RecipientInfo *ri); static int ecdh_cms_encrypt(CMS_RecipientInfo *ri); #endif -static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) +static int eckey_param2type(int *pptype, void **ppval, const EC_KEY *ec_key) { const EC_GROUP *group; int nid; @@ -35,7 +35,14 @@ static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) && (nid = EC_GROUP_get_curve_name(group))) /* we have a 'named curve' => just set the OID */ { - *ppval = OBJ_nid2obj(nid); + ASN1_OBJECT *asn1obj = OBJ_nid2obj(nid); + + if (asn1obj == NULL || OBJ_length(asn1obj) == 0) { + ASN1_OBJECT_free(asn1obj); + ECerr(EC_F_ECKEY_PARAM2TYPE, EC_R_MISSING_OID); + return 0; + } + *ppval = asn1obj; *pptype = V_ASN1_OBJECT; } else { /* explicit parameters */ @@ -43,7 +50,17 @@ static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) pstr = ASN1_STRING_new(); if (pstr == NULL) return 0; - pstr->length = i2d_ECParameters(ec_key, &pstr->data); + + /* + * The cast in the following line is intentional as the + * `i2d_ECParameters` signature can't be constified (see discussion at + * https://github.com/openssl/openssl/pull/9347 where related and + * required constification backports were rejected). + * + * This cast should be safe anyway, because we can expect + * `i2d_ECParameters()` to treat the first argument as if it was const. + */ + pstr->length = i2d_ECParameters((EC_KEY *)ec_key, &pstr->data); if (pstr->length <= 0) { ASN1_STRING_free(pstr); ECerr(EC_F_ECKEY_PARAM2TYPE, ERR_R_EC_LIB); @@ -57,7 +74,7 @@ static int eckey_param2type(int *pptype, void **ppval, EC_KEY *ec_key) static int eckey_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) { - EC_KEY *ec_key = pkey->pkey.ec; + const EC_KEY *ec_key = pkey->pkey.ec; void *pval = NULL; int ptype; unsigned char *penc = NULL, *p; diff --git a/crypto/ec/ec_asn1.c b/crypto/ec/ec_asn1.c index 006f9a5dea17..7b7c75ce8443 100644 --- a/crypto/ec/ec_asn1.c +++ b/crypto/ec/ec_asn1.c @@ -137,6 +137,12 @@ struct ec_parameters_st { ASN1_INTEGER *cofactor; } /* ECPARAMETERS */ ; +typedef enum { + ECPKPARAMETERS_TYPE_NAMED = 0, + ECPKPARAMETERS_TYPE_EXPLICIT, + ECPKPARAMETERS_TYPE_IMPLICIT +} ecpk_parameters_type_t; + struct ecpk_parameters_st { int type; union { @@ -535,9 +541,10 @@ ECPKPARAMETERS *EC_GROUP_get_ecpkparameters(const EC_GROUP *group, return NULL; } } else { - if (ret->type == 0) + if (ret->type == ECPKPARAMETERS_TYPE_NAMED) ASN1_OBJECT_free(ret->value.named_curve); - else if (ret->type == 1 && ret->value.parameters) + else if (ret->type == ECPKPARAMETERS_TYPE_EXPLICIT + && ret->value.parameters != NULL) ECPARAMETERS_free(ret->value.parameters); } @@ -547,15 +554,22 @@ ECPKPARAMETERS *EC_GROUP_get_ecpkparameters(const EC_GROUP *group, */ tmp = EC_GROUP_get_curve_name(group); if (tmp) { - ret->type = 0; - if ((ret->value.named_curve = OBJ_nid2obj(tmp)) == NULL) + ASN1_OBJECT *asn1obj = OBJ_nid2obj(tmp); + + if (asn1obj == NULL || OBJ_length(asn1obj) == 0) { + ASN1_OBJECT_free(asn1obj); + ECerr(EC_F_EC_GROUP_GET_ECPKPARAMETERS, EC_R_MISSING_OID); ok = 0; + } else { + ret->type = ECPKPARAMETERS_TYPE_NAMED; + ret->value.named_curve = asn1obj; + } } else /* we don't know the nid => ERROR */ ok = 0; } else { /* use the ECPARAMETERS structure */ - ret->type = 1; + ret->type = ECPKPARAMETERS_TYPE_EXPLICIT; if ((ret->value.parameters = EC_GROUP_get_ecparameters(group, NULL)) == NULL) ok = 0; @@ -894,7 +908,8 @@ EC_GROUP *EC_GROUP_new_from_ecpkparameters(const ECPKPARAMETERS *params) return NULL; } - if (params->type == 0) { /* the curve is given by an OID */ + if (params->type == ECPKPARAMETERS_TYPE_NAMED) { + /* the curve is given by an OID */ tmp = OBJ_obj2nid(params->value.named_curve); if ((ret = EC_GROUP_new_by_curve_name(tmp)) == NULL) { ECerr(EC_F_EC_GROUP_NEW_FROM_ECPKPARAMETERS, @@ -902,15 +917,16 @@ EC_GROUP *EC_GROUP_new_from_ecpkparameters(const ECPKPARAMETERS *params) return NULL; } EC_GROUP_set_asn1_flag(ret, OPENSSL_EC_NAMED_CURVE); - } else if (params->type == 1) { /* the parameters are given by a - * ECPARAMETERS structure */ + } else if (params->type == ECPKPARAMETERS_TYPE_EXPLICIT) { + /* the parameters are given by an ECPARAMETERS structure */ ret = EC_GROUP_new_from_ecparameters(params->value.parameters); if (!ret) { ECerr(EC_F_EC_GROUP_NEW_FROM_ECPKPARAMETERS, ERR_R_EC_LIB); return NULL; } EC_GROUP_set_asn1_flag(ret, OPENSSL_EC_EXPLICIT_CURVE); - } else if (params->type == 2) { /* implicitlyCA */ + } else if (params->type == ECPKPARAMETERS_TYPE_IMPLICIT) { + /* implicit parameters inherited from CA - unsupported */ return NULL; } else { ECerr(EC_F_EC_GROUP_NEW_FROM_ECPKPARAMETERS, EC_R_ASN1_ERROR); @@ -940,6 +956,9 @@ EC_GROUP *d2i_ECPKParameters(EC_GROUP **a, const unsigned char **in, long len) return NULL; } + if (params->type == ECPKPARAMETERS_TYPE_EXPLICIT) + group->decoded_from_explicit_params = 1; + if (a) { EC_GROUP_free(*a); *a = group; @@ -991,6 +1010,9 @@ EC_KEY *d2i_ECPrivateKey(EC_KEY **a, const unsigned char **in, long len) if (priv_key->parameters) { EC_GROUP_free(ret->group); ret->group = EC_GROUP_new_from_ecpkparameters(priv_key->parameters); + if (ret->group != NULL + && priv_key->parameters->type == ECPKPARAMETERS_TYPE_EXPLICIT) + ret->group->decoded_from_explicit_params = 1; } if (ret->group == NULL) { diff --git a/crypto/ec/ec_err.c b/crypto/ec/ec_err.c index ce3493823218..bfe74226503e 100644 --- a/crypto/ec/ec_err.c +++ b/crypto/ec/ec_err.c @@ -1,6 +1,6 @@ /* * Generated by util/mkerr.pl DO NOT EDIT - * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -341,6 +341,7 @@ static const ERR_STRING_DATA EC_str_reasons[] = { {ERR_PACK(ERR_LIB_EC, 0, EC_R_LADDER_POST_FAILURE), "ladder post failure"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_LADDER_PRE_FAILURE), "ladder pre failure"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_LADDER_STEP_FAILURE), "ladder step failure"}, + {ERR_PACK(ERR_LIB_EC, 0, EC_R_MISSING_OID), "missing OID"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_MISSING_PARAMETERS), "missing parameters"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_MISSING_PRIVATE_KEY), "missing private key"}, {ERR_PACK(ERR_LIB_EC, 0, EC_R_NEED_NEW_SETUP_VALUES), diff --git a/crypto/ec/ec_key.c b/crypto/ec/ec_key.c index 08aaac5d8a6f..23efbd015ca4 100644 --- a/crypto/ec/ec_key.c +++ b/crypto/ec/ec_key.c @@ -1,5 +1,5 @@ /* - * Copyright 2002-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2002-2020 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -14,6 +14,7 @@ #include "internal/refcount.h" #include <openssl/err.h> #include <openssl/engine.h> +#include "crypto/bn.h" EC_KEY *EC_KEY_new(void) { @@ -416,17 +417,86 @@ const BIGNUM *EC_KEY_get0_private_key(const EC_KEY *key) int EC_KEY_set_private_key(EC_KEY *key, const BIGNUM *priv_key) { + int fixed_top; + const BIGNUM *order = NULL; + BIGNUM *tmp_key = NULL; + if (key->group == NULL || key->group->meth == NULL) return 0; + + /* + * Not only should key->group be set, but it should also be in a valid + * fully initialized state. + * + * Specifically, to operate in constant time, we need that the group order + * is set, as we use its length as the fixed public size of any scalar used + * as an EC private key. + */ + order = EC_GROUP_get0_order(key->group); + if (order == NULL || BN_is_zero(order)) + return 0; /* This should never happen */ + if (key->group->meth->set_private != NULL && key->group->meth->set_private(key, priv_key) == 0) return 0; if (key->meth->set_private != NULL && key->meth->set_private(key, priv_key) == 0) return 0; + + /* + * We should never leak the bit length of the secret scalar in the key, + * so we always set the `BN_FLG_CONSTTIME` flag on the internal `BIGNUM` + * holding the secret scalar. + * + * This is important also because `BN_dup()` (and `BN_copy()`) do not + * propagate the `BN_FLG_CONSTTIME` flag from the source `BIGNUM`, and + * this brings an extra risk of inadvertently losing the flag, even when + * the caller specifically set it. + * + * The propagation has been turned on and off a few times in the past + * years because in some conditions has shown unintended consequences in + * some code paths, so at the moment we can't fix this in the BN layer. + * + * In `EC_KEY_set_private_key()` we can work around the propagation by + * manually setting the flag after `BN_dup()` as we know for sure that + * inside the EC module the `BN_FLG_CONSTTIME` is always treated + * correctly and should not generate unintended consequences. + * + * Setting the BN_FLG_CONSTTIME flag alone is never enough, we also have + * to preallocate the BIGNUM internal buffer to a fixed public size big + * enough that operations performed during the processing never trigger + * a realloc which would leak the size of the scalar through memory + * accesses. + * + * Fixed Length + * ------------ + * + * The order of the large prime subgroup of the curve is our choice for + * a fixed public size, as that is generally the upper bound for + * generating a private key in EC cryptosystems and should fit all valid + * secret scalars. + * + * For preallocating the BIGNUM storage we look at the number of "words" + * required for the internal representation of the order, and we + * preallocate 2 extra "words" in case any of the subsequent processing + * might temporarily overflow the order length. + */ + tmp_key = BN_dup(priv_key); + if (tmp_key == NULL) + return 0; + + BN_set_flags(tmp_key, BN_FLG_CONSTTIME); + + fixed_top = bn_get_top(order) + 2; + if (bn_wexpand(tmp_key, fixed_top) == NULL) { + BN_clear_free(tmp_key); + return 0; + } + BN_clear_free(key->priv_key); - key->priv_key = BN_dup(priv_key); - return (key->priv_key == NULL) ? 0 : 1; + key->priv_key = tmp_key; + + return 1; } const EC_POINT *EC_KEY_get0_public_key(const EC_KEY *key) @@ -494,6 +564,13 @@ void EC_KEY_clear_flags(EC_KEY *key, int flags) key->flags &= ~flags; } +int EC_KEY_decoded_from_explicit_params(const EC_KEY *key) +{ + if (key == NULL || key->group == NULL) + return -1; + return key->group->decoded_from_explicit_params; +} + size_t EC_KEY_key2buf(const EC_KEY *key, point_conversion_form_t form, unsigned char **pbuf, BN_CTX *ctx) { diff --git a/crypto/ec/ec_lib.c b/crypto/ec/ec_lib.c index 6832383cad51..08db89fceeb5 100644 --- a/crypto/ec/ec_lib.c +++ b/crypto/ec/ec_lib.c @@ -211,6 +211,7 @@ int EC_GROUP_copy(EC_GROUP *dest, const EC_GROUP *src) dest->asn1_flag = src->asn1_flag; dest->asn1_form = src->asn1_form; + dest->decoded_from_explicit_params = src->decoded_from_explicit_params; if (src->seed) { OPENSSL_free(dest->seed); diff --git a/crypto/ec/ec_local.h b/crypto/ec/ec_local.h index e656fbd5e775..64725a9c92f4 100644 --- a/crypto/ec/ec_local.h +++ b/crypto/ec/ec_local.h @@ -1,5 +1,5 @@ /* - * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved * * Licensed under the OpenSSL license (the "License"). You may not use @@ -209,6 +209,8 @@ struct ec_group_st { BIGNUM *order, *cofactor; int curve_name; /* optional NID for named curve */ int asn1_flag; /* flag to control the asn1 encoding */ + int decoded_from_explicit_params; /* set if decoded from explicit + * curve parameters encoding */ point_conversion_form_t asn1_form; unsigned char *seed; /* optional seed for parameters (appears in * ASN1) */ diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c index 9a9ced8f1343..6f7d66c8bea4 100644 --- a/crypto/ec/ecp_nistp224.c +++ b/crypto/ec/ecp_nistp224.c @@ -72,6 +72,7 @@ typedef uint64_t u64; */ typedef uint64_t limb; +typedef uint64_t limb_aX __attribute((__aligned__(1))); typedef uint128_t widelimb; typedef limb felem[4]; @@ -307,10 +308,10 @@ const EC_METHOD *EC_GFp_nistp224_method(void) */ static void bin28_to_felem(felem out, const u8 in[28]) { - out[0] = *((const uint64_t *)(in)) & 0x00ffffffffffffff; - out[1] = (*((const uint64_t *)(in + 7))) & 0x00ffffffffffffff; - out[2] = (*((const uint64_t *)(in + 14))) & 0x00ffffffffffffff; - out[3] = (*((const uint64_t *)(in+20))) >> 8; + out[0] = *((const limb *)(in)) & 0x00ffffffffffffff; + out[1] = (*((const limb_aX *)(in + 7))) & 0x00ffffffffffffff; + out[2] = (*((const limb_aX *)(in + 14))) & 0x00ffffffffffffff; + out[3] = (*((const limb_aX *)(in + 20))) >> 8; } static void felem_to_bin28(u8 out[28], const felem in) diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c index 75eeba853679..08b32787293b 100644 --- a/crypto/ec/ecp_nistp521.c +++ b/crypto/ec/ecp_nistp521.c @@ -128,6 +128,7 @@ static const felem_bytearray nistp521_curve_params[5] = { # define NLIMBS 9 typedef uint64_t limb; +typedef limb limb_aX __attribute((__aligned__(1))); typedef limb felem[NLIMBS]; typedef uint128_t largefelem[NLIMBS]; @@ -141,14 +142,14 @@ static const limb bottom58bits = 0x3ffffffffffffff; static void bin66_to_felem(felem out, const u8 in[66]) { out[0] = (*((limb *) & in[0])) & bottom58bits; - out[1] = (*((limb *) & in[7]) >> 2) & bottom58bits; - out[2] = (*((limb *) & in[14]) >> 4) & bottom58bits; - out[3] = (*((limb *) & in[21]) >> 6) & bottom58bits; - out[4] = (*((limb *) & in[29])) & bottom58bits; - out[5] = (*((limb *) & in[36]) >> 2) & bottom58bits; - out[6] = (*((limb *) & in[43]) >> 4) & bottom58bits; - out[7] = (*((limb *) & in[50]) >> 6) & bottom58bits; - out[8] = (*((limb *) & in[58])) & bottom57bits; + out[1] = (*((limb_aX *) & in[7]) >> 2) & bottom58bits; + out[2] = (*((limb_aX *) & in[14]) >> 4) & bottom58bits; + out[3] = (*((limb_aX *) & in[21]) >> 6) & bottom58bits; + out[4] = (*((limb_aX *) & in[29])) & bottom58bits; + out[5] = (*((limb_aX *) & in[36]) >> 2) & bottom58bits; + out[6] = (*((limb_aX *) & in[43]) >> 4) & bottom58bits; + out[7] = (*((limb_aX *) & in[50]) >> 6) & bottom58bits; + out[8] = (*((limb_aX *) & in[58])) & bottom57bits; } /* @@ -159,14 +160,14 @@ static void felem_to_bin66(u8 out[66], const felem in) { memset(out, 0, 66); (*((limb *) & out[0])) = in[0]; - (*((limb *) & out[7])) |= in[1] << 2; - (*((limb *) & out[14])) |= in[2] << 4; - (*((limb *) & out[21])) |= in[3] << 6; - (*((limb *) & out[29])) = in[4]; - (*((limb *) & out[36])) |= in[5] << 2; - (*((limb *) & out[43])) |= in[6] << 4; - (*((limb *) & out[50])) |= in[7] << 6; - (*((limb *) & out[58])) = in[8]; + (*((limb_aX *) & out[7])) |= in[1] << 2; + (*((limb_aX *) & out[14])) |= in[2] << 4; + (*((limb_aX *) & out[21])) |= in[3] << 6; + (*((limb_aX *) & out[29])) = in[4]; + (*((limb_aX *) & out[36])) |= in[5] << 2; + (*((limb_aX *) & out[43])) |= in[6] << 4; + (*((limb_aX *) & out[50])) |= in[7] << 6; + (*((limb_aX *) & out[58])) = in[8]; } /* BN_to_felem converts an OpenSSL BIGNUM into an felem */ diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c index ba9268138862..5005249b05ea 100644 --- a/crypto/ec/ecp_nistz256.c +++ b/crypto/ec/ecp_nistz256.c @@ -929,207 +929,6 @@ __owur static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) return ret; } -/* - * Note that by default ECP_NISTZ256_AVX2 is undefined. While it's great - * code processing 4 points in parallel, corresponding serial operation - * is several times slower, because it uses 29x29=58-bit multiplication - * as opposite to 64x64=128-bit in integer-only scalar case. As result - * it doesn't provide *significant* performance improvement. Note that - * just defining ECP_NISTZ256_AVX2 is not sufficient to make it work, - * you'd need to compile even asm/ecp_nistz256-avx.pl module. - */ -#if defined(ECP_NISTZ256_AVX2) -# if !(defined(__x86_64) || defined(__x86_64__) || \ - defined(_M_AMD64) || defined(_M_X64)) || \ - !(defined(__GNUC__) || defined(_MSC_VER)) /* this is for ALIGN32 */ -# undef ECP_NISTZ256_AVX2 -# else -/* Constant time access, loading four values, from four consecutive tables */ -void ecp_nistz256_avx2_multi_gather_w7(void *result, const void *in, - int index0, int index1, int index2, - int index3); -void ecp_nistz256_avx2_transpose_convert(void *RESULTx4, const void *in); -void ecp_nistz256_avx2_convert_transpose_back(void *result, const void *Ax4); -void ecp_nistz256_avx2_point_add_affine_x4(void *RESULTx4, const void *Ax4, - const void *Bx4); -void ecp_nistz256_avx2_point_add_affines_x4(void *RESULTx4, const void *Ax4, - const void *Bx4); -void ecp_nistz256_avx2_to_mont(void *RESULTx4, const void *Ax4); -void ecp_nistz256_avx2_from_mont(void *RESULTx4, const void *Ax4); -void ecp_nistz256_avx2_set1(void *RESULTx4); -int ecp_nistz_avx2_eligible(void); - -static void booth_recode_w7(unsigned char *sign, - unsigned char *digit, unsigned char in) -{ - unsigned char s, d; - - s = ~((in >> 7) - 1); - d = (1 << 8) - in - 1; - d = (d & s) | (in & ~s); - d = (d >> 1) + (d & 1); - - *sign = s & 1; - *digit = d; -} - -/* - * ecp_nistz256_avx2_mul_g performs multiplication by G, using only the - * precomputed table. It does 4 affine point additions in parallel, - * significantly speeding up point multiplication for a fixed value. - */ -static void ecp_nistz256_avx2_mul_g(P256_POINT *r, - unsigned char p_str[33], - const P256_POINT_AFFINE(*preComputedTable)[64]) -{ - const unsigned int window_size = 7; - const unsigned int mask = (1 << (window_size + 1)) - 1; - unsigned int wvalue; - /* Using 4 windows at a time */ - unsigned char sign0, digit0; - unsigned char sign1, digit1; - unsigned char sign2, digit2; - unsigned char sign3, digit3; - unsigned int idx = 0; - BN_ULONG tmp[P256_LIMBS]; - int i; - - ALIGN32 BN_ULONG aX4[4 * 9 * 3] = { 0 }; - ALIGN32 BN_ULONG bX4[4 * 9 * 2] = { 0 }; - ALIGN32 P256_POINT_AFFINE point_arr[4]; - ALIGN32 P256_POINT res_point_arr[4]; - - /* Initial four windows */ - wvalue = *((u16 *) & p_str[0]); - wvalue = (wvalue << 1) & mask; - idx += window_size; - booth_recode_w7(&sign0, &digit0, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign1, &digit1, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign2, &digit2, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign3, &digit3, wvalue); - - ecp_nistz256_avx2_multi_gather_w7(point_arr, preComputedTable[0], - digit0, digit1, digit2, digit3); - - ecp_nistz256_neg(tmp, point_arr[0].Y); - copy_conditional(point_arr[0].Y, tmp, sign0); - ecp_nistz256_neg(tmp, point_arr[1].Y); - copy_conditional(point_arr[1].Y, tmp, sign1); - ecp_nistz256_neg(tmp, point_arr[2].Y); - copy_conditional(point_arr[2].Y, tmp, sign2); - ecp_nistz256_neg(tmp, point_arr[3].Y); - copy_conditional(point_arr[3].Y, tmp, sign3); - - ecp_nistz256_avx2_transpose_convert(aX4, point_arr); - ecp_nistz256_avx2_to_mont(aX4, aX4); - ecp_nistz256_avx2_to_mont(&aX4[4 * 9], &aX4[4 * 9]); - ecp_nistz256_avx2_set1(&aX4[4 * 9 * 2]); - - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign0, &digit0, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign1, &digit1, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign2, &digit2, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign3, &digit3, wvalue); - - ecp_nistz256_avx2_multi_gather_w7(point_arr, preComputedTable[4 * 1], - digit0, digit1, digit2, digit3); - - ecp_nistz256_neg(tmp, point_arr[0].Y); - copy_conditional(point_arr[0].Y, tmp, sign0); - ecp_nistz256_neg(tmp, point_arr[1].Y); - copy_conditional(point_arr[1].Y, tmp, sign1); - ecp_nistz256_neg(tmp, point_arr[2].Y); - copy_conditional(point_arr[2].Y, tmp, sign2); - ecp_nistz256_neg(tmp, point_arr[3].Y); - copy_conditional(point_arr[3].Y, tmp, sign3); - - ecp_nistz256_avx2_transpose_convert(bX4, point_arr); - ecp_nistz256_avx2_to_mont(bX4, bX4); - ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]); - /* Optimized when both inputs are affine */ - ecp_nistz256_avx2_point_add_affines_x4(aX4, aX4, bX4); - - for (i = 2; i < 9; i++) { - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign0, &digit0, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign1, &digit1, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign2, &digit2, wvalue); - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; - booth_recode_w7(&sign3, &digit3, wvalue); - - ecp_nistz256_avx2_multi_gather_w7(point_arr, - preComputedTable[4 * i], - digit0, digit1, digit2, digit3); - - ecp_nistz256_neg(tmp, point_arr[0].Y); - copy_conditional(point_arr[0].Y, tmp, sign0); - ecp_nistz256_neg(tmp, point_arr[1].Y); - copy_conditional(point_arr[1].Y, tmp, sign1); - ecp_nistz256_neg(tmp, point_arr[2].Y); - copy_conditional(point_arr[2].Y, tmp, sign2); - ecp_nistz256_neg(tmp, point_arr[3].Y); - copy_conditional(point_arr[3].Y, tmp, sign3); - - ecp_nistz256_avx2_transpose_convert(bX4, point_arr); - ecp_nistz256_avx2_to_mont(bX4, bX4); - ecp_nistz256_avx2_to_mont(&bX4[4 * 9], &bX4[4 * 9]); - - ecp_nistz256_avx2_point_add_affine_x4(aX4, aX4, bX4); - } - - ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 0], &aX4[4 * 9 * 0]); - ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 1], &aX4[4 * 9 * 1]); - ecp_nistz256_avx2_from_mont(&aX4[4 * 9 * 2], &aX4[4 * 9 * 2]); - - ecp_nistz256_avx2_convert_transpose_back(res_point_arr, aX4); - /* Last window is performed serially */ - wvalue = *((u16 *) & p_str[(idx - 1) / 8]); - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - booth_recode_w7(&sign0, &digit0, wvalue); - ecp_nistz256_gather_w7((P256_POINT_AFFINE *)r, - preComputedTable[36], digit0); - ecp_nistz256_neg(tmp, r->Y); - copy_conditional(r->Y, tmp, sign0); - memcpy(r->Z, ONE, sizeof(ONE)); - /* Sum the four windows */ - ecp_nistz256_point_add(r, r, &res_point_arr[0]); - ecp_nistz256_point_add(r, r, &res_point_arr[1]); - ecp_nistz256_point_add(r, r, &res_point_arr[2]); - ecp_nistz256_point_add(r, r, &res_point_arr[3]); -} -# endif -#endif - __owur static int ecp_nistz256_set_from_affine(EC_POINT *out, const EC_GROUP *group, const P256_POINT_AFFINE *in, BN_CTX *ctx) @@ -1219,6 +1018,8 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group, } if (preComputedTable) { + BN_ULONG infty; + if ((BN_num_bits(scalar) > 256) || BN_is_negative(scalar)) { if ((tmp_scalar = BN_CTX_get(ctx)) == NULL) @@ -1250,67 +1051,58 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group, for (; i < 33; i++) p_str[i] = 0; -#if defined(ECP_NISTZ256_AVX2) - if (ecp_nistz_avx2_eligible()) { - ecp_nistz256_avx2_mul_g(&p.p, p_str, preComputedTable); - } else -#endif - { - BN_ULONG infty; + /* First window */ + wvalue = (p_str[0] << 1) & mask; + idx += window_size; - /* First window */ - wvalue = (p_str[0] << 1) & mask; - idx += window_size; + wvalue = _booth_recode_w7(wvalue); - wvalue = _booth_recode_w7(wvalue); + ecp_nistz256_gather_w7(&p.a, preComputedTable[0], + wvalue >> 1); - ecp_nistz256_gather_w7(&p.a, preComputedTable[0], - wvalue >> 1); - - ecp_nistz256_neg(p.p.Z, p.p.Y); - copy_conditional(p.p.Y, p.p.Z, wvalue & 1); - - /* - * Since affine infinity is encoded as (0,0) and - * Jacobian ias (,,0), we need to harmonize them - * by assigning "one" or zero to Z. - */ - infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] | - p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]); - if (P256_LIMBS == 8) - infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] | - p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]); - - infty = 0 - is_zero(infty); - infty = ~infty; - - p.p.Z[0] = ONE[0] & infty; - p.p.Z[1] = ONE[1] & infty; - p.p.Z[2] = ONE[2] & infty; - p.p.Z[3] = ONE[3] & infty; - if (P256_LIMBS == 8) { - p.p.Z[4] = ONE[4] & infty; - p.p.Z[5] = ONE[5] & infty; - p.p.Z[6] = ONE[6] & infty; - p.p.Z[7] = ONE[7] & infty; - } + ecp_nistz256_neg(p.p.Z, p.p.Y); + copy_conditional(p.p.Y, p.p.Z, wvalue & 1); - for (i = 1; i < 37; i++) { - unsigned int off = (idx - 1) / 8; - wvalue = p_str[off] | p_str[off + 1] << 8; - wvalue = (wvalue >> ((idx - 1) % 8)) & mask; - idx += window_size; + /* + * Since affine infinity is encoded as (0,0) and + * Jacobian is (,,0), we need to harmonize them + * by assigning "one" or zero to Z. + */ + infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] | + p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]); + if (P256_LIMBS == 8) + infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] | + p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]); + + infty = 0 - is_zero(infty); + infty = ~infty; + + p.p.Z[0] = ONE[0] & infty; + p.p.Z[1] = ONE[1] & infty; + p.p.Z[2] = ONE[2] & infty; + p.p.Z[3] = ONE[3] & infty; + if (P256_LIMBS == 8) { + p.p.Z[4] = ONE[4] & infty; + p.p.Z[5] = ONE[5] & infty; + p.p.Z[6] = ONE[6] & infty; + p.p.Z[7] = ONE[7] & infty; + } - wvalue = _booth_recode_w7(wvalue); + for (i = 1; i < 37; i++) { + unsigned int off = (idx - 1) / 8; + wvalue = p_str[off] | p_str[off + 1] << 8; + wvalue = (wvalue >> ((idx - 1) % 8)) & mask; + idx += window_size; - ecp_nistz256_gather_w7(&t.a, - preComputedTable[i], wvalue >> 1); + wvalue = _booth_recode_w7(wvalue); - ecp_nistz256_neg(t.p.Z, t.a.Y); - copy_conditional(t.a.Y, t.p.Z, wvalue & 1); + ecp_nistz256_gather_w7(&t.a, + preComputedTable[i], wvalue >> 1); - ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); - } + ecp_nistz256_neg(t.p.Z, t.a.Y); + copy_conditional(t.a.Y, t.p.Z, wvalue & 1); + + ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); } } else { p_is_infinity = 1; diff --git a/crypto/engine/eng_lib.c b/crypto/engine/eng_lib.c index b851ff695756..5bd584c5999a 100644 --- a/crypto/engine/eng_lib.c +++ b/crypto/engine/eng_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -171,6 +171,7 @@ void engine_cleanup_int(void) cleanup_stack = NULL; } CRYPTO_THREAD_lock_free(global_engine_lock); + global_engine_lock = NULL; } /* Now the "ex_data" support */ diff --git a/crypto/err/openssl.txt b/crypto/err/openssl.txt index 35512f9caf96..0b5873ebbcb7 100644 --- a/crypto/err/openssl.txt +++ b/crypto/err/openssl.txt @@ -934,6 +934,8 @@ PEM_F_PEM_READ_PRIVATEKEY:124:PEM_read_PrivateKey PEM_F_PEM_SIGNFINAL:112:PEM_SignFinal PEM_F_PEM_WRITE:113:PEM_write PEM_F_PEM_WRITE_BIO:114:PEM_write_bio +PEM_F_PEM_WRITE_BIO_PRIVATEKEY_TRADITIONAL:147:\ + PEM_write_bio_PrivateKey_traditional PEM_F_PEM_WRITE_PRIVATEKEY:139:PEM_write_PrivateKey PEM_F_PEM_X509_INFO_READ:115:PEM_X509_INFO_read PEM_F_PEM_X509_INFO_READ_BIO:116:PEM_X509_INFO_read_bio @@ -1742,6 +1744,7 @@ X509_F_X509_NAME_PRINT:117:X509_NAME_print X509_F_X509_OBJECT_NEW:150:X509_OBJECT_new X509_F_X509_PRINT_EX_FP:118:X509_print_ex_fp X509_F_X509_PUBKEY_DECODE:148:x509_pubkey_decode +X509_F_X509_PUBKEY_GET:161:X509_PUBKEY_get X509_F_X509_PUBKEY_GET0:119:X509_PUBKEY_get0 X509_F_X509_PUBKEY_SET:120:X509_PUBKEY_set X509_F_X509_REQ_CHECK_PRIVATE_KEY:144:X509_REQ_check_private_key @@ -2164,6 +2167,7 @@ EC_R_KEYS_NOT_SET:140:keys not set EC_R_LADDER_POST_FAILURE:136:ladder post failure EC_R_LADDER_PRE_FAILURE:153:ladder pre failure EC_R_LADDER_STEP_FAILURE:162:ladder step failure +EC_R_MISSING_OID:167:missing OID EC_R_MISSING_PARAMETERS:124:missing parameters EC_R_MISSING_PRIVATE_KEY:125:missing private key EC_R_NEED_NEW_SETUP_VALUES:157:need new setup values @@ -2398,6 +2402,7 @@ PEM_R_UNEXPECTED_DEK_IV:130:unexpected dek iv PEM_R_UNSUPPORTED_CIPHER:113:unsupported cipher PEM_R_UNSUPPORTED_ENCRYPTION:114:unsupported encryption PEM_R_UNSUPPORTED_KEY_COMPONENTS:126:unsupported key components +PEM_R_UNSUPPORTED_PUBLIC_KEY_TYPE:110:unsupported public key type PKCS12_R_CANT_PACK_STRUCTURE:100:cant pack structure PKCS12_R_CONTENT_TYPE_NOT_DATA:121:content type not data PKCS12_R_DECODE_ERROR:101:decode error diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c index a1b7d50bbff8..405ddbf9bf09 100644 --- a/crypto/evp/e_aes.c +++ b/crypto/evp/e_aes.c @@ -130,11 +130,6 @@ void bsaes_xts_decrypt(const unsigned char *inp, unsigned char *out, size_t len, const AES_KEY *key1, const AES_KEY *key2, const unsigned char iv[16]); #endif -#if !defined(AES_ASM) && !defined(AES_CTR_ASM) \ - && defined(OPENSSL_AES_CONST_TIME) \ - && !defined(OPENSSL_SMALL_FOOTPRINT) -# define AES_CTR_ASM -#endif #ifdef AES_CTR_ASM void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out, size_t blocks, const AES_KEY *key, diff --git a/crypto/evp/encode.c b/crypto/evp/encode.c index 9307ff046424..85926434c300 100644 --- a/crypto/evp/encode.c +++ b/crypto/evp/encode.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -423,7 +423,7 @@ static int evp_decodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t, table = data_ascii2bin; /* trim white space from the start of the line. */ - while ((conv_ascii2bin(*f, table) == B64_WS) && (n > 0)) { + while ((n > 0) && (conv_ascii2bin(*f, table) == B64_WS)) { f++; n--; } diff --git a/crypto/mem_sec.c b/crypto/mem_sec.c index 9e0f6702f406..b5f959ba15d5 100644 --- a/crypto/mem_sec.c +++ b/crypto/mem_sec.c @@ -1,5 +1,5 @@ /* - * Copyright 2015-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved. * Copyright 2004-2014, Akamai Technologies. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use @@ -502,7 +502,7 @@ static void sh_done(void) OPENSSL_free(sh.freelist); OPENSSL_free(sh.bittable); OPENSSL_free(sh.bitmalloc); - if (sh.map_result != NULL && sh.map_size) + if (sh.map_result != MAP_FAILED && sh.map_size) munmap(sh.map_result, sh.map_size); memset(&sh, 0, sizeof(sh)); } diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl index 959efedb0de7..60f03e4fe25b 100755 --- a/crypto/modes/asm/aesni-gcm-x86_64.pl +++ b/crypto/modes/asm/aesni-gcm-x86_64.pl @@ -66,7 +66,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl index 0a0bfd575cee..9bdba41d1de5 100755 --- a/crypto/modes/asm/ghash-x86_64.pl +++ b/crypto/modes/asm/ghash-x86_64.pl @@ -116,7 +116,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/modes/cbc128.c b/crypto/modes/cbc128.c index fc7e0b60510b..c85e37c6a546 100644 --- a/crypto/modes/cbc128.c +++ b/crypto/modes/cbc128.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -15,6 +15,12 @@ # define STRICT_ALIGNMENT 0 #endif +#if defined(__GNUC__) && !STRICT_ALIGNMENT +typedef size_t size_t_aX __attribute((__aligned__(1))); +#else +typedef size_t size_t_aX; +#endif + void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, size_t len, const void *key, unsigned char ivec[16], block128_f block) @@ -40,8 +46,8 @@ void CRYPTO_cbc128_encrypt(const unsigned char *in, unsigned char *out, } else { while (len >= 16) { for (n = 0; n < 16; n += sizeof(size_t)) - *(size_t *)(out + n) = - *(size_t *)(in + n) ^ *(size_t *)(iv + n); + *(size_t_aX *)(out + n) = + *(size_t_aX *)(in + n) ^ *(size_t_aX *)(iv + n); (*block) (out, out, key); iv = out; len -= 16; @@ -96,7 +102,8 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, } } else if (16 % sizeof(size_t) == 0) { /* always true */ while (len >= 16) { - size_t *out_t = (size_t *)out, *iv_t = (size_t *)iv; + size_t_aX *out_t = (size_t_aX *)out; + size_t_aX *iv_t = (size_t_aX *)iv; (*block) (in, out, key); for (n = 0; n < 16 / sizeof(size_t); n++) @@ -125,8 +132,10 @@ void CRYPTO_cbc128_decrypt(const unsigned char *in, unsigned char *out, } } else if (16 % sizeof(size_t) == 0) { /* always true */ while (len >= 16) { - size_t c, *out_t = (size_t *)out, *ivec_t = (size_t *)ivec; - const size_t *in_t = (const size_t *)in; + size_t c; + size_t_aX *out_t = (size_t_aX *)out; + size_t_aX *ivec_t = (size_t_aX *)ivec; + const size_t_aX *in_t = (const size_t_aX *)in; (*block) (in, tmp.c, key); for (n = 0; n < 16 / sizeof(size_t); n++) { diff --git a/crypto/modes/ccm128.c b/crypto/modes/ccm128.c index 424722811c16..655b10350201 100644 --- a/crypto/modes/ccm128.c +++ b/crypto/modes/ccm128.c @@ -1,5 +1,5 @@ /* - * Copyright 2011-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,6 +11,14 @@ #include "modes_local.h" #include <string.h> +#ifndef STRICT_ALIGNMENT +# ifdef __GNUC__ +typedef u64 u64_a1 __attribute((__aligned__(1))); +# else +typedef u64 u64_a1; +# endif +#endif + /* * First you setup M and L parameters and pass the key schedule. This is * called once per session setup... @@ -170,8 +178,8 @@ int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, ctx->cmac.u[0] ^= temp.u[0]; ctx->cmac.u[1] ^= temp.u[1]; #else - ctx->cmac.u[0] ^= ((u64 *)inp)[0]; - ctx->cmac.u[1] ^= ((u64 *)inp)[1]; + ctx->cmac.u[0] ^= ((u64_a1 *)inp)[0]; + ctx->cmac.u[1] ^= ((u64_a1 *)inp)[1]; #endif (*block) (ctx->cmac.c, ctx->cmac.c, key); (*block) (ctx->nonce.c, scratch.c, key); @@ -181,8 +189,8 @@ int CRYPTO_ccm128_encrypt(CCM128_CONTEXT *ctx, temp.u[1] ^= scratch.u[1]; memcpy(out, temp.c, 16); #else - ((u64 *)out)[0] = scratch.u[0] ^ ((u64 *)inp)[0]; - ((u64 *)out)[1] = scratch.u[1] ^ ((u64 *)inp)[1]; + ((u64_a1 *)out)[0] = scratch.u[0] ^ ((u64_a1 *)inp)[0]; + ((u64_a1 *)out)[1] = scratch.u[1] ^ ((u64_a1 *)inp)[1]; #endif inp += 16; out += 16; @@ -254,8 +262,10 @@ int CRYPTO_ccm128_decrypt(CCM128_CONTEXT *ctx, ctx->cmac.u[1] ^= (scratch.u[1] ^= temp.u[1]); memcpy(out, scratch.c, 16); #else - ctx->cmac.u[0] ^= (((u64 *)out)[0] = scratch.u[0] ^ ((u64 *)inp)[0]); - ctx->cmac.u[1] ^= (((u64 *)out)[1] = scratch.u[1] ^ ((u64 *)inp)[1]); + ctx->cmac.u[0] ^= (((u64_a1 *)out)[0] + = scratch.u[0] ^ ((u64_a1 *)inp)[0]); + ctx->cmac.u[1] ^= (((u64_a1 *)out)[1] + = scratch.u[1] ^ ((u64_a1 *)inp)[1]); #endif (*block) (ctx->cmac.c, ctx->cmac.c, key); diff --git a/crypto/modes/cfb128.c b/crypto/modes/cfb128.c index b6bec414a966..b2530007b6e4 100644 --- a/crypto/modes/cfb128.c +++ b/crypto/modes/cfb128.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,6 +11,12 @@ #include "modes_local.h" #include <string.h> +#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) +typedef size_t size_t_aX __attribute((__aligned__(1))); +#else +typedef size_t size_t_aX; +#endif + /* * The input and output encrypted as though 128bit cfb mode is being used. * The extra state information to record how much of the 128bit block we have @@ -43,8 +49,9 @@ void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out, while (len >= 16) { (*block) (ivec, ivec, key); for (; n < 16; n += sizeof(size_t)) { - *(size_t *)(out + n) = - *(size_t *)(ivec + n) ^= *(size_t *)(in + n); + *(size_t_aX *)(out + n) = + *(size_t_aX *)(ivec + n) + ^= *(size_t_aX *)(in + n); } len -= 16; out += 16; @@ -92,9 +99,10 @@ void CRYPTO_cfb128_encrypt(const unsigned char *in, unsigned char *out, while (len >= 16) { (*block) (ivec, ivec, key); for (; n < 16; n += sizeof(size_t)) { - size_t t = *(size_t *)(in + n); - *(size_t *)(out + n) = *(size_t *)(ivec + n) ^ t; - *(size_t *)(ivec + n) = t; + size_t t = *(size_t_aX *)(in + n); + *(size_t_aX *)(out + n) + = *(size_t_aX *)(ivec + n) ^ t; + *(size_t_aX *)(ivec + n) = t; } len -= 16; out += 16; diff --git a/crypto/modes/ctr128.c b/crypto/modes/ctr128.c index ae35116e9524..1ed7decedfd3 100644 --- a/crypto/modes/ctr128.c +++ b/crypto/modes/ctr128.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,6 +11,12 @@ #include "modes_local.h" #include <string.h> +#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) +typedef size_t size_t_aX __attribute((__aligned__(1))); +#else +typedef size_t size_t_aX; +#endif + /* * NOTE: the IV/counter CTR mode is big-endian. The code itself is * endian-neutral. @@ -97,8 +103,9 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out, (*block) (ivec, ecount_buf, key); ctr128_inc_aligned(ivec); for (n = 0; n < 16; n += sizeof(size_t)) - *(size_t *)(out + n) = - *(size_t *)(in + n) ^ *(size_t *)(ecount_buf + n); + *(size_t_aX *)(out + n) = + *(size_t_aX *)(in + n) + ^ *(size_t_aX *)(ecount_buf + n); len -= 16; out += 16; in += 16; diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c index 48775e6d05ff..0c0bf3cda5b5 100644 --- a/crypto/modes/gcm128.c +++ b/crypto/modes/gcm128.c @@ -1,5 +1,5 @@ /* - * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,6 +11,12 @@ #include "modes_local.h" #include <string.h> +#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) +typedef size_t size_t_aX __attribute((__aligned__(1))); +#else +typedef size_t size_t_aX; +#endif + #if defined(BSWAP4) && defined(STRICT_ALIGNMENT) /* redefine, because alignment is ensured */ # undef GETU32 @@ -1080,8 +1086,8 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, size_t j = GHASH_CHUNK; while (j) { - size_t *out_t = (size_t *)out; - const size_t *in_t = (const size_t *)in; + size_t_aX *out_t = (size_t_aX *)out; + const size_t_aX *in_t = (const size_t_aX *)in; (*block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; @@ -1107,8 +1113,8 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, size_t j = i; while (len >= 16) { - size_t *out_t = (size_t *)out; - const size_t *in_t = (const size_t *)in; + size_t_aX *out_t = (size_t_aX *)out; + const size_t_aX *in_t = (const size_t_aX *)in; (*block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; @@ -1318,8 +1324,8 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, GHASH(ctx, in, GHASH_CHUNK); while (j) { - size_t *out_t = (size_t *)out; - const size_t *in_t = (const size_t *)in; + size_t_aX *out_t = (size_t_aX *)out; + const size_t_aX *in_t = (const size_t_aX *)in; (*block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; @@ -1343,8 +1349,8 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, if ((i = (len & (size_t)-16))) { GHASH(ctx, in, i); while (len >= 16) { - size_t *out_t = (size_t *)out; - const size_t *in_t = (const size_t *)in; + size_t_aX *out_t = (size_t_aX *)out; + const size_t_aX *in_t = (const size_t_aX *)in; (*block) (ctx->Yi.c, ctx->EKi.c, key); ++ctr; diff --git a/crypto/modes/modes_local.h b/crypto/modes/modes_local.h index f2ae01d11afd..28c32c0643f4 100644 --- a/crypto/modes/modes_local.h +++ b/crypto/modes/modes_local.h @@ -1,5 +1,5 @@ /* - * Copyright 2010-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -37,6 +37,14 @@ typedef unsigned char u8; # endif #endif +#ifndef STRICT_ALIGNMENT +# ifdef __GNUC__ +typedef u32 u32_a1 __attribute((__aligned__(1))); +# else +typedef u32 u32_a1; +# endif +#endif + #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) # if defined(__GNUC__) && __GNUC__>=2 # if defined(__x86_64) || defined(__x86_64__) @@ -86,8 +94,8 @@ _asm mov eax, val _asm bswap eax} # endif #endif #if defined(BSWAP4) && !defined(STRICT_ALIGNMENT) -# define GETU32(p) BSWAP4(*(const u32 *)(p)) -# define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) +# define GETU32(p) BSWAP4(*(const u32_a1 *)(p)) +# define PUTU32(p,v) *(u32_a1 *)(p) = BSWAP4(v) #else # define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3]) # define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v)) diff --git a/crypto/modes/ofb128.c b/crypto/modes/ofb128.c index 44bdf888db1a..a3469712b2de 100644 --- a/crypto/modes/ofb128.c +++ b/crypto/modes/ofb128.c @@ -1,5 +1,5 @@ /* - * Copyright 2008-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2008-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,6 +11,12 @@ #include "modes_local.h" #include <string.h> +#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) +typedef size_t size_t_aX __attribute((__aligned__(1))); +#else +typedef size_t size_t_aX; +#endif + /* * The input and output encrypted as though 128bit ofb mode is being used. * The extra state information to record how much of the 128bit block we have @@ -41,8 +47,9 @@ void CRYPTO_ofb128_encrypt(const unsigned char *in, unsigned char *out, while (len >= 16) { (*block) (ivec, ivec, key); for (; n < 16; n += sizeof(size_t)) - *(size_t *)(out + n) = - *(size_t *)(in + n) ^ *(size_t *)(ivec + n); + *(size_t_aX *)(out + n) = + *(size_t_aX *)(in + n) + ^ *(size_t_aX *)(ivec + n); len -= 16; out += 16; in += 16; diff --git a/crypto/modes/xts128.c b/crypto/modes/xts128.c index b5bda5e6402d..fe1626c62e10 100644 --- a/crypto/modes/xts128.c +++ b/crypto/modes/xts128.c @@ -1,5 +1,5 @@ /* - * Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2011-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -11,6 +11,14 @@ #include "modes_local.h" #include <string.h> +#ifndef STRICT_ALIGNMENT +# ifdef __GNUC__ +typedef u64 u64_a1 __attribute((__aligned__(1))); +# else +typedef u64 u64_a1; +# endif +#endif + int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, const unsigned char iv[16], const unsigned char *inp, unsigned char *out, @@ -45,8 +53,8 @@ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, scratch.u[0] ^= tweak.u[0]; scratch.u[1] ^= tweak.u[1]; #else - scratch.u[0] = ((u64 *)inp)[0] ^ tweak.u[0]; - scratch.u[1] = ((u64 *)inp)[1] ^ tweak.u[1]; + scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak.u[0]; + scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak.u[1]; #endif (*ctx->block1) (scratch.c, scratch.c, ctx->key1); #if defined(STRICT_ALIGNMENT) @@ -54,8 +62,8 @@ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, scratch.u[1] ^= tweak.u[1]; memcpy(out, scratch.c, 16); #else - ((u64 *)out)[0] = scratch.u[0] ^= tweak.u[0]; - ((u64 *)out)[1] = scratch.u[1] ^= tweak.u[1]; + ((u64_a1 *)out)[0] = scratch.u[0] ^= tweak.u[0]; + ((u64_a1 *)out)[1] = scratch.u[1] ^= tweak.u[1]; #endif inp += 16; out += 16; @@ -128,8 +136,8 @@ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, scratch.u[0] ^= tweak1.u[0]; scratch.u[1] ^= tweak1.u[1]; #else - scratch.u[0] = ((u64 *)inp)[0] ^ tweak1.u[0]; - scratch.u[1] = ((u64 *)inp)[1] ^ tweak1.u[1]; + scratch.u[0] = ((u64_a1 *)inp)[0] ^ tweak1.u[0]; + scratch.u[1] = ((u64_a1 *)inp)[1] ^ tweak1.u[1]; #endif (*ctx->block1) (scratch.c, scratch.c, ctx->key1); scratch.u[0] ^= tweak1.u[0]; @@ -148,8 +156,8 @@ int CRYPTO_xts128_encrypt(const XTS128_CONTEXT *ctx, scratch.u[1] ^= tweak.u[1]; memcpy(out, scratch.c, 16); #else - ((u64 *)out)[0] = scratch.u[0] ^ tweak.u[0]; - ((u64 *)out)[1] = scratch.u[1] ^ tweak.u[1]; + ((u64_a1 *)out)[0] = scratch.u[0] ^ tweak.u[0]; + ((u64_a1 *)out)[1] = scratch.u[1] ^ tweak.u[1]; #endif } diff --git a/crypto/o_str.c b/crypto/o_str.c index 9ad7a89dcadf..eb9f21cc0c45 100644 --- a/crypto/o_str.c +++ b/crypto/o_str.c @@ -1,5 +1,5 @@ /* - * Copyright 2003-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2003-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -220,7 +220,7 @@ char *OPENSSL_buf2hexstr(const unsigned char *buffer, long len) int openssl_strerror_r(int errnum, char *buf, size_t buflen) { -#if defined(_MSC_VER) && _MSC_VER>=1400 +#if defined(_MSC_VER) && _MSC_VER>=1400 && !defined(_WIN32_WCE) return !strerror_s(buf, buflen, errnum); #elif defined(_GNU_SOURCE) char *err; diff --git a/crypto/o_time.c b/crypto/o_time.c index 6d764f55e2e8..3502edda6238 100644 --- a/crypto/o_time.c +++ b/crypto/o_time.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -41,7 +41,7 @@ struct tm *OPENSSL_gmtime(const time_t *timer, struct tm *result) if (gmtime_r(timer, result) == NULL) return NULL; ts = result; -#elif defined (OPENSSL_SYS_WINDOWS) && defined(_MSC_VER) && _MSC_VER >= 1400 +#elif defined (OPENSSL_SYS_WINDOWS) && defined(_MSC_VER) && _MSC_VER >= 1400 && !defined(_WIN32_WCE) if (gmtime_s(result, timer)) return NULL; ts = result; diff --git a/crypto/pem/pem_err.c b/crypto/pem/pem_err.c index f642030aa539..0f3cb02407e6 100644 --- a/crypto/pem/pem_err.c +++ b/crypto/pem/pem_err.c @@ -1,6 +1,6 @@ /* * Generated by util/mkerr.pl DO NOT EDIT - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -60,6 +60,8 @@ static const ERR_STRING_DATA PEM_str_functs[] = { {ERR_PACK(ERR_LIB_PEM, PEM_F_PEM_SIGNFINAL, 0), "PEM_SignFinal"}, {ERR_PACK(ERR_LIB_PEM, PEM_F_PEM_WRITE, 0), "PEM_write"}, {ERR_PACK(ERR_LIB_PEM, PEM_F_PEM_WRITE_BIO, 0), "PEM_write_bio"}, + {ERR_PACK(ERR_LIB_PEM, PEM_F_PEM_WRITE_BIO_PRIVATEKEY_TRADITIONAL, 0), + "PEM_write_bio_PrivateKey_traditional"}, {ERR_PACK(ERR_LIB_PEM, PEM_F_PEM_WRITE_PRIVATEKEY, 0), "PEM_write_PrivateKey"}, {ERR_PACK(ERR_LIB_PEM, PEM_F_PEM_X509_INFO_READ, 0), "PEM_X509_INFO_read"}, @@ -109,6 +111,8 @@ static const ERR_STRING_DATA PEM_str_reasons[] = { "unsupported encryption"}, {ERR_PACK(ERR_LIB_PEM, 0, PEM_R_UNSUPPORTED_KEY_COMPONENTS), "unsupported key components"}, + {ERR_PACK(ERR_LIB_PEM, 0, PEM_R_UNSUPPORTED_PUBLIC_KEY_TYPE), + "unsupported public key type"}, {0, NULL} }; diff --git a/crypto/pem/pem_lib.c b/crypto/pem/pem_lib.c index 64baf7108ea4..a26322119aa7 100644 --- a/crypto/pem/pem_lib.c +++ b/crypto/pem/pem_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -332,7 +332,7 @@ int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name, BIO *bp, } } - if ((dsize = i2d(x, NULL)) < 0) { + if ((dsize = i2d(x, NULL)) <= 0) { PEMerr(PEM_F_PEM_ASN1_WRITE_BIO, ERR_R_ASN1_LIB); dsize = 0; goto err; @@ -791,7 +791,7 @@ static int get_header_and_data(BIO *bp, BIO **header, BIO **data, char *name, { BIO *tmp = *header; char *linebuf, *p; - int len, line, ret = 0, end = 0; + int len, line, ret = 0, end = 0, prev_partial_line_read = 0, partial_line_read = 0; /* 0 if not seen (yet), 1 if reading header, 2 if finished header */ enum header_status got_header = MAYBE_HEADER; unsigned int flags_mask; @@ -809,10 +809,18 @@ static int get_header_and_data(BIO *bp, BIO **header, BIO **data, char *name, flags_mask = ~0u; len = BIO_gets(bp, linebuf, LINESIZE); if (len <= 0) { - PEMerr(PEM_F_GET_HEADER_AND_DATA, PEM_R_SHORT_HEADER); + PEMerr(PEM_F_GET_HEADER_AND_DATA, PEM_R_BAD_END_LINE); goto err; } + /* + * Check if line has been read completely or if only part of the line + * has been read. Keep the previous value to ignore newlines that + * appear due to reading a line up until the char before the newline. + */ + prev_partial_line_read = partial_line_read; + partial_line_read = len == LINESIZE-1 && linebuf[LINESIZE-2] != '\n'; + if (got_header == MAYBE_HEADER) { if (memchr(linebuf, ':', len) != NULL) got_header = IN_HEADER; @@ -823,13 +831,19 @@ static int get_header_and_data(BIO *bp, BIO **header, BIO **data, char *name, /* Check for end of header. */ if (linebuf[0] == '\n') { - if (got_header == POST_HEADER) { - /* Another blank line is an error. */ - PEMerr(PEM_F_GET_HEADER_AND_DATA, PEM_R_BAD_END_LINE); - goto err; + /* + * If previous line has been read only partially this newline is a + * regular newline at the end of a line and not an empty line. + */ + if (!prev_partial_line_read) { + if (got_header == POST_HEADER) { + /* Another blank line is an error. */ + PEMerr(PEM_F_GET_HEADER_AND_DATA, PEM_R_BAD_END_LINE); + goto err; + } + got_header = POST_HEADER; + tmp = *data; } - got_header = POST_HEADER; - tmp = *data; continue; } diff --git a/crypto/pem/pem_pkey.c b/crypto/pem/pem_pkey.c index e58cdf4a3e0b..4a9492724487 100644 --- a/crypto/pem/pem_pkey.c +++ b/crypto/pem/pem_pkey.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -108,6 +108,12 @@ int PEM_write_bio_PrivateKey_traditional(BIO *bp, EVP_PKEY *x, pem_password_cb *cb, void *u) { char pem_str[80]; + + if (x->ameth == NULL || x->ameth->old_priv_encode == NULL) { + PEMerr(PEM_F_PEM_WRITE_BIO_PRIVATEKEY_TRADITIONAL, + PEM_R_UNSUPPORTED_PUBLIC_KEY_TYPE); + return 0; + } BIO_snprintf(pem_str, 80, "%s PRIVATE KEY", x->ameth->pem_str); return PEM_ASN1_write_bio((i2d_of_void *)i2d_PrivateKey, pem_str, bp, x, enc, kstr, klen, cb, u); diff --git a/crypto/pem/pvkfmt.c b/crypto/pem/pvkfmt.c index 1fc19c17f913..a933b7c1813c 100644 --- a/crypto/pem/pvkfmt.c +++ b/crypto/pem/pvkfmt.c @@ -1,5 +1,5 @@ /* - * Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -29,10 +29,10 @@ static unsigned int read_ledword(const unsigned char **in) { const unsigned char *p = *in; unsigned int ret; - ret = *p++; - ret |= (*p++ << 8); - ret |= (*p++ << 16); - ret |= (*p++ << 24); + ret = (unsigned int)*p++; + ret |= (unsigned int)*p++ << 8; + ret |= (unsigned int)*p++ << 16; + ret |= (unsigned int)*p++ << 24; *in = p; return ret; } @@ -875,9 +875,9 @@ int i2b_PVK_bio(BIO *out, EVP_PKEY *pk, int enclevel, wrlen = BIO_write(out, tmp, outlen); OPENSSL_free(tmp); if (wrlen == outlen) { - PEMerr(PEM_F_I2B_PVK_BIO, PEM_R_BIO_WRITE_FAILURE); return outlen; } + PEMerr(PEM_F_I2B_PVK_BIO, PEM_R_BIO_WRITE_FAILURE); return -1; } diff --git a/crypto/poly1305/asm/poly1305-x86.pl b/crypto/poly1305/asm/poly1305-x86.pl index 4aaf63a0a1dd..2ae16a230b66 100755 --- a/crypto/poly1305/asm/poly1305-x86.pl +++ b/crypto/poly1305/asm/poly1305-x86.pl @@ -71,7 +71,7 @@ if ($sse2) { $avx = ($1>=2.09) + ($1>=2.10); } - if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/) { + if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } } diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl index c014be1ca9fa..5f834d8faf2a 100755 --- a/crypto/poly1305/asm/poly1305-x86_64.pl +++ b/crypto/poly1305/asm/poly1305-x86_64.pl @@ -90,7 +90,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=12); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/rand/drbg_ctr.c b/crypto/rand/drbg_ctr.c index 0f0ad1b37be4..a757d0a258ab 100644 --- a/crypto/rand/drbg_ctr.c +++ b/crypto/rand/drbg_ctr.c @@ -63,15 +63,15 @@ static void ctr_XOR(RAND_DRBG_CTR *ctr, const unsigned char *in, size_t inlen) * Process a complete block using BCC algorithm of SP 800-90A 10.3.3 */ __owur static int ctr_BCC_block(RAND_DRBG_CTR *ctr, unsigned char *out, - const unsigned char *in) + const unsigned char *in, int len) { int i, outlen = AES_BLOCK_SIZE; - for (i = 0; i < 16; i++) + for (i = 0; i < len; i++) out[i] ^= in[i]; - if (!EVP_CipherUpdate(ctr->ctx_df, out, &outlen, out, AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) + if (!EVP_CipherUpdate(ctr->ctx_df, out, &outlen, out, len) + || outlen != len) return 0; return 1; } @@ -82,12 +82,16 @@ __owur static int ctr_BCC_block(RAND_DRBG_CTR *ctr, unsigned char *out, */ __owur static int ctr_BCC_blocks(RAND_DRBG_CTR *ctr, const unsigned char *in) { - if (!ctr_BCC_block(ctr, ctr->KX, in) - || !ctr_BCC_block(ctr, ctr->KX + 16, in)) - return 0; - if (ctr->keylen != 16 && !ctr_BCC_block(ctr, ctr->KX + 32, in)) - return 0; - return 1; + unsigned char in_tmp[48]; + unsigned char num_of_blk = 2; + + memcpy(in_tmp, in, 16); + memcpy(in_tmp + 16, in, 16); + if (ctr->keylen != 16) { + memcpy(in_tmp + 32, in, 16); + num_of_blk = 3; + } + return ctr_BCC_block(ctr, ctr->KX, in_tmp, AES_BLOCK_SIZE * num_of_blk); } /* @@ -96,19 +100,14 @@ __owur static int ctr_BCC_blocks(RAND_DRBG_CTR *ctr, const unsigned char *in) */ __owur static int ctr_BCC_init(RAND_DRBG_CTR *ctr) { + unsigned char bltmp[48] = {0}; + unsigned char num_of_blk; + memset(ctr->KX, 0, 48); - memset(ctr->bltmp, 0, 16); - if (!ctr_BCC_block(ctr, ctr->KX, ctr->bltmp)) - return 0; - ctr->bltmp[3] = 1; - if (!ctr_BCC_block(ctr, ctr->KX + 16, ctr->bltmp)) - return 0; - if (ctr->keylen != 16) { - ctr->bltmp[3] = 2; - if (!ctr_BCC_block(ctr, ctr->KX + 32, ctr->bltmp)) - return 0; - } - return 1; + num_of_blk = ctr->keylen == 16 ? 2 : 3; + bltmp[(AES_BLOCK_SIZE * 1) + 3] = 1; + bltmp[(AES_BLOCK_SIZE * 2) + 3] = 2; + return ctr_BCC_block(ctr, ctr->KX, bltmp, num_of_blk * AES_BLOCK_SIZE); } /* @@ -197,20 +196,20 @@ __owur static int ctr_df(RAND_DRBG_CTR *ctr, || !ctr_BCC_final(ctr)) return 0; /* Set up key K */ - if (!EVP_CipherInit_ex(ctr->ctx, ctr->cipher, NULL, ctr->KX, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_ecb, NULL, NULL, ctr->KX, NULL, -1)) return 0; /* X follows key K */ - if (!EVP_CipherUpdate(ctr->ctx, ctr->KX, &outlen, ctr->KX + ctr->keylen, + if (!EVP_CipherUpdate(ctr->ctx_ecb, ctr->KX, &outlen, ctr->KX + ctr->keylen, AES_BLOCK_SIZE) || outlen != AES_BLOCK_SIZE) return 0; - if (!EVP_CipherUpdate(ctr->ctx, ctr->KX + 16, &outlen, ctr->KX, + if (!EVP_CipherUpdate(ctr->ctx_ecb, ctr->KX + 16, &outlen, ctr->KX, AES_BLOCK_SIZE) || outlen != AES_BLOCK_SIZE) return 0; if (ctr->keylen != 16) - if (!EVP_CipherUpdate(ctr->ctx, ctr->KX + 32, &outlen, ctr->KX + 16, - AES_BLOCK_SIZE) + if (!EVP_CipherUpdate(ctr->ctx_ecb, ctr->KX + 32, &outlen, + ctr->KX + 16, AES_BLOCK_SIZE) || outlen != AES_BLOCK_SIZE) return 0; return 1; @@ -229,31 +228,25 @@ __owur static int ctr_update(RAND_DRBG *drbg, { RAND_DRBG_CTR *ctr = &drbg->data.ctr; int outlen = AES_BLOCK_SIZE; + unsigned char V_tmp[48], out[48]; + unsigned char len; /* correct key is already set up. */ + memcpy(V_tmp, ctr->V, 16); inc_128(ctr); - if (!EVP_CipherUpdate(ctr->ctx, ctr->K, &outlen, ctr->V, AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) - return 0; - - /* If keylen longer than 128 bits need extra encrypt */ - if (ctr->keylen != 16) { + memcpy(V_tmp + 16, ctr->V, 16); + if (ctr->keylen == 16) { + len = 32; + } else { inc_128(ctr); - if (!EVP_CipherUpdate(ctr->ctx, ctr->K+16, &outlen, ctr->V, - AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) - return 0; + memcpy(V_tmp + 32, ctr->V, 16); + len = 48; } - inc_128(ctr); - if (!EVP_CipherUpdate(ctr->ctx, ctr->V, &outlen, ctr->V, AES_BLOCK_SIZE) - || outlen != AES_BLOCK_SIZE) + if (!EVP_CipherUpdate(ctr->ctx_ecb, out, &outlen, V_tmp, len) + || outlen != len) return 0; - - /* If 192 bit key part of V is on end of K */ - if (ctr->keylen == 24) { - memcpy(ctr->V + 8, ctr->V, 8); - memcpy(ctr->V, ctr->K + 24, 8); - } + memcpy(ctr->K, out, ctr->keylen); + memcpy(ctr->V, out + ctr->keylen, 16); if ((drbg->flags & RAND_DRBG_FLAG_CTR_NO_DF) == 0) { /* If no input reuse existing derived value */ @@ -268,7 +261,8 @@ __owur static int ctr_update(RAND_DRBG *drbg, ctr_XOR(ctr, in2, in2len); } - if (!EVP_CipherInit_ex(ctr->ctx, ctr->cipher, NULL, ctr->K, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_ecb, NULL, NULL, ctr->K, NULL, -1) + || !EVP_CipherInit_ex(ctr->ctx_ctr, NULL, NULL, ctr->K, NULL, -1)) return 0; return 1; } @@ -285,8 +279,10 @@ __owur static int drbg_ctr_instantiate(RAND_DRBG *drbg, memset(ctr->K, 0, sizeof(ctr->K)); memset(ctr->V, 0, sizeof(ctr->V)); - if (!EVP_CipherInit_ex(ctr->ctx, ctr->cipher, NULL, ctr->K, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_ecb, NULL, NULL, ctr->K, NULL, -1)) return 0; + + inc_128(ctr); if (!ctr_update(drbg, entropy, entropylen, pers, perslen, nonce, noncelen)) return 0; return 1; @@ -296,20 +292,40 @@ __owur static int drbg_ctr_reseed(RAND_DRBG *drbg, const unsigned char *entropy, size_t entropylen, const unsigned char *adin, size_t adinlen) { + RAND_DRBG_CTR *ctr = &drbg->data.ctr; + if (entropy == NULL) return 0; + + inc_128(ctr); if (!ctr_update(drbg, entropy, entropylen, adin, adinlen, NULL, 0)) return 0; return 1; } +static void ctr96_inc(unsigned char *counter) +{ + u32 n = 12, c = 1; + + do { + --n; + c += counter[n]; + counter[n] = (u8)c; + c >>= 8; + } while (n); +} + __owur static int drbg_ctr_generate(RAND_DRBG *drbg, unsigned char *out, size_t outlen, const unsigned char *adin, size_t adinlen) { RAND_DRBG_CTR *ctr = &drbg->data.ctr; + unsigned int ctr32, blocks; + int outl, buflen; if (adin != NULL && adinlen != 0) { + inc_128(ctr); + if (!ctr_update(drbg, adin, adinlen, NULL, 0, NULL, 0)) return 0; /* This means we reuse derived value */ @@ -321,28 +337,53 @@ __owur static int drbg_ctr_generate(RAND_DRBG *drbg, adinlen = 0; } - for ( ; ; ) { - int outl = AES_BLOCK_SIZE; + inc_128(ctr); + if (outlen == 0) { inc_128(ctr); - if (outlen < 16) { - /* Use K as temp space as it will be updated */ - if (!EVP_CipherUpdate(ctr->ctx, ctr->K, &outl, ctr->V, - AES_BLOCK_SIZE) - || outl != AES_BLOCK_SIZE) - return 0; - memcpy(out, ctr->K, outlen); - break; - } - if (!EVP_CipherUpdate(ctr->ctx, out, &outl, ctr->V, AES_BLOCK_SIZE) - || outl != AES_BLOCK_SIZE) + + if (!ctr_update(drbg, adin, adinlen, NULL, 0, NULL, 0)) return 0; - out += 16; - outlen -= 16; - if (outlen == 0) - break; + return 1; } + memset(out, 0, outlen); + + do { + if (!EVP_CipherInit_ex(ctr->ctx_ctr, + NULL, NULL, NULL, ctr->V, -1)) + return 0; + + /*- + * outlen has type size_t while EVP_CipherUpdate takes an + * int argument and thus cannot be guaranteed to process more + * than 2^31-1 bytes at a time. We process such huge generate + * requests in 2^30 byte chunks, which is the greatest multiple + * of AES block size lower than or equal to 2^31-1. + */ + buflen = outlen > (1U << 30) ? (1U << 30) : outlen; + blocks = (buflen + 15) / 16; + + ctr32 = GETU32(ctr->V + 12) + blocks; + if (ctr32 < blocks) { + /* 32-bit counter overflow into V. */ + if (ctr32 != 0) { + blocks -= ctr32; + buflen = blocks * 16; + ctr32 = 0; + } + ctr96_inc(ctr->V); + } + PUTU32(ctr->V + 12, ctr32); + + if (!EVP_CipherUpdate(ctr->ctx_ctr, out, &outl, out, buflen) + || outl != buflen) + return 0; + + out += buflen; + outlen -= buflen; + } while (outlen); + if (!ctr_update(drbg, adin, adinlen, NULL, 0, NULL, 0)) return 0; return 1; @@ -350,7 +391,8 @@ __owur static int drbg_ctr_generate(RAND_DRBG *drbg, static int drbg_ctr_uninstantiate(RAND_DRBG *drbg) { - EVP_CIPHER_CTX_free(drbg->data.ctr.ctx); + EVP_CIPHER_CTX_free(drbg->data.ctr.ctx_ecb); + EVP_CIPHER_CTX_free(drbg->data.ctr.ctx_ctr); EVP_CIPHER_CTX_free(drbg->data.ctr.ctx_df); OPENSSL_cleanse(&drbg->data.ctr, sizeof(drbg->data.ctr)); return 1; @@ -374,25 +416,36 @@ int drbg_ctr_init(RAND_DRBG *drbg) return 0; case NID_aes_128_ctr: keylen = 16; - ctr->cipher = EVP_aes_128_ecb(); + ctr->cipher_ecb = EVP_aes_128_ecb(); + ctr->cipher_ctr = EVP_aes_128_ctr(); break; case NID_aes_192_ctr: keylen = 24; - ctr->cipher = EVP_aes_192_ecb(); + ctr->cipher_ecb = EVP_aes_192_ecb(); + ctr->cipher_ctr = EVP_aes_192_ctr(); break; case NID_aes_256_ctr: keylen = 32; - ctr->cipher = EVP_aes_256_ecb(); + ctr->cipher_ecb = EVP_aes_256_ecb(); + ctr->cipher_ctr = EVP_aes_256_ctr(); break; } drbg->meth = &drbg_ctr_meth; ctr->keylen = keylen; - if (ctr->ctx == NULL) - ctr->ctx = EVP_CIPHER_CTX_new(); - if (ctr->ctx == NULL) + if (ctr->ctx_ecb == NULL) + ctr->ctx_ecb = EVP_CIPHER_CTX_new(); + if (ctr->ctx_ctr == NULL) + ctr->ctx_ctr = EVP_CIPHER_CTX_new(); + if (ctr->ctx_ecb == NULL || ctr->ctx_ctr == NULL + || !EVP_CipherInit_ex(ctr->ctx_ecb, + ctr->cipher_ecb, NULL, NULL, NULL, 1) + || !EVP_CipherInit_ex(ctr->ctx_ctr, + ctr->cipher_ctr, NULL, NULL, NULL, 1)) return 0; + + drbg->meth = &drbg_ctr_meth; drbg->strength = keylen * 8; drbg->seedlen = keylen + 16; @@ -410,7 +463,8 @@ int drbg_ctr_init(RAND_DRBG *drbg) if (ctr->ctx_df == NULL) return 0; /* Set key schedule for df_key */ - if (!EVP_CipherInit_ex(ctr->ctx_df, ctr->cipher, NULL, df_key, NULL, 1)) + if (!EVP_CipherInit_ex(ctr->ctx_df, + ctr->cipher_ecb, NULL, df_key, NULL, 1)) return 0; drbg->min_entropylen = ctr->keylen; diff --git a/crypto/rand/drbg_lib.c b/crypto/rand/drbg_lib.c index faf0590c6c28..8c7c28c9703a 100644 --- a/crypto/rand/drbg_lib.c +++ b/crypto/rand/drbg_lib.c @@ -327,13 +327,6 @@ int RAND_DRBG_instantiate(RAND_DRBG *drbg, max_entropylen += drbg->max_noncelen; } - drbg->reseed_next_counter = tsan_load(&drbg->reseed_prop_counter); - if (drbg->reseed_next_counter) { - drbg->reseed_next_counter++; - if(!drbg->reseed_next_counter) - drbg->reseed_next_counter = 1; - } - if (drbg->get_entropy != NULL) entropylen = drbg->get_entropy(drbg, &entropy, min_entropy, min_entropylen, max_entropylen, 0); @@ -359,9 +352,15 @@ int RAND_DRBG_instantiate(RAND_DRBG *drbg, } drbg->state = DRBG_READY; - drbg->reseed_gen_counter = 1; + drbg->generate_counter = 1; drbg->reseed_time = time(NULL); - tsan_store(&drbg->reseed_prop_counter, drbg->reseed_next_counter); + if (drbg->enable_reseed_propagation) { + if (drbg->parent == NULL) + tsan_counter(&drbg->reseed_counter); + else + tsan_store(&drbg->reseed_counter, + tsan_load(&drbg->parent->reseed_counter)); + } end: if (entropy != NULL && drbg->cleanup_entropy != NULL) @@ -428,14 +427,6 @@ int RAND_DRBG_reseed(RAND_DRBG *drbg, } drbg->state = DRBG_ERROR; - - drbg->reseed_next_counter = tsan_load(&drbg->reseed_prop_counter); - if (drbg->reseed_next_counter) { - drbg->reseed_next_counter++; - if(!drbg->reseed_next_counter) - drbg->reseed_next_counter = 1; - } - if (drbg->get_entropy != NULL) entropylen = drbg->get_entropy(drbg, &entropy, drbg->strength, drbg->min_entropylen, @@ -451,9 +442,15 @@ int RAND_DRBG_reseed(RAND_DRBG *drbg, goto end; drbg->state = DRBG_READY; - drbg->reseed_gen_counter = 1; + drbg->generate_counter = 1; drbg->reseed_time = time(NULL); - tsan_store(&drbg->reseed_prop_counter, drbg->reseed_next_counter); + if (drbg->enable_reseed_propagation) { + if (drbg->parent == NULL) + tsan_counter(&drbg->reseed_counter); + else + tsan_store(&drbg->reseed_counter, + tsan_load(&drbg->parent->reseed_counter)); + } end: if (entropy != NULL && drbg->cleanup_entropy != NULL) @@ -554,7 +551,9 @@ int rand_drbg_restart(RAND_DRBG *drbg, drbg->meth->reseed(drbg, adin, adinlen, NULL, 0); } else if (reseeded == 0) { /* do a full reseeding if it has not been done yet above */ - RAND_DRBG_reseed(drbg, NULL, 0, 0); + if (!RAND_DRBG_reseed(drbg, NULL, 0, 0)) { + RANDerr(RAND_F_RAND_DRBG_RESTART, RAND_R_RESEED_ERROR); + } } } @@ -612,7 +611,7 @@ int RAND_DRBG_generate(RAND_DRBG *drbg, unsigned char *out, size_t outlen, } if (drbg->reseed_interval > 0) { - if (drbg->reseed_gen_counter >= drbg->reseed_interval) + if (drbg->generate_counter >= drbg->reseed_interval) reseed_required = 1; } if (drbg->reseed_time_interval > 0) { @@ -621,11 +620,8 @@ int RAND_DRBG_generate(RAND_DRBG *drbg, unsigned char *out, size_t outlen, || now - drbg->reseed_time >= drbg->reseed_time_interval) reseed_required = 1; } - if (drbg->parent != NULL) { - unsigned int reseed_counter = tsan_load(&drbg->reseed_prop_counter); - if (reseed_counter > 0 - && tsan_load(&drbg->parent->reseed_prop_counter) - != reseed_counter) + if (drbg->enable_reseed_propagation && drbg->parent != NULL) { + if (drbg->reseed_counter != tsan_load(&drbg->parent->reseed_counter)) reseed_required = 1; } @@ -644,7 +640,7 @@ int RAND_DRBG_generate(RAND_DRBG *drbg, unsigned char *out, size_t outlen, return 0; } - drbg->reseed_gen_counter++; + drbg->generate_counter++; return 1; } @@ -706,8 +702,7 @@ int RAND_DRBG_set_callbacks(RAND_DRBG *drbg, RAND_DRBG_get_nonce_fn get_nonce, RAND_DRBG_cleanup_nonce_fn cleanup_nonce) { - if (drbg->state != DRBG_UNINITIALISED - || drbg->parent != NULL) + if (drbg->state != DRBG_UNINITIALISED) return 0; drbg->get_entropy = get_entropy; drbg->cleanup_entropy = cleanup_entropy; @@ -883,8 +878,9 @@ static RAND_DRBG *drbg_setup(RAND_DRBG *parent) if (parent == NULL && rand_drbg_enable_locking(drbg) == 0) goto err; - /* enable seed propagation */ - tsan_store(&drbg->reseed_prop_counter, 1); + /* enable reseed propagation */ + drbg->enable_reseed_propagation = 1; + drbg->reseed_counter = 1; /* * Ignore instantiation error to support just-in-time instantiation. diff --git a/crypto/rand/rand_lib.c b/crypto/rand/rand_lib.c index ab4e9b5486cb..ba3a29e58468 100644 --- a/crypto/rand/rand_lib.c +++ b/crypto/rand/rand_lib.c @@ -174,8 +174,6 @@ size_t rand_drbg_get_entropy(RAND_DRBG *drbg, prediction_resistance, (unsigned char *)&drbg, sizeof(drbg)) != 0) bytes = bytes_needed; - drbg->reseed_next_counter - = tsan_load(&drbg->parent->reseed_prop_counter); rand_drbg_unlock(drbg->parent); rand_pool_add_end(pool, bytes, 8 * bytes); diff --git a/crypto/rand/rand_local.h b/crypto/rand/rand_local.h index 1bc9bf7d266d..a5de5252dcdc 100644 --- a/crypto/rand/rand_local.h +++ b/crypto/rand/rand_local.h @@ -138,9 +138,11 @@ typedef struct rand_drbg_method_st { * The state of a DRBG AES-CTR. */ typedef struct rand_drbg_ctr_st { - EVP_CIPHER_CTX *ctx; + EVP_CIPHER_CTX *ctx_ecb; + EVP_CIPHER_CTX *ctx_ctr; EVP_CIPHER_CTX *ctx_df; - const EVP_CIPHER *cipher; + const EVP_CIPHER *cipher_ecb; + const EVP_CIPHER *cipher_ctr; size_t keylen; unsigned char K[32]; unsigned char V[16]; @@ -233,7 +235,7 @@ struct rand_drbg_st { size_t max_perslen, max_adinlen; /* Counts the number of generate requests since the last reseed. */ - unsigned int reseed_gen_counter; + unsigned int generate_counter; /* * Maximum number of generate requests until a reseed is required. * This value is ignored if it is zero. @@ -246,9 +248,15 @@ struct rand_drbg_st { * This value is ignored if it is zero. */ time_t reseed_time_interval; + + /* + * Enables reseed propagation (see following comment) + */ + unsigned int enable_reseed_propagation; + /* * Counts the number of reseeds since instantiation. - * This value is ignored if it is zero. + * This value is ignored if enable_reseed_propagation is zero. * * This counter is used only for seed propagation from the <master> DRBG * to its two children, the <public> and <private> DRBG. This feature is @@ -256,8 +264,7 @@ struct rand_drbg_st { * is added by RAND_add() or RAND_seed() will have an immediate effect on * the output of RAND_bytes() resp. RAND_priv_bytes(). */ - TSAN_QUALIFIER unsigned int reseed_prop_counter; - unsigned int reseed_next_counter; + TSAN_QUALIFIER unsigned int reseed_counter; size_t seedlen; DRBG_STATUS state; diff --git a/crypto/rand/rand_unix.c b/crypto/rand/rand_unix.c index fe457cab4a3b..da66773e4ab9 100644 --- a/crypto/rand/rand_unix.c +++ b/crypto/rand/rand_unix.c @@ -26,12 +26,12 @@ # include <sys/utsname.h> # endif #endif -#if defined(__FreeBSD__) && !defined(OPENSSL_SYS_UEFI) +#if (defined(__FreeBSD__) || defined(__NetBSD__)) && !defined(OPENSSL_SYS_UEFI) # include <sys/types.h> # include <sys/sysctl.h> # include <sys/param.h> #endif -#if defined(__OpenBSD__) || defined(__NetBSD__) +#if defined(__OpenBSD__) # include <sys/param.h> #endif @@ -247,10 +247,12 @@ static ssize_t sysctl_random(char *buf, size_t buflen) * when the sysctl returns long and we want to request something not a * multiple of longs, which should never be the case. */ +#if defined(__FreeBSD__) if (!ossl_assert(buflen % sizeof(long) == 0)) { errno = EINVAL; return -1; } +#endif /* * On NetBSD before 4.0 KERN_ARND was an alias for KERN_URND, and only @@ -268,7 +270,7 @@ static ssize_t sysctl_random(char *buf, size_t buflen) mib[1] = KERN_ARND; do { - len = buflen; + len = buflen > 256 ? 256 : buflen; if (sysctl(mib, 2, buf, &len, NULL, 0) == -1) return done > 0 ? done : -1; done += len; @@ -409,7 +411,8 @@ static struct random_device { } random_devices[OSSL_NELEM(random_device_paths)]; static int keep_random_devices_open = 1; -# if defined(__linux) && defined(DEVRANDOM_WAIT) +# if defined(__linux) && defined(DEVRANDOM_WAIT) \ + && defined(OPENSSL_RAND_SEED_GETRANDOM) static void *shm_addr; static void cleanup_shm(void) @@ -487,7 +490,7 @@ static int wait_random_seeded(void) } return seeded; } -# else /* defined __linux */ +# else /* defined __linux && DEVRANDOM_WAIT && OPENSSL_RAND_SEED_GETRANDOM */ static int wait_random_seeded(void) { return 1; diff --git a/crypto/rand/randfile.c b/crypto/rand/randfile.c index ba121eefbf09..229ce864a312 100644 --- a/crypto/rand/randfile.c +++ b/crypto/rand/randfile.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -26,7 +26,7 @@ #ifndef OPENSSL_NO_POSIX_IO # include <sys/stat.h> # include <fcntl.h> -# ifdef _WIN32 +# if defined(_WIN32) && !defined(_WIN32_WCE) # include <windows.h> # include <io.h> # define stat _stat diff --git a/crypto/rsa/rsa_ameth.c b/crypto/rsa/rsa_ameth.c index 6692a51ed8fe..fb045544a832 100644 --- a/crypto/rsa/rsa_ameth.c +++ b/crypto/rsa/rsa_ameth.c @@ -1,5 +1,5 @@ /* - * Copyright 2006-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2006-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -118,6 +118,15 @@ static int rsa_pub_decode(EVP_PKEY *pkey, X509_PUBKEY *pubkey) static int rsa_pub_cmp(const EVP_PKEY *a, const EVP_PKEY *b) { + /* + * Don't check the public/private key, this is mostly for smart + * cards. + */ + if (((RSA_flags(a->pkey.rsa) & RSA_METHOD_FLAG_NO_CHECK)) + || (RSA_flags(b->pkey.rsa) & RSA_METHOD_FLAG_NO_CHECK)) { + return 1; + } + if (BN_cmp(b->pkey.rsa->n, a->pkey.rsa->n) != 0 || BN_cmp(b->pkey.rsa->e, a->pkey.rsa->e) != 0) return 0; diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl index f7b85e6c6d7e..b72869b86d4a 100644 --- a/crypto/sha/asm/sha1-586.pl +++ b/crypto/sha/asm/sha1-586.pl @@ -144,7 +144,7 @@ $ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32" && `ml 2>&1` =~ /Version ([0-9]+)\./ && $1>=10); # first version supporting AVX -$ymm=1 if ($xmm && !$ymm && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/ && +$ymm=1 if ($xmm && !$ymm && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/ && $2>=3.0); # first version supporting AVX $shaext=$xmm; ### set to zero if compiling for 1.0.1 diff --git a/crypto/sha/asm/sha1-mb-x86_64.pl b/crypto/sha/asm/sha1-mb-x86_64.pl index a78266d62fc3..47c588715163 100755 --- a/crypto/sha/asm/sha1-mb-x86_64.pl +++ b/crypto/sha/asm/sha1-mb-x86_64.pl @@ -66,7 +66,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl index 0680d6d0a2b2..89146d137b9b 100755 --- a/crypto/sha/asm/sha1-x86_64.pl +++ b/crypto/sha/asm/sha1-x86_64.pl @@ -119,7 +119,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl index 6989d59bd735..7523dbc165fa 100755 --- a/crypto/sha/asm/sha256-586.pl +++ b/crypto/sha/asm/sha256-586.pl @@ -96,7 +96,7 @@ if ($xmm && !$avx && $ARGV[0] eq "win32" && $avx = ($1>=10) + ($1>=11); } -if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/) { +if ($xmm && !$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/sha/asm/sha256-mb-x86_64.pl b/crypto/sha/asm/sha256-mb-x86_64.pl index 81ac814effc4..c8cc6efac95c 100755 --- a/crypto/sha/asm/sha256-mb-x86_64.pl +++ b/crypto/sha/asm/sha256-mb-x86_64.pl @@ -67,7 +67,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index 64ae641c1407..ad37850e6130 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -135,7 +135,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } -if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { +if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:clang|LLVM) version|.*based on LLVM) ([0-9]+\.[0-9]+)/) { $avx = ($2>=3.0) + ($2>3.0); } diff --git a/crypto/store/loader_file.c b/crypto/store/loader_file.c index 8f1d20e74aa4..9c9e3bd08506 100644 --- a/crypto/store/loader_file.c +++ b/crypto/store/loader_file.c @@ -1,5 +1,5 @@ /* - * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -429,6 +429,42 @@ static OSSL_STORE_INFO *try_decode_PrivateKey(const char *pem_name, } } else { int i; +#ifndef OPENSSL_NO_ENGINE + ENGINE *curengine = ENGINE_get_first(); + + while (curengine != NULL) { + ENGINE_PKEY_ASN1_METHS_PTR asn1meths = + ENGINE_get_pkey_asn1_meths(curengine); + + if (asn1meths != NULL) { + const int *nids = NULL; + int nids_n = asn1meths(curengine, NULL, &nids, 0); + + for (i = 0; i < nids_n; i++) { + EVP_PKEY_ASN1_METHOD *ameth2 = NULL; + EVP_PKEY *tmp_pkey = NULL; + const unsigned char *tmp_blob = blob; + + if (!asn1meths(curengine, &ameth2, NULL, nids[i])) + continue; + if (ameth2 == NULL + || ameth2->pkey_flags & ASN1_PKEY_ALIAS) + continue; + + tmp_pkey = d2i_PrivateKey(ameth2->pkey_id, NULL, + &tmp_blob, len); + if (tmp_pkey != NULL) { + if (pkey != NULL) + EVP_PKEY_free(tmp_pkey); + else + pkey = tmp_pkey; + (*matchcount)++; + } + } + } + curengine = ENGINE_get_next(curengine); + } +#endif for (i = 0; i < EVP_PKEY_asn1_get_count(); i++) { EVP_PKEY *tmp_pkey = NULL; diff --git a/crypto/store/store_lib.c b/crypto/store/store_lib.c index fb8184d2d9b5..fb71f84725b1 100644 --- a/crypto/store/store_lib.c +++ b/crypto/store/store_lib.c @@ -1,5 +1,5 @@ /* - * Copyright 2016-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -218,7 +218,11 @@ int OSSL_STORE_eof(OSSL_STORE_CTX *ctx) int OSSL_STORE_close(OSSL_STORE_CTX *ctx) { - int loader_ret = ctx->loader->close(ctx->loader_ctx); + int loader_ret; + + if (ctx == NULL) + return 1; + loader_ret = ctx->loader->close(ctx->loader_ctx); OPENSSL_free(ctx); return loader_ret; diff --git a/crypto/ts/ts_rsp_sign.c b/crypto/ts/ts_rsp_sign.c index 041a187da68c..342582f024b2 100644 --- a/crypto/ts/ts_rsp_sign.c +++ b/crypto/ts/ts_rsp_sign.c @@ -57,12 +57,14 @@ static ASN1_INTEGER *def_serial_cb(struct TS_resp_ctx *ctx, void *data) goto err; if (!ASN1_INTEGER_set(serial, 1)) goto err; + return serial; err: TSerr(TS_F_DEF_SERIAL_CB, ERR_R_MALLOC_FAILURE); TS_RESP_CTX_set_status_info(ctx, TS_STATUS_REJECTION, "Error during serial number generation."); + ASN1_INTEGER_free(serial); return NULL; } diff --git a/crypto/ui/ui_openssl.c b/crypto/ui/ui_openssl.c index 168de4630dcc..9526c16536cb 100644 --- a/crypto/ui/ui_openssl.c +++ b/crypto/ui/ui_openssl.c @@ -1,5 +1,5 @@ /* - * Copyright 2001-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -439,6 +439,16 @@ static int open_console(UI *ui) is_a_tty = 0; else # endif +# ifdef EPERM + /* + * Linux can return EPERM (Operation not permitted), + * e.g. if a daemon executes openssl via fork()+execve() + * This should be ok + */ + if (errno == EPERM) + is_a_tty = 0; + else +# endif # ifdef ENODEV /* * MacOS X returns ENODEV (Operation not supported by device), diff --git a/crypto/whrlpool/wp_block.c b/crypto/whrlpool/wp_block.c index c21c04dbc1bb..39ad009c01bf 100644 --- a/crypto/whrlpool/wp_block.c +++ b/crypto/whrlpool/wp_block.c @@ -1,5 +1,5 @@ /* - * Copyright 2005-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -63,6 +63,20 @@ typedef unsigned long long u64; # undef STRICT_ALIGNMENT #endif +#ifndef STRICT_ALIGNMENT +# ifdef __GNUC__ +typedef u64 u64_a1 __attribute((__aligned__(1))); +# else +typedef u64 u64_a1; +# endif +#endif + +#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT) +typedef u64 u64_aX __attribute((__aligned__(1))); +#else +typedef u64 u64_aX; +#endif + #undef SMALL_REGISTER_BANK #if defined(__i386) || defined(__i386__) || defined(_M_IX86) # define SMALL_REGISTER_BANK @@ -191,13 +205,13 @@ typedef unsigned long long u64; # define LL(c0,c1,c2,c3,c4,c5,c6,c7) c0,c1,c2,c3,c4,c5,c6,c7, \ c0,c1,c2,c3,c4,c5,c6,c7 # define C0(K,i) (((u64*)(Cx.c+0))[2*K.c[(i)*8+0]]) -# define C1(K,i) (((u64*)(Cx.c+7))[2*K.c[(i)*8+1]]) -# define C2(K,i) (((u64*)(Cx.c+6))[2*K.c[(i)*8+2]]) -# define C3(K,i) (((u64*)(Cx.c+5))[2*K.c[(i)*8+3]]) -# define C4(K,i) (((u64*)(Cx.c+4))[2*K.c[(i)*8+4]]) -# define C5(K,i) (((u64*)(Cx.c+3))[2*K.c[(i)*8+5]]) -# define C6(K,i) (((u64*)(Cx.c+2))[2*K.c[(i)*8+6]]) -# define C7(K,i) (((u64*)(Cx.c+1))[2*K.c[(i)*8+7]]) +# define C1(K,i) (((u64_a1*)(Cx.c+7))[2*K.c[(i)*8+1]]) +# define C2(K,i) (((u64_a1*)(Cx.c+6))[2*K.c[(i)*8+2]]) +# define C3(K,i) (((u64_a1*)(Cx.c+5))[2*K.c[(i)*8+3]]) +# define C4(K,i) (((u64_a1*)(Cx.c+4))[2*K.c[(i)*8+4]]) +# define C5(K,i) (((u64_a1*)(Cx.c+3))[2*K.c[(i)*8+5]]) +# define C6(K,i) (((u64_a1*)(Cx.c+2))[2*K.c[(i)*8+6]]) +# define C7(K,i) (((u64_a1*)(Cx.c+1))[2*K.c[(i)*8+7]]) #endif static const @@ -531,7 +545,7 @@ void whirlpool_block(WHIRLPOOL_CTX *ctx, const void *inp, size_t n) } else # endif { - const u64 *pa = (const u64 *)p; + const u64_aX *pa = (const u64_aX *)p; S.q[0] = (K.q[0] = H->q[0]) ^ pa[0]; S.q[1] = (K.q[1] = H->q[1]) ^ pa[1]; S.q[2] = (K.q[2] = H->q[2]) ^ pa[2]; @@ -769,7 +783,7 @@ void whirlpool_block(WHIRLPOOL_CTX *ctx, const void *inp, size_t n) } else # endif { - const u64 *pa = (const u64 *)p; + const u64_aX *pa = (const u64_aX *)p; H->q[0] ^= S.q[0] ^ pa[0]; H->q[1] ^= S.q[1] ^ pa[1]; H->q[2] ^= S.q[2] ^ pa[2]; diff --git a/crypto/x509/x509_err.c b/crypto/x509/x509_err.c index c110d908090e..bdd1e67cd3fd 100644 --- a/crypto/x509/x509_err.c +++ b/crypto/x509/x509_err.c @@ -1,6 +1,6 @@ /* * Generated by util/mkerr.pl DO NOT EDIT - * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -79,6 +79,7 @@ static const ERR_STRING_DATA X509_str_functs[] = { {ERR_PACK(ERR_LIB_X509, X509_F_X509_PRINT_EX_FP, 0), "X509_print_ex_fp"}, {ERR_PACK(ERR_LIB_X509, X509_F_X509_PUBKEY_DECODE, 0), "x509_pubkey_decode"}, + {ERR_PACK(ERR_LIB_X509, X509_F_X509_PUBKEY_GET, 0), "X509_PUBKEY_get"}, {ERR_PACK(ERR_LIB_X509, X509_F_X509_PUBKEY_GET0, 0), "X509_PUBKEY_get0"}, {ERR_PACK(ERR_LIB_X509, X509_F_X509_PUBKEY_SET, 0), "X509_PUBKEY_set"}, {ERR_PACK(ERR_LIB_X509, X509_F_X509_REQ_CHECK_PRIVATE_KEY, 0), diff --git a/crypto/x509/x509_local.h b/crypto/x509/x509_local.h index c517a7745637..10807e1def04 100644 --- a/crypto/x509/x509_local.h +++ b/crypto/x509/x509_local.h @@ -1,5 +1,5 @@ /* - * Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -145,3 +145,5 @@ DEFINE_STACK_OF(STACK_OF_X509_NAME_ENTRY) void x509_set_signature_info(X509_SIG_INFO *siginf, const X509_ALGOR *alg, const ASN1_STRING *sig); +int x509_likely_issued(X509 *issuer, X509 *subject); +int x509_signing_allowed(const X509 *issuer, const X509 *subject); diff --git a/crypto/x509/x509_req.c b/crypto/x509/x509_req.c index 7ba0f26495f9..dd674926ddb5 100644 --- a/crypto/x509/x509_req.c +++ b/crypto/x509/x509_req.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -286,6 +286,18 @@ void X509_REQ_get0_signature(const X509_REQ *req, const ASN1_BIT_STRING **psig, *palg = &req->sig_alg; } +void X509_REQ_set0_signature(X509_REQ *req, ASN1_BIT_STRING *psig) +{ + if (req->signature) + ASN1_BIT_STRING_free(req->signature); + req->signature = psig; +} + +int X509_REQ_set1_signature_algo(X509_REQ *req, X509_ALGOR *palg) +{ + return X509_ALGOR_copy(&req->sig_alg, palg); +} + int X509_REQ_get_signature_nid(const X509_REQ *req) { return OBJ_obj2nid(req->sig_alg.algorithm); diff --git a/crypto/x509/x509_txt.c b/crypto/x509/x509_txt.c index 4755b39eb4eb..02bde640d8e8 100644 --- a/crypto/x509/x509_txt.c +++ b/crypto/x509/x509_txt.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2017 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -174,6 +174,8 @@ const char *X509_verify_cert_error_string(long n) return "OCSP verification failed"; case X509_V_ERR_OCSP_CERT_UNKNOWN: return "OCSP unknown cert"; + case X509_V_ERR_EC_KEY_EXPLICIT_PARAMS: + return "Certificate public key has explicit ECC parameters"; default: /* Printing an error number into a static buffer is not thread-safe */ diff --git a/crypto/x509/x509_vfy.c b/crypto/x509/x509_vfy.c index 41625e75ad6a..801055f5a087 100644 --- a/crypto/x509/x509_vfy.c +++ b/crypto/x509/x509_vfy.c @@ -80,6 +80,7 @@ static int get_issuer_sk(X509 **issuer, X509_STORE_CTX *ctx, X509 *x); static int check_dane_issuer(X509_STORE_CTX *ctx, int depth); static int check_key_level(X509_STORE_CTX *ctx, X509 *cert); static int check_sig_level(X509_STORE_CTX *ctx, X509 *cert); +static int check_curve(X509 *cert); static int get_crl_score(X509_STORE_CTX *ctx, X509 **pissuer, unsigned int *preasons, X509_CRL *crl, X509 *x); @@ -104,7 +105,12 @@ static int null_callback(int ok, X509_STORE_CTX *e) return ok; } -/* Return 1 is a certificate is self signed */ +/* + * Return 1 if given cert is considered self-signed, 0 if not or on error. + * This does not verify self-signedness but relies on x509v3_cache_extensions() + * matching issuer and subject names (i.e., the cert being self-issued) and any + * present authority key identifier matching the subject key identifier, etc. + */ static int cert_self_signed(X509 *x) { if (X509_check_purpose(x, -1, 0) != 1) @@ -131,10 +137,9 @@ static X509 *lookup_cert_match(X509_STORE_CTX *ctx, X509 *x) xtmp = sk_X509_value(certs, i); if (!X509_cmp(xtmp, x)) break; + xtmp = NULL; } - if (i < sk_X509_num(certs)) - X509_up_ref(xtmp); - else + if (xtmp != NULL && !X509_up_ref(xtmp)) xtmp = NULL; sk_X509_pop_free(certs, X509_free); return xtmp; @@ -267,17 +272,24 @@ int X509_verify_cert(X509_STORE_CTX *ctx) return -1; } + if (!X509_up_ref(ctx->cert)) { + X509err(X509_F_X509_VERIFY_CERT, ERR_R_INTERNAL_ERROR); + ctx->error = X509_V_ERR_UNSPECIFIED; + return -1; + } + /* * first we make sure the chain we are going to build is present and that * the first entry is in place */ - if (((ctx->chain = sk_X509_new_null()) == NULL) || - (!sk_X509_push(ctx->chain, ctx->cert))) { + if ((ctx->chain = sk_X509_new_null()) == NULL + || !sk_X509_push(ctx->chain, ctx->cert)) { + X509_free(ctx->cert); X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE); ctx->error = X509_V_ERR_OUT_OF_MEM; return -1; } - X509_up_ref(ctx->cert); + ctx->num_untrusted = 1; /* If the peer's public key is too weak, we can stop early. */ @@ -319,30 +331,26 @@ static X509 *find_issuer(X509_STORE_CTX *ctx, STACK_OF(X509) *sk, X509 *x) return rv; } -/* Given a possible certificate and issuer check them */ - +/* + * Check that the given certificate 'x' is issued by the certificate 'issuer' + * and the issuer is not yet in ctx->chain, where the exceptional case + * that 'x' is self-issued and ctx->chain has just one element is allowed. + */ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer) { - int ret; - if (x == issuer) - return cert_self_signed(x); - ret = X509_check_issued(issuer, x); - if (ret == X509_V_OK) { + if (x509_likely_issued(issuer, x) != X509_V_OK) + return 0; + if ((x->ex_flags & EXFLAG_SI) == 0 || sk_X509_num(ctx->chain) != 1) { int i; X509 *ch; - /* Special case: single self signed certificate */ - if (cert_self_signed(x) && sk_X509_num(ctx->chain) == 1) - return 1; + for (i = 0; i < sk_X509_num(ctx->chain); i++) { ch = sk_X509_value(ctx->chain, i); - if (ch == issuer || !X509_cmp(ch, issuer)) { - ret = X509_V_ERR_PATH_LOOP; - break; - } + if (ch == issuer || X509_cmp(ch, issuer) == 0) + return 0; } } - - return (ret == X509_V_OK); + return 1; } /* Alternative lookup method: look from a STACK stored in other_ctx */ @@ -350,11 +358,15 @@ static int check_issued(X509_STORE_CTX *ctx, X509 *x, X509 *issuer) static int get_issuer_sk(X509 **issuer, X509_STORE_CTX *ctx, X509 *x) { *issuer = find_issuer(ctx, ctx->other_ctx, x); - if (*issuer) { - X509_up_ref(*issuer); - return 1; - } else - return 0; + + if (*issuer == NULL || !X509_up_ref(*issuer)) + goto err; + + return 1; + + err: + *issuer = NULL; + return 0; } static STACK_OF(X509) *lookup_certs_sk(X509_STORE_CTX *ctx, X509_NAME *nm) @@ -366,15 +378,21 @@ static STACK_OF(X509) *lookup_certs_sk(X509_STORE_CTX *ctx, X509_NAME *nm) for (i = 0; i < sk_X509_num(ctx->other_ctx); i++) { x = sk_X509_value(ctx->other_ctx, i); if (X509_NAME_cmp(nm, X509_get_subject_name(x)) == 0) { + if (!X509_up_ref(x)) { + sk_X509_pop_free(sk, X509_free); + X509err(X509_F_LOOKUP_CERTS_SK, ERR_R_INTERNAL_ERROR); + ctx->error = X509_V_ERR_UNSPECIFIED; + return NULL; + } if (sk == NULL) sk = sk_X509_new_null(); - if (sk == NULL || sk_X509_push(sk, x) == 0) { + if (sk == NULL || !sk_X509_push(sk, x)) { + X509_free(x); sk_X509_pop_free(sk, X509_free); X509err(X509_F_LOOKUP_CERTS_SK, ERR_R_MALLOC_FAILURE); ctx->error = X509_V_ERR_OUT_OF_MEM; return NULL; } - X509_up_ref(x); } } return sk; @@ -508,6 +526,14 @@ static int check_chain_extensions(X509_STORE_CTX *ctx) ret = 1; break; } + if ((ctx->param->flags & X509_V_FLAG_X509_STRICT) && num > 1) { + /* Check for presence of explicit elliptic curve parameters */ + ret = check_curve(x); + if (ret < 0) + ctx->error = X509_V_ERR_UNSPECIFIED; + else if (ret == 0) + ctx->error = X509_V_ERR_EC_KEY_EXPLICIT_PARAMS; + } if ((x->ex_flags & EXFLAG_CA) == 0 && x->ex_pathlen != -1 && (ctx->param->flags & X509_V_FLAG_X509_STRICT)) { @@ -1699,6 +1725,7 @@ int x509_check_cert_time(X509_STORE_CTX *ctx, X509 *x, int depth) return 1; } +/* verify the issuer signatures and cert times of ctx->chain */ static int internal_verify(X509_STORE_CTX *ctx) { int n = sk_X509_num(ctx->chain) - 1; @@ -1717,7 +1744,7 @@ static int internal_verify(X509_STORE_CTX *ctx) } if (ctx->check_issued(ctx, xi, xi)) - xs = xi; + xs = xi; /* the typical case: last cert in the chain is self-issued */ else { if (ctx->param->flags & X509_V_FLAG_PARTIAL_CHAIN) { xs = xi; @@ -1736,22 +1763,50 @@ static int internal_verify(X509_STORE_CTX *ctx) * is allowed to reset errors (at its own peril). */ while (n >= 0) { - EVP_PKEY *pkey; - /* - * Skip signature check for self signed certificates unless explicitly - * asked for. It doesn't add any security and just wastes time. If - * the issuer's public key is unusable, report the issuer certificate - * and its depth (rather than the depth of the subject). + * For each iteration of this loop: + * n is the subject depth + * xs is the subject cert, for which the signature is to be checked + * xi is the supposed issuer cert containing the public key to use + * Initially xs == xi if the last cert in the chain is self-issued. + * + * Skip signature check for self-signed certificates unless explicitly + * asked for because it does not add any security and just wastes time. */ - if (xs != xi || (ctx->param->flags & X509_V_FLAG_CHECK_SS_SIGNATURE)) { + if (xs != xi || ((ctx->param->flags & X509_V_FLAG_CHECK_SS_SIGNATURE) + && (xi->ex_flags & EXFLAG_SS) != 0)) { + EVP_PKEY *pkey; + /* + * If the issuer's public key is not available or its key usage + * does not support issuing the subject cert, report the issuer + * cert and its depth (rather than n, the depth of the subject). + */ + int issuer_depth = n + (xs == xi ? 0 : 1); + /* + * According to https://tools.ietf.org/html/rfc5280#section-6.1.4 + * step (n) we must check any given key usage extension in a CA cert + * when preparing the verification of a certificate issued by it. + * According to https://tools.ietf.org/html/rfc5280#section-4.2.1.3 + * we must not verify a certifiate signature if the key usage of the + * CA certificate that issued the certificate prohibits signing. + * In case the 'issuing' certificate is the last in the chain and is + * not a CA certificate but a 'self-issued' end-entity cert (i.e., + * xs == xi && !(xi->ex_flags & EXFLAG_CA)) RFC 5280 does not apply + * (see https://tools.ietf.org/html/rfc6818#section-2) and thus + * we are free to ignore any key usage restrictions on such certs. + */ + int ret = xs == xi && (xi->ex_flags & EXFLAG_CA) == 0 + ? X509_V_OK : x509_signing_allowed(xi, xs); + + if (ret != X509_V_OK && !verify_cb_cert(ctx, xi, issuer_depth, ret)) + return 0; if ((pkey = X509_get0_pubkey(xi)) == NULL) { - if (!verify_cb_cert(ctx, xi, xi != xs ? n+1 : n, - X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY)) + ret = X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY; + if (!verify_cb_cert(ctx, xi, issuer_depth, ret)) return 0; } else if (X509_verify(xs, pkey) <= 0) { - if (!verify_cb_cert(ctx, xs, n, - X509_V_ERR_CERT_SIGNATURE_FAILURE)) + ret = X509_V_ERR_CERT_SIGNATURE_FAILURE; + if (!verify_cb_cert(ctx, xs, n, ret)) return 0; } } @@ -3158,7 +3213,16 @@ static int build_chain(X509_STORE_CTX *ctx) /* Drop this issuer from future consideration */ (void) sk_X509_delete_ptr(sktmp, xtmp); + if (!X509_up_ref(xtmp)) { + X509err(X509_F_BUILD_CHAIN, ERR_R_INTERNAL_ERROR); + trust = X509_TRUST_REJECTED; + ctx->error = X509_V_ERR_UNSPECIFIED; + search = 0; + continue; + } + if (!sk_X509_push(ctx->chain, xtmp)) { + X509_free(xtmp); X509err(X509_F_BUILD_CHAIN, ERR_R_MALLOC_FAILURE); trust = X509_TRUST_REJECTED; ctx->error = X509_V_ERR_OUT_OF_MEM; @@ -3166,7 +3230,7 @@ static int build_chain(X509_STORE_CTX *ctx) continue; } - X509_up_ref(x = xtmp); + x = xtmp; ++ctx->num_untrusted; ss = cert_self_signed(xtmp); @@ -3258,6 +3322,32 @@ static int check_key_level(X509_STORE_CTX *ctx, X509 *cert) } /* + * Check whether the public key of ``cert`` does not use explicit params + * for an elliptic curve. + * + * Returns 1 on success, 0 if check fails, -1 for other errors. + */ +static int check_curve(X509 *cert) +{ +#ifndef OPENSSL_NO_EC + EVP_PKEY *pkey = X509_get0_pubkey(cert); + + /* Unsupported or malformed key */ + if (pkey == NULL) + return -1; + + if (EVP_PKEY_id(pkey) == EVP_PKEY_EC) { + int ret; + + ret = EC_KEY_decoded_from_explicit_params(EVP_PKEY_get0_EC_KEY(pkey)); + return ret < 0 ? ret : !ret; + } +#endif + + return 1; +} + +/* * Check whether the signature digest algorithm of ``cert`` meets the security * level of ``ctx``. Should not be checked for trust anchors (whether * self-signed or otherwise). diff --git a/crypto/x509/x_pubkey.c b/crypto/x509/x_pubkey.c index 4f694b93fb00..9be7e9286571 100644 --- a/crypto/x509/x_pubkey.c +++ b/crypto/x509/x_pubkey.c @@ -1,5 +1,5 @@ /* - * Copyright 1995-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1995-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -169,8 +169,11 @@ EVP_PKEY *X509_PUBKEY_get0(X509_PUBKEY *key) EVP_PKEY *X509_PUBKEY_get(X509_PUBKEY *key) { EVP_PKEY *ret = X509_PUBKEY_get0(key); - if (ret != NULL) - EVP_PKEY_up_ref(ret); + + if (ret != NULL && !EVP_PKEY_up_ref(ret)) { + X509err(X509_F_X509_PUBKEY_GET, ERR_R_INTERNAL_ERROR); + ret = NULL; + } return ret; } diff --git a/crypto/x509v3/pcy_data.c b/crypto/x509v3/pcy_data.c index 073505951322..8c7bc69576a4 100644 --- a/crypto/x509v3/pcy_data.c +++ b/crypto/x509v3/pcy_data.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2018 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -52,6 +52,7 @@ X509_POLICY_DATA *policy_data_new(POLICYINFO *policy, ret = OPENSSL_zalloc(sizeof(*ret)); if (ret == NULL) { X509V3err(X509V3_F_POLICY_DATA_NEW, ERR_R_MALLOC_FAILURE); + ASN1_OBJECT_free(id); return NULL; } ret->expected_policy_set = sk_ASN1_OBJECT_new_null(); diff --git a/crypto/x509v3/v3_alt.c b/crypto/x509v3/v3_alt.c index 7ac2911b91af..4dce0041012e 100644 --- a/crypto/x509v3/v3_alt.c +++ b/crypto/x509v3/v3_alt.c @@ -1,5 +1,5 @@ /* - * Copyright 1999-2019 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the OpenSSL license (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -275,6 +275,7 @@ static int copy_issuer(X509V3_CTX *ctx, GENERAL_NAMES *gens) num = sk_GENERAL_NAME_num(ialt); if (!sk_GENERAL_NAME_reserve(gens, num)) { X509V3err(X509V3_F_COPY_ISSUER, ERR_R_MALLOC_FAILURE); + sk_GENERAL_NAME_free(ialt); goto err; } diff --git a/crypto/x509v3/v3_purp.c b/crypto/x509v3/v3_purp.c index f023c6489548..2b06dba05398 100644 --- a/crypto/x509v3/v3_purp.c +++ b/crypto/x509v3/v3_purp.c @@ -13,6 +13,7 @@ #include <openssl/x509v3.h> #include <openssl/x509_vfy.h> #include "crypto/x509.h" +#include "../x509/x509_local.h" /* for x509_signing_allowed() */ #include "internal/tsan_assist.h" static void x509v3_cache_extensions(X509 *x); @@ -344,6 +345,21 @@ static int setup_crldp(X509 *x) return 1; } +/* Check that issuer public key algorithm matches subject signature algorithm */ +static int check_sig_alg_match(const EVP_PKEY *pkey, const X509 *subject) +{ + int pkey_nid; + + if (pkey == NULL) + return X509_V_ERR_NO_ISSUER_PUBLIC_KEY; + if (OBJ_find_sigid_algs(OBJ_obj2nid(subject->cert_info.signature.algorithm), + NULL, &pkey_nid) == 0) + return X509_V_ERR_UNSUPPORTED_SIGNATURE_ALGORITHM; + if (EVP_PKEY_type(pkey_nid) != EVP_PKEY_base_id(pkey)) + return X509_V_ERR_SIGNATURE_ALGORITHM_MISMATCH; + return X509_V_OK; +} + #define V1_ROOT (EXFLAG_V1|EXFLAG_SS) #define ku_reject(x, usage) \ (((x)->ex_flags & EXFLAG_KUSAGE) && !((x)->ex_kusage & (usage))) @@ -496,11 +512,11 @@ static void x509v3_cache_extensions(X509 *x) x->ex_flags |= EXFLAG_INVALID; /* Does subject name match issuer ? */ if (!X509_NAME_cmp(X509_get_subject_name(x), X509_get_issuer_name(x))) { - x->ex_flags |= EXFLAG_SI; - /* If SKID matches AKID also indicate self signed */ - if (X509_check_akid(x, x->akid) == X509_V_OK && - !ku_reject(x, KU_KEY_CERT_SIGN)) - x->ex_flags |= EXFLAG_SS; + x->ex_flags |= EXFLAG_SI; /* cert is self-issued */ + if (X509_check_akid(x, x->akid) == X509_V_OK /* SKID matches AKID */ + /* .. and the signature alg matches the PUBKEY alg: */ + && check_sig_alg_match(X509_get0_pubkey(x), x) == X509_V_OK) + x->ex_flags |= EXFLAG_SS; /* indicate self-signed */ } x->altname = X509_get_ext_d2i(x, NID_subject_alt_name, &i, NULL); if (x->altname == NULL && i != -1) @@ -793,6 +809,23 @@ static int no_check(const X509_PURPOSE *xp, const X509 *x, int ca) } /*- + * Check if certificate I<issuer> is allowed to issue certificate I<subject> + * according to the B<keyUsage> field of I<issuer> if present + * depending on any proxyCertInfo extension of I<subject>. + * Returns 0 for OK, or positive for reason for rejection + * where reason codes match those for X509_verify_cert(). + */ +int x509_signing_allowed(const X509 *issuer, const X509 *subject) +{ + if (subject->ex_flags & EXFLAG_PROXY) { + if (ku_reject(issuer, KU_DIGITAL_SIGNATURE)) + return X509_V_ERR_KEYUSAGE_NO_DIGITAL_SIGNATURE; + } else if (ku_reject(issuer, KU_KEY_CERT_SIGN)) + return X509_V_ERR_KEYUSAGE_NO_CERTSIGN; + return X509_V_OK; +} + +/*- * Various checks to see if one certificate issued the second. * This can be used to prune a set of possible issuer certificates * which have been looked up using some simple method such as by @@ -800,13 +833,24 @@ static int no_check(const X509_PURPOSE *xp, const X509 *x, int ca) * These are: * 1. Check issuer_name(subject) == subject_name(issuer) * 2. If akid(subject) exists check it matches issuer - * 3. If key_usage(issuer) exists check it supports certificate signing + * 3. Check that issuer public key algorithm matches subject signature algorithm + * 4. If key_usage(issuer) exists check it supports certificate signing * returns 0 for OK, positive for reason for mismatch, reasons match * codes for X509_verify_cert() */ int X509_check_issued(X509 *issuer, X509 *subject) { + int ret; + + if ((ret = x509_likely_issued(issuer, subject)) != X509_V_OK) + return ret; + return x509_signing_allowed(issuer, subject); +} + +/* do the checks 1., 2., and 3. as described above for X509_check_issued() */ +int x509_likely_issued(X509 *issuer, X509 *subject) +{ if (X509_NAME_cmp(X509_get_subject_name(issuer), X509_get_issuer_name(subject))) return X509_V_ERR_SUBJECT_ISSUER_MISMATCH; @@ -824,12 +868,8 @@ int X509_check_issued(X509 *issuer, X509 *subject) return ret; } - if (subject->ex_flags & EXFLAG_PROXY) { - if (ku_reject(issuer, KU_DIGITAL_SIGNATURE)) - return X509_V_ERR_KEYUSAGE_NO_DIGITAL_SIGNATURE; - } else if (ku_reject(issuer, KU_KEY_CERT_SIGN)) - return X509_V_ERR_KEYUSAGE_NO_CERTSIGN; - return X509_V_OK; + /* check if the subject signature alg matches the issuer's PUBKEY alg */ + return check_sig_alg_match(X509_get0_pubkey(issuer), subject); } int X509_check_akid(X509 *issuer, AUTHORITY_KEYID *akid) |