diff options
Diffstat (limited to 'sys')
28 files changed, 2798 insertions, 518 deletions
diff --git a/sys/conf/files b/sys/conf/files index 1eea5e9572d5..018d77bfa84d 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3223,6 +3223,7 @@ libkern/strtoq.c standard libkern/strtoul.c standard libkern/strtouq.c standard libkern/strvalid.c standard +libkern/timingsafe_bcmp.c standard net/bpf.c standard net/bpf_buffer.c optional bpf net/bpf_jitter.c optional bpf_jitter @@ -3880,6 +3881,8 @@ opencrypto/cryptodev.c optional cryptodev opencrypto/cryptodev_if.m optional crypto opencrypto/cryptosoft.c optional crypto opencrypto/cryptodeflate.c optional crypto +opencrypto/gmac.c optional crypto +opencrypto/gfmult.c optional crypto opencrypto/rmd160.c optional crypto | ipsec opencrypto/skipjack.c optional crypto opencrypto/xform.c optional crypto diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 8e529563f516..bbbe82701938 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -129,9 +129,14 @@ amd64/pci/pci_cfgreg.c optional pci cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" crypto/aesni/aeskeys_amd64.S optional aesni crypto/aesni/aesni.c optional aesni +aesni_ghash.o optional aesni \ + dependency "$S/crypto/aesni/aesni_ghash.c" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ + no-implicit-rule \ + clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ - compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -maes ${.IMPSRC}" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" crypto/blowfish/bf_enc.c optional crypto | ipsec diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 86919861e8b0..96879b8b9d2c 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -116,9 +116,14 @@ bf_enc.o optional crypto | ipsec \ no-implicit-rule crypto/aesni/aeskeys_i386.S optional aesni crypto/aesni/aesni.c optional aesni +aesni_ghash.o optional aesni \ + dependency "$S/crypto/aesni/aesni_ghash.c" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC}" \ + no-implicit-rule \ + clean "aesni_ghash.o" aesni_wrap.o optional aesni \ dependency "$S/crypto/aesni/aesni_wrap.c" \ - compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -maes ${.IMPSRC}" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} -mmmx -msse -msse4 -maes ${.IMPSRC}" \ no-implicit-rule \ clean "aesni_wrap.o" crypto/des/arch/i386/des_enc.S optional crypto | ipsec | netsmb diff --git a/sys/crypto/aesni/aesni.c b/sys/crypto/aesni/aesni.c index 7d7a7405e031..25357509c739 100644 --- a/sys/crypto/aesni/aesni.c +++ b/sys/crypto/aesni/aesni.c @@ -1,8 +1,13 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek <pjd@FreeBSD.org> * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> + * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -39,8 +44,10 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/bus.h> #include <sys/uio.h> +#include <sys/mbuf.h> #include <crypto/aesni/aesni.h> #include <cryptodev_if.h> +#include <opencrypto/gmac.h> struct aesni_softc { int32_t cid; @@ -56,7 +63,7 @@ static void aesni_freesession_locked(struct aesni_softc *sc, static int aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini); static int aesni_cipher_process(struct aesni_session *ses, - struct cryptodesc *enccrd, struct cryptop *crp); + struct cryptodesc *enccrd, struct cryptodesc *authcrd, struct cryptop *crp); MALLOC_DEFINE(M_AESNI, "aesni_data", "AESNI Data"); @@ -79,12 +86,12 @@ aesni_probe(device_t dev) return (EINVAL); } - if ((cpu_feature & CPUID_SSE2) == 0) { - device_printf(dev, "No SSE2 support but AESNI!?!\n"); + if ((cpu_feature2 & CPUID2_SSE41) == 0) { + device_printf(dev, "No SSE4.1 support.\n"); return (EINVAL); } - device_set_desc_copy(dev, "AES-CBC,AES-XTS"); + device_set_desc_copy(dev, "AES-CBC,AES-XTS,AES-GCM,AES-ICM"); return (0); } @@ -105,6 +112,11 @@ aesni_attach(device_t dev) rw_init(&sc->lock, "aesni_lock"); crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_ICM, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_NIST_GCM_16, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_128_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_192_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_256_NIST_GMAC, 0, 0); crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0); return (0); } @@ -144,8 +156,10 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) struct cryptoini *encini; int error; - if (sidp == NULL || cri == NULL) + if (sidp == NULL || cri == NULL) { + CRYPTDEB("no sidp or cri"); return (EINVAL); + } sc = device_get_softc(dev); ses = NULL; @@ -153,17 +167,32 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) for (; cri != NULL; cri = cri->cri_next) { switch (cri->cri_alg) { case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: - if (encini != NULL) + case CRYPTO_AES_NIST_GCM_16: + if (encini != NULL) { + CRYPTDEB("encini already set"); return (EINVAL); + } encini = cri; break; + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + /* + * nothing to do here, maybe in the future cache some + * values for GHASH + */ + break; default: + CRYPTDEB("unhandled algorithm"); return (EINVAL); } } - if (encini == NULL) + if (encini == NULL) { + CRYPTDEB("no cipher"); return (EINVAL); + } rw_wlock(&sc->lock); /* @@ -195,6 +224,7 @@ aesni_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) error = aesni_cipher_setup(ses, encini); if (error != 0) { + CRYPTDEB("setup failed"); rw_wlock(&sc->lock); aesni_freesession_locked(sc, ses); rw_wunlock(&sc->lock); @@ -214,7 +244,7 @@ aesni_freesession_locked(struct aesni_softc *sc, struct aesni_session *ses) sid = ses->id; TAILQ_REMOVE(&sc->sessions, ses, next); ctx = ses->fpu_ctx; - bzero(ses, sizeof(*ses)); + *ses = (struct aesni_session){}; ses->id = sid; ses->fpu_ctx = ctx; TAILQ_INSERT_HEAD(&sc->sessions, ses, next); @@ -248,11 +278,13 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) { struct aesni_softc *sc = device_get_softc(dev); struct aesni_session *ses = NULL; - struct cryptodesc *crd, *enccrd; - int error; + struct cryptodesc *crd, *enccrd, *authcrd; + int error, needauth; error = 0; enccrd = NULL; + authcrd = NULL; + needauth = 0; /* Sanity check. */ if (crp == NULL) @@ -266,6 +298,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) for (crd = crp->crp_desc; crd != NULL; crd = crd->crd_next) { switch (crd->crd_alg) { case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: case CRYPTO_AES_XTS: if (enccrd != NULL) { error = EINVAL; @@ -273,11 +306,41 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) } enccrd = crd; break; + + case CRYPTO_AES_NIST_GCM_16: + if (enccrd != NULL) { + error = EINVAL; + goto out; + } + enccrd = crd; + needauth = 1; + break; + + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + if (authcrd != NULL) { + error = EINVAL; + goto out; + } + authcrd = crd; + needauth = 1; + break; + default: - return (EINVAL); + error = EINVAL; + goto out; } } - if (enccrd == NULL || (enccrd->crd_len % AES_BLOCK_LEN) != 0) { + + if (enccrd == NULL || (needauth && authcrd == NULL)) { + error = EINVAL; + goto out; + } + + /* CBC & XTS can only handle full blocks for now */ + if ((enccrd->crd_alg == CRYPTO_AES_CBC || enccrd->crd_alg == + CRYPTO_AES_XTS) && (enccrd->crd_len % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } @@ -293,7 +356,7 @@ aesni_process(device_t dev, struct cryptop *crp, int hint __unused) goto out; } - error = aesni_cipher_process(ses, enccrd, crp); + error = aesni_cipher_process(ses, enccrd, authcrd, crp); if (error != 0) goto out; @@ -307,21 +370,26 @@ uint8_t * aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, int *allocated) { + struct mbuf *m; struct uio *uio; struct iovec *iov; uint8_t *addr; - if (crp->crp_flags & CRYPTO_F_IMBUF) - goto alloc; - else if (crp->crp_flags & CRYPTO_F_IOV) { + if (crp->crp_flags & CRYPTO_F_IMBUF) { + m = (struct mbuf *)crp->crp_buf; + if (m->m_next != NULL) + goto alloc; + addr = mtod(m, uint8_t *); + } else if (crp->crp_flags & CRYPTO_F_IOV) { uio = (struct uio *)crp->crp_buf; if (uio->uio_iovcnt != 1) goto alloc; iov = uio->uio_iov; - addr = (u_char *)iov->iov_base + enccrd->crd_skip; + addr = (uint8_t *)iov->iov_base; } else - addr = (u_char *)crp->crp_buf; + addr = (uint8_t *)crp->crp_buf; *allocated = 0; + addr += enccrd->crd_skip; return (addr); alloc: @@ -376,18 +444,40 @@ aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) return (error); } +/* + * authcrd contains the associated date. + */ static int aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, - struct cryptop *crp) + struct cryptodesc *authcrd, struct cryptop *crp) { + uint8_t tag[GMAC_DIGEST_LEN]; struct thread *td; - uint8_t *buf; - int error, allocated; + uint8_t *buf, *authbuf; + int error, allocated, authallocated; + int ivlen, encflag; + + encflag = (enccrd->crd_flags & CRD_F_ENCRYPT) == CRD_F_ENCRYPT; + + if ((enccrd->crd_alg == CRYPTO_AES_ICM || + enccrd->crd_alg == CRYPTO_AES_NIST_GCM_16) && + (enccrd->crd_flags & CRD_F_IV_EXPLICIT) == 0) + return (EINVAL); buf = aesni_cipher_alloc(enccrd, crp, &allocated); if (buf == NULL) return (ENOMEM); + authbuf = NULL; + authallocated = 0; + if (authcrd != NULL) { + authbuf = aesni_cipher_alloc(authcrd, crp, &authallocated); + if (authbuf == NULL) { + error = ENOMEM; + goto out1; + } + } + td = curthread; error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); @@ -401,42 +491,91 @@ aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, goto out; } - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { - if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); - if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) - crypto_copyback(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { + /* XXX - validate that enccrd and authcrd have/use same key? */ + switch (enccrd->crd_alg) { + case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: + ivlen = AES_BLOCK_LEN; + break; + case CRYPTO_AES_XTS: + ivlen = 8; + break; + case CRYPTO_AES_NIST_GCM_16: + ivlen = 12; /* should support arbitarily larger */ + break; + } + + /* Setup ses->iv */ + bzero(ses->iv, sizeof ses->iv); + if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) + bcopy(enccrd->crd_iv, ses->iv, ivlen); + else if (encflag && ((enccrd->crd_flags & CRD_F_IV_PRESENT) != 0)) + arc4rand(ses->iv, ivlen, 0); + else + crypto_copydata(crp->crp_flags, crp->crp_buf, + enccrd->crd_inject, ivlen, ses->iv); + + if (authcrd != NULL && !encflag) + crypto_copydata(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, GMAC_DIGEST_LEN, tag); + else + bzero(tag, sizeof tag); + + /* Do work */ + switch (ses->algo) { + case CRYPTO_AES_CBC: + if (encflag) aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, enccrd->crd_len, buf, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { + else + aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, + enccrd->crd_len, buf, ses->iv); + break; + case CRYPTO_AES_ICM: + /* encryption & decryption are the same */ + aesni_encrypt_icm(ses->rounds, ses->enc_schedule, + enccrd->crd_len, buf, buf, ses->iv); + break; + case CRYPTO_AES_XTS: + if (encflag) aesni_encrypt_xts(ses->rounds, ses->enc_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, ses->iv); - } - } else { - if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) - bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); else - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); - if (ses->algo == CRYPTO_AES_CBC) { - aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, - enccrd->crd_len, buf, ses->iv); - } else /* if (ses->algo == CRYPTO_AES_XTS) */ { aesni_decrypt_xts(ses->rounds, ses->dec_schedule, ses->xts_schedule, enccrd->crd_len, buf, buf, ses->iv); + break; + case CRYPTO_AES_NIST_GCM_16: + if (encflag) + AES_GCM_encrypt(buf, buf, authbuf, ses->iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds); + else { + if (!AES_GCM_decrypt(buf, buf, authbuf, ses->iv, tag, + enccrd->crd_len, authcrd->crd_len, ivlen, + ses->enc_schedule, ses->rounds)) + error = EBADMSG; } + break; } + if (allocated) crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, enccrd->crd_len, buf); - if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) - crypto_copydata(crp->crp_flags, crp->crp_buf, - enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, - AES_BLOCK_LEN, ses->iv); + + /* + * OpenBSD doesn't copy this back. This primes the IV for the next + * chain. Why do we not do it for decrypt? + */ + if (encflag && enccrd->crd_alg == CRYPTO_AES_CBC) + bcopy(buf + enccrd->crd_len - AES_BLOCK_LEN, ses->iv, AES_BLOCK_LEN); + + if (!error && authcrd != NULL) { + crypto_copyback(crp->crp_flags, crp->crp_buf, + authcrd->crd_inject, GMAC_DIGEST_LEN, tag); + } + out: fpu_kern_leave(td, ses->fpu_ctx); out1: @@ -444,5 +583,7 @@ out1: bzero(buf, enccrd->crd_len); free(buf, M_AESNI); } + if (authallocated) + free(authbuf, M_AESNI); return (error); } diff --git a/sys/crypto/aesni/aesni.h b/sys/crypto/aesni/aesni.h index ff1d1a2db551..5cd89252b859 100644 --- a/sys/crypto/aesni/aesni.h +++ b/sys/crypto/aesni/aesni.h @@ -88,6 +88,9 @@ void aesni_encrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to); void aesni_decrypt_ecb(int rounds, const void *key_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to); +void aesni_encrypt_icm(int rounds, const void *key_schedule /*__aligned(16)*/, + size_t len, const uint8_t *from, uint8_t *to, + const uint8_t iv[AES_BLOCK_LEN]); void aesni_encrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, @@ -96,6 +99,16 @@ void aesni_decrypt_xts(int rounds, const void *data_schedule /*__aligned(16)*/, const void *tweak_schedule /*__aligned(16)*/, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]); +/* GCM & GHASH functions */ +void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr); +int AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr); + int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen); uint8_t *aesni_cipher_alloc(struct cryptodesc *enccrd, struct cryptop *crp, diff --git a/sys/crypto/aesni/aesni_ghash.c b/sys/crypto/aesni/aesni_ghash.c new file mode 100644 index 000000000000..005ba81bc330 --- /dev/null +++ b/sys/crypto/aesni/aesni_ghash.c @@ -0,0 +1,803 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * $FreeBSD$ + * + */ + +/* + * Figure 5, 8 and 12 are copied from the Intel white paper: + * Intel® Carry-Less Multiplication Instruction and its Usage for + * Computing the GCM Mode + * + * and as such are: + * Copyright © 2010 Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef _KERNEL +#include <crypto/aesni/aesni.h> +#else +#include <stdint.h> +#endif + +#include <wmmintrin.h> +#include <emmintrin.h> +#include <smmintrin.h> + +static inline int +m128icmp(__m128i a, __m128i b) +{ + __m128i cmp; + + cmp = _mm_cmpeq_epi32(a, b); + + return _mm_movemask_epi8(cmp) == 0xffff; +} + +#ifdef __i386__ +static inline __m128i +_mm_insert_epi64(__m128i a, int64_t b, const int ndx) +{ + + if (!ndx) { + a = _mm_insert_epi32(a, b, 0); + a = _mm_insert_epi32(a, b >> 32, 1); + } else { + a = _mm_insert_epi32(a, b, 2); + a = _mm_insert_epi32(a, b >> 32, 3); + } + + return a; +} +#endif + +/* some code from carry-less-multiplication-instruction-in-gcm-mode-paper.pdf */ + +/* Figure 5. Code Sample - Performing Ghash Using Algorithms 1 and 5 (C) */ +static void +gfmul(__m128i a, __m128i b, __m128i *res) +{ + __m128i tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9; + + tmp3 = _mm_clmulepi64_si128(a, b, 0x00); + tmp4 = _mm_clmulepi64_si128(a, b, 0x10); + tmp5 = _mm_clmulepi64_si128(a, b, 0x01); + tmp6 = _mm_clmulepi64_si128(a, b, 0x11); + + tmp4 = _mm_xor_si128(tmp4, tmp5); + tmp5 = _mm_slli_si128(tmp4, 8); + tmp4 = _mm_srli_si128(tmp4, 8); + tmp3 = _mm_xor_si128(tmp3, tmp5); + tmp6 = _mm_xor_si128(tmp6, tmp4); + + tmp7 = _mm_srli_epi32(tmp3, 31); + tmp8 = _mm_srli_epi32(tmp6, 31); + tmp3 = _mm_slli_epi32(tmp3, 1); + tmp6 = _mm_slli_epi32(tmp6, 1); + + tmp9 = _mm_srli_si128(tmp7, 12); + tmp8 = _mm_slli_si128(tmp8, 4); + tmp7 = _mm_slli_si128(tmp7, 4); + tmp3 = _mm_or_si128(tmp3, tmp7); + tmp6 = _mm_or_si128(tmp6, tmp8); + tmp6 = _mm_or_si128(tmp6, tmp9); + + tmp7 = _mm_slli_epi32(tmp3, 31); + tmp8 = _mm_slli_epi32(tmp3, 30); + tmp9 = _mm_slli_epi32(tmp3, 25); + + tmp7 = _mm_xor_si128(tmp7, tmp8); + tmp7 = _mm_xor_si128(tmp7, tmp9); + tmp8 = _mm_srli_si128(tmp7, 4); + tmp7 = _mm_slli_si128(tmp7, 12); + tmp3 = _mm_xor_si128(tmp3, tmp7); + + tmp2 = _mm_srli_epi32(tmp3, 1); + tmp4 = _mm_srli_epi32(tmp3, 2); + tmp5 = _mm_srli_epi32(tmp3, 7); + tmp2 = _mm_xor_si128(tmp2, tmp4); + tmp2 = _mm_xor_si128(tmp2, tmp5); + tmp2 = _mm_xor_si128(tmp2, tmp8); + tmp3 = _mm_xor_si128(tmp3, tmp2); + tmp6 = _mm_xor_si128(tmp6, tmp3); + + *res = tmp6; +} + +/* + * Figure 8. Code Sample - Performing Ghash Using an Aggregated Reduction + * Method */ +static void +reduce4(__m128i H1, __m128i H2, __m128i H3, __m128i H4, + __m128i X1, __m128i X2, __m128i X3, __m128i X4, __m128i *res) +{ + /*algorithm by Krzysztof Jankowski, Pierre Laurent - Intel*/ + __m128i H1_X1_lo, H1_X1_hi, H2_X2_lo, H2_X2_hi, H3_X3_lo, + H3_X3_hi, H4_X4_lo, H4_X4_hi, lo, hi; + __m128i tmp0, tmp1, tmp2, tmp3; + __m128i tmp4, tmp5, tmp6, tmp7; + __m128i tmp8, tmp9; + + H1_X1_lo = _mm_clmulepi64_si128(H1, X1, 0x00); + H2_X2_lo = _mm_clmulepi64_si128(H2, X2, 0x00); + H3_X3_lo = _mm_clmulepi64_si128(H3, X3, 0x00); + H4_X4_lo = _mm_clmulepi64_si128(H4, X4, 0x00); + + lo = _mm_xor_si128(H1_X1_lo, H2_X2_lo); + lo = _mm_xor_si128(lo, H3_X3_lo); + lo = _mm_xor_si128(lo, H4_X4_lo); + + H1_X1_hi = _mm_clmulepi64_si128(H1, X1, 0x11); + H2_X2_hi = _mm_clmulepi64_si128(H2, X2, 0x11); + H3_X3_hi = _mm_clmulepi64_si128(H3, X3, 0x11); + H4_X4_hi = _mm_clmulepi64_si128(H4, X4, 0x11); + + hi = _mm_xor_si128(H1_X1_hi, H2_X2_hi); + hi = _mm_xor_si128(hi, H3_X3_hi); + hi = _mm_xor_si128(hi, H4_X4_hi); + + tmp0 = _mm_shuffle_epi32(H1, 78); + tmp4 = _mm_shuffle_epi32(X1, 78); + tmp0 = _mm_xor_si128(tmp0, H1); + tmp4 = _mm_xor_si128(tmp4, X1); + tmp1 = _mm_shuffle_epi32(H2, 78); + tmp5 = _mm_shuffle_epi32(X2, 78); + tmp1 = _mm_xor_si128(tmp1, H2); + tmp5 = _mm_xor_si128(tmp5, X2); + tmp2 = _mm_shuffle_epi32(H3, 78); + tmp6 = _mm_shuffle_epi32(X3, 78); + tmp2 = _mm_xor_si128(tmp2, H3); + tmp6 = _mm_xor_si128(tmp6, X3); + tmp3 = _mm_shuffle_epi32(H4, 78); + tmp7 = _mm_shuffle_epi32(X4, 78); + tmp3 = _mm_xor_si128(tmp3, H4); + tmp7 = _mm_xor_si128(tmp7, X4); + + tmp0 = _mm_clmulepi64_si128(tmp0, tmp4, 0x00); + tmp1 = _mm_clmulepi64_si128(tmp1, tmp5, 0x00); + tmp2 = _mm_clmulepi64_si128(tmp2, tmp6, 0x00); + tmp3 = _mm_clmulepi64_si128(tmp3, tmp7, 0x00); + + tmp0 = _mm_xor_si128(tmp0, lo); + tmp0 = _mm_xor_si128(tmp0, hi); + tmp0 = _mm_xor_si128(tmp1, tmp0); + tmp0 = _mm_xor_si128(tmp2, tmp0); + tmp0 = _mm_xor_si128(tmp3, tmp0); + + tmp4 = _mm_slli_si128(tmp0, 8); + tmp0 = _mm_srli_si128(tmp0, 8); + + lo = _mm_xor_si128(tmp4, lo); + hi = _mm_xor_si128(tmp0, hi); + + tmp3 = lo; + tmp6 = hi; + + tmp7 = _mm_srli_epi32(tmp3, 31); + tmp8 = _mm_srli_epi32(tmp6, 31); + tmp3 = _mm_slli_epi32(tmp3, 1); + tmp6 = _mm_slli_epi32(tmp6, 1); + + tmp9 = _mm_srli_si128(tmp7, 12); + tmp8 = _mm_slli_si128(tmp8, 4); + tmp7 = _mm_slli_si128(tmp7, 4); + tmp3 = _mm_or_si128(tmp3, tmp7); + tmp6 = _mm_or_si128(tmp6, tmp8); + tmp6 = _mm_or_si128(tmp6, tmp9); + + tmp7 = _mm_slli_epi32(tmp3, 31); + tmp8 = _mm_slli_epi32(tmp3, 30); + tmp9 = _mm_slli_epi32(tmp3, 25); + + tmp7 = _mm_xor_si128(tmp7, tmp8); + tmp7 = _mm_xor_si128(tmp7, tmp9); + tmp8 = _mm_srli_si128(tmp7, 4); + tmp7 = _mm_slli_si128(tmp7, 12); + tmp3 = _mm_xor_si128(tmp3, tmp7); + + tmp2 = _mm_srli_epi32(tmp3, 1); + tmp4 = _mm_srli_epi32(tmp3, 2); + tmp5 = _mm_srli_epi32(tmp3, 7); + tmp2 = _mm_xor_si128(tmp2, tmp4); + tmp2 = _mm_xor_si128(tmp2, tmp5); + tmp2 = _mm_xor_si128(tmp2, tmp8); + tmp3 = _mm_xor_si128(tmp3, tmp2); + tmp6 = _mm_xor_si128(tmp6, tmp3); + + *res = tmp6; +} + +/* + * Figure 12. AES-GCM: Processing Four Blocks in Parallel with Aggregated + * Every Four Blocks + */ +/* + * per NIST SP-800-38D, 5.2.1.1, len(p) <= 2^39-256 (in bits), or + * 2^32-256*8*16 bytes. + */ +void +AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr) +{ + int i, j ,k; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i H, H2, H3, H4, Y, T; + __m128i *KEY = (__m128i*)key; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6, + 7); + __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15); + __m128i X = _mm_setzero_si128(); + + if (ibytes == 96/8) { + Y = _mm_loadu_si128((__m128i*)ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /*(Compute E[ZERO, KS] and E[Y0, KS] together*/ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]); + + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j=1; j <nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + Y = _mm_setzero_si128(); + + for (i=0; i < ibytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + if (ibytes%16) { + for (j=0; j < ibytes%16; j++) + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + + gfmul(H,H,&H2); + gfmul(H,H2,&H3); + gfmul(H,H3,&H4); + + for (i=0; i<abytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i=i*4; i<abytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X,tmp1); + gfmul(X, H, &X); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<abytes%16; j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X,tmp1); + gfmul(X,H,&X); + } + + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + ctr5 = _mm_add_epi64(ctr4, ONE); + ctr6 = _mm_add_epi64(ctr5, ONE); + ctr7 = _mm_add_epi64(ctr6, ONE); + ctr8 = _mm_add_epi64(ctr7, ONE); + + for (i=0; i<nbytes/16/8; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + + ctr1 = _mm_add_epi64(ctr1, EIGHT); + ctr2 = _mm_add_epi64(ctr2, EIGHT); + ctr3 = _mm_add_epi64(ctr3, EIGHT); + ctr4 = _mm_add_epi64(ctr4, EIGHT); + ctr5 = _mm_add_epi64(ctr5, EIGHT); + ctr6 = _mm_add_epi64(ctr6, EIGHT); + ctr7 = _mm_add_epi64(ctr7, EIGHT); + ctr8 = _mm_add_epi64(ctr8, EIGHT); + + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + + for (j=1; j<nr; j++) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + } + tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]); + tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]); + tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]); + tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]); + tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]); + + tmp1 = _mm_xor_si128(tmp1, + _mm_loadu_si128(&((__m128i*)in)[i*8+0])); + tmp2 = _mm_xor_si128(tmp2, + _mm_loadu_si128(&((__m128i*)in)[i*8+1])); + tmp3 = _mm_xor_si128(tmp3, + _mm_loadu_si128(&((__m128i*)in)[i*8+2])); + tmp4 = _mm_xor_si128(tmp4, + _mm_loadu_si128(&((__m128i*)in)[i*8+3])); + tmp5 = _mm_xor_si128(tmp5, + _mm_loadu_si128(&((__m128i*)in)[i*8+4])); + tmp6 = _mm_xor_si128(tmp6, + _mm_loadu_si128(&((__m128i*)in)[i*8+5])); + tmp7 = _mm_xor_si128(tmp7, + _mm_loadu_si128(&((__m128i*)in)[i*8+6])); + tmp8 = _mm_xor_si128(tmp8, + _mm_loadu_si128(&((__m128i*)in)[i*8+7])); + + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + + tmp5 = _mm_xor_si128(X, tmp5); + reduce4(H, H2, H3, H4, tmp8, tmp7, tmp6, tmp5, &X); + } + for (k=i*8; k<nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + } + //If remains one incomplete block + if (nbytes%16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + last_block = tmp1; + for (j=0; j<nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + for ((void)j; j<16; j++) + ((unsigned char*)&last_block)[j] = 0; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X, H, &X); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1); + + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + _mm_storeu_si128((__m128i*)tag, T); +} + +/* My modification of _encrypt to be _decrypt */ +int +AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char *addt, const unsigned char *ivec, + unsigned char *tag, uint32_t nbytes, uint32_t abytes, int ibytes, + const unsigned char *key, int nr) +{ + int i, j ,k; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i H, H2, H3, H4, Y, T; + __m128i *KEY = (__m128i*)key; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i last_block = _mm_setzero_si128(); + __m128i ONE = _mm_set_epi32(0, 1, 0, 0); + __m128i EIGHT = _mm_set_epi32(0, 8, 0, 0); + __m128i BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6, + 7); + __m128i BSWAP_MASK = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, + 15); + __m128i X = _mm_setzero_si128(); + + if (ibytes == 96/8) { + Y = _mm_loadu_si128((__m128i*)ivec); + Y = _mm_insert_epi32(Y, 0x1000000, 3); + /*(Compute E[ZERO, KS] and E[Y0, KS] together*/ + tmp1 = _mm_xor_si128(X, KEY[0]); + tmp2 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[nr-1]); + + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + T = _mm_aesenclast_si128(tmp2, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + } else { + tmp1 = _mm_xor_si128(X, KEY[0]); + for (j=1; j <nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + H = _mm_aesenclast_si128(tmp1, KEY[nr]); + + H = _mm_shuffle_epi8(H, BSWAP_MASK); + Y = _mm_setzero_si128(); + + for (i=0; i < ibytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + if (ibytes%16) { + for (j=0; j < ibytes%16; j++) + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + } + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)ibytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, 0, 1); + + Y = _mm_xor_si128(Y, tmp1); + gfmul(Y, H, &Y); + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /*Compute E(K, Y0)*/ + tmp1 = _mm_xor_si128(Y, KEY[0]); + for (j=1; j < nr; j++) + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + T = _mm_aesenclast_si128(tmp1, KEY[nr]); + } + + gfmul(H,H,&H2); + gfmul(H,H2,&H3); + gfmul(H,H3,&H4); + + for (i=0; i<abytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)addt)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)addt)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)addt)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i=i*4; i<abytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X,tmp1); + gfmul(X, H, &X); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<abytes%16; j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X,tmp1); + gfmul(X,H,&X); + } + + /* This is where we validate the cipher text before decrypt */ + for (i = 0; i<nbytes/16/4; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)in)[i*4]); + tmp2 = _mm_loadu_si128(&((__m128i*)in)[i*4+1]); + tmp3 = _mm_loadu_si128(&((__m128i*)in)[i*4+2]); + tmp4 = _mm_loadu_si128(&((__m128i*)in)[i*4+3]); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + + tmp1 = _mm_xor_si128(X, tmp1); + + reduce4(H, H2, H3, H4, tmp4, tmp3, tmp2, tmp1, &X); + } + for (i = i*4; i<nbytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)in)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + } + if (nbytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<nbytes%16; j++) + ((unsigned char*)&last_block)[j] = in[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + gfmul(X, H, &X); + } + + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, (uint64_t)abytes*8, 1); + + X = _mm_xor_si128(X, tmp1); + gfmul(X,H,&X); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + + if (!m128icmp(T, _mm_loadu_si128((__m128i*)tag))) + return 0; //in case the authentication failed + + ctr1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + ctr2 = _mm_add_epi64(ctr1, ONE); + ctr3 = _mm_add_epi64(ctr2, ONE); + ctr4 = _mm_add_epi64(ctr3, ONE); + ctr5 = _mm_add_epi64(ctr4, ONE); + ctr6 = _mm_add_epi64(ctr5, ONE); + ctr7 = _mm_add_epi64(ctr6, ONE); + ctr8 = _mm_add_epi64(ctr7, ONE); + + for (i=0; i<nbytes/16/8; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + + ctr1 = _mm_add_epi64(ctr1, EIGHT); + ctr2 = _mm_add_epi64(ctr2, EIGHT); + ctr3 = _mm_add_epi64(ctr3, EIGHT); + ctr4 = _mm_add_epi64(ctr4, EIGHT); + ctr5 = _mm_add_epi64(ctr5, EIGHT); + ctr6 = _mm_add_epi64(ctr6, EIGHT); + ctr7 = _mm_add_epi64(ctr7, EIGHT); + ctr8 = _mm_add_epi64(ctr8, EIGHT); + + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + + for (j=1; j<nr; j++) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + } + tmp1 =_mm_aesenclast_si128(tmp1, KEY[nr]); + tmp2 =_mm_aesenclast_si128(tmp2, KEY[nr]); + tmp3 =_mm_aesenclast_si128(tmp3, KEY[nr]); + tmp4 =_mm_aesenclast_si128(tmp4, KEY[nr]); + tmp5 =_mm_aesenclast_si128(tmp5, KEY[nr]); + tmp6 =_mm_aesenclast_si128(tmp6, KEY[nr]); + tmp7 =_mm_aesenclast_si128(tmp7, KEY[nr]); + tmp8 =_mm_aesenclast_si128(tmp8, KEY[nr]); + + tmp1 = _mm_xor_si128(tmp1, + _mm_loadu_si128(&((__m128i*)in)[i*8+0])); + tmp2 = _mm_xor_si128(tmp2, + _mm_loadu_si128(&((__m128i*)in)[i*8+1])); + tmp3 = _mm_xor_si128(tmp3, + _mm_loadu_si128(&((__m128i*)in)[i*8+2])); + tmp4 = _mm_xor_si128(tmp4, + _mm_loadu_si128(&((__m128i*)in)[i*8+3])); + tmp5 = _mm_xor_si128(tmp5, + _mm_loadu_si128(&((__m128i*)in)[i*8+4])); + tmp6 = _mm_xor_si128(tmp6, + _mm_loadu_si128(&((__m128i*)in)[i*8+5])); + tmp7 = _mm_xor_si128(tmp7, + _mm_loadu_si128(&((__m128i*)in)[i*8+6])); + tmp8 = _mm_xor_si128(tmp8, + _mm_loadu_si128(&((__m128i*)in)[i*8+7])); + + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + } + for (k=i*8; k<nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi64(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + } + //If remains one incomplete block + if (nbytes%16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + for (j=1; j<nr-1; j+=2) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[j+1]); + } + tmp1 = _mm_aesenc_si128(tmp1, KEY[nr-1]); + tmp1 = _mm_aesenclast_si128(tmp1, KEY[nr]); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + last_block = tmp1; + for (j=0; j<nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + } + return 1; //when sucessfull returns 1 +} diff --git a/sys/crypto/aesni/aesni_wrap.c b/sys/crypto/aesni/aesni_wrap.c index 39819a69d1f7..e5e2a6991d9a 100644 --- a/sys/crypto/aesni/aesni_wrap.c +++ b/sys/crypto/aesni/aesni_wrap.c @@ -3,8 +3,13 @@ * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> + * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -29,15 +34,18 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); - + #include <sys/param.h> #include <sys/libkern.h> #include <sys/malloc.h> #include <sys/proc.h> #include <sys/systm.h> #include <crypto/aesni/aesni.h> - + +#include <opencrypto/gmac.h> + #include "aesencdec.h" +#include <smmintrin.h> MALLOC_DECLARE(M_AESNI); @@ -176,6 +184,104 @@ aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, } } +/* + * mixed endian increment, low 64bits stored in hi word to be compatible + * with _icm's BSWAP. + */ +static inline __m128i +nextc(__m128i x) +{ + const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0); + const __m128i ZERO = _mm_setzero_si128(); + + x = _mm_add_epi64(x, ONE); + __m128i t = _mm_cmpeq_epi64(x, ZERO); + t = _mm_unpackhi_epi64(t, ZERO); + x = _mm_sub_epi64(x, t); + + return x; +} + +void +aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len, + const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) +{ + __m128i tot; + __m128i tmp1, tmp2, tmp3, tmp4; + __m128i tmp5, tmp6, tmp7, tmp8; + __m128i ctr1, ctr2, ctr3, ctr4; + __m128i ctr5, ctr6, ctr7, ctr8; + __m128i BSWAP_EPI64; + __m128i tout[8]; + struct blocks8 *top; + const struct blocks8 *blks; + size_t i, cnt; + + BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7); + + ctr1 = _mm_loadu_si128((__m128i*)iv); + ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + + cnt = len / AES_BLOCK_LEN / 8; + for (i = 0; i < cnt; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr2 = nextc(ctr1); + tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64); + ctr3 = nextc(ctr2); + tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64); + ctr4 = nextc(ctr3); + tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64); + ctr5 = nextc(ctr4); + tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64); + ctr6 = nextc(ctr5); + tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64); + ctr7 = nextc(ctr6); + tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64); + ctr8 = nextc(ctr7); + tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64); + ctr1 = nextc(ctr8); + + blks = (const struct blocks8 *)from; + top = (struct blocks8 *)to; + aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4, + tmp5, tmp6, tmp7, tmp8, tout); + + top->blk[0] = blks->blk[0] ^ tout[0]; + top->blk[1] = blks->blk[1] ^ tout[1]; + top->blk[2] = blks->blk[2] ^ tout[2]; + top->blk[3] = blks->blk[3] ^ tout[3]; + top->blk[4] = blks->blk[4] ^ tout[4]; + top->blk[5] = blks->blk[5] ^ tout[5]; + top->blk[6] = blks->blk[6] ^ tout[6]; + top->blk[7] = blks->blk[7] ^ tout[7]; + + from += AES_BLOCK_LEN * 8; + to += AES_BLOCK_LEN * 8; + } + i *= 8; + cnt = len / AES_BLOCK_LEN; + for (; i < cnt; i++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = nextc(ctr1); + + tot = aesni_enc(rounds - 1, key_schedule, tmp1); + + tot = tot ^ _mm_loadu_si128((const __m128i *)from); + _mm_storeu_si128((__m128i *)to, tot); + + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } + + /* handle remaining partial round */ + if (len % AES_BLOCK_LEN != 0) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tot = aesni_enc(rounds - 1, key_schedule, tmp1); + tot = tot ^ _mm_loadu_si128((const __m128i *)from); + memcpy(to, &tot, len % AES_BLOCK_LEN); + } +} + #define AES_XTS_BLOCKSIZE 16 #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ @@ -333,8 +439,15 @@ int aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, int keylen) { + int decsched; + + decsched = 1; switch (ses->algo) { + case CRYPTO_AES_ICM: + case CRYPTO_AES_NIST_GCM_16: + decsched = 0; + /* FALLTHROUGH */ case CRYPTO_AES_CBC: switch (keylen) { case 128: @@ -347,6 +460,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + CRYPTDEB("invalid CBC/ICM/GCM key length"); return (EINVAL); } break; @@ -359,6 +473,7 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, ses->rounds = AES256_ROUNDS; break; default: + CRYPTDEB("invalid XTS key length"); return (EINVAL); } break; @@ -367,13 +482,13 @@ aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, } aesni_set_enckey(key, ses->enc_schedule, ses->rounds); - aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); - if (ses->algo == CRYPTO_AES_CBC) - arc4rand(ses->iv, sizeof(ses->iv), 0); - else /* if (ses->algo == CRYPTO_AES_XTS) */ { + if (decsched) + aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, + ses->rounds); + + if (ses->algo == CRYPTO_AES_XTS) aesni_set_enckey(key + keylen / 16, ses->xts_schedule, ses->rounds); - } return (0); } diff --git a/sys/crypto/via/padlock_hash.c b/sys/crypto/via/padlock_hash.c index 9dffc40a6824..c952b63503ea 100644 --- a/sys/crypto/via/padlock_hash.c +++ b/sys/crypto/via/padlock_hash.c @@ -75,7 +75,7 @@ struct padlock_sha_ctx { CTASSERT(sizeof(struct padlock_sha_ctx) <= sizeof(union authctx)); static void padlock_sha_init(struct padlock_sha_ctx *ctx); -static int padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf, +static int padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize); static void padlock_sha1_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx); @@ -83,16 +83,16 @@ static void padlock_sha256_final(uint8_t *hash, struct padlock_sha_ctx *ctx); static struct auth_hash padlock_hmac_sha1 = { CRYPTO_SHA1_HMAC, "HMAC-SHA1", 20, SHA1_HASH_LEN, SHA1_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx), - (void (*)(void *))padlock_sha_init, - (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update, + (void (*)(void *))padlock_sha_init, NULL, NULL, + (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, (void (*)(uint8_t *, void *))padlock_sha1_final }; static struct auth_hash padlock_hmac_sha256 = { CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256", 32, SHA2_256_HASH_LEN, SHA2_256_HMAC_BLOCK_LEN, sizeof(struct padlock_sha_ctx), - (void (*)(void *))padlock_sha_init, - (int (*)(void *, uint8_t *, uint16_t))padlock_sha_update, + (void (*)(void *))padlock_sha_init, NULL, NULL, + (int (*)(void *, const uint8_t *, uint16_t))padlock_sha_update, (void (*)(uint8_t *, void *))padlock_sha256_final }; @@ -167,7 +167,7 @@ padlock_sha_init(struct padlock_sha_ctx *ctx) } static int -padlock_sha_update(struct padlock_sha_ctx *ctx, uint8_t *buf, uint16_t bufsize) +padlock_sha_update(struct padlock_sha_ctx *ctx, const uint8_t *buf, uint16_t bufsize) { if (ctx->psc_size - ctx->psc_offset < bufsize) { diff --git a/sys/geom/eli/g_eli_crypto.c b/sys/geom/eli/g_eli_crypto.c index 63d36f2c7891..43eabf49e3db 100644 --- a/sys/geom/eli/g_eli_crypto.c +++ b/sys/geom/eli/g_eli_crypto.c @@ -101,7 +101,7 @@ g_eli_crypto_cipher(u_int algo, int enc, u_char *data, size_t datasize, crp->crp_opaque = NULL; crp->crp_callback = g_eli_crypto_done; crp->crp_buf = (void *)data; - crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_REL; + crp->crp_flags = CRYPTO_F_CBIFSYNC; crp->crp_desc = crd; error = crypto_dispatch(crp); diff --git a/sys/geom/eli/g_eli_integrity.c b/sys/geom/eli/g_eli_integrity.c index 84b781ccbde5..f7bf1fdad713 100644 --- a/sys/geom/eli/g_eli_integrity.c +++ b/sys/geom/eli/g_eli_integrity.c @@ -479,7 +479,7 @@ g_eli_auth_run(struct g_eli_worker *wr, struct bio *bp) crp->crp_opaque = (void *)bp; crp->crp_buf = (void *)data; data += encr_secsize; - crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_REL; + crp->crp_flags = CRYPTO_F_CBIFSYNC; if (g_eli_batch) crp->crp_flags |= CRYPTO_F_BATCH; if (bp->bio_cmd == BIO_WRITE) { diff --git a/sys/geom/eli/g_eli_privacy.c b/sys/geom/eli/g_eli_privacy.c index 2e6a2112f487..a60efe8f1445 100644 --- a/sys/geom/eli/g_eli_privacy.c +++ b/sys/geom/eli/g_eli_privacy.c @@ -286,7 +286,7 @@ g_eli_crypto_run(struct g_eli_worker *wr, struct bio *bp) crp->crp_callback = g_eli_crypto_write_done; else /* if (bp->bio_cmd == BIO_READ) */ crp->crp_callback = g_eli_crypto_read_done; - crp->crp_flags = CRYPTO_F_CBIFSYNC | CRYPTO_F_REL; + crp->crp_flags = CRYPTO_F_CBIFSYNC; if (g_eli_batch) crp->crp_flags |= CRYPTO_F_BATCH; crp->crp_desc = crd; diff --git a/sys/libkern/timingsafe_bcmp.c b/sys/libkern/timingsafe_bcmp.c new file mode 100644 index 000000000000..ebada620d2d7 --- /dev/null +++ b/sys/libkern/timingsafe_bcmp.c @@ -0,0 +1,32 @@ +/* $OpenBSD: timingsafe_bcmp.c,v 1.2 2014/06/10 04:16:57 deraadt Exp $ */ +/* + * Copyright (c) 2010 Damien Miller. All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * $FreeBSD$ + * + */ + +#include <sys/libkern.h> + +int +timingsafe_bcmp(const void *b1, const void *b2, size_t n) +{ + const unsigned char *p1 = b1, *p2 = b2; + int ret = 0; + + for (; n > 0; n--) + ret |= *p1++ ^ *p2++; + return (ret != 0); +} diff --git a/sys/mips/rmi/dev/sec/rmisec.c b/sys/mips/rmi/dev/sec/rmisec.c index 6501f75f88f7..31098bc65f52 100644 --- a/sys/mips/rmi/dev/sec/rmisec.c +++ b/sys/mips/rmi/dev/sec/rmisec.c @@ -423,13 +423,8 @@ xlr_sec_process(device_t dev, struct cryptop *crp, int hint) cmd->op.source_buf = (uint64_t) (unsigned long)crp->crp_buf; cmd->op.source_buf_size = crp->crp_ilen; - if (crp->crp_flags & CRYPTO_F_REL) { - cmd->op.dest_buf = (uint64_t) (unsigned long)crp->crp_buf; - cmd->op.dest_buf_size = crp->crp_ilen; - } else { - cmd->op.dest_buf = (uint64_t) (unsigned long)crp->crp_buf; - cmd->op.dest_buf_size = crp->crp_ilen; - } + cmd->op.dest_buf = (uint64_t) (unsigned long)crp->crp_buf; + cmd->op.dest_buf_size = crp->crp_ilen; cmd->op.num_packets = 1; cmd->op.num_fragments = 1; diff --git a/sys/modules/aesni/Makefile b/sys/modules/aesni/Makefile index 26dbedc960e4..6fdfc7e32694 100644 --- a/sys/modules/aesni/Makefile +++ b/sys/modules/aesni/Makefile @@ -7,13 +7,18 @@ SRCS= aesni.c SRCS+= aeskeys_${MACHINE_CPUARCH}.S SRCS+= device_if.h bus_if.h opt_bus.h cryptodev_if.h -OBJS+= aesni_wrap.o +OBJS+= aesni_ghash.o aesni_wrap.o # Remove -nostdinc so we can get the intrinsics. +aesni_ghash.o: aesni_ghash.c + # XXX - gcc won't understand -mpclmul + ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} \ + -mmmx -msse -msse4 -maes -mpclmul ${.IMPSRC} + ${CTFCONVERT_CMD} + aesni_wrap.o: aesni_wrap.c ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} ${PROF} \ - -mmmx -msse -maes ${.IMPSRC} + -mmmx -msse -msse4 -maes ${.IMPSRC} ${CTFCONVERT_CMD} .include <bsd.kmod.mk> - diff --git a/sys/modules/crypto/Makefile b/sys/modules/crypto/Makefile index 0e3c8ebfddc7..9153445de7d6 100644 --- a/sys/modules/crypto/Makefile +++ b/sys/modules/crypto/Makefile @@ -18,6 +18,7 @@ SRCS += camellia.c camellia-api.c SRCS += des_ecb.c des_enc.c des_setkey.c SRCS += sha1.c sha2.c sha256c.c SRCS += siphash.c +SRCS += gmac.c gfmult.c SRCS += opt_param.h cryptodev_if.h bus_if.h device_if.h SRCS += opt_ddb.h diff --git a/sys/opencrypto/criov.c b/sys/opencrypto/criov.c index 96f0dfc59b66..f94631ec8ca0 100644 --- a/sys/opencrypto/criov.c +++ b/sys/opencrypto/criov.c @@ -99,35 +99,31 @@ cuio_copyback(struct uio* uio, int off, int len, caddr_t cp) } /* - * Return a pointer to iov/offset of location in iovec list. + * Return the index and offset of location in iovec list. */ -struct iovec * +int cuio_getptr(struct uio *uio, int loc, int *off) { - struct iovec *iov = uio->uio_iov; - int iol = uio->uio_iovcnt; + int ind, len; - while (loc >= 0) { - /* Normal end of search */ - if (loc < iov->iov_len) { + ind = 0; + while (loc >= 0 && ind < uio->uio_iovcnt) { + len = uio->uio_iov[ind].iov_len; + if (len > loc) { *off = loc; - return (iov); + return (ind); } + loc -= len; + ind++; + } - loc -= iov->iov_len; - if (iol == 0) { - if (loc == 0) { - /* Point at the end of valid data */ - *off = iov->iov_len; - return (iov); - } else - return (NULL); - } else { - iov++, iol--; - } - } + if (ind > 0 && loc == 0) { + ind--; + *off = uio->uio_iov[ind].iov_len; + return (ind); + } - return (NULL); + return (-1); } /* @@ -196,3 +192,47 @@ crypto_apply(int flags, caddr_t buf, int off, int len, error = (*f)(arg, buf + off, len); return (error); } + +void +crypto_mbuftoiov(struct mbuf *mbuf, struct iovec **iovptr, int *cnt, + int *allocated) +{ + struct iovec *iov; + struct mbuf *m, *mtmp; + int i, j; + + *allocated = 0; + iov = *iovptr; + if (iov == NULL) + *cnt = 0; + + m = mbuf; + i = 0; + while (m != NULL) { + if (i == *cnt) { + /* we need to allocate a larger array */ + j = 1; + mtmp = m; + while ((mtmp = mtmp->m_next) != NULL) + j++; + iov = malloc(sizeof *iov * (i + j), M_CRYPTO_DATA, + M_WAITOK); + *allocated = 1; + *cnt = i + j; + memcpy(iov, *iovptr, sizeof *iov * i); + } + + iov[i].iov_base = m->m_data; + iov[i].iov_len = m->m_len; + + i++; + m = m->m_next; + } + + if (*allocated) + KASSERT(*cnt == i, ("did not allocate correct amount: %d != %d", + *cnt, i)); + + *iovptr = iov; + *cnt = i; +} diff --git a/sys/opencrypto/crypto.c b/sys/opencrypto/crypto.c index d1df283f21a6..0ecdeb302dea 100644 --- a/sys/opencrypto/crypto.c +++ b/sys/opencrypto/crypto.c @@ -368,9 +368,8 @@ again: best = cap; } } - if (best != NULL) - return best; - if (match == CRYPTOCAP_F_HARDWARE && (flags & CRYPTOCAP_F_SOFTWARE)) { + if (best == NULL && match == CRYPTOCAP_F_HARDWARE && + (flags & CRYPTOCAP_F_SOFTWARE)) { /* sort of an Algol 68-style for loop */ match = CRYPTOCAP_F_SOFTWARE; goto again; @@ -421,9 +420,12 @@ crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int crid) (*sid) <<= 32; (*sid) |= (lid & 0xffffffff); cap->cc_sessions++; - } - } else + } else + CRYPTDEB("dev newsession failed"); + } else { + CRYPTDEB("no driver"); err = EINVAL; + } CRYPTO_DRIVER_UNLOCK(); return err; } diff --git a/sys/opencrypto/cryptodev.c b/sys/opencrypto/cryptodev.c index bf461381cfbd..14015048a52d 100644 --- a/sys/opencrypto/cryptodev.c +++ b/sys/opencrypto/cryptodev.c @@ -3,6 +3,12 @@ /*- * Copyright (c) 2001 Theo de Raadt * Copyright (c) 2002-2006 Sam Leffler, Errno Consulting + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -36,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" +#include "opt_kdtrace.h" #include <sys/param.h> #include <sys/systm.h> @@ -55,10 +62,15 @@ __FBSDID("$FreeBSD$"); #include <sys/fcntl.h> #include <sys/bus.h> #include <sys/user.h> +#include <sys/sdt.h> #include <opencrypto/cryptodev.h> #include <opencrypto/xform.h> +SDT_PROVIDER_DECLARE(opencrypto); + +SDT_PROBE_DEFINE1(opencrypto, dev, ioctl, error, "int"/*line number*/); + #ifdef COMPAT_FREEBSD32 #include <sys/mount.h> #include <compat/freebsd32/freebsd32.h> @@ -315,6 +327,8 @@ static int csefree(struct csession *); static int cryptodev_op(struct csession *, struct crypt_op *, struct ucred *, struct thread *td); +static int cryptodev_aead(struct csession *, struct crypt_aead *, + struct ucred *, struct thread *); static int cryptodev_key(struct crypt_kop *); static int cryptodev_find(struct crypt_find_op *); @@ -324,15 +338,23 @@ static int cryptodev_find(struct crypt_find_op *); * by device name/class or through search constraints. */ static int -checkforsoftware(int crid) +checkforsoftware(int *cridp) { + int crid; + + crid = *cridp; if (!crypto_devallowsoft) { - if (crid & CRYPTOCAP_F_SOFTWARE) - return EINVAL; /* XXX */ + if (crid & CRYPTOCAP_F_SOFTWARE) { + if (crid & CRYPTOCAP_F_HARDWARE) { + *cridp = CRYPTOCAP_F_HARDWARE; + return 0; + } + return EINVAL; + } if ((crid & CRYPTOCAP_F_HARDWARE) == 0 && (crypto_getcaps(crid) & CRYPTOCAP_F_HARDWARE) == 0) - return EINVAL; /* XXX */ + return EINVAL; } return 0; } @@ -352,6 +374,7 @@ cryptof_ioctl( struct csession *cse; struct session_op *sop; struct crypt_op *cop; + struct crypt_aead *caead; struct enc_xform *txform = NULL; struct auth_hash *thash = NULL; struct crypt_kop *kop; @@ -412,7 +435,15 @@ cryptof_ioctl( case CRYPTO_CAMELLIA_CBC: txform = &enc_xform_camellia; break; + case CRYPTO_AES_ICM: + txform = &enc_xform_aes_icm; + break; + case CRYPTO_AES_NIST_GCM_16: + txform = &enc_xform_aes_nist_gcm; + break; + default: + CRYPTDEB("invalid cipher"); return (EINVAL); } @@ -437,6 +468,16 @@ cryptof_ioctl( case CRYPTO_RIPEMD160_HMAC: thash = &auth_hash_hmac_ripemd_160; break; + case CRYPTO_AES_128_NIST_GMAC: + thash = &auth_hash_nist_gmac_aes_128; + break; + case CRYPTO_AES_192_NIST_GMAC: + thash = &auth_hash_nist_gmac_aes_192; + break; + case CRYPTO_AES_256_NIST_GMAC: + thash = &auth_hash_nist_gmac_aes_256; + break; + #ifdef notdef case CRYPTO_MD5: thash = &auth_hash_md5; @@ -449,6 +490,7 @@ cryptof_ioctl( thash = &auth_hash_null; break; default: + CRYPTDEB("invalid mac"); return (EINVAL); } @@ -460,6 +502,7 @@ cryptof_ioctl( crie.cri_klen = sop->keylen * 8; if (sop->keylen > txform->maxkey || sop->keylen < txform->minkey) { + CRYPTDEB("invalid cipher parameters"); error = EINVAL; goto bail; } @@ -467,8 +510,10 @@ cryptof_ioctl( crie.cri_key = malloc(crie.cri_klen / 8, M_XDATA, M_WAITOK); if ((error = copyin(sop->key, crie.cri_key, - crie.cri_klen / 8))) + crie.cri_klen / 8))) { + CRYPTDEB("invalid key"); goto bail; + } if (thash) crie.cri_next = &cria; } @@ -477,6 +522,7 @@ cryptof_ioctl( cria.cri_alg = thash->type; cria.cri_klen = sop->mackeylen * 8; if (sop->mackeylen != thash->keysize) { + CRYPTDEB("invalid mac key length"); error = EINVAL; goto bail; } @@ -485,8 +531,10 @@ cryptof_ioctl( cria.cri_key = malloc(cria.cri_klen / 8, M_XDATA, M_WAITOK); if ((error = copyin(sop->mackey, cria.cri_key, - cria.cri_klen / 8))) + cria.cri_klen / 8))) { + CRYPTDEB("invalid mac key"); goto bail; + } } } @@ -497,14 +545,18 @@ cryptof_ioctl( #endif ) { crid = SES2(sop)->crid; - error = checkforsoftware(crid); - if (error) + error = checkforsoftware(&crid); + if (error) { + CRYPTDEB("checkforsoftware"); goto bail; + } } else crid = CRYPTOCAP_F_HARDWARE; error = crypto_newsession(&sid, (txform ? &crie : &cria), crid); - if (error) + if (error) { + CRYPTDEB("crypto_newsession"); goto bail; + } cse = csecreate(fcr, sid, crie.cri_key, crie.cri_klen, cria.cri_key, cria.cri_klen, sop->cipher, sop->mac, txform, @@ -513,6 +565,7 @@ cryptof_ioctl( if (cse == NULL) { crypto_freesession(sid); error = EINVAL; + CRYPTDEB("csecreate"); goto bail; } sop->ses = cse->ses; @@ -559,8 +612,10 @@ bail: #endif cop = (struct crypt_op *)data; cse = csefind(fcr, cop->ses); - if (cse == NULL) + if (cse == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); + } error = cryptodev_op(cse, cop, active_cred, td); #ifdef COMPAT_FREEBSD32 if (error == 0 && cmd == CIOCCRYPT32) @@ -614,6 +669,13 @@ bail: case CIOCFINDDEV: error = cryptodev_find((struct crypt_find_op *)data); break; + case CIOCCRYPTAEAD: + caead = (struct crypt_aead *)data; + cse = csefind(fcr, caead->ses); + if (cse == NULL) + return (EINVAL); + error = cryptodev_aead(cse, caead, active_cred, td); + break; default: error = EINVAL; break; @@ -636,12 +698,16 @@ cryptodev_op( struct cryptodesc *crde = NULL, *crda = NULL; int error; - if (cop->len > 256*1024-4) + if (cop->len > 256*1024-4) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (E2BIG); + } if (cse->txform) { - if (cop->len == 0 || (cop->len % cse->txform->blocksize) != 0) + if (cop->len == 0 || (cop->len % cse->txform->blocksize) != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); return (EINVAL); + } } cse->uio.uio_iov = &cse->iovec; @@ -661,6 +727,7 @@ cryptodev_op( crp = crypto_getreq((cse->txform != NULL) + (cse->thash != NULL)); if (crp == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = ENOMEM; goto bail; } @@ -673,13 +740,17 @@ cryptodev_op( if (cse->txform) crde = crp->crp_desc; else { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } } - if ((error = copyin(cop->src, cse->uio.uio_iov[0].iov_base, cop->len))) + if ((error = copyin(cop->src, cse->uio.uio_iov[0].iov_base, + cop->len))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if (crda) { crda->crd_skip = 0; @@ -714,15 +785,20 @@ cryptodev_op( if (cop->iv) { if (crde == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } if (cse->cipher == CRYPTO_ARC4) { /* XXX use flag? */ + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } - if ((error = copyin(cop->iv, cse->tmp_iv, cse->txform->blocksize))) + if ((error = copyin(cop->iv, cse->tmp_iv, + cse->txform->blocksize))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } bcopy(cse->tmp_iv, crde->crd_iv, cse->txform->blocksize); crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; crde->crd_skip = 0; @@ -735,6 +811,7 @@ cryptodev_op( } if (cop->mac && crda == NULL) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = EINVAL; goto bail; } @@ -753,8 +830,10 @@ again: error = msleep(crp, &cse->lock, PWAIT, "crydev", 0); mtx_unlock(&cse->lock); - if (error != 0) + if (error != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if (crp->crp_etype == EAGAIN) { crp->crp_etype = 0; @@ -763,23 +842,30 @@ again: } if (crp->crp_etype != 0) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = crp->crp_etype; goto bail; } if (cse->error) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); error = cse->error; goto bail; } if (cop->dst && - (error = copyout(cse->uio.uio_iov[0].iov_base, cop->dst, cop->len))) + (error = copyout(cse->uio.uio_iov[0].iov_base, cop->dst, + cop->len))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } if (cop->mac && (error = copyout((caddr_t)cse->uio.uio_iov[0].iov_base + cop->len, - cop->mac, cse->thash->hashsize))) + cop->mac, cse->thash->hashsize))) { + SDT_PROBE1(opencrypto, dev, ioctl, error, __LINE__); goto bail; + } bail: if (crp) @@ -791,6 +877,151 @@ bail: } static int +cryptodev_aead( + struct csession *cse, + struct crypt_aead *caead, + struct ucred *active_cred, + struct thread *td) +{ + struct uio *uio; + struct cryptop *crp = NULL; + struct cryptodesc *crde = NULL, *crda = NULL; + int error; + + if (caead->len > 256*1024-4 || caead->aadlen > 256*1024-4) + return (E2BIG); + + if (cse->txform == NULL || cse->thash == NULL || caead->tag == NULL || + (caead->len % cse->txform->blocksize) != 0) + return (EINVAL); + + uio = &cse->uio; + uio->uio_iov = &cse->iovec; + uio->uio_iovcnt = 1; + uio->uio_offset = 0; + uio->uio_resid = caead->len + caead->aadlen + cse->thash->hashsize; + uio->uio_segflg = UIO_SYSSPACE; + uio->uio_rw = UIO_WRITE; + uio->uio_td = td; + uio->uio_iov[0].iov_len = uio->uio_resid; + + uio->uio_iov[0].iov_base = malloc(uio->uio_iov[0].iov_len, + M_XDATA, M_WAITOK); + + crp = crypto_getreq(2); + if (crp == NULL) { + error = ENOMEM; + goto bail; + } + + crda = crp->crp_desc; + crde = crda->crd_next; + + if ((error = copyin(caead->src, cse->uio.uio_iov[0].iov_base, + caead->len))) + goto bail; + + if ((error = copyin(caead->aad, (char *)cse->uio.uio_iov[0].iov_base + + caead->len, caead->aadlen))) + goto bail; + + crda->crd_skip = caead->len; + crda->crd_len = caead->aadlen; + crda->crd_inject = caead->len + caead->aadlen; + + crda->crd_alg = cse->mac; + crda->crd_key = cse->mackey; + crda->crd_klen = cse->mackeylen * 8; + + if (caead->op == COP_ENCRYPT) + crde->crd_flags |= CRD_F_ENCRYPT; + else + crde->crd_flags &= ~CRD_F_ENCRYPT; + /* crde->crd_skip set below */ + crde->crd_len = caead->len; + crde->crd_inject = 0; + + crde->crd_alg = cse->cipher; + crde->crd_key = cse->key; + crde->crd_klen = cse->keylen * 8; + + crp->crp_ilen = caead->len + caead->aadlen; + crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIMM + | (caead->flags & COP_F_BATCH); + crp->crp_buf = (caddr_t)&cse->uio.uio_iov; + crp->crp_callback = (int (*) (struct cryptop *)) cryptodev_cb; + crp->crp_sid = cse->sid; + crp->crp_opaque = (void *)cse; + + if (caead->iv) { + if (caead->ivlen > sizeof cse->tmp_iv) { + error = EINVAL; + goto bail; + } + + if ((error = copyin(caead->iv, cse->tmp_iv, caead->ivlen))) + goto bail; + bcopy(cse->tmp_iv, crde->crd_iv, caead->ivlen); + crde->crd_flags |= CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; + crde->crd_skip = 0; + } else { + crde->crd_flags |= CRD_F_IV_PRESENT; + crde->crd_skip = cse->txform->blocksize; + crde->crd_len -= cse->txform->blocksize; + } + + if ((error = copyin(caead->tag, (caddr_t)cse->uio.uio_iov[0].iov_base + + caead->len + caead->aadlen, cse->thash->hashsize))) + goto bail; +again: + /* + * Let the dispatch run unlocked, then, interlock against the + * callback before checking if the operation completed and going + * to sleep. This insures drivers don't inherit our lock which + * results in a lock order reversal between crypto_dispatch forced + * entry and the crypto_done callback into us. + */ + error = crypto_dispatch(crp); + mtx_lock(&cse->lock); + if (error == 0 && (crp->crp_flags & CRYPTO_F_DONE) == 0) + error = msleep(crp, &cse->lock, PWAIT, "crydev", 0); + mtx_unlock(&cse->lock); + + if (error != 0) + goto bail; + + if (crp->crp_etype == EAGAIN) { + crp->crp_etype = 0; + crp->crp_flags &= ~CRYPTO_F_DONE; + goto again; + } + + if (crp->crp_etype != 0) { + error = crp->crp_etype; + goto bail; + } + + if (cse->error) { + error = cse->error; + goto bail; + } + + if (caead->dst && (error = copyout(cse->uio.uio_iov[0].iov_base, + caead->dst, caead->len))) + goto bail; + + if ((error = copyout((caddr_t)cse->uio.uio_iov[0].iov_base + + caead->len + caead->aadlen, caead->tag, cse->thash->hashsize))) + goto bail; + +bail: + crypto_freereq(crp); + free(cse->uio.uio_iov[0].iov_base, M_XDATA); + + return (error); +} + +static int cryptodev_cb(void *op) { struct cryptop *crp = (struct cryptop *) op; @@ -919,14 +1150,16 @@ static int cryptodev_find(struct crypt_find_op *find) { device_t dev; + size_t fnlen = sizeof find->name; if (find->crid != -1) { dev = crypto_find_device_byhid(find->crid); if (dev == NULL) return (ENOENT); - strlcpy(find->name, device_get_nameunit(dev), - sizeof(find->name)); + strncpy(find->name, device_get_nameunit(dev), fnlen); + find->name[fnlen - 1] = '\x0'; } else { + find->name[fnlen - 1] = '\x0'; find->crid = crypto_find_driver(find->name); if (find->crid == -1) return (ENOENT); diff --git a/sys/opencrypto/cryptodev.h b/sys/opencrypto/cryptodev.h index e2995221333d..f434843c289d 100644 --- a/sys/opencrypto/cryptodev.h +++ b/sys/opencrypto/cryptodev.h @@ -23,6 +23,12 @@ * PURPOSE. * * Copyright (c) 2001 Theo de Raadt + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -100,20 +106,23 @@ #define CAMELLIA_BLOCK_LEN 16 #define EALG_MAX_BLOCK_LEN AES_BLOCK_LEN /* Keep this updated */ +/* Maximum hash algorithm result length */ +#define AALG_MAX_RESULT_LEN 64 /* Keep this updated */ + #define CRYPTO_ALGORITHM_MIN 1 -#define CRYPTO_DES_CBC 1 -#define CRYPTO_3DES_CBC 2 -#define CRYPTO_BLF_CBC 3 -#define CRYPTO_CAST_CBC 4 -#define CRYPTO_SKIPJACK_CBC 5 -#define CRYPTO_MD5_HMAC 6 -#define CRYPTO_SHA1_HMAC 7 -#define CRYPTO_RIPEMD160_HMAC 8 -#define CRYPTO_MD5_KPDK 9 -#define CRYPTO_SHA1_KPDK 10 -#define CRYPTO_RIJNDAEL128_CBC 11 /* 128 bit blocksize */ -#define CRYPTO_AES_CBC 11 /* 128 bit blocksize -- the same as above */ -#define CRYPTO_ARC4 12 +#define CRYPTO_DES_CBC 1 +#define CRYPTO_3DES_CBC 2 +#define CRYPTO_BLF_CBC 3 +#define CRYPTO_CAST_CBC 4 +#define CRYPTO_SKIPJACK_CBC 5 +#define CRYPTO_MD5_HMAC 6 +#define CRYPTO_SHA1_HMAC 7 +#define CRYPTO_RIPEMD160_HMAC 8 +#define CRYPTO_MD5_KPDK 9 +#define CRYPTO_SHA1_KPDK 10 +#define CRYPTO_RIJNDAEL128_CBC 11 /* 128 bit blocksize */ +#define CRYPTO_AES_CBC 11 /* 128 bit blocksize -- the same as above */ +#define CRYPTO_ARC4 12 #define CRYPTO_MD5 13 #define CRYPTO_SHA1 14 #define CRYPTO_NULL_HMAC 15 @@ -122,9 +131,18 @@ #define CRYPTO_SHA2_256_HMAC 18 #define CRYPTO_SHA2_384_HMAC 19 #define CRYPTO_SHA2_512_HMAC 20 -#define CRYPTO_CAMELLIA_CBC 21 +#define CRYPTO_CAMELLIA_CBC 21 #define CRYPTO_AES_XTS 22 -#define CRYPTO_ALGORITHM_MAX 22 /* Keep updated - see below */ +#define CRYPTO_AES_ICM 23 /* commonly known as CTR mode */ +#define CRYPTO_AES_NIST_GMAC 24 /* cipher side */ +#define CRYPTO_AES_NIST_GCM_16 25 /* 16 byte ICV */ +#define CRYPTO_AES_128_NIST_GMAC 26 /* auth side */ +#define CRYPTO_AES_192_NIST_GMAC 27 /* auth side */ +#define CRYPTO_AES_256_NIST_GMAC 28 /* auth side */ +#define CRYPTO_ALGORITHM_MAX 28 /* Keep updated - see below */ + +#define CRYPTO_ALGO_VALID(x) ((x) >= CRYPTO_ALGORITHM_MIN && \ + (x) <= CRYPTO_ALGORITHM_MAX) /* Algorithm flags */ #define CRYPTO_ALG_FLAG_SUPPORTED 0x01 /* Algorithm is supported */ @@ -182,6 +200,20 @@ struct crypt_op { caddr_t iv; }; +/* op and flags the same as crypt_op */ +struct crypt_aead { + u_int32_t ses; + u_int16_t op; /* i.e. COP_ENCRYPT */ + u_int16_t flags; + u_int len; + u_int aadlen; + u_int ivlen; + caddr_t src, dst; /* become iov[] inside kernel */ + caddr_t aad; /* additional authenticated data */ + caddr_t tag; /* must fit for chosen TAG length */ + caddr_t iv; +}; + /* * Parameters for looking up a crypto driver/device by * device name or by id. The latter are returned for @@ -239,6 +271,7 @@ struct crypt_kop { #define CIOCGSESSION2 _IOWR('c', 106, struct session2_op) #define CIOCKEY2 _IOWR('c', 107, struct crypt_kop) #define CIOCFINDDEV _IOWR('c', 108, struct crypt_find_op) +#define CIOCCRYPTAEAD _IOWR('c', 109, struct crypt_aead) struct cryptotstat { struct timespec acc; /* total accumulated time */ @@ -269,6 +302,14 @@ struct cryptostats { }; #ifdef _KERNEL + +#if 0 +#define CRYPTDEB(s) do { printf("%s:%d: %s\n", __FILE__, __LINE__, s); \ + } while (0) +#else +#define CRYPTDEB(s) do { } while (0) +#endif + /* Standard initialization structure beginning */ struct cryptoini { int cri_alg; /* Algorithm to use */ @@ -292,14 +333,15 @@ struct cryptodesc { place, so don't copy. */ #define CRD_F_IV_EXPLICIT 0x04 /* IV explicitly provided */ #define CRD_F_DSA_SHA_NEEDED 0x08 /* Compute SHA-1 of buffer for DSA */ +#define CRD_F_COMP 0x0f /* Set when doing compression */ #define CRD_F_KEY_EXPLICIT 0x10 /* Key explicitly provided */ -#define CRD_F_COMP 0x0f /* Set when doing compression */ struct cryptoini CRD_INI; /* Initialization/context data */ -#define crd_iv CRD_INI.cri_iv -#define crd_key CRD_INI.cri_key -#define crd_alg CRD_INI.cri_alg -#define crd_klen CRD_INI.cri_klen +#define crd_esn CRD_INI.cri_esn +#define crd_iv CRD_INI.cri_iv +#define crd_key CRD_INI.cri_key +#define crd_alg CRD_INI.cri_alg +#define crd_klen CRD_INI.cri_klen struct cryptodesc *crd_next; }; @@ -324,9 +366,8 @@ struct cryptop { */ int crp_flags; -#define CRYPTO_F_IMBUF 0x0001 /* Input/output are mbuf chains */ -#define CRYPTO_F_IOV 0x0002 /* Input/output are uio */ -#define CRYPTO_F_REL 0x0004 /* Must return data in same place */ +#define CRYPTO_F_IMBUF 0x0001 /* Input/output are mbuf chains */ +#define CRYPTO_F_IOV 0x0002 /* Input/output are uio */ #define CRYPTO_F_BATCH 0x0008 /* Batch op if possible */ #define CRYPTO_F_CBIMM 0x0010 /* Do callback immediately */ #define CRYPTO_F_DONE 0x0020 /* Operation completed */ @@ -341,12 +382,12 @@ struct cryptop { struct bintime crp_tstamp; /* performance time stamp */ }; -#define CRYPTO_BUF_CONTIG 0x0 -#define CRYPTO_BUF_IOV 0x1 -#define CRYPTO_BUF_MBUF 0x2 +#define CRYPTO_BUF_CONTIG 0x0 +#define CRYPTO_BUF_IOV 0x1 +#define CRYPTO_BUF_MBUF 0x2 -#define CRYPTO_OP_DECRYPT 0x0 -#define CRYPTO_OP_ENCRYPT 0x1 +#define CRYPTO_OP_DECRYPT 0x0 +#define CRYPTO_OP_ENCRYPT 0x1 /* * Hints passed to process methods. @@ -381,9 +422,9 @@ MALLOC_DECLARE(M_CRYPTO_DATA); extern int crypto_newsession(u_int64_t *sid, struct cryptoini *cri, int hard); extern int crypto_freesession(u_int64_t sid); -#define CRYPTOCAP_F_HARDWARE CRYPTO_FLAG_HARDWARE -#define CRYPTOCAP_F_SOFTWARE CRYPTO_FLAG_SOFTWARE -#define CRYPTOCAP_F_SYNC 0x04000000 /* operates synchronously */ +#define CRYPTOCAP_F_HARDWARE CRYPTO_FLAG_HARDWARE +#define CRYPTOCAP_F_SOFTWARE CRYPTO_FLAG_SOFTWARE +#define CRYPTOCAP_F_SYNC 0x04000000 /* operates synchronously */ extern int32_t crypto_get_driverid(device_t dev, int flags); extern int crypto_find_driver(const char *); extern device_t crypto_find_device_byhid(int hid); @@ -418,10 +459,15 @@ extern int crypto_devallowsoft; /* only use hardware crypto */ struct uio; extern void cuio_copydata(struct uio* uio, int off, int len, caddr_t cp); extern void cuio_copyback(struct uio* uio, int off, int len, caddr_t cp); -extern struct iovec *cuio_getptr(struct uio *uio, int loc, int *off); +extern int cuio_getptr(struct uio *uio, int loc, int *off); extern int cuio_apply(struct uio *uio, int off, int len, int (*f)(void *, void *, u_int), void *arg); +struct mbuf; +struct iovec; +extern void crypto_mbuftoiov(struct mbuf *mbuf, struct iovec **iovptr, + int *cnt, int *allocated); + extern void crypto_copyback(int flags, caddr_t buf, int off, int size, caddr_t in); extern void crypto_copydata(int flags, caddr_t buf, int off, int size, diff --git a/sys/opencrypto/cryptosoft.c b/sys/opencrypto/cryptosoft.c index a0875ff5ac54..d769eea2ca21 100644 --- a/sys/opencrypto/cryptosoft.c +++ b/sys/opencrypto/cryptosoft.c @@ -9,6 +9,12 @@ * supported the development of this code. * * Copyright (c) 2000, 2001 Angelos D. Keromytis + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in @@ -37,6 +43,8 @@ __FBSDID("$FreeBSD$"); #include <sys/uio.h> #include <sys/lock.h> #include <sys/rwlock.h> +#include <sys/endian.h> +#include <sys/limits.h> #include <crypto/blowfish/blowfish.h> #include <crypto/sha1.h> @@ -64,6 +72,7 @@ u_int8_t hmac_opad_buffer[HMAC_MAX_BLOCK_LEN]; static int swcr_encdec(struct cryptodesc *, struct swcr_data *, caddr_t, int); static int swcr_authcompute(struct cryptodesc *, struct swcr_data *, caddr_t, int); +static int swcr_authenc(struct cryptop *crp); static int swcr_compdec(struct cryptodesc *, struct swcr_data *, caddr_t, int); static int swcr_freesession(device_t dev, u_int64_t tid); static int swcr_freesession_locked(device_t dev, u_int64_t tid); @@ -76,36 +85,48 @@ swcr_encdec(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf, int flags) { unsigned char iv[EALG_MAX_BLOCK_LEN], blk[EALG_MAX_BLOCK_LEN], *idat; - unsigned char *ivp, piv[EALG_MAX_BLOCK_LEN]; + unsigned char *ivp, *nivp, iv2[EALG_MAX_BLOCK_LEN]; struct enc_xform *exf; - int i, k, j, blks; + int i, j, k, blks, ind, count, ivlen; + struct uio *uio, uiolcl; + struct iovec iovlcl[4]; + struct iovec *iov; + int iovcnt, iovalloc; + int error; + + error = 0; exf = sw->sw_exf; blks = exf->blocksize; + ivlen = exf->ivsize; /* Check for non-padded data */ if (crd->crd_len % blks) return EINVAL; + if (crd->crd_alg == CRYPTO_AES_ICM && + (crd->crd_flags & CRD_F_IV_EXPLICIT) == 0) + return (EINVAL); + /* Initialize the IV */ if (crd->crd_flags & CRD_F_ENCRYPT) { /* IV explicitly provided ? */ if (crd->crd_flags & CRD_F_IV_EXPLICIT) - bcopy(crd->crd_iv, iv, blks); + bcopy(crd->crd_iv, iv, ivlen); else - arc4rand(iv, blks, 0); + arc4rand(iv, ivlen, 0); /* Do we need to write the IV */ if (!(crd->crd_flags & CRD_F_IV_PRESENT)) - crypto_copyback(flags, buf, crd->crd_inject, blks, iv); + crypto_copyback(flags, buf, crd->crd_inject, ivlen, iv); } else { /* Decryption */ - /* IV explicitly provided ? */ + /* IV explicitly provided ? */ if (crd->crd_flags & CRD_F_IV_EXPLICIT) - bcopy(crd->crd_iv, iv, blks); + bcopy(crd->crd_iv, iv, ivlen); else { /* Get IV off buf */ - crypto_copydata(flags, buf, crd->crd_inject, blks, iv); + crypto_copydata(flags, buf, crd->crd_inject, ivlen, iv); } } @@ -114,341 +135,184 @@ swcr_encdec(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf, if (sw->sw_kschedule) exf->zerokey(&(sw->sw_kschedule)); + error = exf->setkey(&sw->sw_kschedule, crd->crd_key, crd->crd_klen / 8); if (error) return (error); } + iov = iovlcl; + iovcnt = nitems(iovlcl); + iovalloc = 0; + uio = &uiolcl; + if ((flags & CRYPTO_F_IMBUF) != 0) { + crypto_mbuftoiov((struct mbuf *)buf, &iov, &iovcnt, + &iovalloc); + uio->uio_iov = iov; + uio->uio_iovcnt = iovcnt; + } else if ((flags & CRYPTO_F_IOV) != 0) + uio = (struct uio *)buf; + else { + iov[0].iov_base = buf; + iov[0].iov_len = crd->crd_skip + crd->crd_len; + uio->uio_iov = iov; + uio->uio_iovcnt = 1; + } + ivp = iv; - /* - * xforms that provide a reinit method perform all IV - * handling themselves. - */ - if (exf->reinit) + if (exf->reinit) { + /* + * xforms that provide a reinit method perform all IV + * handling themselves. + */ exf->reinit(sw->sw_kschedule, iv); + } - if (flags & CRYPTO_F_IMBUF) { - struct mbuf *m = (struct mbuf *) buf; + count = crd->crd_skip; + ind = cuio_getptr(uio, count, &k); + if (ind == -1) { + error = EINVAL; + goto out; + } - /* Find beginning of data */ - m = m_getptr(m, crd->crd_skip, &k); - if (m == NULL) - return EINVAL; + i = crd->crd_len; - i = crd->crd_len; - - while (i > 0) { - /* - * If there's insufficient data at the end of - * an mbuf, we have to do some copying. - */ - if (m->m_len < k + blks && m->m_len != k) { - m_copydata(m, k, blks, blk); - - /* Actual encryption/decryption */ - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - blk); - } else { - exf->decrypt(sw->sw_kschedule, - blk); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, blk); - - /* - * Keep encrypted block for XOR'ing - * with next block - */ - bcopy(blk, iv, blks); - ivp = iv; - } else { /* decrypt */ - /* - * Keep encrypted block for XOR'ing - * with next block - */ - if (ivp == iv) - bcopy(blk, piv, blks); - else - bcopy(blk, iv, blks); - - exf->decrypt(sw->sw_kschedule, blk); - - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; + while (i > 0) { + /* + * If there's insufficient data at the end of + * an iovec, we have to do some copying. + */ + if (uio->uio_iov[ind].iov_len < k + blks && + uio->uio_iov[ind].iov_len != k) { + cuio_copydata(uio, count, blks, blk); + + /* Actual encryption/decryption */ + if (exf->reinit) { + if (crd->crd_flags & CRD_F_ENCRYPT) { + exf->encrypt(sw->sw_kschedule, + blk); + } else { + exf->decrypt(sw->sw_kschedule, + blk); } - - /* Copy back decrypted block */ - m_copyback(m, k, blks, blk); - - /* Advance pointer */ - m = m_getptr(m, k + blks, &k); - if (m == NULL) - return EINVAL; - - i -= blks; - - /* Could be done... */ - if (i == 0) - break; + } else if (crd->crd_flags & CRD_F_ENCRYPT) { + /* XOR with previous block */ + for (j = 0; j < blks; j++) + blk[j] ^= ivp[j]; + + exf->encrypt(sw->sw_kschedule, blk); + + /* + * Keep encrypted block for XOR'ing + * with next block + */ + bcopy(blk, iv, blks); + ivp = iv; + } else { /* decrypt */ + /* + * Keep encrypted block for XOR'ing + * with next block + */ + nivp = (ivp == iv) ? iv2 : iv; + bcopy(blk, nivp, blks); + + exf->decrypt(sw->sw_kschedule, blk); + + /* XOR with previous block */ + for (j = 0; j < blks; j++) + blk[j] ^= ivp[j]; + + ivp = nivp; } - /* Skip possibly empty mbufs */ - if (k == m->m_len) { - for (m = m->m_next; m && m->m_len == 0; - m = m->m_next) - ; - k = 0; - } + /* Copy back decrypted block */ + cuio_copyback(uio, count, blks, blk); - /* Sanity check */ - if (m == NULL) - return EINVAL; - - /* - * Warning: idat may point to garbage here, but - * we only use it in the while() loop, only if - * there are indeed enough data. - */ - idat = mtod(m, unsigned char *) + k; - - while (m->m_len >= k + blks && i > 0) { - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - idat); - } else { - exf->decrypt(sw->sw_kschedule, - idat); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, idat); - ivp = idat; - } else { /* decrypt */ - /* - * Keep encrypted block to be used - * in next block's processing. - */ - if (ivp == iv) - bcopy(idat, piv, blks); - else - bcopy(idat, iv, blks); - - exf->decrypt(sw->sw_kschedule, idat); - - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; - } + count += blks; - idat += blks; - k += blks; - i -= blks; + /* Advance pointer */ + ind = cuio_getptr(uio, count, &k); + if (ind == -1) { + error = EINVAL; + goto out; } - } - return 0; /* Done with mbuf encryption/decryption */ - } else if (flags & CRYPTO_F_IOV) { - struct uio *uio = (struct uio *) buf; - struct iovec *iov; + i -= blks; - /* Find beginning of data */ - iov = cuio_getptr(uio, crd->crd_skip, &k); - if (iov == NULL) - return EINVAL; + /* Could be done... */ + if (i == 0) + break; + } - i = crd->crd_len; - - while (i > 0) { - /* - * If there's insufficient data at the end of - * an iovec, we have to do some copying. - */ - if (iov->iov_len < k + blks && iov->iov_len != k) { - cuio_copydata(uio, k, blks, blk); - - /* Actual encryption/decryption */ - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - blk); - } else { - exf->decrypt(sw->sw_kschedule, - blk); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, blk); - - /* - * Keep encrypted block for XOR'ing - * with next block - */ - bcopy(blk, iv, blks); - ivp = iv; - } else { /* decrypt */ - /* - * Keep encrypted block for XOR'ing - * with next block - */ - if (ivp == iv) - bcopy(blk, piv, blks); - else - bcopy(blk, iv, blks); - - exf->decrypt(sw->sw_kschedule, blk); - - /* XOR with previous block */ - for (j = 0; j < blks; j++) - blk[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; + /* + * Warning: idat may point to garbage here, but + * we only use it in the while() loop, only if + * there are indeed enough data. + */ + idat = (char *)uio->uio_iov[ind].iov_base + k; + + while (uio->uio_iov[ind].iov_len >= k + blks && i > 0) { + if (exf->reinit) { + if (crd->crd_flags & CRD_F_ENCRYPT) { + exf->encrypt(sw->sw_kschedule, + idat); + } else { + exf->decrypt(sw->sw_kschedule, + idat); } - - /* Copy back decrypted block */ - cuio_copyback(uio, k, blks, blk); - - /* Advance pointer */ - iov = cuio_getptr(uio, k + blks, &k); - if (iov == NULL) - return EINVAL; - - i -= blks; - - /* Could be done... */ - if (i == 0) - break; + } else if (crd->crd_flags & CRD_F_ENCRYPT) { + /* XOR with previous block/IV */ + for (j = 0; j < blks; j++) + idat[j] ^= ivp[j]; + + exf->encrypt(sw->sw_kschedule, idat); + ivp = idat; + } else { /* decrypt */ + /* + * Keep encrypted block to be used + * in next block's processing. + */ + nivp = (ivp == iv) ? iv2 : iv; + bcopy(idat, nivp, blks); + + exf->decrypt(sw->sw_kschedule, idat); + + /* XOR with previous block/IV */ + for (j = 0; j < blks; j++) + idat[j] ^= ivp[j]; + + ivp = nivp; } - /* - * Warning: idat may point to garbage here, but - * we only use it in the while() loop, only if - * there are indeed enough data. - */ - idat = (char *)iov->iov_base + k; - - while (iov->iov_len >= k + blks && i > 0) { - if (exf->reinit) { - if (crd->crd_flags & CRD_F_ENCRYPT) { - exf->encrypt(sw->sw_kschedule, - idat); - } else { - exf->decrypt(sw->sw_kschedule, - idat); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - exf->encrypt(sw->sw_kschedule, idat); - ivp = idat; - } else { /* decrypt */ - /* - * Keep encrypted block to be used - * in next block's processing. - */ - if (ivp == iv) - bcopy(idat, piv, blks); - else - bcopy(idat, iv, blks); - - exf->decrypt(sw->sw_kschedule, idat); - - /* XOR with previous block/IV */ - for (j = 0; j < blks; j++) - idat[j] ^= ivp[j]; - - if (ivp == iv) - bcopy(piv, iv, blks); - else - ivp = iv; - } - - idat += blks; - k += blks; - i -= blks; - } - if (k == iov->iov_len) { - iov++; - k = 0; - } + idat += blks; + count += blks; + k += blks; + i -= blks; } - return 0; /* Done with iovec encryption/decryption */ - } else { /* contiguous buffer */ - if (exf->reinit) { - for (i = crd->crd_skip; - i < crd->crd_skip + crd->crd_len; i += blks) { - if (crd->crd_flags & CRD_F_ENCRYPT) - exf->encrypt(sw->sw_kschedule, buf + i); - else - exf->decrypt(sw->sw_kschedule, buf + i); - } - } else if (crd->crd_flags & CRD_F_ENCRYPT) { - for (i = crd->crd_skip; - i < crd->crd_skip + crd->crd_len; i += blks) { - /* XOR with the IV/previous block, as appropriate. */ - if (i == crd->crd_skip) - for (k = 0; k < blks; k++) - buf[i + k] ^= ivp[k]; - else - for (k = 0; k < blks; k++) - buf[i + k] ^= buf[i + k - blks]; - exf->encrypt(sw->sw_kschedule, buf + i); - } - } else { /* Decrypt */ - /* - * Start at the end, so we don't need to keep the encrypted - * block as the IV for the next block. - */ - for (i = crd->crd_skip + crd->crd_len - blks; - i >= crd->crd_skip; i -= blks) { - exf->decrypt(sw->sw_kschedule, buf + i); - - /* XOR with the IV/previous block, as appropriate */ - if (i == crd->crd_skip) - for (k = 0; k < blks; k++) - buf[i + k] ^= ivp[k]; - else - for (k = 0; k < blks; k++) - buf[i + k] ^= buf[i + k - blks]; + /* + * Advance to the next iov if the end of the current iov + * is aligned with the end of a cipher block. + * Note that the code is equivalent to calling: + * ind = cuio_getptr(uio, count, &k); + */ + if (i > 0 && k == uio->uio_iov[ind].iov_len) { + k = 0; + ind++; + if (ind >= uio->uio_iovcnt) { + error = EINVAL; + goto out; } } - - return 0; /* Done with contiguous buffer encryption/decryption */ } - /* Unreachable */ - return EINVAL; +out: + if (iovalloc) + free(iov, M_CRYPTO_DATA); + + return (error); } static void @@ -583,6 +447,181 @@ swcr_authcompute(struct cryptodesc *crd, struct swcr_data *sw, caddr_t buf, return 0; } +CTASSERT(INT_MAX <= (1ll<<39) - 256); /* GCM: plain text < 2^39-256 */ +CTASSERT(INT_MAX <= (uint64_t)-1); /* GCM: associated data <= 2^64-1 */ + +/* + * Apply a combined encryption-authentication transformation + */ +static int +swcr_authenc(struct cryptop *crp) +{ + uint32_t blkbuf[howmany(EALG_MAX_BLOCK_LEN, sizeof(uint32_t))]; + u_char *blk = (u_char *)blkbuf; + u_char aalg[AALG_MAX_RESULT_LEN]; + u_char uaalg[AALG_MAX_RESULT_LEN]; + u_char iv[EALG_MAX_BLOCK_LEN]; + union authctx ctx; + struct cryptodesc *crd, *crda = NULL, *crde = NULL; + struct swcr_data *sw, *swa, *swe = NULL; + struct auth_hash *axf = NULL; + struct enc_xform *exf = NULL; + caddr_t buf = (caddr_t)crp->crp_buf; + uint32_t *blkp; + int aadlen, blksz, i, ivlen, len, iskip, oskip, r; + + ivlen = blksz = iskip = oskip = 0; + + for (crd = crp->crp_desc; crd; crd = crd->crd_next) { + for (sw = swcr_sessions[crp->crp_sid & 0xffffffff]; + sw && sw->sw_alg != crd->crd_alg; + sw = sw->sw_next) + ; + if (sw == NULL) + return (EINVAL); + + switch (sw->sw_alg) { + case CRYPTO_AES_NIST_GCM_16: + case CRYPTO_AES_NIST_GMAC: + swe = sw; + crde = crd; + exf = swe->sw_exf; + ivlen = 12; + break; + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + swa = sw; + crda = crd; + axf = swa->sw_axf; + if (swa->sw_ictx == 0) + return (EINVAL); + bcopy(swa->sw_ictx, &ctx, axf->ctxsize); + blksz = axf->blocksize; + break; + default: + return (EINVAL); + } + } + if (crde == NULL || crda == NULL) + return (EINVAL); + + if (crde->crd_alg == CRYPTO_AES_NIST_GCM_16 && + (crde->crd_flags & CRD_F_IV_EXPLICIT) == 0) + return (EINVAL); + + if (crde->crd_klen != crda->crd_klen) + return (EINVAL); + + /* Initialize the IV */ + if (crde->crd_flags & CRD_F_ENCRYPT) { + /* IV explicitly provided ? */ + if (crde->crd_flags & CRD_F_IV_EXPLICIT) + bcopy(crde->crd_iv, iv, ivlen); + else + arc4rand(iv, ivlen, 0); + + /* Do we need to write the IV */ + if (!(crde->crd_flags & CRD_F_IV_PRESENT)) + crypto_copyback(crp->crp_flags, buf, crde->crd_inject, + ivlen, iv); + + } else { /* Decryption */ + /* IV explicitly provided ? */ + if (crde->crd_flags & CRD_F_IV_EXPLICIT) + bcopy(crde->crd_iv, iv, ivlen); + else { + /* Get IV off buf */ + crypto_copydata(crp->crp_flags, buf, crde->crd_inject, + ivlen, iv); + } + } + + /* Supply MAC with IV */ + if (axf->Reinit) + axf->Reinit(&ctx, iv, ivlen); + + /* Supply MAC with AAD */ + aadlen = crda->crd_len; + + for (i = iskip; i < crda->crd_len; i += blksz) { + len = MIN(crda->crd_len - i, blksz - oskip); + crypto_copydata(crp->crp_flags, buf, crda->crd_skip + i, len, + blk + oskip); + bzero(blk + len + oskip, blksz - len - oskip); + axf->Update(&ctx, blk, blksz); + oskip = 0; /* reset initial output offset */ + } + + if (exf->reinit) + exf->reinit(swe->sw_kschedule, iv); + + /* Do encryption/decryption with MAC */ + for (i = 0; i < crde->crd_len; i += blksz) { + len = MIN(crde->crd_len - i, blksz); + if (len < blksz) + bzero(blk, blksz); + crypto_copydata(crp->crp_flags, buf, crde->crd_skip + i, len, + blk); + if (crde->crd_flags & CRD_F_ENCRYPT) { + exf->encrypt(swe->sw_kschedule, blk); + axf->Update(&ctx, blk, len); + crypto_copyback(crp->crp_flags, buf, + crde->crd_skip + i, len, blk); + } else { + axf->Update(&ctx, blk, len); + } + } + + /* Do any required special finalization */ + switch (crda->crd_alg) { + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + /* length block */ + bzero(blk, blksz); + blkp = (uint32_t *)blk + 1; + *blkp = htobe32(aadlen * 8); + blkp = (uint32_t *)blk + 3; + *blkp = htobe32(crde->crd_len * 8); + axf->Update(&ctx, blk, blksz); + break; + } + + /* Finalize MAC */ + axf->Final(aalg, &ctx); + + /* Validate tag */ + if (!(crde->crd_flags & CRD_F_ENCRYPT)) { + crypto_copydata(crp->crp_flags, buf, crda->crd_inject, + axf->hashsize, uaalg); + + r = timingsafe_bcmp(aalg, uaalg, axf->hashsize); + if (r == 0) { + /* tag matches, decrypt data */ + for (i = 0; i < crde->crd_len; i += blksz) { + len = MIN(crde->crd_len - i, blksz); + if (len < blksz) + bzero(blk, blksz); + crypto_copydata(crp->crp_flags, buf, + crde->crd_skip + i, len, blk); + if (!(crde->crd_flags & CRD_F_ENCRYPT)) { + exf->decrypt(swe->sw_kschedule, blk); + } + crypto_copyback(crp->crp_flags, buf, + crde->crd_skip + i, len, blk); + } + } else + return (EBADMSG); + } else { + /* Inject the authentication data */ + crypto_copyback(crp->crp_flags, buf, crda->crd_inject, + axf->hashsize, aalg); + } + + return (0); +} + /* * Apply a compression/decompression algorithm */ @@ -747,6 +786,16 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) case CRYPTO_AES_XTS: txf = &enc_xform_aes_xts; goto enccommon; + case CRYPTO_AES_ICM: + txf = &enc_xform_aes_icm; + goto enccommon; + case CRYPTO_AES_NIST_GCM_16: + txf = &enc_xform_aes_nist_gcm; + goto enccommon; + case CRYPTO_AES_NIST_GMAC: + txf = &enc_xform_aes_nist_gmac; + (*swd)->sw_exf = txf; + break; case CRYPTO_CAMELLIA_CBC: txf = &enc_xform_camellia; goto enccommon; @@ -865,6 +914,31 @@ swcr_newsession(device_t dev, u_int32_t *sid, struct cryptoini *cri) (*swd)->sw_axf = axf; break; #endif + + case CRYPTO_AES_128_NIST_GMAC: + axf = &auth_hash_nist_gmac_aes_128; + goto auth4common; + + case CRYPTO_AES_192_NIST_GMAC: + axf = &auth_hash_nist_gmac_aes_192; + goto auth4common; + + case CRYPTO_AES_256_NIST_GMAC: + axf = &auth_hash_nist_gmac_aes_256; + auth4common: + (*swd)->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, + M_NOWAIT); + if ((*swd)->sw_ictx == NULL) { + swcr_freesession_locked(dev, i); + rw_runlock(&swcr_sessions_lock); + return ENOBUFS; + } + axf->Init((*swd)->sw_ictx); + axf->Setkey((*swd)->sw_ictx, cri->cri_key, + cri->cri_klen / 8); + (*swd)->sw_axf = axf; + break; + case CRYPTO_DEFLATE_COMP: cxf = &comp_algo_deflate; (*swd)->sw_cxf = cxf; @@ -925,6 +999,9 @@ swcr_freesession_locked(device_t dev, u_int64_t tid) case CRYPTO_SKIPJACK_CBC: case CRYPTO_RIJNDAEL128_CBC: case CRYPTO_AES_XTS: + case CRYPTO_AES_ICM: + case CRYPTO_AES_NIST_GCM_16: + case CRYPTO_AES_NIST_GMAC: case CRYPTO_CAMELLIA_CBC: case CRYPTO_NULL_CBC: txf = swd->sw_exf; @@ -1050,6 +1127,7 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) case CRYPTO_SKIPJACK_CBC: case CRYPTO_RIJNDAEL128_CBC: case CRYPTO_AES_XTS: + case CRYPTO_AES_ICM: case CRYPTO_CAMELLIA_CBC: if ((crp->crp_etype = swcr_encdec(crd, sw, crp->crp_buf, crp->crp_flags)) != 0) @@ -1074,6 +1152,14 @@ swcr_process(device_t dev, struct cryptop *crp, int hint) goto done; break; + case CRYPTO_AES_NIST_GCM_16: + case CRYPTO_AES_NIST_GMAC: + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + crp->crp_etype = swcr_authenc(crp); + goto done; + case CRYPTO_DEFLATE_COMP: if ((crp->crp_etype = swcr_compdec(crd, sw, crp->crp_buf, crp->crp_flags)) != 0) @@ -1144,6 +1230,12 @@ swcr_attach(device_t dev) REGISTER(CRYPTO_SHA1); REGISTER(CRYPTO_RIJNDAEL128_CBC); REGISTER(CRYPTO_AES_XTS); + REGISTER(CRYPTO_AES_ICM); + REGISTER(CRYPTO_AES_NIST_GCM_16); + REGISTER(CRYPTO_AES_NIST_GMAC); + REGISTER(CRYPTO_AES_128_NIST_GMAC); + REGISTER(CRYPTO_AES_192_NIST_GMAC); + REGISTER(CRYPTO_AES_256_NIST_GMAC); REGISTER(CRYPTO_CAMELLIA_CBC); REGISTER(CRYPTO_DEFLATE_COMP); #undef REGISTER diff --git a/sys/opencrypto/gfmult.c b/sys/opencrypto/gfmult.c new file mode 100644 index 000000000000..990a7ff5d60f --- /dev/null +++ b/sys/opencrypto/gfmult.c @@ -0,0 +1,275 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include "gfmult.h" + +#define REV_POLY_REDUCT 0xe1 /* 0x87 bit reversed */ + +/* reverse the bits of a nibble */ +static const uint8_t nib_rev[] = { + 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, + 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf, +}; + +/* calulate v * 2 */ +static inline struct gf128 +gf128_mulalpha(struct gf128 v) +{ + uint64_t mask; + + mask = !!(v.v[1] & 1); + mask = ~(mask - 1); + v.v[1] = (v.v[1] >> 1) | ((v.v[0] & 1) << 63); + v.v[0] = (v.v[0] >> 1) ^ ((mask & REV_POLY_REDUCT) << 56); + + return v; +} + +/* + * Generate a table for 0-16 * h. Store the results in the table w/ indexes + * bit reversed, and the words striped across the values. + */ +void +gf128_genmultable(struct gf128 h, struct gf128table *t) +{ + struct gf128 tbl[16]; + int i; + + tbl[0] = MAKE_GF128(0, 0); + tbl[1] = h; + + for (i = 2; i < 16; i += 2) { + tbl[i] = gf128_mulalpha(tbl[i / 2]); + tbl[i + 1] = gf128_add(tbl[i], h); + } + + for (i = 0; i < 16; i++) { + t->a[nib_rev[i]] = tbl[i].v[0] >> 32; + t->b[nib_rev[i]] = tbl[i].v[0]; + t->c[nib_rev[i]] = tbl[i].v[1] >> 32; + t->d[nib_rev[i]] = tbl[i].v[1]; + } +} + +/* + * Generate tables containing h, h^2, h^3 and h^4, starting at 0. + */ +void +gf128_genmultable4(struct gf128 h, struct gf128table4 *t) +{ + struct gf128 h2, h3, h4; + + gf128_genmultable(h, &t->tbls[0]); + + h2 = gf128_mul(h, &t->tbls[0]); + + gf128_genmultable(h2, &t->tbls[1]); + + h3 = gf128_mul(h, &t->tbls[1]); + gf128_genmultable(h3, &t->tbls[2]); + + h4 = gf128_mul(h2, &t->tbls[1]); + gf128_genmultable(h4, &t->tbls[3]); +} + +/* + * Read a row from the table. + */ +static inline struct gf128 +readrow(struct gf128table *tbl, unsigned bits) +{ + struct gf128 r; + + bits = bits % 16; + + r.v[0] = ((uint64_t)tbl->a[bits] << 32) | tbl->b[bits]; + r.v[1] = ((uint64_t)tbl->c[bits] << 32) | tbl->d[bits]; + + return r; +} + +/* + * These are the reduction values. Since we are dealing with bit reversed + * version, the values need to be bit reversed, AND the indexes are also + * bit reversed to make lookups quicker. + */ +static uint16_t reduction[] = { + 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, + 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0, +}; + +/* + * Calculate: + * (x*2^4 + word[3,0]*h) * + * 2^4 + word[7,4]*h) * + * ... + * 2^4 + word[63,60]*h + */ +static struct gf128 +gfmultword(uint64_t word, struct gf128 x, struct gf128table *tbl) +{ + struct gf128 row; + unsigned bits; + unsigned redbits; + int i; + + for (i = 0; i < 64; i += 4) { + bits = word % 16; + + /* fetch row */ + row = readrow(tbl, bits); + + /* x * 2^4 */ + redbits = x.v[1] % 16; + x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60; + x.v[0] >>= 4; + x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16); + + word >>= 4; + + x = gf128_add(x, row); + } + + return x; +} + +/* + * Calculate + * (x*2^4 + worda[3,0]*h^4+wordb[3,0]*h^3+...+wordd[3,0]*h) * + * ... + * 2^4 + worda[63,60]*h^4+ ... + wordd[63,60]*h + * + * Passing/returning struct is .5% faster than passing in via pointer on + * amd64. + */ +static struct gf128 +gfmultword4(uint64_t worda, uint64_t wordb, uint64_t wordc, uint64_t wordd, + struct gf128 x, struct gf128table4 *tbl) +{ + struct gf128 rowa, rowb, rowc, rowd; + unsigned bitsa, bitsb, bitsc, bitsd; + unsigned redbits; + int i; + + /* + * XXX - nibble reverse words to save a shift? probably not as + * nibble reverse would take 20 ops (5 * 4) verse 16 + */ + + for (i = 0; i < 64; i += 4) { + bitsa = worda % 16; + bitsb = wordb % 16; + bitsc = wordc % 16; + bitsd = wordd % 16; + + /* fetch row */ + rowa = readrow(&tbl->tbls[3], bitsa); + rowb = readrow(&tbl->tbls[2], bitsb); + rowc = readrow(&tbl->tbls[1], bitsc); + rowd = readrow(&tbl->tbls[0], bitsd); + + /* x * 2^4 */ + redbits = x.v[1] % 16; + x.v[1] = (x.v[1] >> 4) | (x.v[0] % 16) << 60; + x.v[0] >>= 4; + x.v[0] ^= (uint64_t)reduction[redbits] << (64 - 16); + + worda >>= 4; + wordb >>= 4; + wordc >>= 4; + wordd >>= 4; + + x = gf128_add(x, gf128_add(rowa, gf128_add(rowb, + gf128_add(rowc, rowd)))); + } + + return x; +} + +struct gf128 +gf128_mul(struct gf128 v, struct gf128table *tbl) +{ + struct gf128 ret; + + ret = MAKE_GF128(0, 0); + + ret = gfmultword(v.v[1], ret, tbl); + ret = gfmultword(v.v[0], ret, tbl); + + return ret; +} + +/* + * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or: + * (((a*h+b)*h+c)*h+d)*h + */ +struct gf128 +gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, struct gf128 d, + struct gf128table4 *tbl) +{ + struct gf128 tmp; + + tmp = MAKE_GF128(0, 0); + + tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl); + tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl); + + return tmp; +} + +/* + * a = data[0..15] + r + * b = data[16..31] + * c = data[32..47] + * d = data[48..63] + * + * Calculate a*h^4 + b*h^3 + c*h^2 + d*h, or: + * (((a*h+b)*h+c)*h+d)*h + */ +struct gf128 +gf128_mul4b(struct gf128 r, const uint8_t *v, struct gf128table4 *tbl) +{ + struct gf128 a, b, c, d; + struct gf128 tmp; + + tmp = MAKE_GF128(0, 0); + + a = gf128_add(r, gf128_read(&v[0*16])); + b = gf128_read(&v[1*16]); + c = gf128_read(&v[2*16]); + d = gf128_read(&v[3*16]); + + tmp = gfmultword4(a.v[1], b.v[1], c.v[1], d.v[1], tmp, tbl); + tmp = gfmultword4(a.v[0], b.v[0], c.v[0], d.v[0], tmp, tbl); + + return tmp; +} diff --git a/sys/opencrypto/gfmult.h b/sys/opencrypto/gfmult.h new file mode 100644 index 000000000000..c385618ace37 --- /dev/null +++ b/sys/opencrypto/gfmult.h @@ -0,0 +1,128 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _GFMULT_H_ +#define _GFMULT_H_ + +#ifdef __APPLE__ +#define __aligned(x) __attribute__((__aligned__(x))) +#define be64dec(buf) __builtin_bswap64(*(uint64_t *)buf) +#define be64enc(buf, x) (*(uint64_t *)buf = __builtin_bswap64(x)) +#else +#include <sys/endian.h> +#endif + +#ifdef _KERNEL +#include <sys/types.h> +#else +#include <stdint.h> +#include <strings.h> +#endif + +#define REQ_ALIGN (16 * 4) +/* + * The rows are striped across cache lines. Note that the indexes + * are bit reversed to make accesses quicker. + */ +struct gf128table { + uint32_t a[16] __aligned(REQ_ALIGN); /* bits 0 - 31 */ + uint32_t b[16] __aligned(REQ_ALIGN); /* bits 63 - 32 */ + uint32_t c[16] __aligned(REQ_ALIGN); /* bits 95 - 64 */ + uint32_t d[16] __aligned(REQ_ALIGN); /* bits 127 - 96 */ +} __aligned(REQ_ALIGN); + +/* + * A set of tables that contain h, h^2, h^3, h^4. To be used w/ gf128_mul4. + */ +struct gf128table4 { + struct gf128table tbls[4]; +}; + +/* + * GCM per spec is bit reversed in memory. So byte 0 is really bit reversed + * and contains bits 0-7. We can deal w/ this by using right shifts and + * related math instead of having to bit reverse everything. This means that + * the low bits are in v[0] (bits 0-63) and reverse order, while the high + * bits are in v[1] (bits 64-127) and reverse order. The high bit of v[0] is + * bit 0, and the low bit of v[1] is bit 127. + */ +struct gf128 { + uint64_t v[2]; +}; + +/* Note that we don't bit reverse in MAKE_GF128. */ +#define MAKE_GF128(a, b) ((struct gf128){.v = { (a), (b) } }) +#define GF128_EQ(a, b) ((((a).v[0] ^ (b).v[0]) | \ + ((a).v[1] ^ (b).v[1])) == 0) + +static inline struct gf128 +gf128_read(const uint8_t *buf) +{ + struct gf128 r; + + r.v[0] = be64dec(buf); + buf += sizeof(uint64_t); + + r.v[1] = be64dec(buf); + + return r; +} + +static inline void +gf128_write(struct gf128 v, uint8_t *buf) +{ + uint64_t tmp; + + be64enc(buf, v.v[0]); + buf += sizeof tmp; + + be64enc(buf, v.v[1]); +} + +static inline struct gf128 __pure /* XXX - __pure2 instead */ +gf128_add(struct gf128 a, struct gf128 b) +{ + a.v[0] ^= b.v[0]; + a.v[1] ^= b.v[1]; + + return a; +} + +void gf128_genmultable(struct gf128 h, struct gf128table *t); +void gf128_genmultable4(struct gf128 h, struct gf128table4 *t); +struct gf128 gf128_mul(struct gf128 v, struct gf128table *tbl); +struct gf128 gf128_mul4(struct gf128 a, struct gf128 b, struct gf128 c, + struct gf128 d, struct gf128table4 *tbl); +struct gf128 gf128_mul4b(struct gf128 r, const uint8_t *v, + struct gf128table4 *tbl); + +#endif /* _GFMULT_H_ */ diff --git a/sys/opencrypto/gmac.c b/sys/opencrypto/gmac.c new file mode 100644 index 000000000000..402092c18fda --- /dev/null +++ b/sys/opencrypto/gmac.c @@ -0,0 +1,119 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#include <sys/types.h> +#include <sys/systm.h> +#include <opencrypto/gfmult.h> +#include <opencrypto/gmac.h> + +void +AES_GMAC_Init(struct aes_gmac_ctx *agc) +{ + + bzero(agc, sizeof *agc); +} + +void +AES_GMAC_Setkey(struct aes_gmac_ctx *agc, const uint8_t *key, uint16_t klen) +{ + const uint8_t zeros[GMAC_BLOCK_LEN] = {}; + struct gf128 h; + uint8_t hbuf[GMAC_BLOCK_LEN]; + + agc->rounds = rijndaelKeySetupEnc(agc->keysched, key, klen * 8); + + rijndaelEncrypt(agc->keysched, agc->rounds, zeros, hbuf); + + h = gf128_read(hbuf); + gf128_genmultable4(h, &agc->ghashtbl); + + explicit_bzero(&h, sizeof h); + explicit_bzero(hbuf, sizeof hbuf); +} + +void +AES_GMAC_Reinit(struct aes_gmac_ctx *agc, const uint8_t *iv, uint16_t ivlen) +{ + + KASSERT(ivlen <= sizeof agc->counter, ("passed ivlen too large!")); + bcopy(iv, agc->counter, ivlen); +} + +int +AES_GMAC_Update(struct aes_gmac_ctx *agc, const uint8_t *data, uint16_t len) +{ + struct gf128 v; + uint8_t buf[GMAC_BLOCK_LEN] = {}; + int i; + + v = agc->hash; + + while (len > 0) { + if (len >= 4*GMAC_BLOCK_LEN) { + i = 4*GMAC_BLOCK_LEN; + v = gf128_mul4b(v, data, &agc->ghashtbl); + } else if (len >= GMAC_BLOCK_LEN) { + i = GMAC_BLOCK_LEN; + v = gf128_add(v, gf128_read(data)); + v = gf128_mul(v, &agc->ghashtbl.tbls[0]); + } else { + i = len; + bcopy(data, buf, i); + v = gf128_add(v, gf128_read(&buf[0])); + v = gf128_mul(v, &agc->ghashtbl.tbls[0]); + explicit_bzero(buf, sizeof buf); + } + len -= i; + data += i; + } + + agc->hash = v; + explicit_bzero(&v, sizeof v); + + return (0); +} + +void +AES_GMAC_Final(uint8_t digest[GMAC_DIGEST_LEN], struct aes_gmac_ctx *agc) +{ + uint8_t enccntr[GMAC_BLOCK_LEN]; + struct gf128 a; + + /* XXX - zero additional bytes? */ + agc->counter[GMAC_BLOCK_LEN - 1] = 1; + + rijndaelEncrypt(agc->keysched, agc->rounds, agc->counter, enccntr); + a = gf128_add(agc->hash, gf128_read(enccntr)); + gf128_write(a, digest); + + explicit_bzero(enccntr, sizeof enccntr); +} diff --git a/sys/opencrypto/gmac.h b/sys/opencrypto/gmac.h new file mode 100644 index 000000000000..79a9e118f614 --- /dev/null +++ b/sys/opencrypto/gmac.h @@ -0,0 +1,55 @@ +/*- + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by John-Mark Gurney under + * the sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _GMAC_H_ + +#include "gfmult.h" +#include <crypto/rijndael/rijndael.h> + +#define GMAC_BLOCK_LEN 16 +#define GMAC_DIGEST_LEN 16 + +struct aes_gmac_ctx { + struct gf128table4 ghashtbl; + struct gf128 hash; + uint32_t keysched[4*(RIJNDAEL_MAXNR + 1)]; + uint8_t counter[GMAC_BLOCK_LEN]; + int rounds; +}; + +void AES_GMAC_Init(struct aes_gmac_ctx *); +void AES_GMAC_Setkey(struct aes_gmac_ctx *, const uint8_t *, uint16_t); +void AES_GMAC_Reinit(struct aes_gmac_ctx *, const uint8_t *, uint16_t); +int AES_GMAC_Update(struct aes_gmac_ctx *, const uint8_t *, uint16_t); +void AES_GMAC_Final(uint8_t [GMAC_DIGEST_LEN], struct aes_gmac_ctx *); + +#endif /* _GMAC_H_ */ diff --git a/sys/opencrypto/xform.c b/sys/opencrypto/xform.c index bfb061b5417b..508a64652b50 100644 --- a/sys/opencrypto/xform.c +++ b/sys/opencrypto/xform.c @@ -24,6 +24,12 @@ * Copyright (C) 2001, Angelos D. Keromytis. * * Copyright (C) 2008, Damien Miller + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software with or without fee * is hereby granted, provided that this entire notice is included in @@ -76,6 +82,7 @@ static int blf_setkey(u_int8_t **, u_int8_t *, int); static int cast5_setkey(u_int8_t **, u_int8_t *, int); static int skipjack_setkey(u_int8_t **, u_int8_t *, int); static int rijndael128_setkey(u_int8_t **, u_int8_t *, int); +static int aes_icm_setkey(u_int8_t **, u_int8_t *, int); static int aes_xts_setkey(u_int8_t **, u_int8_t *, int); static int cml_setkey(u_int8_t **, u_int8_t *, int); @@ -99,6 +106,8 @@ static void rijndael128_decrypt(caddr_t, u_int8_t *); static void aes_xts_decrypt(caddr_t, u_int8_t *); static void cml_decrypt(caddr_t, u_int8_t *); +static void aes_icm_crypt(caddr_t, u_int8_t *); + static void null_zerokey(u_int8_t **); static void des1_zerokey(u_int8_t **); static void des3_zerokey(u_int8_t **); @@ -106,103 +115,145 @@ static void blf_zerokey(u_int8_t **); static void cast5_zerokey(u_int8_t **); static void skipjack_zerokey(u_int8_t **); static void rijndael128_zerokey(u_int8_t **); +static void aes_icm_zerokey(u_int8_t **); static void aes_xts_zerokey(u_int8_t **); static void cml_zerokey(u_int8_t **); +static void aes_icm_reinit(caddr_t, u_int8_t *); static void aes_xts_reinit(caddr_t, u_int8_t *); +static void aes_gcm_reinit(caddr_t, u_int8_t *); static void null_init(void *); -static int null_update(void *, u_int8_t *, u_int16_t); +static void null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len); +static int null_update(void *, const u_int8_t *, u_int16_t); static void null_final(u_int8_t *, void *); -static int MD5Update_int(void *, u_int8_t *, u_int16_t); +static int MD5Update_int(void *, const u_int8_t *, u_int16_t); static void SHA1Init_int(void *); -static int SHA1Update_int(void *, u_int8_t *, u_int16_t); +static int SHA1Update_int(void *, const u_int8_t *, u_int16_t); static void SHA1Final_int(u_int8_t *, void *); -static int RMD160Update_int(void *, u_int8_t *, u_int16_t); -static int SHA256Update_int(void *, u_int8_t *, u_int16_t); -static int SHA384Update_int(void *, u_int8_t *, u_int16_t); -static int SHA512Update_int(void *, u_int8_t *, u_int16_t); +static int RMD160Update_int(void *, const u_int8_t *, u_int16_t); +static int SHA256Update_int(void *, const u_int8_t *, u_int16_t); +static int SHA384Update_int(void *, const u_int8_t *, u_int16_t); +static int SHA512Update_int(void *, const u_int8_t *, u_int16_t); static u_int32_t deflate_compress(u_int8_t *, u_int32_t, u_int8_t **); static u_int32_t deflate_decompress(u_int8_t *, u_int32_t, u_int8_t **); +#define AESICM_BLOCKSIZE 16 + +struct aes_icm_ctx { + u_int32_t ac_ek[4*(RIJNDAEL_MAXNR + 1)]; + /* ac_block is initalized to IV */ + u_int8_t ac_block[AESICM_BLOCKSIZE]; + int ac_nr; +}; + MALLOC_DEFINE(M_XDATA, "xform", "xform data buffers"); /* Encryption instances */ struct enc_xform enc_xform_null = { CRYPTO_NULL_CBC, "NULL", /* NB: blocksize of 4 is to generate a properly aligned ESP header */ - NULL_BLOCK_LEN, 0, 256, /* 2048 bits, max key */ + NULL_BLOCK_LEN, NULL_BLOCK_LEN, 0, 256, /* 2048 bits, max key */ null_encrypt, null_decrypt, null_setkey, null_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_des = { CRYPTO_DES_CBC, "DES", - DES_BLOCK_LEN, 8, 8, + DES_BLOCK_LEN, DES_BLOCK_LEN, 8, 8, des1_encrypt, des1_decrypt, des1_setkey, des1_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_3des = { CRYPTO_3DES_CBC, "3DES", - DES3_BLOCK_LEN, 24, 24, + DES3_BLOCK_LEN, DES3_BLOCK_LEN, 24, 24, des3_encrypt, des3_decrypt, des3_setkey, des3_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_blf = { CRYPTO_BLF_CBC, "Blowfish", - BLOWFISH_BLOCK_LEN, 5, 56 /* 448 bits, max key */, + BLOWFISH_BLOCK_LEN, BLOWFISH_BLOCK_LEN, 5, 56 /* 448 bits, max key */, blf_encrypt, blf_decrypt, blf_setkey, blf_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_cast5 = { CRYPTO_CAST_CBC, "CAST-128", - CAST128_BLOCK_LEN, 5, 16, + CAST128_BLOCK_LEN, CAST128_BLOCK_LEN, 5, 16, cast5_encrypt, cast5_decrypt, cast5_setkey, cast5_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_skipjack = { CRYPTO_SKIPJACK_CBC, "Skipjack", - SKIPJACK_BLOCK_LEN, 10, 10, + SKIPJACK_BLOCK_LEN, SKIPJACK_BLOCK_LEN, 10, 10, skipjack_encrypt, - skipjack_decrypt, - skipjack_setkey, + skipjack_decrypt, skipjack_setkey, skipjack_zerokey, - NULL + NULL, }; struct enc_xform enc_xform_rijndael128 = { CRYPTO_RIJNDAEL128_CBC, "Rijndael-128/AES", - RIJNDAEL128_BLOCK_LEN, 8, 32, + RIJNDAEL128_BLOCK_LEN, RIJNDAEL128_BLOCK_LEN, 16, 32, rijndael128_encrypt, rijndael128_decrypt, rijndael128_setkey, rijndael128_zerokey, - NULL + NULL, +}; + +struct enc_xform enc_xform_aes_icm = { + CRYPTO_AES_ICM, "AES-ICM", + RIJNDAEL128_BLOCK_LEN, RIJNDAEL128_BLOCK_LEN, 16, 32, + aes_icm_crypt, + aes_icm_crypt, + aes_icm_setkey, + rijndael128_zerokey, + aes_icm_reinit, +}; + +struct enc_xform enc_xform_aes_nist_gcm = { + CRYPTO_AES_NIST_GCM_16, "AES-GCM", + 1, 12, 16, 32, + aes_icm_crypt, + aes_icm_crypt, + aes_icm_setkey, + aes_icm_zerokey, + aes_gcm_reinit, +}; + +struct enc_xform enc_xform_aes_nist_gmac = { + CRYPTO_AES_NIST_GMAC, "AES-GMAC", + 1, 12, 16, 32, + NULL, + NULL, + NULL, + NULL, + NULL, }; struct enc_xform enc_xform_aes_xts = { CRYPTO_AES_XTS, "AES-XTS", - RIJNDAEL128_BLOCK_LEN, 32, 64, + RIJNDAEL128_BLOCK_LEN, 8, 32, 64, aes_xts_encrypt, aes_xts_decrypt, aes_xts_setkey, @@ -212,85 +263,115 @@ struct enc_xform enc_xform_aes_xts = { struct enc_xform enc_xform_arc4 = { CRYPTO_ARC4, "ARC4", - 1, 1, 32, + 1, 1, 1, 32, + NULL, NULL, NULL, NULL, NULL, - NULL }; struct enc_xform enc_xform_camellia = { CRYPTO_CAMELLIA_CBC, "Camellia", - CAMELLIA_BLOCK_LEN, 8, 32, + CAMELLIA_BLOCK_LEN, CAMELLIA_BLOCK_LEN, 8, 32, cml_encrypt, cml_decrypt, cml_setkey, cml_zerokey, - NULL + NULL, }; /* Authentication instances */ -struct auth_hash auth_hash_null = { +struct auth_hash auth_hash_null = { /* NB: context isn't used */ CRYPTO_NULL_HMAC, "NULL-HMAC", - 0, NULL_HASH_LEN, NULL_HMAC_BLOCK_LEN, sizeof(int), /* NB: context isn't used */ - null_init, null_update, null_final + 0, NULL_HASH_LEN, sizeof(int), NULL_HMAC_BLOCK_LEN, + null_init, null_reinit, null_reinit, null_update, null_final }; struct auth_hash auth_hash_hmac_md5 = { CRYPTO_MD5_HMAC, "HMAC-MD5", - 16, MD5_HASH_LEN, MD5_HMAC_BLOCK_LEN, sizeof(MD5_CTX), - (void (*) (void *)) MD5Init, MD5Update_int, + 16, MD5_HASH_LEN, sizeof(MD5_CTX), MD5_HMAC_BLOCK_LEN, + (void (*) (void *)) MD5Init, NULL, NULL, MD5Update_int, (void (*) (u_int8_t *, void *)) MD5Final }; struct auth_hash auth_hash_hmac_sha1 = { CRYPTO_SHA1_HMAC, "HMAC-SHA1", - 20, SHA1_HASH_LEN, SHA1_HMAC_BLOCK_LEN, sizeof(SHA1_CTX), - SHA1Init_int, SHA1Update_int, SHA1Final_int + 20, SHA1_HASH_LEN, sizeof(SHA1_CTX), SHA1_HMAC_BLOCK_LEN, + SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int }; struct auth_hash auth_hash_hmac_ripemd_160 = { CRYPTO_RIPEMD160_HMAC, "HMAC-RIPEMD-160", - 20, RIPEMD160_HASH_LEN, RIPEMD160_HMAC_BLOCK_LEN, sizeof(RMD160_CTX), - (void (*)(void *)) RMD160Init, RMD160Update_int, + 20, RIPEMD160_HASH_LEN, sizeof(RMD160_CTX), RIPEMD160_HMAC_BLOCK_LEN, + (void (*)(void *)) RMD160Init, NULL, NULL, RMD160Update_int, (void (*)(u_int8_t *, void *)) RMD160Final }; struct auth_hash auth_hash_key_md5 = { CRYPTO_MD5_KPDK, "Keyed MD5", - 0, MD5_KPDK_HASH_LEN, 0, sizeof(MD5_CTX), - (void (*)(void *)) MD5Init, MD5Update_int, + 0, MD5_KPDK_HASH_LEN, sizeof(MD5_CTX), 0, + (void (*)(void *)) MD5Init, NULL, NULL, MD5Update_int, (void (*)(u_int8_t *, void *)) MD5Final }; struct auth_hash auth_hash_key_sha1 = { CRYPTO_SHA1_KPDK, "Keyed SHA1", - 0, SHA1_KPDK_HASH_LEN, 0, sizeof(SHA1_CTX), - SHA1Init_int, SHA1Update_int, SHA1Final_int + 0, SHA1_KPDK_HASH_LEN, sizeof(SHA1_CTX), 0, + SHA1Init_int, NULL, NULL, SHA1Update_int, SHA1Final_int }; struct auth_hash auth_hash_hmac_sha2_256 = { CRYPTO_SHA2_256_HMAC, "HMAC-SHA2-256", - 32, SHA2_256_HASH_LEN, SHA2_256_HMAC_BLOCK_LEN, sizeof(SHA256_CTX), - (void (*)(void *)) SHA256_Init, SHA256Update_int, + 32, SHA2_256_HASH_LEN, sizeof(SHA256_CTX), SHA2_256_HMAC_BLOCK_LEN, + (void (*)(void *)) SHA256_Init, NULL, NULL, SHA256Update_int, (void (*)(u_int8_t *, void *)) SHA256_Final }; struct auth_hash auth_hash_hmac_sha2_384 = { CRYPTO_SHA2_384_HMAC, "HMAC-SHA2-384", - 48, SHA2_384_HASH_LEN, SHA2_384_HMAC_BLOCK_LEN, sizeof(SHA384_CTX), - (void (*)(void *)) SHA384_Init, SHA384Update_int, + 48, SHA2_384_HASH_LEN, sizeof(SHA384_CTX), SHA2_384_HMAC_BLOCK_LEN, + (void (*)(void *)) SHA384_Init, NULL, NULL, SHA384Update_int, (void (*)(u_int8_t *, void *)) SHA384_Final }; struct auth_hash auth_hash_hmac_sha2_512 = { CRYPTO_SHA2_512_HMAC, "HMAC-SHA2-512", - 64, SHA2_512_HASH_LEN, SHA2_512_HMAC_BLOCK_LEN, sizeof(SHA512_CTX), - (void (*)(void *)) SHA512_Init, SHA512Update_int, + 64, SHA2_512_HASH_LEN, sizeof(SHA512_CTX), SHA2_512_HMAC_BLOCK_LEN, + (void (*)(void *)) SHA512_Init, NULL, NULL, SHA512Update_int, (void (*)(u_int8_t *, void *)) SHA512_Final }; +struct auth_hash auth_hash_nist_gmac_aes_128 = { + CRYPTO_AES_128_NIST_GMAC, "GMAC-AES-128", + 16, 16, sizeof(struct aes_gmac_ctx), GMAC_BLOCK_LEN, + (void (*)(void *)) AES_GMAC_Init, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, + (void (*)(u_int8_t *, void *)) AES_GMAC_Final +}; + +struct auth_hash auth_hash_nist_gmac_aes_192 = { + CRYPTO_AES_192_NIST_GMAC, "GMAC-AES-192", + 24, 16, sizeof(struct aes_gmac_ctx), GMAC_BLOCK_LEN, + (void (*)(void *)) AES_GMAC_Init, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, + (void (*)(u_int8_t *, void *)) AES_GMAC_Final +}; + +struct auth_hash auth_hash_nist_gmac_aes_256 = { + CRYPTO_AES_256_NIST_GMAC, "GMAC-AES-256", + 32, 16, sizeof(struct aes_gmac_ctx), GMAC_BLOCK_LEN, + (void (*)(void *)) AES_GMAC_Init, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Setkey, + (void (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Reinit, + (int (*)(void *, const u_int8_t *, u_int16_t)) AES_GMAC_Update, + (void (*)(u_int8_t *, void *)) AES_GMAC_Final +}; + /* Compression instance */ struct comp_algo comp_algo_deflate = { CRYPTO_DEFLATE_COMP, "Deflate", @@ -579,6 +660,74 @@ rijndael128_zerokey(u_int8_t **sched) *sched = NULL; } +void +aes_icm_reinit(caddr_t key, u_int8_t *iv) +{ + struct aes_icm_ctx *ctx; + + ctx = (struct aes_icm_ctx *)key; + bcopy(iv, ctx->ac_block, AESICM_BLOCKSIZE); +} + +void +aes_gcm_reinit(caddr_t key, u_int8_t *iv) +{ + struct aes_icm_ctx *ctx; + + aes_icm_reinit(key, iv); + + ctx = (struct aes_icm_ctx *)key; + /* GCM starts with 2 as counter 1 is used for final xor of tag. */ + bzero(&ctx->ac_block[AESICM_BLOCKSIZE - 4], 4); + ctx->ac_block[AESICM_BLOCKSIZE - 1] = 2; +} + +void +aes_icm_crypt(caddr_t key, u_int8_t *data) +{ + struct aes_icm_ctx *ctx; + u_int8_t keystream[AESICM_BLOCKSIZE]; + int i; + + ctx = (struct aes_icm_ctx *)key; + rijndaelEncrypt(ctx->ac_ek, ctx->ac_nr, ctx->ac_block, keystream); + for (i = 0; i < AESICM_BLOCKSIZE; i++) + data[i] ^= keystream[i]; + explicit_bzero(keystream, sizeof(keystream)); + + /* increment counter */ + for (i = AESICM_BLOCKSIZE - 1; + i >= 0; i--) + if (++ctx->ac_block[i]) /* continue on overflow */ + break; +} + +int +aes_icm_setkey(u_int8_t **sched, u_int8_t *key, int len) +{ + struct aes_icm_ctx *ctx; + + *sched = malloc(sizeof(struct aes_icm_ctx), M_CRYPTO_DATA, + M_NOWAIT | M_ZERO); + if (*sched == NULL) + return ENOMEM; + + ctx = (struct aes_icm_ctx *)*sched; + ctx->ac_nr = rijndaelKeySetupEnc(ctx->ac_ek, (u_char *)key, len * 8); + if (ctx->ac_nr == 0) + return EINVAL; + return 0; +} + +void +aes_icm_zerokey(u_int8_t **sched) +{ + + bzero(*sched, sizeof(struct aes_icm_ctx)); + free(*sched, M_CRYPTO_DATA); + *sched = NULL; +} + #define AES_XTS_BLOCKSIZE 16 #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ @@ -728,8 +877,13 @@ null_init(void *ctx) { } +static void +null_reinit(void *ctx, const u_int8_t *buf, u_int16_t len) +{ +} + static int -null_update(void *ctx, u_int8_t *buf, u_int16_t len) +null_update(void *ctx, const u_int8_t *buf, u_int16_t len) { return 0; } @@ -742,14 +896,14 @@ null_final(u_int8_t *buf, void *ctx) } static int -RMD160Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +RMD160Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { RMD160Update(ctx, buf, len); return 0; } static int -MD5Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +MD5Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { MD5Update(ctx, buf, len); return 0; @@ -762,7 +916,7 @@ SHA1Init_int(void *ctx) } static int -SHA1Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA1Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA1Update(ctx, buf, len); return 0; @@ -775,21 +929,21 @@ SHA1Final_int(u_int8_t *blk, void *ctx) } static int -SHA256Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA256Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA256_Update(ctx, buf, len); return 0; } static int -SHA384Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA384Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA384_Update(ctx, buf, len); return 0; } static int -SHA512Update_int(void *ctx, u_int8_t *buf, u_int16_t len) +SHA512Update_int(void *ctx, const u_int8_t *buf, u_int16_t len) { SHA512_Update(ctx, buf, len); return 0; diff --git a/sys/opencrypto/xform.h b/sys/opencrypto/xform.h index 8df7b07e8bbd..e0b639796b83 100644 --- a/sys/opencrypto/xform.h +++ b/sys/opencrypto/xform.h @@ -9,6 +9,12 @@ * supported the development of this code. * * Copyright (c) 2000 Angelos D. Keromytis + * Copyright (c) 2014 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by John-Mark Gurney + * under sponsorship of the FreeBSD Foundation and + * Rubicon Communications, LLC (Netgate). * * Permission to use, copy, and modify this software without fee * is hereby granted, provided that this entire notice is included in @@ -29,6 +35,7 @@ #include <crypto/sha1.h> #include <crypto/sha2/sha2.h> #include <opencrypto/rmd160.h> +#include <opencrypto/gmac.h> /* Declarations */ struct auth_hash { @@ -36,10 +43,12 @@ struct auth_hash { char *name; u_int16_t keysize; u_int16_t hashsize; - u_int16_t blocksize; u_int16_t ctxsize; + u_int16_t blocksize; void (*Init) (void *); - int (*Update) (void *, u_int8_t *, u_int16_t); + void (*Setkey) (void *, const u_int8_t *, u_int16_t); + void (*Reinit) (void *, const u_int8_t *, u_int16_t); + int (*Update) (void *, const u_int8_t *, u_int16_t); void (*Final) (u_int8_t *, void *); }; @@ -50,6 +59,7 @@ struct enc_xform { int type; char *name; u_int16_t blocksize; + u_int16_t ivsize; u_int16_t minkey, maxkey; void (*encrypt) (caddr_t, u_int8_t *); void (*decrypt) (caddr_t, u_int8_t *); @@ -73,6 +83,7 @@ union authctx { SHA256_CTX sha256ctx; SHA384_CTX sha384ctx; SHA512_CTX sha512ctx; + struct aes_gmac_ctx aes_gmac_ctx; }; extern struct enc_xform enc_xform_null; @@ -82,6 +93,9 @@ extern struct enc_xform enc_xform_blf; extern struct enc_xform enc_xform_cast5; extern struct enc_xform enc_xform_skipjack; extern struct enc_xform enc_xform_rijndael128; +extern struct enc_xform enc_xform_aes_icm; +extern struct enc_xform enc_xform_aes_nist_gcm; +extern struct enc_xform enc_xform_aes_nist_gmac; extern struct enc_xform enc_xform_aes_xts; extern struct enc_xform enc_xform_arc4; extern struct enc_xform enc_xform_camellia; @@ -95,6 +109,9 @@ extern struct auth_hash auth_hash_hmac_ripemd_160; extern struct auth_hash auth_hash_hmac_sha2_256; extern struct auth_hash auth_hash_hmac_sha2_384; extern struct auth_hash auth_hash_hmac_sha2_512; +extern struct auth_hash auth_hash_nist_gmac_aes_128; +extern struct auth_hash auth_hash_nist_gmac_aes_192; +extern struct auth_hash auth_hash_nist_gmac_aes_256; extern struct comp_algo comp_algo_deflate; diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h index ec5f761e7a86..5d683e5242a8 100644 --- a/sys/sys/libkern.h +++ b/sys/sys/libkern.h @@ -80,6 +80,7 @@ struct malloc_type; uint32_t arc4random(void); void arc4rand(void *ptr, u_int len, int reseed); int bcmp(const void *, const void *, size_t); +int timingsafe_bcmp(const void *, const void *, size_t); void *bsearch(const void *, const void *, size_t, size_t, int (*)(const void *, const void *)); #ifndef HAVE_INLINE_FFS diff --git a/sys/sys/param.h b/sys/sys/param.h index 8a3ea14ec305..d25cdef69462 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1100049 /* Master, propagated to newvers */ +#define __FreeBSD_version 1100050 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, |