Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6

359ea2f1 · Linus Torvalds · 960b8466 · e1d5dea1 · 359ea2f1 · 359ea2f1
Commit 359ea2f1 authored Jul 06, 2005 by Linus Torvalds
16 changed files
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -65,7 +65,9 @@ CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
 head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o

 libs-y 					+= arch/x86_64/lib/
-core-y					+= arch/x86_64/kernel/ arch/x86_64/mm/
+core-y					+= arch/x86_64/kernel/ \
+					   arch/x86_64/mm/ \
+					   arch/x86_64/crypto/
 core-$(CONFIG_IA32_EMULATION)		+= arch/x86_64/ia32/
 drivers-$(CONFIG_PCI)			+= arch/x86_64/pci/
 drivers-$(CONFIG_OPROFILE)		+= arch/x86_64/oprofile/

--- a/arch/x86_64/crypto/Makefile
+++ b/arch/x86_64/crypto/Makefile
+# 
+# x86_64/crypto/Makefile 
+# 
+# Arch-specific CryptoAPI modules.
+# 
+
+obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
+
+aes-x86_64-y := aes-x86_64-asm.o aes.o
--- a/arch/x86_64/crypto/aes-x86_64-asm.S
+++ b/arch/x86_64/crypto/aes-x86_64-asm.S
+/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
+ *
+ * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
+ *
+ * License:
+ * This code can be distributed under the terms of the GNU General Public
+ * License (GPL) Version 2 provided that the above header down to and
+ * including this sentence is retained in full.
+ */
+
+.extern aes_ft_tab
+.extern aes_it_tab
+.extern aes_fl_tab
+.extern aes_il_tab
+
+.text
+
+#define R1	%rax
+#define R1E	%eax
+#define R1X	%ax
+#define R1H	%ah
+#define R1L	%al
+#define R2	%rbx
+#define R2E	%ebx
+#define R2X	%bx
+#define R2H	%bh
+#define R2L	%bl
+#define R3	%rcx
+#define R3E	%ecx
+#define R3X	%cx
+#define R3H	%ch
+#define R3L	%cl
+#define R4	%rdx
+#define R4E	%edx
+#define R4X	%dx
+#define R4H	%dh
+#define R4L	%dl
+#define R5	%rsi
+#define R5E	%esi
+#define R6	%rdi
+#define R6E	%edi
+#define R7	%rbp
+#define R7E	%ebp
+#define R8	%r8
+#define R9	%r9
+#define R10	%r10
+#define R11	%r11
+
+#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
+	.global	FUNC;			\
+	.type	FUNC,@function;		\
+	.align	8;			\
+FUNC:	movq	r1,r2;			\
+	movq	r3,r4;			\
+	leaq	BASE+52(r8),r9;		\
+	movq	r10,r11;		\
+	movl	(r7),r5 ## E;		\
+	movl	4(r7),r1 ## E;		\
+	movl	8(r7),r6 ## E;		\
+	movl	12(r7),r7 ## E;		\
+	movl	(r8),r10 ## E;		\
+	xorl	-48(r9),r5 ## E;	\
+	xorl	-44(r9),r1 ## E;	\
+	xorl	-40(r9),r6 ## E;	\
+	xorl	-36(r9),r7 ## E;	\
+	cmpl	$24,r10 ## E;		\
+	jb	B128;			\
+	leaq	32(r9),r9;		\
+	je	B192;			\
+	leaq	32(r9),r9;
+
+#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
+	movq	r1,r2;			\
+	movq	r3,r4;			\
+	movl	r5 ## E,(r9);		\
+	movl	r6 ## E,4(r9);		\
+	movl	r7 ## E,8(r9);		\
+	movl	r8 ## E,12(r9);		\
+	ret;
+
+#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
+	movzbl	r2 ## H,r5 ## E;	\
+	movzbl	r2 ## L,r6 ## E;	\
+	movl	TAB+1024(,r5,4),r5 ## E;\
+	movw	r4 ## X,r2 ## X;	\
+	movl	TAB(,r6,4),r6 ## E;	\
+	roll	$16,r2 ## E;		\
+	shrl	$16,r4 ## E;		\
+	movzbl	r4 ## H,r7 ## E;	\
+	movzbl	r4 ## L,r4 ## E;	\
+	xorl	OFFSET(r8),ra ## E;	\
+	xorl	OFFSET+4(r8),rb ## E;	\
+	xorl	TAB+3072(,r7,4),r5 ## E;\
+	xorl	TAB+2048(,r4,4),r6 ## E;\
+	movzbl	r1 ## L,r7 ## E;	\
+	movzbl	r1 ## H,r4 ## E;	\
+	movl	TAB+1024(,r4,4),r4 ## E;\
+	movw	r3 ## X,r1 ## X;	\
+	roll	$16,r1 ## E;		\
+	shrl	$16,r3 ## E;		\
+	xorl	TAB(,r7,4),r5 ## E;	\
+	movzbl	r3 ## H,r7 ## E;	\
+	movzbl	r3 ## L,r3 ## E;	\
+	xorl	TAB+3072(,r7,4),r4 ## E;\
+	xorl	TAB+2048(,r3,4),r5 ## E;\
+	movzbl	r1 ## H,r7 ## E;	\
+	movzbl	r1 ## L,r3 ## E;	\
+	shrl	$16,r1 ## E;		\
+	xorl	TAB+3072(,r7,4),r6 ## E;\
+	movl	TAB+2048(,r3,4),r3 ## E;\
+	movzbl	r1 ## H,r7 ## E;	\
+	movzbl	r1 ## L,r1 ## E;	\
+	xorl	TAB+1024(,r7,4),r6 ## E;\
+	xorl	TAB(,r1,4),r3 ## E;	\
+	movzbl	r2 ## H,r1 ## E;	\
+	movzbl	r2 ## L,r7 ## E;	\
+	shrl	$16,r2 ## E;		\
+	xorl	TAB+3072(,r1,4),r3 ## E;\
+	xorl	TAB+2048(,r7,4),r4 ## E;\
+	movzbl	r2 ## H,r1 ## E;	\
+	movzbl	r2 ## L,r2 ## E;	\
+	xorl	OFFSET+8(r8),rc ## E;	\
+	xorl	OFFSET+12(r8),rd ## E;	\
+	xorl	TAB+1024(,r1,4),r3 ## E;\
+	xorl	TAB(,r2,4),r4 ## E;
+
+#define move_regs(r1,r2,r3,r4) \
+	movl	r3 ## E,r1 ## E;	\
+	movl	r4 ## E,r2 ## E;
+
+#define entry(FUNC,BASE,B128,B192) \
+	prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
+
+#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
+
+#define encrypt_round(TAB,OFFSET) \
+	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
+	move_regs(R1,R2,R5,R6)
+
+#define encrypt_final(TAB,OFFSET) \
+	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
+
+#define decrypt_round(TAB,OFFSET) \
+	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
+	move_regs(R1,R2,R5,R6)
+
+#define decrypt_final(TAB,OFFSET) \
+	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
+
+/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
+
+	entry(aes_encrypt,0,enc128,enc192)
+	encrypt_round(aes_ft_tab,-96)
+	encrypt_round(aes_ft_tab,-80)
+enc192:	encrypt_round(aes_ft_tab,-64)
+	encrypt_round(aes_ft_tab,-48)
+enc128:	encrypt_round(aes_ft_tab,-32)
+	encrypt_round(aes_ft_tab,-16)
+	encrypt_round(aes_ft_tab,  0)
+	encrypt_round(aes_ft_tab, 16)
+	encrypt_round(aes_ft_tab, 32)
+	encrypt_round(aes_ft_tab, 48)
+	encrypt_round(aes_ft_tab, 64)
+	encrypt_round(aes_ft_tab, 80)
+	encrypt_round(aes_ft_tab, 96)
+	encrypt_final(aes_fl_tab,112)
+	return
+
+/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
+
+	entry(aes_decrypt,240,dec128,dec192)
+	decrypt_round(aes_it_tab,-96)
+	decrypt_round(aes_it_tab,-80)
+dec192:	decrypt_round(aes_it_tab,-64)
+	decrypt_round(aes_it_tab,-48)
+dec128:	decrypt_round(aes_it_tab,-32)
+	decrypt_round(aes_it_tab,-16)
+	decrypt_round(aes_it_tab,  0)
+	decrypt_round(aes_it_tab, 16)
+	decrypt_round(aes_it_tab, 32)
+	decrypt_round(aes_it_tab, 48)
+	decrypt_round(aes_it_tab, 64)
+	decrypt_round(aes_it_tab, 80)
+	decrypt_round(aes_it_tab, 96)
+	decrypt_final(aes_il_tab,112)
+	return
--- a/arch/x86_64/crypto/aes.c
+++ b/arch/x86_64/crypto/aes.c
+/*
+ * Cryptographic API.
+ *
+ * AES Cipher Algorithm.
+ *
+ * Based on Brian Gladman's code.
+ *
+ * Linux developers:
+ *  Alexander Kjeldaas <astor@fast.no>
+ *  Herbert Valerio Riedel <hvr@hvrlab.org>
+ *  Kyle McMartin <kyle@debian.org>
+ *  Adam J. Richter <adam@yggdrasil.com> (conversion to 2.5 API).
+ *  Andreas Steinmetz <ast@domdv.de> (adapted to x86_64 assembler)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK.
+ * All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software in both source and binary
+ * form is allowed (with or without changes) provided that:
+ *
+ *   1. distributions of this source code include the above copyright
+ *      notice, this list of conditions and the following disclaimer;
+ *
+ *   2. distributions in binary form include the above copyright
+ *      notice, this list of conditions and the following disclaimer
+ *      in the documentation and/or other associated materials;
+ *
+ *   3. the copyright holder's name is not used to endorse products
+ *      built using this software without specific written permission.
+ *
+ * ALTERNATIVELY, provided that this notice is retained in full, this product
+ * may be distributed under the terms of the GNU General Public License (GPL),
+ * in which case the provisions of the GPL apply INSTEAD OF those given above.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ */
+
+/* Some changes from the Gladman version:
+    s/RIJNDAEL(e_key)/E_KEY/g
+    s/RIJNDAEL(d_key)/D_KEY/g
+*/
+
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define AES_MIN_KEY_SIZE	16
+#define AES_MAX_KEY_SIZE	32
+
+#define AES_BLOCK_SIZE		16
+
+/*
+ * #define byte(x, nr) ((unsigned char)((x) >> (nr*8)))
+ */
+static inline u8 byte(const u32 x, const unsigned n)
+{
+	return x >> (n << 3);
+}
+
+#define u32_in(x) le32_to_cpu(*(const __le32 *)(x))
+
+struct aes_ctx
+{
+	u32 key_length;
+	u32 E[60];
+	u32 D[60];
+};
+
+#define E_KEY ctx->E
+#define D_KEY ctx->D
+
+static u8 pow_tab[256] __initdata;
+static u8 log_tab[256] __initdata;
+static u8 sbx_tab[256] __initdata;
+static u8 isb_tab[256] __initdata;
+static u32 rco_tab[10];
+u32 aes_ft_tab[4][256];
+u32 aes_it_tab[4][256];
+
+u32 aes_fl_tab[4][256];
+u32 aes_il_tab[4][256];
+
+static inline u8 f_mult(u8 a, u8 b)
+{
+	u8 aa = log_tab[a], cc = aa + log_tab[b];
+
+	return pow_tab[cc + (cc < aa ? 1 : 0)];
+}
+
+#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0)
+
+#define ls_box(x)				\
+	(aes_fl_tab[0][byte(x, 0)] ^		\
+	 aes_fl_tab[1][byte(x, 1)] ^		\
+	 aes_fl_tab[2][byte(x, 2)] ^		\
+	 aes_fl_tab[3][byte(x, 3)])
+
+static void __init gen_tabs(void)
+{
+	u32 i, t;
+	u8 p, q;
+
+	/* log and power tables for GF(2**8) finite field with
+	   0x011b as modular polynomial - the simplest primitive
+	   root is 0x03, used here to generate the tables */
+
+	for (i = 0, p = 1; i < 256; ++i) {
+		pow_tab[i] = (u8)p;
+		log_tab[p] = (u8)i;
+
+		p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0);
+	}
+
+	log_tab[1] = 0;
+
+	for (i = 0, p = 1; i < 10; ++i) {
+		rco_tab[i] = p;
+
+		p = (p << 1) ^ (p & 0x80 ? 0x01b : 0);
+	}
+
+	for (i = 0; i < 256; ++i) {
+		p = (i ? pow_tab[255 - log_tab[i]] : 0);
+		q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2));
+		p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2));
+		sbx_tab[i] = p;
+		isb_tab[p] = (u8)i;
+	}
+
+	for (i = 0; i < 256; ++i) {
+		p = sbx_tab[i];
+
+		t = p;
+		aes_fl_tab[0][i] = t;
+		aes_fl_tab[1][i] = rol32(t, 8);
+		aes_fl_tab[2][i] = rol32(t, 16);
+		aes_fl_tab[3][i] = rol32(t, 24);
+
+		t = ((u32)ff_mult(2, p)) |
+		    ((u32)p << 8) |
+		    ((u32)p << 16) | ((u32)ff_mult(3, p) << 24);
+
+		aes_ft_tab[0][i] = t;
+		aes_ft_tab[1][i] = rol32(t, 8);
+		aes_ft_tab[2][i] = rol32(t, 16);
+		aes_ft_tab[3][i] = rol32(t, 24);
+
+		p = isb_tab[i];
+
+		t = p;
+		aes_il_tab[0][i] = t;
+		aes_il_tab[1][i] = rol32(t, 8);
+		aes_il_tab[2][i] = rol32(t, 16);
+		aes_il_tab[3][i] = rol32(t, 24);
+
+		t = ((u32)ff_mult(14, p)) |
+		    ((u32)ff_mult(9, p) << 8) |
+		    ((u32)ff_mult(13, p) << 16) |
+		    ((u32)ff_mult(11, p) << 24);
+
+		aes_it_tab[0][i] = t;
+		aes_it_tab[1][i] = rol32(t, 8);
+		aes_it_tab[2][i] = rol32(t, 16);
+		aes_it_tab[3][i] = rol32(t, 24);
+	}
+}
+
+#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
+
+#define imix_col(y, x)			\
+	u    = star_x(x);		\
+	v    = star_x(u);		\
+	w    = star_x(v);		\
+	t    = w ^ (x);			\
+	(y)  = u ^ v ^ w;		\
+	(y) ^= ror32(u ^ t,  8) ^	\
+	       ror32(v ^ t, 16) ^	\
+	       ror32(t, 24)
+
+/* initialise the key schedule from the user supplied key */
+
+#define loop4(i)					\
+{							\
+	t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];	\
+	t ^= E_KEY[4 * i];     E_KEY[4 * i + 4] = t;	\
+	t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t;	\
+	t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t;	\
+	t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t;	\
+}
+
+#define loop6(i)					\
+{							\
+	t = ror32(t,  8); t = ls_box(t) ^ rco_tab[i];	\
+	t ^= E_KEY[6 * i];     E_KEY[6 * i + 6] = t;	\
+	t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t;	\
+	t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t;	\
+	t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t;	\
+	t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t;	\
+	t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t;	\
+}
+
+#define loop8(i)					\
+{							\
+	t = ror32(t,  8); ; t = ls_box(t) ^ rco_tab[i];	\
+	t ^= E_KEY[8 * i];     E_KEY[8 * i + 8] = t;	\
+	t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t;	\
+	t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t;	\
+	t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t;	\
+	t  = E_KEY[8 * i + 4] ^ ls_box(t);		\
+	E_KEY[8 * i + 12] = t;				\
+	t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t;	\
+	t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t;	\
+	t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t;	\
+}
+
+static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len,
+		       u32 *flags)
+{
+	struct aes_ctx *ctx = ctx_arg;
+	u32 i, j, t, u, v, w;
+
+	if (key_len != 16 && key_len != 24 && key_len != 32) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	ctx->key_length = key_len;
+
+	D_KEY[key_len + 24] = E_KEY[0] = u32_in(in_key);
+	D_KEY[key_len + 25] = E_KEY[1] = u32_in(in_key + 4);
+	D_KEY[key_len + 26] = E_KEY[2] = u32_in(in_key + 8);
+	D_KEY[key_len + 27] = E_KEY[3] = u32_in(in_key + 12);
+
+	switch (key_len) {
+	case 16:
+		t = E_KEY[3];
+		for (i = 0; i < 10; ++i)
+			loop4(i);
+		break;
+
+	case 24:
+		E_KEY[4] = u32_in(in_key + 16);
+		t = E_KEY[5] = u32_in(in_key + 20);
+		for (i = 0; i < 8; ++i)
+			loop6 (i);
+		break;
+
+	case 32:
+		E_KEY[4] = u32_in(in_key + 16);
+		E_KEY[5] = u32_in(in_key + 20);
+		E_KEY[6] = u32_in(in_key + 24);
+		t = E_KEY[7] = u32_in(in_key + 28);
+		for (i = 0; i < 7; ++i)
+			loop8(i);
+		break;
+	}
+
+	D_KEY[0] = E_KEY[key_len + 24];
+	D_KEY[1] = E_KEY[key_len + 25];
+	D_KEY[2] = E_KEY[key_len + 26];
+	D_KEY[3] = E_KEY[key_len + 27];
+
+	for (i = 4; i < key_len + 24; ++i) {
+		j = key_len + 24 - (i & ~3) + (i & 3);
+		imix_col(D_KEY[j], E_KEY[i]);
+	}
+
+	return 0;
+}
+
+extern void aes_encrypt(void *ctx_arg, u8 *out, const u8 *in);
+extern void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in);
+
+static struct crypto_alg aes_alg = {
+	.cra_name		=	"aes",
+	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
+	.cra_blocksize		=	AES_BLOCK_SIZE,
+	.cra_ctxsize		=	sizeof(struct aes_ctx),
+	.cra_module		=	THIS_MODULE,
+	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
+	.cra_u			=	{
+		.cipher = {
+			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
+			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
+			.cia_setkey	   	= 	aes_set_key,
+			.cia_encrypt	 	=	aes_encrypt,
+			.cia_decrypt	  	=	aes_decrypt
+		}
+	}
+};
+
+static int __init aes_init(void)
+{
+	gen_tabs();
+	return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+	crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
+MODULE_LICENSE("GPL");
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -146,7 +146,7 @@ config CRYPTO_SERPENT

 config CRYPTO_AES
 	tristate "AES cipher algorithms"
-	depends on CRYPTO && !((X86 || UML_X86) && !64BIT)
+	depends on CRYPTO && !(X86 || UML_X86)
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
 	  algorithm.
@@ -184,6 +184,26 @@ config CRYPTO_AES_586

 	  See <http://csrc.nist.gov/encryption/aes/> for more information.

+config CRYPTO_AES_X86_64
+	tristate "AES cipher algorithms (x86_64)"
+	depends on CRYPTO && ((X86 || UML_X86) && 64BIT)
+	help
+	  AES cipher algorithms (FIPS-197). AES uses the Rijndael 
+	  algorithm.
+
+	  Rijndael appears to be consistently a very good performer in
+	  both hardware and software across a wide range of computing 
+	  environments regardless of its use in feedback or non-feedback 
+	  modes. Its key setup time is excellent, and its key agility is 
+	  good. Rijndael's very low memory requirements make it very well 
+	  suited for restricted-space environments, in which it also 
+	  demonstrates excellent performance. Rijndael's operations are 
+	  among the easiest to defend against power and timing attacks.	
+
+	  The AES specifies three key sizes: 128, 192 and 256 bits	  
+
+	  See <http://csrc.nist.gov/encryption/aes/> for more information.
+
 config CRYPTO_CAST5
 	tristate "CAST5 (CAST-128) cipher algorithm"
 	depends on CRYPTO

--- a/crypto/api.c
+++ b/crypto/api.c
@@ -13,9 +13,12 @@
 * any later version.
 *
 */
+
+#include <linux/compiler.h>
 #include <linux/init.h>
 #include <linux/crypto.h>
 #include <linux/errno.h>
+#include <linux/kmod.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include "internal.h"
@@ -33,7 +36,7 @@ static inline void crypto_alg_put(struct crypto_alg *alg)
 	module_put(alg->cra_module);
 }

-struct crypto_alg *crypto_alg_lookup(const char *name)
+static struct crypto_alg *crypto_alg_lookup(const char *name)
 {
 	struct crypto_alg *q, *alg = NULL;

@@ -54,6 +57,13 @@ struct crypto_alg *crypto_alg_lookup(const char *name)
 	return alg;
 }

+/* A far more intelligent version of this is planned.  For now, just
+ * try an exact match on the name of the algorithm. */
+static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name)
+{
+	return try_then_request_module(crypto_alg_lookup(name), name);
+}
+
 static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags)
 {
 	tfm->crt_flags = 0;
@@ -117,20 +127,46 @@ static void crypto_exit_ops(struct crypto_tfm *tfm)
 	}
 }

+static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags)
+{
+	unsigned int len;
+
+	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	default:
+		BUG();
+
+	case CRYPTO_ALG_TYPE_CIPHER:
+		len = crypto_cipher_ctxsize(alg, flags);
+		break;
+		
+	case CRYPTO_ALG_TYPE_DIGEST:
+		len = crypto_digest_ctxsize(alg, flags);
+		break;
+		
+	case CRYPTO_ALG_TYPE_COMPRESS:
+		len = crypto_compress_ctxsize(alg, flags);
+		break;
+	}
+
+	return len + alg->cra_alignmask;
+}
+
 struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags)
 {
 	struct crypto_tfm *tfm = NULL;
 	struct crypto_alg *alg;
+	unsigned int tfm_size;

 	alg = crypto_alg_mod_lookup(name);
 	if (alg == NULL)
 		goto out;
-	
-	tfm = kmalloc(sizeof(*tfm) + alg->cra_ctxsize, GFP_KERNEL);
+
+	tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags);
+	tfm = kmalloc(tfm_size, GFP_KERNEL);
 	if (tfm == NULL)
 		goto out_put;

-	memset(tfm, 0, sizeof(*tfm) + alg->cra_ctxsize);
+	memset(tfm, 0, tfm_size);
 	
 	tfm->__crt_alg = alg;
 	
@@ -155,8 +191,14 @@ out:

 void crypto_free_tfm(struct crypto_tfm *tfm)
 {
-	struct crypto_alg *alg = tfm->__crt_alg;
-	int size = sizeof(*tfm) + alg->cra_ctxsize;
+	struct crypto_alg *alg;
+	int size;
+
+	if (unlikely(!tfm))
+		return;
+
+	alg = tfm->__crt_alg;
+	size = sizeof(*tfm) + alg->cra_ctxsize;

 	crypto_exit_ops(tfm);
 	crypto_alg_put(alg);
@@ -168,6 +210,12 @@ int crypto_register_alg(struct crypto_alg *alg)
 {
 	int ret = 0;
 	struct crypto_alg *q;
+
+	if (alg->cra_alignmask & (alg->cra_alignmask + 1))
+		return -EINVAL;
+
+	if (alg->cra_alignmask > PAGE_SIZE)
+		return -EINVAL;
 	
 	down_write(&crypto_alg_sem);
 	

--- a/crypto/cipher.c
+++ b/crypto/cipher.c
--- a/crypto/des.c
+++ b/crypto/des.c
--- a/crypto/hmac.c
+++ b/crypto/hmac.c
@@ -49,8 +49,7 @@ int crypto_alloc_hmac_block(struct crypto_tfm *tfm)

 void crypto_free_hmac_block(struct crypto_tfm *tfm)
 {
-	if (tfm->crt_digest.dit_hmac_block)
-		kfree(tfm->crt_digest.dit_hmac_block);
+	kfree(tfm->crt_digest.dit_hmac_block);
 }

 void crypto_hmac_init(struct crypto_tfm *tfm, u8 *key, unsigned int *keylen)

--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -16,7 +16,7 @@
 #include <linux/highmem.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
-#include <linux/kmod.h>
+#include <linux/kernel.h>
 #include <asm/kmap_types.h>

 extern enum km_type crypto_km_types[];
@@ -42,20 +42,6 @@ static inline void crypto_yield(struct crypto_tfm *tfm)
 		cond_resched();
 }

-static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
-{
-	return (void *)&tfm[1];
-}
-
-struct crypto_alg *crypto_alg_lookup(const char *name);
-
-/* A far more intelligent version of this is planned.  For now, just
- * try an exact match on the name of the algorithm. */
-static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name)
-{
-	return try_then_request_module(crypto_alg_lookup(name), name);
-}
-
 #ifdef CONFIG_CRYPTO_HMAC
 int crypto_alloc_hmac_block(struct crypto_tfm *tfm);
 void crypto_free_hmac_block(struct crypto_tfm *tfm);
@@ -76,6 +62,33 @@ static inline void crypto_init_proc(void)
 { }
 #endif

+static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg,
+						 int flags)
+{
+	return alg->cra_ctxsize;
+}
+
+static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg,
+						 int flags)
+{
+	unsigned int len = alg->cra_ctxsize;
+	
+	switch (flags & CRYPTO_TFM_MODE_MASK) {
+	case CRYPTO_TFM_MODE_CBC:
+		len = ALIGN(len, alg->cra_alignmask + 1);
+		len += alg->cra_blocksize;
+		break;
+	}
+
+	return len;
+}
+
+static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg,
+						   int flags)
+{
+	return alg->cra_ctxsize;
+}
+
 int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags);
 int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags);

--- a/crypto/scatterwalk.c
+++ b/crypto/scatterwalk.c
@@ -100,7 +100,7 @@ void scatterwalk_done(struct scatter_walk *walk, int out, int more)
 int scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
 			   size_t nbytes, int out)
 {
-	do {
+	while (nbytes > walk->len_this_page) {
 		memcpy_dir(buf, walk->data, walk->len_this_page, out);
 		buf += walk->len_this_page;
 		nbytes -= walk->len_this_page;
@@ -108,7 +108,7 @@ int scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
 		scatterwalk_unmap(walk, out);
 		scatterwalk_pagedone(walk, out, 1);
 		scatterwalk_map(walk, out);
-	} while (nbytes > walk->len_this_page);
+	}

 	memcpy_dir(buf, walk->data, nbytes, out);
 	return nbytes;

--- a/crypto/scatterwalk.h
+++ b/crypto/scatterwalk.h
@@ -40,10 +40,10 @@ static inline int scatterwalk_samebuf(struct scatter_walk *walk_in,
 	       walk_in->offset == walk_out->offset;
 }

-static inline int scatterwalk_across_pages(struct scatter_walk *walk,
-					   unsigned int nbytes)
+static inline unsigned int scatterwalk_clamp(struct scatter_walk *walk,
+					     unsigned int nbytes)
 {
-	return nbytes > walk->len_this_page;
+	return nbytes > walk->len_this_page ? walk->len_this_page : nbytes;
 }

 static inline void scatterwalk_advance(struct scatter_walk *walk,
@@ -55,6 +55,12 @@ static inline void scatterwalk_advance(struct scatter_walk *walk,
 	walk->len_this_segment -= nbytes;
 }

+static inline unsigned int scatterwalk_aligned(struct scatter_walk *walk,
+					       unsigned int alignmask)
+{
+	return !(walk->offset & alignmask);
+}
+
 void scatterwalk_start(struct scatter_walk *walk, struct scatterlist *sg);
 int scatterwalk_copychunks(void *buf, struct scatter_walk *walk, size_t nbytes, int out);
 void scatterwalk_map(struct scatter_walk *walk, int out);

--- a/crypto/serpent.c
+++ b/crypto/serpent.c
@@ -210,7 +210,6 @@
 	x4 ^= x2;

 struct serpent_ctx {
-	u8 iv[SERPENT_BLOCK_SIZE];
 	u32 expkey[SERPENT_EXPKEY_WORDS];
 };


--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -49,6 +49,7 @@
 #include <linux/errno.h>
 #include <linux/crypto.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <asm/byteorder.h>
 #include "padlock.h"

@@ -59,8 +60,12 @@
 #define AES_EXTENDED_KEY_SIZE_B	(AES_EXTENDED_KEY_SIZE * sizeof(uint32_t))

 struct aes_ctx {
-	uint32_t e_data[AES_EXTENDED_KEY_SIZE+4];
-	uint32_t d_data[AES_EXTENDED_KEY_SIZE+4];
+	uint32_t e_data[AES_EXTENDED_KEY_SIZE];
+	uint32_t d_data[AES_EXTENDED_KEY_SIZE];
+	struct {
+		struct cword encrypt;
+		struct cword decrypt;
+	} cword;
 	uint32_t *E;
 	uint32_t *D;
 	int key_length;
@@ -280,10 +285,15 @@ aes_hw_extkey_available(uint8_t key_len)
 	return 0;
 }

+static inline struct aes_ctx *aes_ctx(void *ctx)
+{
+	return (struct aes_ctx *)ALIGN((unsigned long)ctx, PADLOCK_ALIGNMENT);
+}
+
 static int
 aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t *flags)
 {
-	struct aes_ctx *ctx = ctx_arg;
+	struct aes_ctx *ctx = aes_ctx(ctx_arg);
 	uint32_t i, t, u, v, w;
 	uint32_t P[AES_EXTENDED_KEY_SIZE];
 	uint32_t rounds;
@@ -295,25 +305,36 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t

 	ctx->key_length = key_len;

+	/*
+	 * If the hardware is capable of generating the extended key
+	 * itself we must supply the plain key for both encryption
+	 * and decryption.
+	 */
 	ctx->E = ctx->e_data;
-	ctx->D = ctx->d_data;
-
-	/* Ensure 16-Bytes alignmentation of keys for VIA PadLock. */
-	if ((int)(ctx->e_data) & 0x0F)
-		ctx->E += 4 - (((int)(ctx->e_data) & 0x0F) / sizeof (ctx->e_data[0]));
-
-	if ((int)(ctx->d_data) & 0x0F)
-		ctx->D += 4 - (((int)(ctx->d_data) & 0x0F) / sizeof (ctx->d_data[0]));
+	ctx->D = ctx->e_data;

 	E_KEY[0] = uint32_t_in (in_key);
 	E_KEY[1] = uint32_t_in (in_key + 4);
 	E_KEY[2] = uint32_t_in (in_key + 8);
 	E_KEY[3] = uint32_t_in (in_key + 12);

+	/* Prepare control words. */
+	memset(&ctx->cword, 0, sizeof(ctx->cword));
+
+	ctx->cword.decrypt.encdec = 1;
+	ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4;
+	ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds;
+	ctx->cword.encrypt.ksize = (key_len - 16) / 8;
+	ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize;
+
 	/* Don't generate extended keys if the hardware can do it. */
 	if (aes_hw_extkey_available(key_len))
 		return 0;

+	ctx->D = ctx->d_data;
+	ctx->cword.encrypt.keygen = 1;
+	ctx->cword.decrypt.keygen = 1;
+
 	switch (key_len) {
 	case 16:
 		t = E_KEY[3];
@@ -369,10 +390,9 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t

 /* ====== Encryption/decryption routines ====== */

-/* This is the real call to PadLock. */
-static inline void
-padlock_xcrypt_ecb(uint8_t *input, uint8_t *output, uint8_t *key,
-		   void *control_word, uint32_t count)
+/* These are the real call to PadLock. */
+static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key,
+				      void *control_word, u32 count)
 {
 	asm volatile ("pushfl; popfl");		/* enforce key reload. */
 	asm volatile (".byte 0xf3,0x0f,0xa7,0xc8"	/* rep xcryptecb */
@@ -380,60 +400,70 @@ padlock_xcrypt_ecb(uint8_t *input, uint8_t *output, uint8_t *key,
 		      : "d"(control_word), "b"(key), "c"(count));
 }

-static void
-aes_padlock(void *ctx_arg, uint8_t *out_arg, const uint8_t *in_arg, int encdec)
+static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key,
+				     u8 *iv, void *control_word, u32 count)
 {
-	/* Don't blindly modify this structure - the items must 
-	   fit on 16-Bytes boundaries! */
-	struct padlock_xcrypt_data {
-		uint8_t buf[AES_BLOCK_SIZE];
-		union cword cword;
-	};
-
-	struct aes_ctx *ctx = ctx_arg;
-	char bigbuf[sizeof(struct padlock_xcrypt_data) + 16];
-	struct padlock_xcrypt_data *data;
-	void *key;
-
-	/* Place 'data' at the first 16-Bytes aligned address in 'bigbuf'. */
-	if (((long)bigbuf) & 0x0F)
-		data = (void*)(bigbuf + 16 - ((long)bigbuf & 0x0F));
-	else
-		data = (void*)bigbuf;
-
-	/* Prepare Control word. */
-	memset (data, 0, sizeof(struct padlock_xcrypt_data));
-	data->cword.b.encdec = !encdec;	/* in the rest of cryptoapi ENC=1/DEC=0 */
-	data->cword.b.rounds = 10 + (ctx->key_length - 16) / 4;
-	data->cword.b.ksize = (ctx->key_length - 16) / 8;
-
-	/* Is the hardware capable to generate the extended key? */
-	if (!aes_hw_extkey_available(ctx->key_length))
-		data->cword.b.keygen = 1;
-
-	/* ctx->E starts with a plain key - if the hardware is capable
-	   to generate the extended key itself we must supply
-	   the plain key for both Encryption and Decryption. */
-	if (encdec == CRYPTO_DIR_ENCRYPT || data->cword.b.keygen == 0)
-		key = ctx->E;
-	else
-		key = ctx->D;
-	
-	memcpy(data->buf, in_arg, AES_BLOCK_SIZE);
-	padlock_xcrypt_ecb(data->buf, data->buf, key, &data->cword, 1);
-	memcpy(out_arg, data->buf, AES_BLOCK_SIZE);
+	/* Enforce key reload. */
+	asm volatile ("pushfl; popfl");
+	/* rep xcryptcbc */
+	asm volatile (".byte 0xf3,0x0f,0xa7,0xd0"
+		      : "+S" (input), "+D" (output), "+a" (iv)
+		      : "d" (control_word), "b" (key), "c" (count));
+	return iv;
 }

 static void
 aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in)
 {
-	aes_padlock(ctx_arg, out, in, CRYPTO_DIR_ENCRYPT);
+	struct aes_ctx *ctx = aes_ctx(ctx_arg);
+	padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, 1);
 }

 static void
 aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in)
 {
-	aes_padlock(ctx_arg, out, in, CRYPTO_DIR_DECRYPT);
+	struct aes_ctx *ctx = aes_ctx(ctx_arg);
+	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1);
+}
+
+static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out,
+				    const u8 *in, unsigned int nbytes)
+{
+	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt,
+			   nbytes / AES_BLOCK_SIZE);
+	return nbytes & ~(AES_BLOCK_SIZE - 1);
+}
+
+static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out,
+				    const u8 *in, unsigned int nbytes)
+{
+	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt,
+			   nbytes / AES_BLOCK_SIZE);
+	return nbytes & ~(AES_BLOCK_SIZE - 1);
+}
+
+static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out,
+				    const u8 *in, unsigned int nbytes)
+{
+	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	u8 *iv;
+
+	iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info,
+				&ctx->cword.encrypt, nbytes / AES_BLOCK_SIZE);
+	memcpy(desc->info, iv, AES_BLOCK_SIZE);
+
+	return nbytes & ~(AES_BLOCK_SIZE - 1);
+}
+
+static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out,
+				    const u8 *in, unsigned int nbytes)
+{
+	struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm));
+	padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt,
+			   nbytes / AES_BLOCK_SIZE);
+	return nbytes & ~(AES_BLOCK_SIZE - 1);
 }

 static struct crypto_alg aes_alg = {
@@ -441,6 +471,7 @@ static struct crypto_alg aes_alg = {
 	.cra_flags		=	CRYPTO_ALG_TYPE_CIPHER,
 	.cra_blocksize		=	AES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct aes_ctx),
+	.cra_alignmask		=	PADLOCK_ALIGNMENT - 1,
 	.cra_module		=	THIS_MODULE,
 	.cra_list		=	LIST_HEAD_INIT(aes_alg.cra_list),
 	.cra_u			=	{
@@ -449,7 +480,11 @@ static struct crypto_alg aes_alg = {
 			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
 			.cia_setkey	   	= 	aes_set_key,
 			.cia_encrypt	 	=	aes_encrypt,
-			.cia_decrypt	  	=	aes_decrypt
+			.cia_decrypt	  	=	aes_decrypt,
+			.cia_encrypt_ecb 	=	aes_encrypt_ecb,
+			.cia_decrypt_ecb  	=	aes_decrypt_ecb,
+			.cia_encrypt_cbc 	=	aes_encrypt_cbc,
+			.cia_decrypt_cbc  	=	aes_decrypt_cbc,
 		}
 	}
 };

--- a/drivers/crypto/padlock.h
+++ b/drivers/crypto/padlock.h
@@ -13,18 +13,18 @@
 #ifndef _CRYPTO_PADLOCK_H
 #define _CRYPTO_PADLOCK_H

+#define PADLOCK_ALIGNMENT 16
+
 /* Control word. */
-union cword {
-	uint32_t cword[4];
-	struct {
-		int rounds:4;
-		int algo:3;
-		int keygen:1;
-		int interm:1;
-		int encdec:1;
-		int ksize:2;
-	} b;
-};
+struct cword {
+	int __attribute__ ((__packed__))
+		rounds:4,
+		algo:3,
+		keygen:1,
+		interm:1,
+		encdec:1,
+		ksize:2;
+} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT)));

 #define PFX	"padlock: "


--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -61,6 +61,15 @@
 #define CRYPTO_DIR_DECRYPT		0

 struct scatterlist;
+struct crypto_tfm;
+
+struct cipher_desc {
+	struct crypto_tfm *tfm;
+	void (*crfn)(void *ctx, u8 *dst, const u8 *src);
+	unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst,
+			     const u8 *src, unsigned int nbytes);
+	void *info;
+};

 /*
 * Algorithms: modular crypto algorithm implementations, managed
@@ -73,6 +82,19 @@ struct cipher_alg {
 	                  unsigned int keylen, u32 *flags);
 	void (*cia_encrypt)(void *ctx, u8 *dst, const u8 *src);
 	void (*cia_decrypt)(void *ctx, u8 *dst, const u8 *src);
+
+	unsigned int (*cia_encrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_ecb)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_encrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
+	unsigned int (*cia_decrypt_cbc)(const struct cipher_desc *desc,
+					u8 *dst, const u8 *src,
+					unsigned int nbytes);
 };

 struct digest_alg {
@@ -102,6 +124,7 @@ struct crypto_alg {
 	u32 cra_flags;
 	unsigned int cra_blocksize;
 	unsigned int cra_ctxsize;
+	unsigned int cra_alignmask;
 	const char cra_name[CRYPTO_MAX_ALG_NAME];

 	union {
@@ -136,7 +159,6 @@ static inline int crypto_alg_available(const char *name, u32 flags)
 * and core processing logic.  Managed via crypto_alloc_tfm() and
 * crypto_free_tfm(), as well as the various helpers below.
 */
-struct crypto_tfm;

 struct cipher_tfm {
 	void *cit_iv;
@@ -266,6 +288,16 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm)
 	return tfm->__crt_alg->cra_digest.dia_digestsize;
 }

+static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_alg->cra_alignmask;
+}
+
+static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
+{
+	return (void *)&tfm[1];
+}
+
 /*
 * API wrappers.
 */