#include <machine/asm.h>

.globl	ossl_vaes_vpclmulqdq_capable
.type	ossl_vaes_vpclmulqdq_capable,@function
.align	32
ossl_vaes_vpclmulqdq_capable:
	movq	OPENSSL_ia32cap_P+8(%rip),%rcx

	movq	$6600291188736,%rdx
	xorl	%eax,%eax
	andq	%rdx,%rcx
	cmpq	%rdx,%rcx
	cmoveq	%rcx,%rax
	.byte	0xf3,0xc3
.size	ossl_vaes_vpclmulqdq_capable, .-ossl_vaes_vpclmulqdq_capable
.text	
.globl	ossl_aes_gcm_init_avx512
.type	ossl_aes_gcm_init_avx512,@function
.align	32
ossl_aes_gcm_init_avx512:
.cfi_startproc	
.byte	243,15,30,250
	vpxorq	%xmm16,%xmm16,%xmm16


	movl	240(%rdi),%eax
	cmpl	$9,%eax
	je	.Laes_128_0
	cmpl	$11,%eax
	je	.Laes_192_0
	cmpl	$13,%eax
	je	.Laes_256_0
	jmp	.Lexit_aes_0
.align	32
.Laes_128_0:
	vpxorq	0(%rdi),%xmm16,%xmm16

	vaesenc	16(%rdi),%xmm16,%xmm16

	vaesenc	32(%rdi),%xmm16,%xmm16

	vaesenc	48(%rdi),%xmm16,%xmm16

	vaesenc	64(%rdi),%xmm16,%xmm16

	vaesenc	80(%rdi),%xmm16,%xmm16

	vaesenc	96(%rdi),%xmm16,%xmm16

	vaesenc	112(%rdi),%xmm16,%xmm16

	vaesenc	128(%rdi),%xmm16,%xmm16

	vaesenc	144(%rdi),%xmm16,%xmm16

	vaesenclast	160(%rdi),%xmm16,%xmm16
	jmp	.Lexit_aes_0
.align	32
.Laes_192_0:
	vpxorq	0(%rdi),%xmm16,%xmm16

	vaesenc	16(%rdi),%xmm16,%xmm16

	vaesenc	32(%rdi),%xmm16,%xmm16

	vaesenc	48(%rdi),%xmm16,%xmm16

	vaesenc	64(%rdi),%xmm16,%xmm16

	vaesenc	80(%rdi),%xmm16,%xmm16

	vaesenc	96(%rdi),%xmm16,%xmm16

	vaesenc	112(%rdi),%xmm16,%xmm16

	vaesenc	128(%rdi),%xmm16,%xmm16

	vaesenc	144(%rdi),%xmm16,%xmm16

	vaesenc	160(%rdi),%xmm16,%xmm16

	vaesenc	176(%rdi),%xmm16,%xmm16

	vaesenclast	192(%rdi),%xmm16,%xmm16
	jmp	.Lexit_aes_0
.align	32
.Laes_256_0:
	vpxorq	0(%rdi),%xmm16,%xmm16

	vaesenc	16(%rdi),%xmm16,%xmm16

	vaesenc	32(%rdi),%xmm16,%xmm16

	vaesenc	48(%rdi),%xmm16,%xmm16

	vaesenc	64(%rdi),%xmm16,%xmm16

	vaesenc	80(%rdi),%xmm16,%xmm16

	vaesenc	96(%rdi),%xmm16,%xmm16

	vaesenc	112(%rdi),%xmm16,%xmm16

	vaesenc	128(%rdi),%xmm16,%xmm16

	vaesenc	144(%rdi),%xmm16,%xmm16

	vaesenc	160(%rdi),%xmm16,%xmm16

	vaesenc	176(%rdi),%xmm16,%xmm16

	vaesenc	192(%rdi),%xmm16,%xmm16

	vaesenc	208(%rdi),%xmm16,%xmm16

	vaesenclast	224(%rdi),%xmm16,%xmm16
	jmp	.Lexit_aes_0
.Lexit_aes_0:

	vpshufb	SHUF_MASK(%rip),%xmm16,%xmm16

	vmovdqa64	%xmm16,%xmm2
	vpsllq	$1,%xmm16,%xmm16
	vpsrlq	$63,%xmm2,%xmm2
	vmovdqa	%xmm2,%xmm1
	vpslldq	$8,%xmm2,%xmm2
	vpsrldq	$8,%xmm1,%xmm1
	vporq	%xmm2,%xmm16,%xmm16

	vpshufd	$36,%xmm1,%xmm2
	vpcmpeqd	TWOONE(%rip),%xmm2,%xmm2
	vpand	POLY(%rip),%xmm2,%xmm2
	vpxorq	%xmm2,%xmm16,%xmm16

	vmovdqu64	%xmm16,336(%rsi)
	vshufi32x4	$0x00,%ymm16,%ymm16,%ymm4
	vmovdqa	%ymm4,%ymm3

	vpclmulqdq	$0x11,%ymm4,%ymm3,%ymm0
	vpclmulqdq	$0x00,%ymm4,%ymm3,%ymm1
	vpclmulqdq	$0x01,%ymm4,%ymm3,%ymm2
	vpclmulqdq	$0x10,%ymm4,%ymm3,%ymm3
	vpxorq	%ymm2,%ymm3,%ymm3

	vpsrldq	$8,%ymm3,%ymm2
	vpslldq	$8,%ymm3,%ymm3
	vpxorq	%ymm2,%ymm0,%ymm0
	vpxorq	%ymm1,%ymm3,%ymm3



	vmovdqu64	POLY2(%rip),%ymm2

	vpclmulqdq	$0x01,%ymm3,%ymm2,%ymm1
	vpslldq	$8,%ymm1,%ymm1
	vpxorq	%ymm1,%ymm3,%ymm3



	vpclmulqdq	$0x00,%ymm3,%ymm2,%ymm1
	vpsrldq	$4,%ymm1,%ymm1
	vpclmulqdq	$0x10,%ymm3,%ymm2,%ymm3
	vpslldq	$4,%ymm3,%ymm3

	vpternlogq	$0x96,%ymm1,%ymm0,%ymm3

	vmovdqu64	%xmm3,320(%rsi)
	vinserti64x2	$1,%xmm16,%ymm3,%ymm4
	vmovdqa64	%ymm4,%ymm5

	vpclmulqdq	$0x11,%ymm3,%ymm4,%ymm0
	vpclmulqdq	$0x00,%ymm3,%ymm4,%ymm1
	vpclmulqdq	$0x01,%ymm3,%ymm4,%ymm2
	vpclmulqdq	$0x10,%ymm3,%ymm4,%ymm4
	vpxorq	%ymm2,%ymm4,%ymm4

	vpsrldq	$8,%ymm4,%ymm2
	vpslldq	$8,%ymm4,%ymm4
	vpxorq	%ymm2,%ymm0,%ymm0
	vpxorq	%ymm1,%ymm4,%ymm4



	vmovdqu64	POLY2(%rip),%ymm2

	vpclmulqdq	$0x01,%ymm4,%ymm2,%ymm1
	vpslldq	$8,%ymm1,%ymm1
	vpxorq	%ymm1,%ymm4,%ymm4



	vpclmulqdq	$0x00,%ymm4,%ymm2,%ymm1
	vpsrldq	$4,%ymm1,%ymm1
	vpclmulqdq	$0x10,%ymm4,%ymm2,%ymm4
	vpslldq	$4,%ymm4,%ymm4

	vpternlogq	$0x96,%ymm1,%ymm0,%ymm4

	vmovdqu64	%ymm4,288(%rsi)

	vinserti64x4	$1,%ymm5,%zmm4,%zmm4


	vshufi64x2	$0x00,%zmm4,%zmm4,%zmm3
	vmovdqa64	%zmm4,%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm0
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm1
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm2
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm2
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm0,%zmm0
	vpxorq	%zmm1,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm2

	vpclmulqdq	$0x01,%zmm4,%zmm2,%zmm1
	vpslldq	$8,%zmm1,%zmm1
	vpxorq	%zmm1,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm2,%zmm1
	vpsrldq	$4,%zmm1,%zmm1
	vpclmulqdq	$0x10,%zmm4,%zmm2,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm1,%zmm0,%zmm4

	vmovdqu64	%zmm4,224(%rsi)
	vshufi64x2	$0x00,%zmm4,%zmm4,%zmm3

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm0
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm1
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm2
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm2
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm0,%zmm0
	vpxorq	%zmm1,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm2

	vpclmulqdq	$0x01,%zmm5,%zmm2,%zmm1
	vpslldq	$8,%zmm1,%zmm1
	vpxorq	%zmm1,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm2,%zmm1
	vpsrldq	$4,%zmm1,%zmm1
	vpclmulqdq	$0x10,%zmm5,%zmm2,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm1,%zmm0,%zmm5

	vmovdqu64	%zmm5,160(%rsi)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm0
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm1
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm2
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm2
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm0,%zmm0
	vpxorq	%zmm1,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm2

	vpclmulqdq	$0x01,%zmm4,%zmm2,%zmm1
	vpslldq	$8,%zmm1,%zmm1
	vpxorq	%zmm1,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm2,%zmm1
	vpsrldq	$4,%zmm1,%zmm1
	vpclmulqdq	$0x10,%zmm4,%zmm2,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm1,%zmm0,%zmm4

	vmovdqu64	%zmm4,96(%rsi)
	vzeroupper
.Labort_init:
	.byte	0xf3,0xc3
.cfi_endproc	
.size	ossl_aes_gcm_init_avx512, .-ossl_aes_gcm_init_avx512
.globl	ossl_aes_gcm_setiv_avx512
.type	ossl_aes_gcm_setiv_avx512,@function
.align	32
ossl_aes_gcm_setiv_avx512:
.cfi_startproc	
.Lsetiv_seh_begin:
.byte	243,15,30,250
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-16
.Lsetiv_seh_push_rbx:
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-24
.Lsetiv_seh_push_rbp:
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.Lsetiv_seh_push_r12:
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.Lsetiv_seh_push_r13:
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.Lsetiv_seh_push_r14:
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.Lsetiv_seh_push_r15:










	leaq	0(%rsp),%rbp
.cfi_def_cfa_register	%rbp
.Lsetiv_seh_setfp:

.Lsetiv_seh_prolog_end:
	subq	$820,%rsp
	andq	$(-64),%rsp
	cmpq	$12,%rcx
	je	iv_len_12_init_IV
	vpxor	%xmm2,%xmm2,%xmm2
	movq	%rdx,%r10
	movq	%rcx,%r11
	orq	%r11,%r11
	jz	.L_CALC_AAD_done_1

	xorq	%rbx,%rbx
	vmovdqa64	SHUF_MASK(%rip),%zmm16

.L_get_AAD_loop48x16_1:
	cmpq	$768,%r11
	jl	.L_exit_AAD_loop48x16_1
	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_2

	vmovdqu64	288(%rsi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rsi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rsi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rsi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,192(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,128(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,64(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,0(%rsp)
.L_skip_hkeys_precomputation_2:
	movq	$1,%rbx
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	0(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	64(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	128(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	192(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	512(%r10),%zmm11
	vmovdqu64	576(%r10),%zmm3
	vmovdqu64	640(%r10),%zmm4
	vmovdqu64	704(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm2

	subq	$768,%r11
	je	.L_CALC_AAD_done_1

	addq	$768,%r10
	jmp	.L_get_AAD_loop48x16_1

.L_exit_AAD_loop48x16_1:

	cmpq	$512,%r11
	jl	.L_less_than_32x16_1

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_3

	vmovdqu64	288(%rsi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rsi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rsi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rsi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)
.L_skip_hkeys_precomputation_3:
	movq	$1,%rbx
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm2

	subq	$512,%r11
	je	.L_CALC_AAD_done_1

	addq	$512,%r10
	jmp	.L_less_than_16x16_1

.L_less_than_32x16_1:
	cmpq	$256,%r11
	jl	.L_less_than_16x16_1

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	96(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	160(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	224(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	288(%rsi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm2

	subq	$256,%r11
	je	.L_CALC_AAD_done_1

	addq	$256,%r10

.L_less_than_16x16_1:

	leaq	byte64_len_to_mask_table(%rip),%r12
	leaq	(%r12,%r11,8),%r12


	addl	$15,%r11d
	shrl	$4,%r11d
	cmpl	$2,%r11d
	jb	.L_AAD_blocks_1_1
	je	.L_AAD_blocks_2_1
	cmpl	$4,%r11d
	jb	.L_AAD_blocks_3_1
	je	.L_AAD_blocks_4_1
	cmpl	$6,%r11d
	jb	.L_AAD_blocks_5_1
	je	.L_AAD_blocks_6_1
	cmpl	$8,%r11d
	jb	.L_AAD_blocks_7_1
	je	.L_AAD_blocks_8_1
	cmpl	$10,%r11d
	jb	.L_AAD_blocks_9_1
	je	.L_AAD_blocks_10_1
	cmpl	$12,%r11d
	jb	.L_AAD_blocks_11_1
	je	.L_AAD_blocks_12_1
	cmpl	$14,%r11d
	jb	.L_AAD_blocks_13_1
	je	.L_AAD_blocks_14_1
	cmpl	$15,%r11d
	je	.L_AAD_blocks_15_1
.L_AAD_blocks_16_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	96(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	160(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	224(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm9,%zmm11,%zmm1
	vpternlogq	$0x96,%zmm10,%zmm3,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm12,%zmm11,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm3,%zmm8
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_15_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	112(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	176(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	240(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_14_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%ymm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%ymm16,%ymm5,%ymm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	128(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	192(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	256(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm5,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm5,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm5,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm5,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_13_1:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%xmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%xmm16,%xmm5,%xmm5
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	144(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	208(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	272(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm5,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm5,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm5,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_12_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	160(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	224(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_11_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	176(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	240(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_10_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%ymm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%ymm16,%ymm4,%ymm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	192(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	256(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm4,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm4,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm4,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm4,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_9_1:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%xmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%xmm16,%xmm4,%xmm4
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	208(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	272(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm4,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm4,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm4,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_8_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	224(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_7_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	240(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_6_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%ymm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%ymm16,%ymm3,%ymm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	256(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm3,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm3,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm3,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm3,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_5_1:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%xmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%xmm16,%xmm3,%xmm3
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	272(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm3,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm3,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm3,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm3,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_4_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	288(%rsi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_3_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	304(%rsi),%ymm15
	vinserti64x2	$2,336(%rsi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_2_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%ymm11{%k1}{z}
	vpshufb	%ymm16,%ymm11,%ymm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	320(%rsi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm11,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm11,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm11,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm11,%ymm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

	jmp	.L_CALC_AAD_done_1
.L_AAD_blocks_1_1:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%xmm11{%k1}{z}
	vpshufb	%xmm16,%xmm11,%xmm11
	vpxorq	%zmm2,%zmm11,%zmm11
	vmovdqu64	336(%rsi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm11,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm11,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm11,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm11,%xmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm2
	vpslldq	$4,%xmm2,%xmm2
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm2

.L_CALC_AAD_done_1:
	movq	%rcx,%r10
	shlq	$3,%r10
	vmovq	%r10,%xmm3


	vpxorq	%xmm2,%xmm3,%xmm2

	vmovdqu64	336(%rsi),%xmm1

	vpclmulqdq	$0x11,%xmm1,%xmm2,%xmm11
	vpclmulqdq	$0x00,%xmm1,%xmm2,%xmm3
	vpclmulqdq	$0x01,%xmm1,%xmm2,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm2,%xmm2
	vpxorq	%xmm4,%xmm2,%xmm2

	vpsrldq	$8,%xmm2,%xmm4
	vpslldq	$8,%xmm2,%xmm2
	vpxorq	%xmm4,%xmm11,%xmm11
	vpxorq	%xmm3,%xmm2,%xmm2



	vmovdqu64	POLY2(%rip),%xmm4

	vpclmulqdq	$0x01,%xmm2,%xmm4,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm2,%xmm2



	vpclmulqdq	$0x00,%xmm2,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm2,%xmm4,%xmm2
	vpslldq	$4,%xmm2,%xmm2

	vpternlogq	$0x96,%xmm3,%xmm11,%xmm2

	vpshufb	SHUF_MASK(%rip),%xmm2,%xmm2
	jmp	skip_iv_len_12_init_IV
iv_len_12_init_IV:

	vmovdqu8	ONEf(%rip),%xmm2
	movq	%rdx,%r11
	movl	$0x0000000000000fff,%r10d
	kmovq	%r10,%k1
	vmovdqu8	(%r11),%xmm2{%k1}
skip_iv_len_12_init_IV:
	vmovdqu	%xmm2,%xmm1


	movl	240(%rdi),%r10d
	cmpl	$9,%r10d
	je	.Laes_128_4
	cmpl	$11,%r10d
	je	.Laes_192_4
	cmpl	$13,%r10d
	je	.Laes_256_4
	jmp	.Lexit_aes_4
.align	32
.Laes_128_4:
	vpxorq	0(%rdi),%xmm1,%xmm1

	vaesenc	16(%rdi),%xmm1,%xmm1

	vaesenc	32(%rdi),%xmm1,%xmm1

	vaesenc	48(%rdi),%xmm1,%xmm1

	vaesenc	64(%rdi),%xmm1,%xmm1

	vaesenc	80(%rdi),%xmm1,%xmm1

	vaesenc	96(%rdi),%xmm1,%xmm1

	vaesenc	112(%rdi),%xmm1,%xmm1

	vaesenc	128(%rdi),%xmm1,%xmm1

	vaesenc	144(%rdi),%xmm1,%xmm1

	vaesenclast	160(%rdi),%xmm1,%xmm1
	jmp	.Lexit_aes_4
.align	32
.Laes_192_4:
	vpxorq	0(%rdi),%xmm1,%xmm1

	vaesenc	16(%rdi),%xmm1,%xmm1

	vaesenc	32(%rdi),%xmm1,%xmm1

	vaesenc	48(%rdi),%xmm1,%xmm1

	vaesenc	64(%rdi),%xmm1,%xmm1

	vaesenc	80(%rdi),%xmm1,%xmm1

	vaesenc	96(%rdi),%xmm1,%xmm1

	vaesenc	112(%rdi),%xmm1,%xmm1

	vaesenc	128(%rdi),%xmm1,%xmm1

	vaesenc	144(%rdi),%xmm1,%xmm1

	vaesenc	160(%rdi),%xmm1,%xmm1

	vaesenc	176(%rdi),%xmm1,%xmm1

	vaesenclast	192(%rdi),%xmm1,%xmm1
	jmp	.Lexit_aes_4
.align	32
.Laes_256_4:
	vpxorq	0(%rdi),%xmm1,%xmm1

	vaesenc	16(%rdi),%xmm1,%xmm1

	vaesenc	32(%rdi),%xmm1,%xmm1

	vaesenc	48(%rdi),%xmm1,%xmm1

	vaesenc	64(%rdi),%xmm1,%xmm1

	vaesenc	80(%rdi),%xmm1,%xmm1

	vaesenc	96(%rdi),%xmm1,%xmm1

	vaesenc	112(%rdi),%xmm1,%xmm1

	vaesenc	128(%rdi),%xmm1,%xmm1

	vaesenc	144(%rdi),%xmm1,%xmm1

	vaesenc	160(%rdi),%xmm1,%xmm1

	vaesenc	176(%rdi),%xmm1,%xmm1

	vaesenc	192(%rdi),%xmm1,%xmm1

	vaesenc	208(%rdi),%xmm1,%xmm1

	vaesenclast	224(%rdi),%xmm1,%xmm1
	jmp	.Lexit_aes_4
.Lexit_aes_4:

	vmovdqu	%xmm1,32(%rsi)


	vpshufb	SHUF_MASK(%rip),%xmm2,%xmm2
	vmovdqu	%xmm2,0(%rsi)
	cmpq	$256,%rcx
	jbe	.Lskip_hkeys_cleanup_5
	vpxor	%xmm0,%xmm0,%xmm0
	vmovdqa64	%zmm0,0(%rsp)
	vmovdqa64	%zmm0,64(%rsp)
	vmovdqa64	%zmm0,128(%rsp)
	vmovdqa64	%zmm0,192(%rsp)
	vmovdqa64	%zmm0,256(%rsp)
	vmovdqa64	%zmm0,320(%rsp)
	vmovdqa64	%zmm0,384(%rsp)
	vmovdqa64	%zmm0,448(%rsp)
	vmovdqa64	%zmm0,512(%rsp)
	vmovdqa64	%zmm0,576(%rsp)
	vmovdqa64	%zmm0,640(%rsp)
	vmovdqa64	%zmm0,704(%rsp)
.Lskip_hkeys_cleanup_5:
	vzeroupper
	leaq	(%rbp),%rsp
.cfi_def_cfa_register	%rsp
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
.Labort_setiv:
	.byte	0xf3,0xc3
.Lsetiv_seh_end:
.cfi_endproc	
.size	ossl_aes_gcm_setiv_avx512, .-ossl_aes_gcm_setiv_avx512
.globl	ossl_aes_gcm_update_aad_avx512
.type	ossl_aes_gcm_update_aad_avx512,@function
.align	32
ossl_aes_gcm_update_aad_avx512:
.cfi_startproc	
.Lghash_seh_begin:
.byte	243,15,30,250
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-16
.Lghash_seh_push_rbx:
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-24
.Lghash_seh_push_rbp:
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.Lghash_seh_push_r12:
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.Lghash_seh_push_r13:
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.Lghash_seh_push_r14:
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.Lghash_seh_push_r15:










	leaq	0(%rsp),%rbp
.cfi_def_cfa_register	%rbp
.Lghash_seh_setfp:

.Lghash_seh_prolog_end:
	subq	$820,%rsp
	andq	$(-64),%rsp
	vmovdqu64	64(%rdi),%xmm14
	movq	%rsi,%r10
	movq	%rdx,%r11
	orq	%r11,%r11
	jz	.L_CALC_AAD_done_6

	xorq	%rbx,%rbx
	vmovdqa64	SHUF_MASK(%rip),%zmm16

.L_get_AAD_loop48x16_6:
	cmpq	$768,%r11
	jl	.L_exit_AAD_loop48x16_6
	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_7

	vmovdqu64	288(%rdi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rdi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rdi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rdi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,192(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,128(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,64(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,0(%rsp)
.L_skip_hkeys_precomputation_7:
	movq	$1,%rbx
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	0(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	64(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	128(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	192(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	512(%r10),%zmm11
	vmovdqu64	576(%r10),%zmm3
	vmovdqu64	640(%r10),%zmm4
	vmovdqu64	704(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm14

	subq	$768,%r11
	je	.L_CALC_AAD_done_6

	addq	$768,%r10
	jmp	.L_get_AAD_loop48x16_6

.L_exit_AAD_loop48x16_6:

	cmpq	$512,%r11
	jl	.L_less_than_32x16_6

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	testq	%rbx,%rbx
	jnz	.L_skip_hkeys_precomputation_8

	vmovdqu64	288(%rdi),%zmm1
	vmovdqu64	%zmm1,704(%rsp)

	vmovdqu64	224(%rdi),%zmm9
	vmovdqu64	%zmm9,640(%rsp)


	vshufi64x2	$0x00,%zmm9,%zmm9,%zmm9

	vmovdqu64	160(%rdi),%zmm10
	vmovdqu64	%zmm10,576(%rsp)

	vmovdqu64	96(%rdi),%zmm12
	vmovdqu64	%zmm12,512(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,448(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,384(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm10,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm10,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm10,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm10,%zmm10

	vpsrldq	$8,%zmm10,%zmm17
	vpslldq	$8,%zmm10,%zmm10
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm10,%zmm10



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm10,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm10,%zmm10



	vpclmulqdq	$0x00,%zmm10,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm10,%zmm17,%zmm10
	vpslldq	$4,%zmm10,%zmm10

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm10

	vmovdqu64	%zmm10,320(%rsp)

	vpclmulqdq	$0x11,%zmm9,%zmm12,%zmm13
	vpclmulqdq	$0x00,%zmm9,%zmm12,%zmm15
	vpclmulqdq	$0x01,%zmm9,%zmm12,%zmm17
	vpclmulqdq	$0x10,%zmm9,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm12,%zmm12

	vpsrldq	$8,%zmm12,%zmm17
	vpslldq	$8,%zmm12,%zmm12
	vpxorq	%zmm17,%zmm13,%zmm13
	vpxorq	%zmm15,%zmm12,%zmm12



	vmovdqu64	POLY2(%rip),%zmm17

	vpclmulqdq	$0x01,%zmm12,%zmm17,%zmm15
	vpslldq	$8,%zmm15,%zmm15
	vpxorq	%zmm15,%zmm12,%zmm12



	vpclmulqdq	$0x00,%zmm12,%zmm17,%zmm15
	vpsrldq	$4,%zmm15,%zmm15
	vpclmulqdq	$0x10,%zmm12,%zmm17,%zmm12
	vpslldq	$4,%zmm12,%zmm12

	vpternlogq	$0x96,%zmm15,%zmm13,%zmm12

	vmovdqu64	%zmm12,256(%rsp)
.L_skip_hkeys_precomputation_8:
	movq	$1,%rbx
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	256(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	320(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	384(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	448(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	256(%r10),%zmm11
	vmovdqu64	320(%r10),%zmm3
	vmovdqu64	384(%r10),%zmm4
	vmovdqu64	448(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vmovdqu64	512(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	576(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	640(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	704(%rsp),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm14

	subq	$512,%r11
	je	.L_CALC_AAD_done_6

	addq	$512,%r10
	jmp	.L_less_than_16x16_6

.L_less_than_32x16_6:
	cmpq	$256,%r11
	jl	.L_less_than_16x16_6

	vmovdqu64	0(%r10),%zmm11
	vmovdqu64	64(%r10),%zmm3
	vmovdqu64	128(%r10),%zmm4
	vmovdqu64	192(%r10),%zmm5
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	96(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm11,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm11,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm11,%zmm12
	vmovdqu64	160(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm3,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm3,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm3,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm3,%zmm18
	vpxorq	%zmm17,%zmm10,%zmm7
	vpxorq	%zmm13,%zmm1,%zmm6
	vpxorq	%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7
	vmovdqu64	224(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm19,%zmm4,%zmm9
	vpclmulqdq	$0x01,%zmm19,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm19,%zmm4,%zmm12
	vmovdqu64	288(%rdi),%zmm19
	vpclmulqdq	$0x11,%zmm19,%zmm5,%zmm13
	vpclmulqdq	$0x00,%zmm19,%zmm5,%zmm15
	vpclmulqdq	$0x01,%zmm19,%zmm5,%zmm17
	vpclmulqdq	$0x10,%zmm19,%zmm5,%zmm18

	vpternlogq	$0x96,%zmm17,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm1,%zmm6
	vpternlogq	$0x96,%zmm15,%zmm9,%zmm8
	vpternlogq	$0x96,%zmm18,%zmm12,%zmm7

	vpsrldq	$8,%zmm7,%zmm1
	vpslldq	$8,%zmm7,%zmm9
	vpxorq	%zmm1,%zmm6,%zmm6
	vpxorq	%zmm9,%zmm8,%zmm8
	vextracti64x4	$1,%zmm6,%ymm1
	vpxorq	%ymm1,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm1
	vpxorq	%xmm1,%xmm6,%xmm6
	vextracti64x4	$1,%zmm8,%ymm9
	vpxorq	%ymm9,%ymm8,%ymm8
	vextracti32x4	$1,%ymm8,%xmm9
	vpxorq	%xmm9,%xmm8,%xmm8
	vmovdqa64	POLY2(%rip),%xmm10


	vpclmulqdq	$0x01,%xmm8,%xmm10,%xmm1
	vpslldq	$8,%xmm1,%xmm1
	vpxorq	%xmm1,%xmm8,%xmm1


	vpclmulqdq	$0x00,%xmm1,%xmm10,%xmm9
	vpsrldq	$4,%xmm9,%xmm9
	vpclmulqdq	$0x10,%xmm1,%xmm10,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm6,%xmm9,%xmm14

	subq	$256,%r11
	je	.L_CALC_AAD_done_6

	addq	$256,%r10

.L_less_than_16x16_6:

	leaq	byte64_len_to_mask_table(%rip),%r12
	leaq	(%r12,%r11,8),%r12


	addl	$15,%r11d
	shrl	$4,%r11d
	cmpl	$2,%r11d
	jb	.L_AAD_blocks_1_6
	je	.L_AAD_blocks_2_6
	cmpl	$4,%r11d
	jb	.L_AAD_blocks_3_6
	je	.L_AAD_blocks_4_6
	cmpl	$6,%r11d
	jb	.L_AAD_blocks_5_6
	je	.L_AAD_blocks_6_6
	cmpl	$8,%r11d
	jb	.L_AAD_blocks_7_6
	je	.L_AAD_blocks_8_6
	cmpl	$10,%r11d
	jb	.L_AAD_blocks_9_6
	je	.L_AAD_blocks_10_6
	cmpl	$12,%r11d
	jb	.L_AAD_blocks_11_6
	je	.L_AAD_blocks_12_6
	cmpl	$14,%r11d
	jb	.L_AAD_blocks_13_6
	je	.L_AAD_blocks_14_6
	cmpl	$15,%r11d
	je	.L_AAD_blocks_15_6
.L_AAD_blocks_16_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	96(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	160(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	224(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm9,%zmm11,%zmm1
	vpternlogq	$0x96,%zmm10,%zmm3,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm12,%zmm11,%zmm7
	vpternlogq	$0x96,%zmm13,%zmm3,%zmm8
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_15_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%zmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%zmm16,%zmm5,%zmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	112(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	176(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	240(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm5,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm5,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm5,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm5,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_14_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%ymm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%ymm16,%ymm5,%ymm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	128(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	192(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	256(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm5,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm5,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm5,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm5,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_13_6:
	subq	$1536,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4
	vmovdqu8	192(%r10),%xmm5{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpshufb	%xmm16,%xmm5,%xmm5
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	144(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	208(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	272(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm5,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm5,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm5,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_12_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	160(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	224(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm1,%zmm11,%zmm9
	vpternlogq	$0x96,%zmm6,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm11
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm3
	vpternlogq	$0x96,%zmm7,%zmm11,%zmm12
	vpternlogq	$0x96,%zmm8,%zmm3,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_11_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%zmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%zmm16,%zmm4,%zmm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	176(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	240(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm4,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm4,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm4,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm4,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_10_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%ymm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%ymm16,%ymm4,%ymm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	192(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	256(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm4,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm4,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm4,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm4,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_9_6:
	subq	$1024,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3
	vmovdqu8	128(%r10),%xmm4{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpshufb	%xmm16,%xmm4,%xmm4
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	208(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	272(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm4,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm4,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm4,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_8_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	224(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm13
	vpxorq	%zmm9,%zmm1,%zmm9
	vpxorq	%zmm10,%zmm6,%zmm10
	vpxorq	%zmm12,%zmm7,%zmm12
	vpxorq	%zmm13,%zmm8,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_7_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%zmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%zmm16,%zmm3,%zmm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	240(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm3,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm3,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm3,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm3,%zmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_6_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%ymm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%ymm16,%ymm3,%ymm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	256(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm3,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm3,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm3,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm3,%ymm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_5_6:
	subq	$512,%r12
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11
	vmovdqu8	64(%r10),%xmm3{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpshufb	%xmm16,%xmm3,%xmm3
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	272(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm3,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm3,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm3,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm3,%xmm6

	vpxorq	%zmm12,%zmm7,%zmm7
	vpxorq	%zmm13,%zmm8,%zmm8
	vpxorq	%zmm9,%zmm1,%zmm1
	vpxorq	%zmm10,%zmm6,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_4_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	288(%rdi),%zmm15
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm9
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm10
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm12
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm13

	vpxorq	%zmm13,%zmm12,%zmm12
	vpsrldq	$8,%zmm12,%zmm7
	vpslldq	$8,%zmm12,%zmm8
	vpxorq	%zmm7,%zmm9,%zmm1
	vpxorq	%zmm8,%zmm10,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_3_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%zmm11{%k1}{z}
	vpshufb	%zmm16,%zmm11,%zmm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	304(%rdi),%ymm15
	vinserti64x2	$2,336(%rdi),%zmm15,%zmm15
	vpclmulqdq	$0x01,%zmm15,%zmm11,%zmm7
	vpclmulqdq	$0x10,%zmm15,%zmm11,%zmm8
	vpclmulqdq	$0x11,%zmm15,%zmm11,%zmm1
	vpclmulqdq	$0x00,%zmm15,%zmm11,%zmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_2_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%ymm11{%k1}{z}
	vpshufb	%ymm16,%ymm11,%ymm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	320(%rdi),%ymm15
	vpclmulqdq	$0x01,%ymm15,%ymm11,%ymm7
	vpclmulqdq	$0x10,%ymm15,%ymm11,%ymm8
	vpclmulqdq	$0x11,%ymm15,%ymm11,%ymm1
	vpclmulqdq	$0x00,%ymm15,%ymm11,%ymm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

	jmp	.L_CALC_AAD_done_6
.L_AAD_blocks_1_6:
	kmovq	(%r12),%k1
	vmovdqu8	0(%r10),%xmm11{%k1}{z}
	vpshufb	%xmm16,%xmm11,%xmm11
	vpxorq	%zmm14,%zmm11,%zmm11
	vmovdqu64	336(%rdi),%xmm15
	vpclmulqdq	$0x01,%xmm15,%xmm11,%xmm7
	vpclmulqdq	$0x10,%xmm15,%xmm11,%xmm8
	vpclmulqdq	$0x11,%xmm15,%xmm11,%xmm1
	vpclmulqdq	$0x00,%xmm15,%xmm11,%xmm6

	vpxorq	%zmm8,%zmm7,%zmm7
	vpsrldq	$8,%zmm7,%zmm12
	vpslldq	$8,%zmm7,%zmm13
	vpxorq	%zmm12,%zmm1,%zmm1
	vpxorq	%zmm13,%zmm6,%zmm6
	vextracti64x4	$1,%zmm1,%ymm12
	vpxorq	%ymm12,%ymm1,%ymm1
	vextracti32x4	$1,%ymm1,%xmm12
	vpxorq	%xmm12,%xmm1,%xmm1
	vextracti64x4	$1,%zmm6,%ymm13
	vpxorq	%ymm13,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm13
	vpxorq	%xmm13,%xmm6,%xmm6
	vmovdqa64	POLY2(%rip),%xmm15


	vpclmulqdq	$0x01,%xmm6,%xmm15,%xmm7
	vpslldq	$8,%xmm7,%xmm7
	vpxorq	%xmm7,%xmm6,%xmm7


	vpclmulqdq	$0x00,%xmm7,%xmm15,%xmm8
	vpsrldq	$4,%xmm8,%xmm8
	vpclmulqdq	$0x10,%xmm7,%xmm15,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm1,%xmm8,%xmm14

.L_CALC_AAD_done_6:
	vmovdqu64	%xmm14,64(%rdi)
	cmpq	$256,%rdx
	jbe	.Lskip_hkeys_cleanup_9
	vpxor	%xmm0,%xmm0,%xmm0
	vmovdqa64	%zmm0,0(%rsp)
	vmovdqa64	%zmm0,64(%rsp)
	vmovdqa64	%zmm0,128(%rsp)
	vmovdqa64	%zmm0,192(%rsp)
	vmovdqa64	%zmm0,256(%rsp)
	vmovdqa64	%zmm0,320(%rsp)
	vmovdqa64	%zmm0,384(%rsp)
	vmovdqa64	%zmm0,448(%rsp)
	vmovdqa64	%zmm0,512(%rsp)
	vmovdqa64	%zmm0,576(%rsp)
	vmovdqa64	%zmm0,640(%rsp)
	vmovdqa64	%zmm0,704(%rsp)
.Lskip_hkeys_cleanup_9:
	vzeroupper
	leaq	(%rbp),%rsp
.cfi_def_cfa_register	%rsp
	popq	%r15
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r15
	popq	%r14
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r14
	popq	%r13
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r13
	popq	%r12
.cfi_adjust_cfa_offset	-8
.cfi_restore	%r12
	popq	%rbp
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbp
	popq	%rbx
.cfi_adjust_cfa_offset	-8
.cfi_restore	%rbx
.Lexit_update_aad:
	.byte	0xf3,0xc3
.Lghash_seh_end:
.cfi_endproc	
.size	ossl_aes_gcm_update_aad_avx512, .-ossl_aes_gcm_update_aad_avx512
.globl	ossl_aes_gcm_encrypt_avx512
.type	ossl_aes_gcm_encrypt_avx512,@function
.align	32
ossl_aes_gcm_encrypt_avx512:
.cfi_startproc	
.Lencrypt_seh_begin:
.byte	243,15,30,250
	pushq	%rbx
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbx,-16
.Lencrypt_seh_push_rbx:
	pushq	%rbp
.cfi_adjust_cfa_offset	8
.cfi_offset	%rbp,-24
.Lencrypt_seh_push_rbp:
	pushq	%r12
.cfi_adjust_cfa_offset	8
.cfi_offset	%r12,-32
.Lencrypt_seh_push_r12:
	pushq	%r13
.cfi_adjust_cfa_offset	8
.cfi_offset	%r13,-40
.Lencrypt_seh_push_r13:
	pushq	%r14
.cfi_adjust_cfa_offset	8
.cfi_offset	%r14,-48
.Lencrypt_seh_push_r14:
	pushq	%r15
.cfi_adjust_cfa_offset	8
.cfi_offset	%r15,-56
.Lencrypt_seh_push_r15:










	leaq	0(%rsp),%rbp
.cfi_def_cfa_register	%rbp
.Lencrypt_seh_setfp:

.Lencrypt_seh_prolog_end:
	subq	$1588,%rsp
	andq	$(-64),%rsp


	movl	240(%rdi),%eax
	cmpl	$9,%eax
	je	.Laes_gcm_encrypt_128_avx512
	cmpl	$11,%eax
	je	.Laes_gcm_encrypt_192_avx512
	cmpl	$13,%eax
	je	.Laes_gcm_encrypt_256_avx512
	xorl	%eax,%eax
	jmp	.Lexit_gcm_encrypt
.align	32
.Laes_gcm_encrypt_128_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_10
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_11
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_11
	subq	%r13,%r12
.L_no_extra_mask_11:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_11

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_11

.L_partial_incomplete_11:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_11:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)

	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_11:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_10
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_10

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_12
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_12
.L_next_16_overflow_12:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_12:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_13

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_13:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_10



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_14
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_14
.L_next_16_overflow_14:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_14:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_15
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_15:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_10
.L_encrypt_big_nblocks_10:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_16
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_16
.L_16_blocks_overflow_16:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_16:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_17
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_17
.L_16_blocks_overflow_17:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_17:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_18
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_18
.L_16_blocks_overflow_18:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_18:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_10

.L_no_more_big_nblocks_10:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_10

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_10
.L_encrypt_0_blocks_ghash_32_10:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_19

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_19
	jb	.L_last_num_blocks_is_7_1_19


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_19
	jb	.L_last_num_blocks_is_11_9_19


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_19
	ja	.L_last_num_blocks_is_16_19
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_19
	jmp	.L_last_num_blocks_is_13_19

.L_last_num_blocks_is_11_9_19:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_19
	ja	.L_last_num_blocks_is_11_19
	jmp	.L_last_num_blocks_is_9_19

.L_last_num_blocks_is_7_1_19:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_19
	jb	.L_last_num_blocks_is_3_1_19

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_19
	je	.L_last_num_blocks_is_6_19
	jmp	.L_last_num_blocks_is_5_19

.L_last_num_blocks_is_3_1_19:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_19
	je	.L_last_num_blocks_is_2_19
.L_last_num_blocks_is_1_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_20
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_20

.L_16_blocks_overflow_20:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_20:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_21





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_21
.L_small_initial_partial_block_21:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_21
.L_small_initial_compute_done_21:
.L_after_reduction_21:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_2_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_22
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_22

.L_16_blocks_overflow_22:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_22:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_23





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_23
.L_small_initial_partial_block_23:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_23:

	orq	%r8,%r8
	je	.L_after_reduction_23
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_23:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_3_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_24
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_24

.L_16_blocks_overflow_24:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_24:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_25





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_25
.L_small_initial_partial_block_25:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_25:

	orq	%r8,%r8
	je	.L_after_reduction_25
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_25:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_4_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_26
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_26

.L_16_blocks_overflow_26:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_26:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_27





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_27
.L_small_initial_partial_block_27:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_27:

	orq	%r8,%r8
	je	.L_after_reduction_27
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_27:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_5_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_28
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_28

.L_16_blocks_overflow_28:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_28:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_29





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_29
.L_small_initial_partial_block_29:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_29:

	orq	%r8,%r8
	je	.L_after_reduction_29
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_29:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_6_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_30
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_30

.L_16_blocks_overflow_30:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_30:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_31





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_31
.L_small_initial_partial_block_31:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_31:

	orq	%r8,%r8
	je	.L_after_reduction_31
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_31:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_7_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_32
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_32

.L_16_blocks_overflow_32:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_32:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_33





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_33
.L_small_initial_partial_block_33:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_33:

	orq	%r8,%r8
	je	.L_after_reduction_33
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_33:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_8_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_34
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_34

.L_16_blocks_overflow_34:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_34:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_35





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_35
.L_small_initial_partial_block_35:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_35:

	orq	%r8,%r8
	je	.L_after_reduction_35
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_35:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_9_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_36
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_36

.L_16_blocks_overflow_36:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_36:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_37





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_37
.L_small_initial_partial_block_37:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_37:

	orq	%r8,%r8
	je	.L_after_reduction_37
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_37:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_10_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_38
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_38

.L_16_blocks_overflow_38:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_38:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_39





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_39
.L_small_initial_partial_block_39:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_39:

	orq	%r8,%r8
	je	.L_after_reduction_39
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_39:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_11_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_40
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_40

.L_16_blocks_overflow_40:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_40:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_41





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_41
.L_small_initial_partial_block_41:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_41:

	orq	%r8,%r8
	je	.L_after_reduction_41
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_41:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_12_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_42
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_42

.L_16_blocks_overflow_42:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_42:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_43





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_43
.L_small_initial_partial_block_43:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_43:

	orq	%r8,%r8
	je	.L_after_reduction_43
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_43:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_13_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_44
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_44

.L_16_blocks_overflow_44:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_44:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_45





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_45
.L_small_initial_partial_block_45:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_45:

	orq	%r8,%r8
	je	.L_after_reduction_45
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_45:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_14_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_46
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_46

.L_16_blocks_overflow_46:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_46:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_47





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_47
.L_small_initial_partial_block_47:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_47:

	orq	%r8,%r8
	je	.L_after_reduction_47
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_47:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_15_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_48
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_48

.L_16_blocks_overflow_48:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_48:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_49





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_49
.L_small_initial_partial_block_49:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_49:

	orq	%r8,%r8
	je	.L_after_reduction_49
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_49:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_16_19:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_50
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_50

.L_16_blocks_overflow_50:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_50:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_51:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_51:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_51:
	jmp	.L_last_blocks_done_19
.L_last_num_blocks_is_0_19:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_19:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10
.L_encrypt_32_blocks_10:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_52
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_52
.L_16_blocks_overflow_52:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_52:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_53
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_53
.L_16_blocks_overflow_53:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_53:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_54

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_54
	jb	.L_last_num_blocks_is_7_1_54


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_54
	jb	.L_last_num_blocks_is_11_9_54


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_54
	ja	.L_last_num_blocks_is_16_54
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_54
	jmp	.L_last_num_blocks_is_13_54

.L_last_num_blocks_is_11_9_54:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_54
	ja	.L_last_num_blocks_is_11_54
	jmp	.L_last_num_blocks_is_9_54

.L_last_num_blocks_is_7_1_54:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_54
	jb	.L_last_num_blocks_is_3_1_54

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_54
	je	.L_last_num_blocks_is_6_54
	jmp	.L_last_num_blocks_is_5_54

.L_last_num_blocks_is_3_1_54:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_54
	je	.L_last_num_blocks_is_2_54
.L_last_num_blocks_is_1_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_55
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_55

.L_16_blocks_overflow_55:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_55:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_56





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_56
.L_small_initial_partial_block_56:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_56
.L_small_initial_compute_done_56:
.L_after_reduction_56:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_2_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_57
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_57

.L_16_blocks_overflow_57:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_57:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_58





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_58
.L_small_initial_partial_block_58:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_58:

	orq	%r8,%r8
	je	.L_after_reduction_58
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_58:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_3_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_59
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_59

.L_16_blocks_overflow_59:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_59:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_60





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_60
.L_small_initial_partial_block_60:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_60:

	orq	%r8,%r8
	je	.L_after_reduction_60
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_60:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_4_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_61
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_61

.L_16_blocks_overflow_61:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_61:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_62





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_62
.L_small_initial_partial_block_62:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_62:

	orq	%r8,%r8
	je	.L_after_reduction_62
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_62:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_5_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_63
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_63

.L_16_blocks_overflow_63:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_63:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_64





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_64
.L_small_initial_partial_block_64:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_64:

	orq	%r8,%r8
	je	.L_after_reduction_64
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_64:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_6_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_65
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_65

.L_16_blocks_overflow_65:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_65:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_66





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_66
.L_small_initial_partial_block_66:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_66:

	orq	%r8,%r8
	je	.L_after_reduction_66
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_66:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_7_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_67
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_67

.L_16_blocks_overflow_67:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_67:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_68





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_68
.L_small_initial_partial_block_68:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_68:

	orq	%r8,%r8
	je	.L_after_reduction_68
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_68:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_8_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_69
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_69

.L_16_blocks_overflow_69:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_69:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_70





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_70
.L_small_initial_partial_block_70:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_70:

	orq	%r8,%r8
	je	.L_after_reduction_70
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_70:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_9_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_71
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_71

.L_16_blocks_overflow_71:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_71:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_72





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_72
.L_small_initial_partial_block_72:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_72:

	orq	%r8,%r8
	je	.L_after_reduction_72
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_72:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_10_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_73
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_73

.L_16_blocks_overflow_73:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_73:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_74





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_74
.L_small_initial_partial_block_74:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_74:

	orq	%r8,%r8
	je	.L_after_reduction_74
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_74:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_11_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_75
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_75

.L_16_blocks_overflow_75:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_75:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_76





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_76
.L_small_initial_partial_block_76:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_76:

	orq	%r8,%r8
	je	.L_after_reduction_76
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_76:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_12_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_77
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_77

.L_16_blocks_overflow_77:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_77:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_78





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_78
.L_small_initial_partial_block_78:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_78:

	orq	%r8,%r8
	je	.L_after_reduction_78
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_78:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_13_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_79
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_79

.L_16_blocks_overflow_79:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_79:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_80





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_80
.L_small_initial_partial_block_80:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_80:

	orq	%r8,%r8
	je	.L_after_reduction_80
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_80:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_14_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_81
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_81

.L_16_blocks_overflow_81:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_81:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_82





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_82
.L_small_initial_partial_block_82:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_82:

	orq	%r8,%r8
	je	.L_after_reduction_82
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_82:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_15_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_83
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_83

.L_16_blocks_overflow_83:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_83:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_84





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_84
.L_small_initial_partial_block_84:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_84:

	orq	%r8,%r8
	je	.L_after_reduction_84
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_84:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_16_54:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_85
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_85

.L_16_blocks_overflow_85:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_85:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_86:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_86:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_86:
	jmp	.L_last_blocks_done_54
.L_last_num_blocks_is_0_54:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_54:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10
.L_encrypt_16_blocks_10:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_87
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_87
.L_16_blocks_overflow_87:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_87:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_88

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_88
	jb	.L_last_num_blocks_is_7_1_88


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_88
	jb	.L_last_num_blocks_is_11_9_88


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_88
	ja	.L_last_num_blocks_is_16_88
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_88
	jmp	.L_last_num_blocks_is_13_88

.L_last_num_blocks_is_11_9_88:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_88
	ja	.L_last_num_blocks_is_11_88
	jmp	.L_last_num_blocks_is_9_88

.L_last_num_blocks_is_7_1_88:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_88
	jb	.L_last_num_blocks_is_3_1_88

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_88
	je	.L_last_num_blocks_is_6_88
	jmp	.L_last_num_blocks_is_5_88

.L_last_num_blocks_is_3_1_88:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_88
	je	.L_last_num_blocks_is_2_88
.L_last_num_blocks_is_1_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_89
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_89

.L_16_blocks_overflow_89:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_89:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_90





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_90
.L_small_initial_partial_block_90:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_90
.L_small_initial_compute_done_90:
.L_after_reduction_90:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_2_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_91
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_91

.L_16_blocks_overflow_91:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_91:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_92





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_92
.L_small_initial_partial_block_92:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_92:

	orq	%r8,%r8
	je	.L_after_reduction_92
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_92:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_3_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_93
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_93

.L_16_blocks_overflow_93:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_93:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_94





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_94
.L_small_initial_partial_block_94:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_94:

	orq	%r8,%r8
	je	.L_after_reduction_94
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_94:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_4_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_95
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_95

.L_16_blocks_overflow_95:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_95:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_96





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_96
.L_small_initial_partial_block_96:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_96:

	orq	%r8,%r8
	je	.L_after_reduction_96
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_96:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_5_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_97
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_97

.L_16_blocks_overflow_97:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_97:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_98





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_98
.L_small_initial_partial_block_98:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_98:

	orq	%r8,%r8
	je	.L_after_reduction_98
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_98:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_6_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_99
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_99

.L_16_blocks_overflow_99:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_99:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_100





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_100
.L_small_initial_partial_block_100:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_100:

	orq	%r8,%r8
	je	.L_after_reduction_100
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_100:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_7_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_101
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_101

.L_16_blocks_overflow_101:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_101:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_102





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_102
.L_small_initial_partial_block_102:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_102:

	orq	%r8,%r8
	je	.L_after_reduction_102
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_102:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_8_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_103
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_103

.L_16_blocks_overflow_103:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_103:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_104





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_104
.L_small_initial_partial_block_104:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_104:

	orq	%r8,%r8
	je	.L_after_reduction_104
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_104:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_9_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_105
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_105

.L_16_blocks_overflow_105:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_105:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_106





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_106
.L_small_initial_partial_block_106:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_106:

	orq	%r8,%r8
	je	.L_after_reduction_106
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_106:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_10_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_107
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_107

.L_16_blocks_overflow_107:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_107:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_108





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_108
.L_small_initial_partial_block_108:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_108:

	orq	%r8,%r8
	je	.L_after_reduction_108
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_108:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_11_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_109
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_109

.L_16_blocks_overflow_109:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_109:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_110





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_110
.L_small_initial_partial_block_110:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_110:

	orq	%r8,%r8
	je	.L_after_reduction_110
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_110:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_12_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_111
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_111

.L_16_blocks_overflow_111:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_111:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_112





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_112
.L_small_initial_partial_block_112:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_112:

	orq	%r8,%r8
	je	.L_after_reduction_112
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_112:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_13_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_113
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_113

.L_16_blocks_overflow_113:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_113:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_114





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_114
.L_small_initial_partial_block_114:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_114:

	orq	%r8,%r8
	je	.L_after_reduction_114
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_114:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_14_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_115
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_115

.L_16_blocks_overflow_115:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_115:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_116





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_116
.L_small_initial_partial_block_116:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_116:

	orq	%r8,%r8
	je	.L_after_reduction_116
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_116:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_15_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_117
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_117

.L_16_blocks_overflow_117:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_117:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_118





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_118
.L_small_initial_partial_block_118:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_118:

	orq	%r8,%r8
	je	.L_after_reduction_118
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_118:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_16_88:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_119
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_119

.L_16_blocks_overflow_119:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_119:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_120:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_120:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_120:
	jmp	.L_last_blocks_done_88
.L_last_num_blocks_is_0_88:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_88:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10

.L_message_below_32_blocks_10:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_121
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_121:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_122

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_122
	jb	.L_last_num_blocks_is_7_1_122


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_122
	jb	.L_last_num_blocks_is_11_9_122


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_122
	ja	.L_last_num_blocks_is_16_122
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_122
	jmp	.L_last_num_blocks_is_13_122

.L_last_num_blocks_is_11_9_122:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_122
	ja	.L_last_num_blocks_is_11_122
	jmp	.L_last_num_blocks_is_9_122

.L_last_num_blocks_is_7_1_122:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_122
	jb	.L_last_num_blocks_is_3_1_122

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_122
	je	.L_last_num_blocks_is_6_122
	jmp	.L_last_num_blocks_is_5_122

.L_last_num_blocks_is_3_1_122:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_122
	je	.L_last_num_blocks_is_2_122
.L_last_num_blocks_is_1_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_123
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_123

.L_16_blocks_overflow_123:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_123:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_124





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_124
.L_small_initial_partial_block_124:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_124
.L_small_initial_compute_done_124:
.L_after_reduction_124:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_2_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_125
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_125

.L_16_blocks_overflow_125:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_125:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_126





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_126
.L_small_initial_partial_block_126:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_126:

	orq	%r8,%r8
	je	.L_after_reduction_126
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_126:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_3_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_127
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_127

.L_16_blocks_overflow_127:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_127:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_128





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_128
.L_small_initial_partial_block_128:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_128:

	orq	%r8,%r8
	je	.L_after_reduction_128
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_128:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_4_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_129
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_129

.L_16_blocks_overflow_129:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_129:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_130





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_130
.L_small_initial_partial_block_130:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_130:

	orq	%r8,%r8
	je	.L_after_reduction_130
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_130:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_5_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_131
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_131

.L_16_blocks_overflow_131:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_131:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_132





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_132
.L_small_initial_partial_block_132:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_132:

	orq	%r8,%r8
	je	.L_after_reduction_132
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_132:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_6_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_133
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_133

.L_16_blocks_overflow_133:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_133:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_134





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_134
.L_small_initial_partial_block_134:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_134:

	orq	%r8,%r8
	je	.L_after_reduction_134
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_134:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_7_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_135
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_135

.L_16_blocks_overflow_135:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_135:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_136





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_136
.L_small_initial_partial_block_136:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_136:

	orq	%r8,%r8
	je	.L_after_reduction_136
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_136:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_8_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_137
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_137

.L_16_blocks_overflow_137:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_137:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_138





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_138
.L_small_initial_partial_block_138:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_138:

	orq	%r8,%r8
	je	.L_after_reduction_138
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_138:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_9_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_139
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_139

.L_16_blocks_overflow_139:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_139:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_140





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_140
.L_small_initial_partial_block_140:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_140:

	orq	%r8,%r8
	je	.L_after_reduction_140
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_140:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_10_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_141
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_141

.L_16_blocks_overflow_141:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_141:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_142





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_142
.L_small_initial_partial_block_142:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_142:

	orq	%r8,%r8
	je	.L_after_reduction_142
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_142:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_11_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_143
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_143

.L_16_blocks_overflow_143:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_143:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_144





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_144
.L_small_initial_partial_block_144:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_144:

	orq	%r8,%r8
	je	.L_after_reduction_144
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_144:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_12_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_145
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_145

.L_16_blocks_overflow_145:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_145:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_146





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_146
.L_small_initial_partial_block_146:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_146:

	orq	%r8,%r8
	je	.L_after_reduction_146
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_146:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_13_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_147
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_147

.L_16_blocks_overflow_147:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_147:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_148





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_148
.L_small_initial_partial_block_148:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_148:

	orq	%r8,%r8
	je	.L_after_reduction_148
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_148:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_14_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_149
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_149

.L_16_blocks_overflow_149:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_149:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_150





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_150
.L_small_initial_partial_block_150:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_150:

	orq	%r8,%r8
	je	.L_after_reduction_150
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_150:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_15_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_151
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_151

.L_16_blocks_overflow_151:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_151:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_152





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_152
.L_small_initial_partial_block_152:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_152:

	orq	%r8,%r8
	je	.L_after_reduction_152
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_152:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_16_122:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_153
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_153

.L_16_blocks_overflow_153:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_153:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_154:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_154:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_154:
	jmp	.L_last_blocks_done_122
.L_last_num_blocks_is_0_122:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_122:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_10

.L_message_below_equal_16_blocks_10:


	movl	%r8d,%r12d
	addl	$15,%r12d
	shrl	$4,%r12d
	cmpq	$8,%r12
	je	.L_small_initial_num_blocks_is_8_155
	jl	.L_small_initial_num_blocks_is_7_1_155


	cmpq	$12,%r12
	je	.L_small_initial_num_blocks_is_12_155
	jl	.L_small_initial_num_blocks_is_11_9_155


	cmpq	$16,%r12
	je	.L_small_initial_num_blocks_is_16_155
	cmpq	$15,%r12
	je	.L_small_initial_num_blocks_is_15_155
	cmpq	$14,%r12
	je	.L_small_initial_num_blocks_is_14_155
	jmp	.L_small_initial_num_blocks_is_13_155

.L_small_initial_num_blocks_is_11_9_155:

	cmpq	$11,%r12
	je	.L_small_initial_num_blocks_is_11_155
	cmpq	$10,%r12
	je	.L_small_initial_num_blocks_is_10_155
	jmp	.L_small_initial_num_blocks_is_9_155

.L_small_initial_num_blocks_is_7_1_155:
	cmpq	$4,%r12
	je	.L_small_initial_num_blocks_is_4_155
	jl	.L_small_initial_num_blocks_is_3_1_155

	cmpq	$7,%r12
	je	.L_small_initial_num_blocks_is_7_155
	cmpq	$6,%r12
	je	.L_small_initial_num_blocks_is_6_155
	jmp	.L_small_initial_num_blocks_is_5_155

.L_small_initial_num_blocks_is_3_1_155:

	cmpq	$3,%r12
	je	.L_small_initial_num_blocks_is_3_155
	cmpq	$2,%r12
	je	.L_small_initial_num_blocks_is_2_155





.L_small_initial_num_blocks_is_1_155:
	vmovdqa64	SHUF_MASK(%rip),%xmm29
	vpaddd	ONE(%rip),%xmm2,%xmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm0,%xmm2
	vpshufb	%xmm29,%xmm0,%xmm0
	vmovdqu8	0(%rcx,%r11,1),%xmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%xmm15,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%xmm15,%xmm0,%xmm0
	vpxorq	%xmm6,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm6
	vextracti32x4	$0,%zmm6,%xmm13


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_156





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_156
.L_small_initial_partial_block_156:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)











	vpxorq	%xmm13,%xmm14,%xmm14

	jmp	.L_after_reduction_156
.L_small_initial_compute_done_156:
.L_after_reduction_156:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_2_155:
	vmovdqa64	SHUF_MASK(%rip),%ymm29
	vshufi64x2	$0,%ymm2,%ymm2,%ymm0
	vpaddd	ddq_add_1234(%rip),%ymm0,%ymm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm0,%xmm2
	vpshufb	%ymm29,%ymm0,%ymm0
	vmovdqu8	0(%rcx,%r11,1),%ymm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%ymm15,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%ymm15,%ymm0,%ymm0
	vpxorq	%ymm6,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm6
	vextracti32x4	$1,%zmm6,%xmm13
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_157





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_157
.L_small_initial_partial_block_157:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm6,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm6,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm6,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm6,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_157:

	orq	%r8,%r8
	je	.L_after_reduction_157
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_157:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_3_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$2,%zmm6,%xmm13
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_158





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_158
.L_small_initial_partial_block_158:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm6,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm6,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm6,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm6,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_158:

	orq	%r8,%r8
	je	.L_after_reduction_158
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_158:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_4_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm0,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vmovdqu8	0(%rcx,%r11,1),%zmm6{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm6,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vextracti32x4	$3,%zmm6,%xmm13
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_159





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_159
.L_small_initial_partial_block_159:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_159:

	orq	%r8,%r8
	je	.L_after_reduction_159
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_159:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_5_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%xmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%xmm15,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%xmm15,%xmm3,%xmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%xmm7,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%xmm29,%xmm3,%xmm7
	vextracti32x4	$0,%zmm7,%xmm13
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_160





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_160
.L_small_initial_partial_block_160:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_160:

	orq	%r8,%r8
	je	.L_after_reduction_160
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_160:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_6_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%ymm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%ymm15,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%ymm15,%ymm3,%ymm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%ymm7,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%ymm29,%ymm3,%ymm7
	vextracti32x4	$1,%zmm7,%xmm13
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_161





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_161
.L_small_initial_partial_block_161:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm7,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm7,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm7,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm7,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_161:

	orq	%r8,%r8
	je	.L_after_reduction_161
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_161:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_7_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$2,%zmm7,%xmm13
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_162





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_162
.L_small_initial_partial_block_162:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm7,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm7,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm7,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm7,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_162:

	orq	%r8,%r8
	je	.L_after_reduction_162
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_162:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_8_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$64,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm3,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vextracti32x4	$3,%zmm7,%xmm13
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_163





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_163
.L_small_initial_partial_block_163:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_163:

	orq	%r8,%r8
	je	.L_after_reduction_163
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_163:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_9_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%xmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%xmm15,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%xmm15,%xmm4,%xmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%xmm10,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%xmm29,%xmm4,%xmm10
	vextracti32x4	$0,%zmm10,%xmm13
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_164





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_164
.L_small_initial_partial_block_164:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_164:

	orq	%r8,%r8
	je	.L_after_reduction_164
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_164:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_10_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%ymm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%ymm15,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%ymm15,%ymm4,%ymm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%ymm10,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%ymm29,%ymm4,%ymm10
	vextracti32x4	$1,%zmm10,%xmm13
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_165





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_165
.L_small_initial_partial_block_165:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm10,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm10,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm10,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm10,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_165:

	orq	%r8,%r8
	je	.L_after_reduction_165
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_165:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_11_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$2,%zmm10,%xmm13
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_166





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_166
.L_small_initial_partial_block_166:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm10,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm10,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm10,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm10,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_166:

	orq	%r8,%r8
	je	.L_after_reduction_166
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_166:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_12_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$128,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm4,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vextracti32x4	$3,%zmm10,%xmm13
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_167





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_167
.L_small_initial_partial_block_167:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vpxorq	%zmm15,%zmm0,%zmm15
	vpxorq	%zmm16,%zmm3,%zmm16
	vpxorq	%zmm17,%zmm4,%zmm17
	vpxorq	%zmm19,%zmm5,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_167:

	orq	%r8,%r8
	je	.L_after_reduction_167
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_167:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_13_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$0,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%xmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%xmm15,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%xmm15,%xmm5,%xmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%xmm11,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%xmm29,%xmm5,%xmm11
	vextracti32x4	$0,%zmm11,%xmm13
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_168





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_168
.L_small_initial_partial_block_168:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	160(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	224(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	288(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19

	vpxorq	%zmm19,%zmm17,%zmm17
	vpsrldq	$8,%zmm17,%zmm4
	vpslldq	$8,%zmm17,%zmm5
	vpxorq	%zmm4,%zmm15,%zmm0
	vpxorq	%zmm5,%zmm16,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_168:

	orq	%r8,%r8
	je	.L_after_reduction_168
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_168:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_14_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$1,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%ymm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%ymm15,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%ymm15,%ymm5,%ymm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%ymm11,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%ymm29,%ymm5,%ymm11
	vextracti32x4	$1,%zmm11,%xmm13
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_169





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_169
.L_small_initial_partial_block_169:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	144(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	208(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	272(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	336(%rsi),%xmm20
	vpclmulqdq	$0x01,%xmm20,%xmm11,%xmm4
	vpclmulqdq	$0x10,%xmm20,%xmm11,%xmm5
	vpclmulqdq	$0x11,%xmm20,%xmm11,%xmm0
	vpclmulqdq	$0x00,%xmm20,%xmm11,%xmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_169:

	orq	%r8,%r8
	je	.L_after_reduction_169
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_169:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_15_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$2,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$2,%zmm11,%xmm13
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_170





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_170
.L_small_initial_partial_block_170:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	128(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	192(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	256(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	320(%rsi),%ymm20
	vpclmulqdq	$0x01,%ymm20,%ymm11,%ymm4
	vpclmulqdq	$0x10,%ymm20,%ymm11,%ymm5
	vpclmulqdq	$0x11,%ymm20,%ymm11,%ymm0
	vpclmulqdq	$0x00,%ymm20,%ymm11,%ymm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_170:

	orq	%r8,%r8
	je	.L_after_reduction_170
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_170:
	jmp	.L_small_initial_blocks_encrypted_155
.L_small_initial_num_blocks_is_16_155:
	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	ddq_add_5678(%rip),%zmm2,%zmm3
	vpaddd	ddq_add_8888(%rip),%zmm0,%zmm4
	vpaddd	ddq_add_8888(%rip),%zmm3,%zmm5
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%r15
	subq	$192,%r15
	kmovq	(%r10,%r15,8),%k1
	vextracti32x4	$3,%zmm5,%xmm2
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqu8	0(%rcx,%r11,1),%zmm6
	vmovdqu8	64(%rcx,%r11,1),%zmm7
	vmovdqu8	128(%rcx,%r11,1),%zmm10
	vmovdqu8	192(%rcx,%r11,1),%zmm11{%k1}{z}
	vbroadcastf64x2	0(%rdi),%zmm15
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm15,%zmm3,%zmm3
	vpxorq	%zmm15,%zmm4,%zmm4
	vpxorq	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	16(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm15
	vaesenc	%zmm15,%zmm0,%zmm0
	vaesenc	%zmm15,%zmm3,%zmm3
	vaesenc	%zmm15,%zmm4,%zmm4
	vaesenc	%zmm15,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm15
	vaesenclast	%zmm15,%zmm0,%zmm0
	vaesenclast	%zmm15,%zmm3,%zmm3
	vaesenclast	%zmm15,%zmm4,%zmm4
	vaesenclast	%zmm15,%zmm5,%zmm5
	vpxorq	%zmm6,%zmm0,%zmm0
	vpxorq	%zmm7,%zmm3,%zmm3
	vpxorq	%zmm10,%zmm4,%zmm4
	vpxorq	%zmm11,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm12
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm6
	vpshufb	%zmm29,%zmm3,%zmm7
	vpshufb	%zmm29,%zmm4,%zmm10
	vpshufb	%zmm29,%zmm5,%zmm11
	vextracti32x4	$3,%zmm11,%xmm13
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_171:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm12,16(%rsi)
	vpxorq	%zmm14,%zmm6,%zmm6
	vmovdqu64	112(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm6,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm6,%zmm3
	vpclmulqdq	$0x01,%zmm20,%zmm6,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm6,%zmm5
	vmovdqu64	176(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm7,%zmm15
	vpclmulqdq	$0x00,%zmm20,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm7,%zmm17
	vpclmulqdq	$0x10,%zmm20,%zmm7,%zmm19
	vmovdqu64	240(%rsi),%zmm20
	vpclmulqdq	$0x11,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x00,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm0,%zmm6,%zmm15
	vpternlogq	$0x96,%zmm3,%zmm7,%zmm16
	vpclmulqdq	$0x01,%zmm20,%zmm10,%zmm6
	vpclmulqdq	$0x10,%zmm20,%zmm10,%zmm7
	vpternlogq	$0x96,%zmm4,%zmm6,%zmm17
	vpternlogq	$0x96,%zmm5,%zmm7,%zmm19
	vmovdqu64	304(%rsi),%ymm20
	vinserti64x2	$2,336(%rsi),%zmm20,%zmm20
	vpclmulqdq	$0x01,%zmm20,%zmm11,%zmm4
	vpclmulqdq	$0x10,%zmm20,%zmm11,%zmm5
	vpclmulqdq	$0x11,%zmm20,%zmm11,%zmm0
	vpclmulqdq	$0x00,%zmm20,%zmm11,%zmm3

	vpxorq	%zmm17,%zmm4,%zmm4
	vpxorq	%zmm19,%zmm5,%zmm5
	vpxorq	%zmm15,%zmm0,%zmm0
	vpxorq	%zmm16,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm17
	vpslldq	$8,%zmm4,%zmm19
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm17
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm17
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm19
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm19
	vpxorq	%xmm19,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm20


	vpclmulqdq	$0x01,%xmm3,%xmm20,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm20,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm20,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_171:
	vpxorq	%xmm13,%xmm14,%xmm14
.L_after_reduction_171:
.L_small_initial_blocks_encrypted_155:
.L_ghash_done_10:
	vmovdqu64	%xmm2,0(%rsi)
	vmovdqu64	%xmm14,64(%rsi)
.L_enc_dec_done_10:
	jmp	.Lexit_gcm_encrypt
.align	32
.Laes_gcm_encrypt_192_avx512:
	orq	%r8,%r8
	je	.L_enc_dec_done_172
	xorq	%r14,%r14
	vmovdqu64	64(%rsi),%xmm14

	movq	(%rdx),%r11
	orq	%r11,%r11
	je	.L_partial_block_done_173
	movl	$16,%r10d
	leaq	byte_len_to_mask_table(%rip),%r12
	cmpq	%r10,%r8
	cmovcq	%r8,%r10
	kmovw	(%r12,%r10,2),%k1
	vmovdqu8	(%rcx),%xmm0{%k1}{z}

	vmovdqu64	16(%rsi),%xmm3
	vmovdqu64	336(%rsi),%xmm4



	leaq	SHIFT_MASK(%rip),%r12
	addq	%r11,%r12
	vmovdqu64	(%r12),%xmm5
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm0,%xmm3,%xmm3


	leaq	(%r8,%r11,1),%r13
	subq	$16,%r13
	jge	.L_no_extra_mask_173
	subq	%r13,%r12
.L_no_extra_mask_173:



	vmovdqu64	16(%r12),%xmm0
	vpand	%xmm0,%xmm3,%xmm3
	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm14,%xmm14
	cmpq	$0,%r13
	jl	.L_partial_incomplete_173

	vpclmulqdq	$0x11,%xmm4,%xmm14,%xmm7
	vpclmulqdq	$0x00,%xmm4,%xmm14,%xmm10
	vpclmulqdq	$0x01,%xmm4,%xmm14,%xmm11
	vpclmulqdq	$0x10,%xmm4,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm14,%xmm14

	vpsrldq	$8,%xmm14,%xmm11
	vpslldq	$8,%xmm14,%xmm14
	vpxorq	%xmm11,%xmm7,%xmm7
	vpxorq	%xmm10,%xmm14,%xmm14



	vmovdqu64	POLY2(%rip),%xmm11

	vpclmulqdq	$0x01,%xmm14,%xmm11,%xmm10
	vpslldq	$8,%xmm10,%xmm10
	vpxorq	%xmm10,%xmm14,%xmm14



	vpclmulqdq	$0x00,%xmm14,%xmm11,%xmm10
	vpsrldq	$4,%xmm10,%xmm10
	vpclmulqdq	$0x10,%xmm14,%xmm11,%xmm14
	vpslldq	$4,%xmm14,%xmm14

	vpternlogq	$0x96,%xmm10,%xmm7,%xmm14

	movq	$0,(%rdx)

	movq	%r11,%r12
	movq	$16,%r11
	subq	%r12,%r11
	jmp	.L_enc_dec_done_173

.L_partial_incomplete_173:
	addq	%r8,(%rdx)
	movq	%r8,%r11

.L_enc_dec_done_173:


	leaq	byte_len_to_mask_table(%rip),%r12
	kmovw	(%r12,%r11,2),%k1
	vmovdqu64	%xmm14,64(%rsi)

	vpshufb	SHUF_MASK(%rip),%xmm3,%xmm3
	vpshufb	%xmm5,%xmm3,%xmm3
	movq	%r9,%r12
	vmovdqu8	%xmm3,(%r12){%k1}
.L_partial_block_done_173:
	vmovdqu64	0(%rsi),%xmm2
	subq	%r11,%r8
	je	.L_enc_dec_done_172
	cmpq	$256,%r8
	jbe	.L_message_below_equal_16_blocks_172

	vmovdqa64	SHUF_MASK(%rip),%zmm29
	vmovdqa64	ddq_addbe_4444(%rip),%zmm27
	vmovdqa64	ddq_addbe_1234(%rip),%zmm28






	vmovd	%xmm2,%r15d
	andl	$255,%r15d

	vshufi64x2	$0,%zmm2,%zmm2,%zmm2
	vpshufb	%zmm29,%zmm2,%zmm2



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_174
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_174
.L_next_16_overflow_174:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_174:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	0(%rcx,%r11,1),%zmm0
	vmovdqu8	64(%rcx,%r11,1),%zmm3
	vmovdqu8	128(%rcx,%r11,1),%zmm4
	vmovdqu8	192(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,0(%r10,%r11,1)
	vmovdqu8	%zmm10,64(%r10,%r11,1)
	vmovdqu8	%zmm11,128(%r10,%r11,1)
	vmovdqu8	%zmm12,192(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,768(%rsp)
	vmovdqa64	%zmm10,832(%rsp)
	vmovdqa64	%zmm11,896(%rsp)
	vmovdqa64	%zmm12,960(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_175

	vmovdqu64	288(%rsi),%zmm0
	vmovdqu64	%zmm0,704(%rsp)

	vmovdqu64	224(%rsi),%zmm3
	vmovdqu64	%zmm3,640(%rsp)


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	160(%rsi),%zmm4
	vmovdqu64	%zmm4,576(%rsp)

	vmovdqu64	96(%rsi),%zmm5
	vmovdqu64	%zmm5,512(%rsp)
.L_skip_hkeys_precomputation_175:
	cmpq	$512,%r8
	jb	.L_message_below_32_blocks_172



	cmpb	$240,%r15b
	jae	.L_next_16_overflow_176
	vpaddd	%zmm28,%zmm2,%zmm7
	vpaddd	%zmm27,%zmm7,%zmm10
	vpaddd	%zmm27,%zmm10,%zmm11
	vpaddd	%zmm27,%zmm11,%zmm12
	jmp	.L_next_16_ok_176
.L_next_16_overflow_176:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm12
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm7
	vpaddd	%zmm12,%zmm7,%zmm10
	vpaddd	%zmm12,%zmm10,%zmm11
	vpaddd	%zmm12,%zmm11,%zmm12
	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
.L_next_16_ok_176:
	vshufi64x2	$255,%zmm12,%zmm12,%zmm2
	addb	$16,%r15b

	vmovdqu8	256(%rcx,%r11,1),%zmm0
	vmovdqu8	320(%rcx,%r11,1),%zmm3
	vmovdqu8	384(%rcx,%r11,1),%zmm4
	vmovdqu8	448(%rcx,%r11,1),%zmm5


	vbroadcastf64x2	0(%rdi),%zmm6
	vpxorq	%zmm6,%zmm7,%zmm7
	vpxorq	%zmm6,%zmm10,%zmm10
	vpxorq	%zmm6,%zmm11,%zmm11
	vpxorq	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	16(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	32(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	48(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	64(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	80(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	96(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	112(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	128(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	144(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	160(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	176(%rdi),%zmm6
	vaesenc	%zmm6,%zmm7,%zmm7
	vaesenc	%zmm6,%zmm10,%zmm10
	vaesenc	%zmm6,%zmm11,%zmm11
	vaesenc	%zmm6,%zmm12,%zmm12
	vbroadcastf64x2	192(%rdi),%zmm6
	vaesenclast	%zmm6,%zmm7,%zmm7
	vaesenclast	%zmm6,%zmm10,%zmm10
	vaesenclast	%zmm6,%zmm11,%zmm11
	vaesenclast	%zmm6,%zmm12,%zmm12


	vpxorq	%zmm0,%zmm7,%zmm7
	vpxorq	%zmm3,%zmm10,%zmm10
	vpxorq	%zmm4,%zmm11,%zmm11
	vpxorq	%zmm5,%zmm12,%zmm12


	movq	%r9,%r10
	vmovdqu8	%zmm7,256(%r10,%r11,1)
	vmovdqu8	%zmm10,320(%r10,%r11,1)
	vmovdqu8	%zmm11,384(%r10,%r11,1)
	vmovdqu8	%zmm12,448(%r10,%r11,1)

	vpshufb	%zmm29,%zmm7,%zmm7
	vpshufb	%zmm29,%zmm10,%zmm10
	vpshufb	%zmm29,%zmm11,%zmm11
	vpshufb	%zmm29,%zmm12,%zmm12
	vmovdqa64	%zmm7,1024(%rsp)
	vmovdqa64	%zmm10,1088(%rsp)
	vmovdqa64	%zmm11,1152(%rsp)
	vmovdqa64	%zmm12,1216(%rsp)
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_177
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,192(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,128(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,64(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,0(%rsp)
.L_skip_hkeys_precomputation_177:
	movq	$1,%r14
	addq	$512,%r11
	subq	$512,%r8

	cmpq	$768,%r8
	jb	.L_no_more_big_nblocks_172
.L_encrypt_big_nblocks_172:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_178
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_178
.L_16_blocks_overflow_178:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_178:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_179
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_179
.L_16_blocks_overflow_179:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_179:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_180
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_180
.L_16_blocks_overflow_180:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_180:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	512(%rcx,%r11,1),%zmm17
	vmovdqu8	576(%rcx,%r11,1),%zmm19
	vmovdqu8	640(%rcx,%r11,1),%zmm20
	vmovdqu8	704(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30


	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10

	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpternlogq	$0x96,%zmm15,%zmm12,%zmm6
	vpxorq	%zmm24,%zmm6,%zmm6
	vpternlogq	$0x96,%zmm10,%zmm13,%zmm7
	vpxorq	%zmm25,%zmm7,%zmm7
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vextracti64x4	$1,%zmm6,%ymm12
	vpxorq	%ymm12,%ymm6,%ymm6
	vextracti32x4	$1,%ymm6,%xmm12
	vpxorq	%xmm12,%xmm6,%xmm6
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm6
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,512(%r10,%r11,1)
	vmovdqu8	%zmm3,576(%r10,%r11,1)
	vmovdqu8	%zmm4,640(%r10,%r11,1)
	vmovdqu8	%zmm5,704(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1024(%rsp)
	vmovdqa64	%zmm3,1088(%rsp)
	vmovdqa64	%zmm4,1152(%rsp)
	vmovdqa64	%zmm5,1216(%rsp)
	vmovdqa64	%zmm6,%zmm14

	addq	$768,%r11
	subq	$768,%r8
	cmpq	$768,%r8
	jae	.L_encrypt_big_nblocks_172

.L_no_more_big_nblocks_172:

	cmpq	$512,%r8
	jae	.L_encrypt_32_blocks_172

	cmpq	$256,%r8
	jae	.L_encrypt_16_blocks_172
.L_encrypt_0_blocks_ghash_32_172:
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$256,%ebx
	subl	%r10d,%ebx
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	addl	$256,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_181

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_181
	jb	.L_last_num_blocks_is_7_1_181


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_181
	jb	.L_last_num_blocks_is_11_9_181


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_181
	ja	.L_last_num_blocks_is_16_181
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_181
	jmp	.L_last_num_blocks_is_13_181

.L_last_num_blocks_is_11_9_181:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_181
	ja	.L_last_num_blocks_is_11_181
	jmp	.L_last_num_blocks_is_9_181

.L_last_num_blocks_is_7_1_181:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_181
	jb	.L_last_num_blocks_is_3_1_181

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_181
	je	.L_last_num_blocks_is_6_181
	jmp	.L_last_num_blocks_is_5_181

.L_last_num_blocks_is_3_1_181:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_181
	je	.L_last_num_blocks_is_2_181
.L_last_num_blocks_is_1_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_182
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_182

.L_16_blocks_overflow_182:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_182:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_183





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_183
.L_small_initial_partial_block_183:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_183
.L_small_initial_compute_done_183:
.L_after_reduction_183:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_2_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_184
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_184

.L_16_blocks_overflow_184:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_184:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_185





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_185
.L_small_initial_partial_block_185:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_185:

	orq	%r8,%r8
	je	.L_after_reduction_185
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_185:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_3_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_186
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_186

.L_16_blocks_overflow_186:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_186:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_187





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_187
.L_small_initial_partial_block_187:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_187:

	orq	%r8,%r8
	je	.L_after_reduction_187
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_187:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_4_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_188
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_188

.L_16_blocks_overflow_188:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_188:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_189





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_189
.L_small_initial_partial_block_189:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_189:

	orq	%r8,%r8
	je	.L_after_reduction_189
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_189:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_5_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_190
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_190

.L_16_blocks_overflow_190:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_190:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_191





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_191
.L_small_initial_partial_block_191:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_191:

	orq	%r8,%r8
	je	.L_after_reduction_191
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_191:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_6_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_192
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_192

.L_16_blocks_overflow_192:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_192:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_193





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_193
.L_small_initial_partial_block_193:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_193:

	orq	%r8,%r8
	je	.L_after_reduction_193
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_193:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_7_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_194
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_194

.L_16_blocks_overflow_194:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_194:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_195





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_195
.L_small_initial_partial_block_195:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_195:

	orq	%r8,%r8
	je	.L_after_reduction_195
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_195:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_8_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_196
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_196

.L_16_blocks_overflow_196:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_196:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_197





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_197
.L_small_initial_partial_block_197:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_197:

	orq	%r8,%r8
	je	.L_after_reduction_197
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_197:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_9_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_198
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_198

.L_16_blocks_overflow_198:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_198:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_199





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_199
.L_small_initial_partial_block_199:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_199:

	orq	%r8,%r8
	je	.L_after_reduction_199
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_199:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_10_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_200
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_200

.L_16_blocks_overflow_200:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_200:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_201





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_201
.L_small_initial_partial_block_201:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_201:

	orq	%r8,%r8
	je	.L_after_reduction_201
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_201:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_11_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_202
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_202

.L_16_blocks_overflow_202:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_202:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_203





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_203
.L_small_initial_partial_block_203:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_203:

	orq	%r8,%r8
	je	.L_after_reduction_203
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_203:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_12_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_204
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_204

.L_16_blocks_overflow_204:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_204:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_205





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_205
.L_small_initial_partial_block_205:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_205:

	orq	%r8,%r8
	je	.L_after_reduction_205
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_205:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_13_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_206
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_206

.L_16_blocks_overflow_206:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_206:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_207





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_207
.L_small_initial_partial_block_207:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_207:

	orq	%r8,%r8
	je	.L_after_reduction_207
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_207:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_14_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_208
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_208

.L_16_blocks_overflow_208:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_208:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_209





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_209
.L_small_initial_partial_block_209:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_209:

	orq	%r8,%r8
	je	.L_after_reduction_209
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_209:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_15_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_210
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_210

.L_16_blocks_overflow_210:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_210:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_211





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_211
.L_small_initial_partial_block_211:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_211:

	orq	%r8,%r8
	je	.L_after_reduction_211
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_211:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_16_181:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_212
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_212

.L_16_blocks_overflow_212:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_212:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	1088(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	1152(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	1216(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm14,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_213:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_213:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_213:
	jmp	.L_last_blocks_done_181
.L_last_num_blocks_is_0_181:
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_181:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_172
.L_encrypt_32_blocks_172:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_214
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_214
.L_16_blocks_overflow_214:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_214:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_215
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_215
.L_16_blocks_overflow_215:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_215:
	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1024(%rsp),%zmm8
	vmovdqu64	256(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	320(%rsp),%zmm18
	vmovdqa64	1088(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	384(%rsp),%zmm1
	vmovdqa64	1152(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	448(%rsp),%zmm18
	vmovdqa64	1216(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	256(%rcx,%r11,1),%zmm17
	vmovdqu8	320(%rcx,%r11,1),%zmm19
	vmovdqu8	384(%rcx,%r11,1),%zmm20
	vmovdqu8	448(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm15,%zmm10,%zmm26
	vpternlogq	$0x96,%zmm12,%zmm6,%zmm24
	vpternlogq	$0x96,%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,256(%r10,%r11,1)
	vmovdqu8	%zmm3,320(%r10,%r11,1)
	vmovdqu8	%zmm4,384(%r10,%r11,1)
	vmovdqu8	%zmm5,448(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,768(%rsp)
	vmovdqa64	%zmm3,832(%rsp)
	vmovdqa64	%zmm4,896(%rsp)
	vmovdqa64	%zmm5,960(%rsp)
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

	subq	$512,%r8
	addq	$512,%r11
	movl	%r8d,%r10d
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_216

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_216
	jb	.L_last_num_blocks_is_7_1_216


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_216
	jb	.L_last_num_blocks_is_11_9_216


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_216
	ja	.L_last_num_blocks_is_16_216
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_216
	jmp	.L_last_num_blocks_is_13_216

.L_last_num_blocks_is_11_9_216:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_216
	ja	.L_last_num_blocks_is_11_216
	jmp	.L_last_num_blocks_is_9_216

.L_last_num_blocks_is_7_1_216:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_216
	jb	.L_last_num_blocks_is_3_1_216

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_216
	je	.L_last_num_blocks_is_6_216
	jmp	.L_last_num_blocks_is_5_216

.L_last_num_blocks_is_3_1_216:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_216
	je	.L_last_num_blocks_is_2_216
.L_last_num_blocks_is_1_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_217
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_217

.L_16_blocks_overflow_217:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_217:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_218





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_218
.L_small_initial_partial_block_218:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)


	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm0


	vpclmulqdq	$0x01,%xmm25,%xmm0,%xmm3
	vpslldq	$8,%xmm3,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm3


	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm4
	vpsrldq	$4,%xmm4,%xmm4
	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm4,%xmm14












	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_218
.L_small_initial_compute_done_218:
.L_after_reduction_218:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_2_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_219
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_219

.L_16_blocks_overflow_219:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_219:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_220





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_220
.L_small_initial_partial_block_220:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_220:

	orq	%r8,%r8
	je	.L_after_reduction_220
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_220:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_3_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_221
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_221

.L_16_blocks_overflow_221:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_221:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_222





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_222
.L_small_initial_partial_block_222:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_222:

	orq	%r8,%r8
	je	.L_after_reduction_222
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_222:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_4_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_223
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_223

.L_16_blocks_overflow_223:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_223:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_224





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_224
.L_small_initial_partial_block_224:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpxorq	%zmm26,%zmm4,%zmm4
	vpxorq	%zmm24,%zmm0,%zmm0
	vpxorq	%zmm25,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_224:

	orq	%r8,%r8
	je	.L_after_reduction_224
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_224:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_5_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_225
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_225

.L_16_blocks_overflow_225:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_225:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_226





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_226
.L_small_initial_partial_block_226:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_226:

	orq	%r8,%r8
	je	.L_after_reduction_226
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_226:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_6_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_227
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_227

.L_16_blocks_overflow_227:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_227:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_228





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_228
.L_small_initial_partial_block_228:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_228:

	orq	%r8,%r8
	je	.L_after_reduction_228
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_228:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_7_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_229
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_229

.L_16_blocks_overflow_229:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_229:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_230





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_230
.L_small_initial_partial_block_230:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_230:

	orq	%r8,%r8
	je	.L_after_reduction_230
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_230:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_8_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_231
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_231

.L_16_blocks_overflow_231:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_231:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_232





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_232
.L_small_initial_partial_block_232:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_232:

	orq	%r8,%r8
	je	.L_after_reduction_232
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_232:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_9_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_233
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_233

.L_16_blocks_overflow_233:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_233:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_234





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_234
.L_small_initial_partial_block_234:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_234:

	orq	%r8,%r8
	je	.L_after_reduction_234
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_234:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_10_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_235
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_235

.L_16_blocks_overflow_235:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_235:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_236





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_236
.L_small_initial_partial_block_236:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_236:

	orq	%r8,%r8
	je	.L_after_reduction_236
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_236:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_11_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_237
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_237

.L_16_blocks_overflow_237:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_237:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_238





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_238
.L_small_initial_partial_block_238:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_238:

	orq	%r8,%r8
	je	.L_after_reduction_238
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_238:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_12_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_239
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_239

.L_16_blocks_overflow_239:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_239:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_240





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_240
.L_small_initial_partial_block_240:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_240:

	orq	%r8,%r8
	je	.L_after_reduction_240
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_240:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_13_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_241
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_241

.L_16_blocks_overflow_241:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_241:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_242





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_242
.L_small_initial_partial_block_242:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm26,%zmm30,%zmm30
	vpxorq	%zmm24,%zmm8,%zmm8
	vpxorq	%zmm25,%zmm22,%zmm22

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_242:

	orq	%r8,%r8
	je	.L_after_reduction_242
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_242:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_14_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_243
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_243

.L_16_blocks_overflow_243:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_243:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_244





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_244
.L_small_initial_partial_block_244:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_244:

	orq	%r8,%r8
	je	.L_after_reduction_244
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_244:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_15_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_245
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_245

.L_16_blocks_overflow_245:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_245:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_246





	subq	$16,%r8
	movq	$0,(%rdx)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_246
.L_small_initial_partial_block_246:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_246:

	orq	%r8,%r8
	je	.L_after_reduction_246
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_246:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_16_216:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_247
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_247

.L_16_blocks_overflow_247:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_247:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_248:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpternlogq	$0x96,%zmm31,%zmm26,%zmm5
	vpternlogq	$0x96,%zmm8,%zmm24,%zmm0
	vpternlogq	$0x96,%zmm22,%zmm25,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_248:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_248:
	jmp	.L_last_blocks_done_216
.L_last_num_blocks_is_0_216:
	vmovdqa64	768(%rsp),%zmm13
	vpxorq	%zmm14,%zmm13,%zmm13
	vmovdqu64	0(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	832(%rsp),%zmm13
	vmovdqu64	64(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpxorq	%zmm10,%zmm4,%zmm26
	vpxorq	%zmm6,%zmm0,%zmm24
	vpxorq	%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	896(%rsp),%zmm13
	vmovdqu64	128(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	960(%rsp),%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_216:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_172
.L_encrypt_16_blocks_172:
	cmpb	$240,%r15b
	jae	.L_16_blocks_overflow_249
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_249
.L_16_blocks_overflow_249:
	vpshufb	%zmm29,%zmm2,%zmm2
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_249:
	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp),%zmm1




	vshufi64x2	$255,%zmm5,%zmm5,%zmm2
	addb	$16,%r15b


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp),%zmm18
	vmovdqa64	832(%rsp),%zmm22







	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30



	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm6
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp),%zmm1
	vmovdqa64	896(%rsp),%zmm8



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp),%zmm18
	vmovdqa64	960(%rsp),%zmm22



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30



	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19


	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31



	vpternlogq	$0x96,%zmm17,%zmm12,%zmm6
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10



	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30



	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21



	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31



	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13


	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm15,%zmm10,%zmm26
	vpxorq	%zmm12,%zmm6,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30

	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5



	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5



	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1)
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
	vmovdqa64	%zmm0,1280(%rsp)
	vmovdqa64	%zmm3,1344(%rsp)
	vmovdqa64	%zmm4,1408(%rsp)
	vmovdqa64	%zmm5,1472(%rsp)
	vmovdqa64	1024(%rsp),%zmm13
	vmovdqu64	256(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1088(%rsp),%zmm13
	vmovdqu64	320(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1152(%rsp),%zmm13
	vmovdqu64	384(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1216(%rsp),%zmm13
	vmovdqu64	448(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_250

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_250
	jb	.L_last_num_blocks_is_7_1_250


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_250
	jb	.L_last_num_blocks_is_11_9_250


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_250
	ja	.L_last_num_blocks_is_16_250
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_250
	jmp	.L_last_num_blocks_is_13_250

.L_last_num_blocks_is_11_9_250:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_250
	ja	.L_last_num_blocks_is_11_250
	jmp	.L_last_num_blocks_is_9_250

.L_last_num_blocks_is_7_1_250:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_250
	jb	.L_last_num_blocks_is_3_1_250

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_250
	je	.L_last_num_blocks_is_6_250
	jmp	.L_last_num_blocks_is_5_250

.L_last_num_blocks_is_3_1_250:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_250
	je	.L_last_num_blocks_is_2_250
.L_last_num_blocks_is_1_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_251
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_251

.L_16_blocks_overflow_251:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_251:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%xmm31,%xmm0,%xmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%xmm29,%xmm0,%xmm17
	vextracti32x4	$0,%zmm17,%xmm7


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_252





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_252
.L_small_initial_partial_block_252:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)











	vpxorq	%xmm7,%xmm14,%xmm14

	jmp	.L_after_reduction_252
.L_small_initial_compute_done_252:
.L_after_reduction_252:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_2_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$254,%r15d
	jae	.L_16_blocks_overflow_253
	vpaddd	%ymm28,%ymm2,%ymm0
	jmp	.L_16_blocks_ok_253

.L_16_blocks_overflow_253:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%ymm29,%ymm0,%ymm0
.L_16_blocks_ok_253:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%ymm17{%k1}{z}
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%ymm31,%ymm0,%ymm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%ymm31,%ymm0,%ymm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%ymm30,%ymm0,%ymm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%ymm31,%ymm0,%ymm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%ymm30,%ymm0,%ymm0
	vpxorq	%ymm17,%ymm0,%ymm0
	vextracti32x4	$1,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%ymm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%ymm29,%ymm0,%ymm17
	vextracti32x4	$1,%zmm17,%xmm7
	subq	$16 * (2 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_254





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_254
.L_small_initial_partial_block_254:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm17,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm17,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm17,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm17,%xmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_254:

	orq	%r8,%r8
	je	.L_after_reduction_254
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_254:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_3_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$253,%r15d
	jae	.L_16_blocks_overflow_255
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_255

.L_16_blocks_overflow_255:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_255:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$2,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$2,%zmm17,%xmm7
	subq	$16 * (3 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_256





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_256
.L_small_initial_partial_block_256:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm17,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm17,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm17,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm17,%ymm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_256:

	orq	%r8,%r8
	je	.L_after_reduction_256
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_256:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_4_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$252,%r15d
	jae	.L_16_blocks_overflow_257
	vpaddd	%zmm28,%zmm2,%zmm0
	jmp	.L_16_blocks_ok_257

.L_16_blocks_overflow_257:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%zmm29,%zmm0,%zmm0
.L_16_blocks_ok_257:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm17,%zmm0,%zmm0
	vextracti32x4	$3,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1){%k1}
	vmovdqu8	%zmm0,%zmm0{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vextracti32x4	$3,%zmm17,%xmm7
	subq	$16 * (4 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_258





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_258
.L_small_initial_partial_block_258:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_258:

	orq	%r8,%r8
	je	.L_after_reduction_258
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_258:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_5_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$251,%r15d
	jae	.L_16_blocks_overflow_259
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%xmm27,%xmm0,%xmm3
	jmp	.L_16_blocks_ok_259

.L_16_blocks_overflow_259:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%xmm29,%xmm3,%xmm3
.L_16_blocks_ok_259:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%xmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%xmm30,%xmm3,%xmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%xmm31,%xmm3,%xmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%xmm30,%xmm3,%xmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%xmm19,%xmm3,%xmm3
	vextracti32x4	$0,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%xmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%xmm29,%xmm3,%xmm19
	vextracti32x4	$0,%zmm19,%xmm7
	subq	$16 * (5 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_260





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_260
.L_small_initial_partial_block_260:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_260:

	orq	%r8,%r8
	je	.L_after_reduction_260
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_260:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_6_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$250,%r15d
	jae	.L_16_blocks_overflow_261
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%ymm27,%ymm0,%ymm3
	jmp	.L_16_blocks_ok_261

.L_16_blocks_overflow_261:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%ymm29,%ymm3,%ymm3
.L_16_blocks_ok_261:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%ymm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%ymm30,%ymm3,%ymm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%ymm31,%ymm3,%ymm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%ymm30,%ymm3,%ymm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%ymm19,%ymm3,%ymm3
	vextracti32x4	$1,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%ymm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%ymm29,%ymm3,%ymm19
	vextracti32x4	$1,%zmm19,%xmm7
	subq	$16 * (6 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_262





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_262
.L_small_initial_partial_block_262:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm19,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm19,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm19,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm19,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_262:

	orq	%r8,%r8
	je	.L_after_reduction_262
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_262:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_7_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$249,%r15d
	jae	.L_16_blocks_overflow_263
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_263

.L_16_blocks_overflow_263:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_263:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$2,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$2,%zmm19,%xmm7
	subq	$16 * (7 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_264





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_264
.L_small_initial_partial_block_264:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm19,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm19,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm19,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm19,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_264:

	orq	%r8,%r8
	je	.L_after_reduction_264
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_264:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_8_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$64,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$248,%r15d
	jae	.L_16_blocks_overflow_265
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	jmp	.L_16_blocks_ok_265

.L_16_blocks_overflow_265:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
.L_16_blocks_ok_265:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm3,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vextracti32x4	$3,%zmm3,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1){%k1}
	vmovdqu8	%zmm3,%zmm3{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vextracti32x4	$3,%zmm19,%xmm7
	subq	$16 * (8 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_266





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_266
.L_small_initial_partial_block_266:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_266:

	orq	%r8,%r8
	je	.L_after_reduction_266
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_266:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_9_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$247,%r15d
	jae	.L_16_blocks_overflow_267
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%xmm27,%xmm3,%xmm4
	jmp	.L_16_blocks_ok_267

.L_16_blocks_overflow_267:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%xmm29,%xmm4,%xmm4
.L_16_blocks_ok_267:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%xmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%xmm30,%xmm4,%xmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%xmm31,%xmm4,%xmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%xmm30,%xmm4,%xmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%xmm20,%xmm4,%xmm4
	vextracti32x4	$0,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%xmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%xmm29,%xmm4,%xmm20
	vextracti32x4	$0,%zmm20,%xmm7
	subq	$16 * (9 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_268





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_268
.L_small_initial_partial_block_268:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_268:

	orq	%r8,%r8
	je	.L_after_reduction_268
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_268:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_10_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$246,%r15d
	jae	.L_16_blocks_overflow_269
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%ymm27,%ymm3,%ymm4
	jmp	.L_16_blocks_ok_269

.L_16_blocks_overflow_269:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%ymm29,%ymm4,%ymm4
.L_16_blocks_ok_269:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%ymm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%ymm30,%ymm4,%ymm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%ymm31,%ymm4,%ymm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%ymm30,%ymm4,%ymm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%ymm20,%ymm4,%ymm4
	vextracti32x4	$1,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%ymm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%ymm29,%ymm4,%ymm20
	vextracti32x4	$1,%zmm20,%xmm7
	subq	$16 * (10 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_270





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_270
.L_small_initial_partial_block_270:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm20,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm20,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm20,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm20,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_270:

	orq	%r8,%r8
	je	.L_after_reduction_270
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_270:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_11_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$245,%r15d
	jae	.L_16_blocks_overflow_271
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_271

.L_16_blocks_overflow_271:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_271:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$2,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$2,%zmm20,%xmm7
	subq	$16 * (11 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_272





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_272
.L_small_initial_partial_block_272:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm20,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm20,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm20,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm20,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_272:

	orq	%r8,%r8
	je	.L_after_reduction_272
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_272:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_12_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$128,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$244,%r15d
	jae	.L_16_blocks_overflow_273
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	jmp	.L_16_blocks_ok_273

.L_16_blocks_overflow_273:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
.L_16_blocks_ok_273:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm4,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vextracti32x4	$3,%zmm4,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1){%k1}
	vmovdqu8	%zmm4,%zmm4{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vextracti32x4	$3,%zmm20,%xmm7
	subq	$16 * (12 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_274





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_274
.L_small_initial_partial_block_274:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vpxorq	%zmm8,%zmm0,%zmm8
	vpxorq	%zmm22,%zmm3,%zmm22
	vpxorq	%zmm30,%zmm4,%zmm30
	vpxorq	%zmm31,%zmm5,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_274:

	orq	%r8,%r8
	je	.L_after_reduction_274
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_274:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_13_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$243,%r15d
	jae	.L_16_blocks_overflow_275
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%xmm27,%xmm4,%xmm5
	jmp	.L_16_blocks_ok_275

.L_16_blocks_overflow_275:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%xmm29,%xmm5,%xmm5
.L_16_blocks_ok_275:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$0,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%xmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%xmm30,%xmm5,%xmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%xmm31,%xmm5,%xmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%xmm30,%xmm5,%xmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%xmm21,%xmm5,%xmm5
	vextracti32x4	$0,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%xmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%xmm29,%xmm5,%xmm21
	vextracti32x4	$0,%zmm21,%xmm7
	subq	$16 * (13 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_276





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_276
.L_small_initial_partial_block_276:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	160(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	224(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	288(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31

	vpxorq	%zmm31,%zmm30,%zmm30
	vpsrldq	$8,%zmm30,%zmm4
	vpslldq	$8,%zmm30,%zmm5
	vpxorq	%zmm4,%zmm8,%zmm0
	vpxorq	%zmm5,%zmm22,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_276:

	orq	%r8,%r8
	je	.L_after_reduction_276
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_276:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_14_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$242,%r15d
	jae	.L_16_blocks_overflow_277
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%ymm27,%ymm4,%ymm5
	jmp	.L_16_blocks_ok_277

.L_16_blocks_overflow_277:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%ymm29,%ymm5,%ymm5
.L_16_blocks_ok_277:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$1,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%ymm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%ymm30,%ymm5,%ymm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%ymm31,%ymm5,%ymm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%ymm30,%ymm5,%ymm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%ymm21,%ymm5,%ymm5
	vextracti32x4	$1,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%ymm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%ymm29,%ymm5,%ymm21
	vextracti32x4	$1,%zmm21,%xmm7
	subq	$16 * (14 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_278





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_278
.L_small_initial_partial_block_278:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	144(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	208(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	272(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	336(%rsi),%xmm1
	vpclmulqdq	$0x01,%xmm1,%xmm21,%xmm4
	vpclmulqdq	$0x10,%xmm1,%xmm21,%xmm5
	vpclmulqdq	$0x11,%xmm1,%xmm21,%xmm0
	vpclmulqdq	$0x00,%xmm1,%xmm21,%xmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_278:

	orq	%r8,%r8
	je	.L_after_reduction_278
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_278:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_15_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$241,%r15d
	jae	.L_16_blocks_overflow_279
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_279

.L_16_blocks_overflow_279:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_279:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$2,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$2,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$2,%zmm21,%xmm7
	subq	$16 * (15 - 1),%r8


	cmpq	$16,%r8
	jl	.L_small_initial_partial_block_280





	subq	$16,%r8
	movq	$0,(%rdx)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

	jmp	.L_small_initial_compute_done_280
.L_small_initial_partial_block_280:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	128(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	192(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	256(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	320(%rsi),%ymm1
	vpclmulqdq	$0x01,%ymm1,%ymm21,%ymm4
	vpclmulqdq	$0x10,%ymm1,%ymm21,%ymm5
	vpclmulqdq	$0x11,%ymm1,%ymm21,%ymm0
	vpclmulqdq	$0x00,%ymm1,%ymm21,%ymm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_280:

	orq	%r8,%r8
	je	.L_after_reduction_280
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_280:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_16_250:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	subq	$192,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$240,%r15d
	jae	.L_16_blocks_overflow_281
	vpaddd	%zmm28,%zmm2,%zmm0
	vpaddd	%zmm27,%zmm0,%zmm3
	vpaddd	%zmm27,%zmm3,%zmm4
	vpaddd	%zmm27,%zmm4,%zmm5
	jmp	.L_16_blocks_ok_281

.L_16_blocks_overflow_281:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vmovdqa64	ddq_add_4444(%rip),%zmm5
	vpaddd	%zmm5,%zmm0,%zmm3
	vpaddd	%zmm5,%zmm3,%zmm4
	vpaddd	%zmm5,%zmm4,%zmm5
	vpshufb	%zmm29,%zmm0,%zmm0
	vpshufb	%zmm29,%zmm3,%zmm3
	vpshufb	%zmm29,%zmm4,%zmm4
	vpshufb	%zmm29,%zmm5,%zmm5
.L_16_blocks_ok_281:




	vbroadcastf64x2	0(%rdi),%zmm30
	vmovdqa64	1280(%rsp),%zmm8
	vmovdqu64	512(%rsp),%zmm1
	vextracti32x4	$3,%zmm5,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	576(%rsp),%zmm18
	vmovdqa64	1344(%rsp),%zmm22
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm30,%zmm3,%zmm3
	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	640(%rsp),%zmm1
	vmovdqa64	1408(%rsp),%zmm8
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	704(%rsp),%zmm18
	vmovdqa64	1472(%rsp),%zmm22
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%zmm17
	vmovdqu8	64(%rcx,%r11,1),%zmm19
	vmovdqu8	128(%rcx,%r11,1),%zmm20
	vmovdqu8	192(%rcx,%r11,1),%zmm21{%k1}{z}
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpternlogq	$0x96,%zmm12,%zmm24,%zmm14
	vpternlogq	$0x96,%zmm13,%zmm25,%zmm7
	vpternlogq	$0x96,%zmm15,%zmm26,%zmm10
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vbroadcastf64x2	144(%rdi),%zmm31
	vpsrldq	$8,%zmm10,%zmm15
	vpslldq	$8,%zmm10,%zmm10

	vmovdqa64	POLY2(%rip),%xmm16
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	160(%rdi),%zmm30
	vpxorq	%zmm15,%zmm14,%zmm14
	vpxorq	%zmm10,%zmm7,%zmm7
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vextracti64x4	$1,%zmm14,%ymm12
	vpxorq	%ymm12,%ymm14,%ymm14
	vextracti32x4	$1,%ymm14,%xmm12
	vpxorq	%xmm12,%xmm14,%xmm14
	vextracti64x4	$1,%zmm7,%ymm13
	vpxorq	%ymm13,%ymm7,%ymm7
	vextracti32x4	$1,%ymm7,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm7
	vbroadcastf64x2	176(%rdi),%zmm31
	vpclmulqdq	$0x01,%xmm7,%xmm16,%xmm13
	vpslldq	$8,%xmm13,%xmm13
	vpxorq	%xmm13,%xmm7,%xmm13
	vaesenc	%zmm30,%zmm0,%zmm0
	vaesenc	%zmm30,%zmm3,%zmm3
	vaesenc	%zmm30,%zmm4,%zmm4
	vaesenc	%zmm30,%zmm5,%zmm5
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%zmm31,%zmm0,%zmm0
	vaesenc	%zmm31,%zmm3,%zmm3
	vaesenc	%zmm31,%zmm4,%zmm4
	vaesenc	%zmm31,%zmm5,%zmm5
	vpclmulqdq	$0x00,%xmm13,%xmm16,%xmm12
	vpsrldq	$4,%xmm12,%xmm12
	vpclmulqdq	$0x10,%xmm13,%xmm16,%xmm15
	vpslldq	$4,%xmm15,%xmm15

	vpternlogq	$0x96,%xmm12,%xmm15,%xmm14
	vaesenclast	%zmm30,%zmm0,%zmm0
	vaesenclast	%zmm30,%zmm3,%zmm3
	vaesenclast	%zmm30,%zmm4,%zmm4
	vaesenclast	%zmm30,%zmm5,%zmm5
	vpxorq	%zmm17,%zmm0,%zmm0
	vpxorq	%zmm19,%zmm3,%zmm3
	vpxorq	%zmm20,%zmm4,%zmm4
	vpxorq	%zmm21,%zmm5,%zmm5
	vextracti32x4	$3,%zmm5,%xmm11
	movq	%r9,%r10
	vmovdqu8	%zmm0,0(%r10,%r11,1)
	vmovdqu8	%zmm3,64(%r10,%r11,1)
	vmovdqu8	%zmm4,128(%r10,%r11,1)
	vmovdqu8	%zmm5,192(%r10,%r11,1){%k1}
	vmovdqu8	%zmm5,%zmm5{%k1}{z}
	vpshufb	%zmm29,%zmm0,%zmm17
	vpshufb	%zmm29,%zmm3,%zmm19
	vpshufb	%zmm29,%zmm4,%zmm20
	vpshufb	%zmm29,%zmm5,%zmm21
	vextracti32x4	$3,%zmm21,%xmm7
	subq	$16 * (16 - 1),%r8
.L_small_initial_partial_block_282:








	movq	%r8,(%rdx)
	vmovdqu64	%xmm11,16(%rsi)
	vpxorq	%zmm14,%zmm17,%zmm17
	vmovdqu64	112(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm17,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm17,%zmm3
	vpclmulqdq	$0x01,%zmm1,%zmm17,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm17,%zmm5
	vmovdqu64	176(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm19,%zmm8
	vpclmulqdq	$0x00,%zmm1,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm19,%zmm30
	vpclmulqdq	$0x10,%zmm1,%zmm19,%zmm31
	vmovdqu64	240(%rsi),%zmm1
	vpclmulqdq	$0x11,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm0,%zmm17,%zmm8
	vpternlogq	$0x96,%zmm3,%zmm19,%zmm22
	vpclmulqdq	$0x01,%zmm1,%zmm20,%zmm17
	vpclmulqdq	$0x10,%zmm1,%zmm20,%zmm19
	vpternlogq	$0x96,%zmm4,%zmm17,%zmm30
	vpternlogq	$0x96,%zmm5,%zmm19,%zmm31
	vmovdqu64	304(%rsi),%ymm1
	vinserti64x2	$2,336(%rsi),%zmm1,%zmm1
	vpclmulqdq	$0x01,%zmm1,%zmm21,%zmm4
	vpclmulqdq	$0x10,%zmm1,%zmm21,%zmm5
	vpclmulqdq	$0x11,%zmm1,%zmm21,%zmm0
	vpclmulqdq	$0x00,%zmm1,%zmm21,%zmm3

	vpxorq	%zmm30,%zmm4,%zmm4
	vpxorq	%zmm31,%zmm5,%zmm5
	vpxorq	%zmm8,%zmm0,%zmm0
	vpxorq	%zmm22,%zmm3,%zmm3

	vpxorq	%zmm5,%zmm4,%zmm4
	vpsrldq	$8,%zmm4,%zmm30
	vpslldq	$8,%zmm4,%zmm31
	vpxorq	%zmm30,%zmm0,%zmm0
	vpxorq	%zmm31,%zmm3,%zmm3
	vextracti64x4	$1,%zmm0,%ymm30
	vpxorq	%ymm30,%ymm0,%ymm0
	vextracti32x4	$1,%ymm0,%xmm30
	vpxorq	%xmm30,%xmm0,%xmm0
	vextracti64x4	$1,%zmm3,%ymm31
	vpxorq	%ymm31,%ymm3,%ymm3
	vextracti32x4	$1,%ymm3,%xmm31
	vpxorq	%xmm31,%xmm3,%xmm3
	vmovdqa64	POLY2(%rip),%xmm1


	vpclmulqdq	$0x01,%xmm3,%xmm1,%xmm4
	vpslldq	$8,%xmm4,%xmm4
	vpxorq	%xmm4,%xmm3,%xmm4


	vpclmulqdq	$0x00,%xmm4,%xmm1,%xmm5
	vpsrldq	$4,%xmm5,%xmm5
	vpclmulqdq	$0x10,%xmm4,%xmm1,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm0,%xmm5,%xmm14

.L_small_initial_compute_done_282:
	vpxorq	%xmm7,%xmm14,%xmm14
.L_after_reduction_282:
	jmp	.L_last_blocks_done_250
.L_last_num_blocks_is_0_250:
	vmovdqa64	1280(%rsp),%zmm13
	vmovdqu64	512(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1344(%rsp),%zmm13
	vmovdqu64	576(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11
	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26
	vmovdqa64	1408(%rsp),%zmm13
	vmovdqu64	640(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm0
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm3
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm4
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm5
	vmovdqa64	1472(%rsp),%zmm13
	vmovdqu64	704(%rsp),%zmm12
	vpclmulqdq	$0x11,%zmm12,%zmm13,%zmm6
	vpclmulqdq	$0x00,%zmm12,%zmm13,%zmm7
	vpclmulqdq	$0x01,%zmm12,%zmm13,%zmm10
	vpclmulqdq	$0x10,%zmm12,%zmm13,%zmm11

	vpternlogq	$0x96,%zmm10,%zmm4,%zmm26
	vpternlogq	$0x96,%zmm6,%zmm0,%zmm24
	vpternlogq	$0x96,%zmm7,%zmm3,%zmm25
	vpternlogq	$0x96,%zmm11,%zmm5,%zmm26

	vpsrldq	$8,%zmm26,%zmm0
	vpslldq	$8,%zmm26,%zmm3
	vpxorq	%zmm0,%zmm24,%zmm24
	vpxorq	%zmm3,%zmm25,%zmm25
	vextracti64x4	$1,%zmm24,%ymm0
	vpxorq	%ymm0,%ymm24,%ymm24
	vextracti32x4	$1,%ymm24,%xmm0
	vpxorq	%xmm0,%xmm24,%xmm24
	vextracti64x4	$1,%zmm25,%ymm3
	vpxorq	%ymm3,%ymm25,%ymm25
	vextracti32x4	$1,%ymm25,%xmm3
	vpxorq	%xmm3,%xmm25,%xmm25
	vmovdqa64	POLY2(%rip),%xmm4


	vpclmulqdq	$0x01,%xmm25,%xmm4,%xmm0
	vpslldq	$8,%xmm0,%xmm0
	vpxorq	%xmm0,%xmm25,%xmm0


	vpclmulqdq	$0x00,%xmm0,%xmm4,%xmm3
	vpsrldq	$4,%xmm3,%xmm3
	vpclmulqdq	$0x10,%xmm0,%xmm4,%xmm14
	vpslldq	$4,%xmm14,%xmm14
	vpternlogq	$0x96,%xmm24,%xmm3,%xmm14

.L_last_blocks_done_250:
	vpshufb	%xmm29,%xmm2,%xmm2
	jmp	.L_ghash_done_172

.L_message_below_32_blocks_172:


	subq	$256,%r8
	addq	$256,%r11
	movl	%r8d,%r10d
	testq	%r14,%r14
	jnz	.L_skip_hkeys_precomputation_283
	vmovdqu64	640(%rsp),%zmm3


	vshufi64x2	$0x00,%zmm3,%zmm3,%zmm3

	vmovdqu64	576(%rsp),%zmm4
	vmovdqu64	512(%rsp),%zmm5

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,448(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,384(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm4,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm4,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm4,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm4,%zmm4

	vpsrldq	$8,%zmm4,%zmm10
	vpslldq	$8,%zmm4,%zmm4
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm4,%zmm4



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm4,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm4,%zmm4



	vpclmulqdq	$0x00,%zmm4,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm4,%zmm10,%zmm4
	vpslldq	$4,%zmm4,%zmm4

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm4

	vmovdqu64	%zmm4,320(%rsp)

	vpclmulqdq	$0x11,%zmm3,%zmm5,%zmm6
	vpclmulqdq	$0x00,%zmm3,%zmm5,%zmm7
	vpclmulqdq	$0x01,%zmm3,%zmm5,%zmm10
	vpclmulqdq	$0x10,%zmm3,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm5,%zmm5

	vpsrldq	$8,%zmm5,%zmm10
	vpslldq	$8,%zmm5,%zmm5
	vpxorq	%zmm10,%zmm6,%zmm6
	vpxorq	%zmm7,%zmm5,%zmm5



	vmovdqu64	POLY2(%rip),%zmm10

	vpclmulqdq	$0x01,%zmm5,%zmm10,%zmm7
	vpslldq	$8,%zmm7,%zmm7
	vpxorq	%zmm7,%zmm5,%zmm5



	vpclmulqdq	$0x00,%zmm5,%zmm10,%zmm7
	vpsrldq	$4,%zmm7,%zmm7
	vpclmulqdq	$0x10,%zmm5,%zmm10,%zmm5
	vpslldq	$4,%zmm5,%zmm5

	vpternlogq	$0x96,%zmm7,%zmm6,%zmm5

	vmovdqu64	%zmm5,256(%rsp)
.L_skip_hkeys_precomputation_283:
	movq	$1,%r14
	andl	$~15,%r10d
	movl	$512,%ebx
	subl	%r10d,%ebx
	movl	%r8d,%r10d
	addl	$15,%r10d
	shrl	$4,%r10d
	je	.L_last_num_blocks_is_0_284

	cmpl	$8,%r10d
	je	.L_last_num_blocks_is_8_284
	jb	.L_last_num_blocks_is_7_1_284


	cmpl	$12,%r10d
	je	.L_last_num_blocks_is_12_284
	jb	.L_last_num_blocks_is_11_9_284


	cmpl	$15,%r10d
	je	.L_last_num_blocks_is_15_284
	ja	.L_last_num_blocks_is_16_284
	cmpl	$14,%r10d
	je	.L_last_num_blocks_is_14_284
	jmp	.L_last_num_blocks_is_13_284

.L_last_num_blocks_is_11_9_284:

	cmpl	$10,%r10d
	je	.L_last_num_blocks_is_10_284
	ja	.L_last_num_blocks_is_11_284
	jmp	.L_last_num_blocks_is_9_284

.L_last_num_blocks_is_7_1_284:
	cmpl	$4,%r10d
	je	.L_last_num_blocks_is_4_284
	jb	.L_last_num_blocks_is_3_1_284

	cmpl	$6,%r10d
	ja	.L_last_num_blocks_is_7_284
	je	.L_last_num_blocks_is_6_284
	jmp	.L_last_num_blocks_is_5_284

.L_last_num_blocks_is_3_1_284:

	cmpl	$2,%r10d
	ja	.L_last_num_blocks_is_3_284
	je	.L_last_num_blocks_is_2_284
.L_last_num_blocks_is_1_284:
	leaq	byte64_len_to_mask_table(%rip),%r10
	movq	%r8,%rax
	kmovq	(%r10,%rax,8),%k1
	cmpl	$255,%r15d
	jae	.L_16_blocks_overflow_285
	vpaddd	%xmm28,%xmm2,%xmm0
	jmp	.L_16_blocks_ok_285

.L_16_blocks_overflow_285:
	vpshufb	%zmm29,%zmm2,%zmm2
	vpaddd	ddq_add_1234(%rip),%zmm2,%zmm0
	vpshufb	%xmm29,%xmm0,%xmm0
.L_16_blocks_ok_285:




	vbroadcastf64x2	0(%rdi),%zmm30
	vpxorq	768(%rsp),%zmm14,%zmm8
	vmovdqu64	0(%rsp,%rbx,1),%zmm1
	vextracti32x4	$0,%zmm0,%xmm2
	vshufi64x2	$0,%zmm2,%zmm2,%zmm2


	vbroadcastf64x2	16(%rdi),%zmm31
	vmovdqu64	64(%rsp,%rbx,1),%zmm18
	vmovdqa64	832(%rsp),%zmm22
	vpxorq	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	32(%rdi),%zmm30


	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm14
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm7
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm10
	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm11
	vmovdqu64	128(%rsp,%rbx,1),%zmm1
	vmovdqa64	896(%rsp),%zmm8
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	48(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vmovdqu64	192(%rsp,%rbx,1),%zmm18
	vmovdqa64	960(%rsp),%zmm22
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	64(%rdi),%zmm30


	vpclmulqdq	$0x10,%zmm1,%zmm8,%zmm20
	vpclmulqdq	$0x01,%zmm1,%zmm8,%zmm21
	vpclmulqdq	$0x11,%zmm1,%zmm8,%zmm17
	vpclmulqdq	$0x00,%zmm1,%zmm8,%zmm19
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	80(%rdi),%zmm31


	vpternlogq	$0x96,%zmm17,%zmm12,%zmm14
	vpternlogq	$0x96,%zmm19,%zmm13,%zmm7
	vpternlogq	$0x96,%zmm21,%zmm16,%zmm11
	vpternlogq	$0x96,%zmm20,%zmm15,%zmm10
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	96(%rdi),%zmm30
	vmovdqu8	0(%rcx,%r11,1),%xmm17{%k1}{z}
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	112(%rdi),%zmm31


	vpclmulqdq	$0x10,%zmm18,%zmm22,%zmm15
	vpclmulqdq	$0x01,%zmm18,%zmm22,%zmm16
	vpclmulqdq	$0x11,%zmm18,%zmm22,%zmm12
	vpclmulqdq	$0x00,%zmm18,%zmm22,%zmm13
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	128(%rdi),%zmm30
	vpternlogq	$0x96,%zmm16,%zmm11,%zmm10
	vpxorq	%zmm12,%zmm14,%zmm24
	vpxorq	%zmm13,%zmm7,%zmm25
	vpxorq	%zmm15,%zmm10,%zmm26
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	144(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	160(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vbroadcastf64x2	176(%rdi),%zmm31
	vaesenc	%xmm30,%xmm0,%xmm0
	vbroadcastf64x2	192(%rdi),%zmm30
	vaesenc	%xmm31,%xmm0,%xmm0
	vaesenclast	%xmm30,%xmm0,%xmm0
	vpxorq	%xmm17,%xmm0,%xmm0
	vextracti32x4	$0,%zmm0,%xmm11
	movq	%r9,%r10
	vmovdqu8	%xmm0,0(%r10,%r11,1){%k1}