/*
* This file is part of the openHiTLS project.
*
* openHiTLS is licensed under the Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
*
* http://license.coscl.org.cn/MulanPSL2
*
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/
#include "hitls_build.h"
#if defined(HITLS_CRYPTO_AES) && defined(HITLS_CRYPTO_ECB)
#include "crypt_aes_macro_x86_64.s"
.text
.set ARG1, %rdi
.set ARG2, %rsi
.set ARG3, %rdx
.set ARG4, %ecx
.set ARG5, %r8
.set ARG6, %r9
.set RDK, %xmm3
.set KEY, %rdi
.set KTMP, %r9
.set ROUNDS, %eax
.set RET, %eax
.set BLK0, %xmm1
.set BLK1, %xmm4
.set BLK2, %xmm5
.set BLK3, %xmm6
.set BLK4, %xmm10
.set BLK5, %xmm11
.set BLK6, %xmm12
.set BLK7, %xmm13
.set BLK8, %xmm0
.set BLK9, %xmm2
.set BLK10, %xmm7
.set BLK11, %xmm8
.set BLK12, %xmm9
.set BLK13, %xmm14
/**
* Function description: Sets the AES encryption assembly acceleration API in ECB mode.
* Function prototype: int32_t CRYPT_AES_ECB_Encrypt(const CRYPT_AES_Key *ctx,
* const uint8_t *in, uint8_t *out, uint32_t len);
* Input register:
* x0: Pointer to the input key structure.
* x1: Points to the 128-bit input data.
* x2: Points to the 128-bit output data.
* x3: Indicates the length of a data block, that is, 16 bytes.
* Change register: xmm1,xmm3,xmm4,xmm5,xmm6,xmm10,xmm11,xmm12,xmm13.
* Output register: eax.
* Function/Macro Call: None.
*/
.globl CRYPT_AES_ECB_Encrypt
.type CRYPT_AES_ECB_Encrypt, @function
CRYPT_AES_ECB_Encrypt:
.cfi_startproc
.align 16
.Lecb_aesenc_start:
cmpl $64, ARG4
jae .Lecb_enc_above_equal_4_blks
cmpl $32, ARG4
jae .Lecb_enc_above_equal_2_blks
cmpl $0, ARG4
je .Lecb_aesdec_finish
jmp .Lecb_enc_proc_1_blk
.Lecb_enc_above_equal_2_blks:
cmpl $48, ARG4
jb .Lecb_enc_proc_2_blks
jmp .Lecb_enc_proc_3_blks
.Lecb_enc_above_equal_4_blks:
cmpl $96, ARG4
jae .Lecb_enc_above_equal_6_blks
cmpl $80, ARG4
jb .Lecb_enc_proc_4_blks
jmp .Lecb_enc_proc_5_blks
.Lecb_enc_above_equal_6_blks:
cmpl $112, ARG4
jb .Lecb_enc_proc_6_blks
cmpl $128, ARG4
jb .Lecb_enc_proc_7_blks
cmpl $256, ARG4
jbe .Lecb_enc_proc_8_blks
.align 16
.ecb_enc_proc_14_blks:
.Lecb_aesenc_14_blks_loop:
movq KEY, KTMP
vmovdqu (KEY), RDK
movl 240(KEY), ROUNDS
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
vpxor 96(ARG2), RDK, BLK6
vpxor 112(ARG2), RDK, BLK7
vpxor 128(ARG2), RDK, BLK8
vpxor 144(ARG2), RDK, BLK9
vpxor 160(ARG2), RDK, BLK10
vpxor 176(ARG2), RDK, BLK11
vpxor 192(ARG2), RDK, BLK12
vpxor 208(ARG2), RDK, BLK13
decl ROUNDS
AES_ENC_14_BLKS ARG2 KTMP ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6 BLK7 BLK8 BLK9 BLK10 BLK11 BLK12 BLK13
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
vmovdqu BLK6, 96(ARG3)
vmovdqu BLK7, 112(ARG3)
vmovdqu BLK8, 128(ARG3)
vmovdqu BLK9, 144(ARG3)
vmovdqu BLK10, 160(ARG3)
vmovdqu BLK11, 176(ARG3)
vmovdqu BLK12, 192(ARG3)
vmovdqu BLK13, 208(ARG3)
leaq 224(ARG2), ARG2
leaq 224(ARG3), ARG3
subl $224, ARG4
cmpl $224, ARG4
jb .Lecb_aesenc_start
jmp .Lecb_aesenc_14_blks_loop
.align 16
.Lecb_enc_proc_8_blks:
.Lecb_aesenc_8_blks_loop:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
movq KEY, KTMP
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
vpxor 96(ARG2), RDK, BLK6
vpxor 112(ARG2), RDK, BLK7
decl ROUNDS
AES_ENC_8_BLKS KTMP ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6 BLK7
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
vmovdqu BLK6, 96(ARG3)
vmovdqu BLK7, 112(ARG3)
leaq 128(ARG2), ARG2
leaq 128(ARG3), ARG3
subl $128, ARG4
cmpl $128, ARG4
jb .Lecb_aesenc_start
jmp .Lecb_aesenc_8_blks_loop
.align 16
.Lecb_enc_proc_1_blk:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
decl ROUNDS
AES_ENC_1_BLK KEY ROUNDS RDK BLK0
vmovdqu BLK0, (ARG3)
jmp .Lecb_aesenc_finish
.align 16
.Lecb_enc_proc_2_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
decl ROUNDS
AES_ENC_2_BLKS KEY ROUNDS RDK BLK0 BLK1
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
jmp .Lecb_aesenc_finish
.align 16
.Lecb_enc_proc_3_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
decl ROUNDS
AES_ENC_3_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
jmp .Lecb_aesenc_finish
.align 16
.Lecb_enc_proc_4_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
decl ROUNDS
AES_ENC_4_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
jmp .Lecb_aesenc_finish
.align 16
.Lecb_enc_proc_5_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
decl ROUNDS
AES_ENC_5_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
jmp .Lecb_aesenc_finish
.align 16
.Lecb_enc_proc_6_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
decl ROUNDS
AES_ENC_6_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
jmp .Lecb_aesenc_finish
.align 16
.Lecb_enc_proc_7_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
vpxor 96(ARG2), RDK, BLK6
decl ROUNDS
AES_ENC_7_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
vmovdqu BLK6, 96(ARG3)
.align 16
.Lecb_aesenc_finish:
vpxor RDK, RDK, RDK
movl $0, RET
ret
.cfi_endproc
.size CRYPT_AES_ECB_Encrypt, .-CRYPT_AES_ECB_Encrypt
/**
* Function description: Sets the AES decryption and assembly acceleration API in ECB mode.
* Function prototype: int32_t CRYPT_AES_ECB_Decrypt(const CRYPT_AES_Key *ctx,
* const uint8_t *in, uint8_t *out, uint32_t len);
* Input register:
* x0: Pointer to the input key structure.
* x1: Points to the 128-bit input data.
* x2: Indicates the 128-bit output data.
* x3: Indicates the length of a data block, that is, 16 bytes.
* Change register: xmm1,xmm3,xmm4,xmm5,xmm6,xmm10,xmm11,xmm12,xmm13.
* Output register: eax.
* Function/Macro Call: None.
*/
.globl CRYPT_AES_ECB_Decrypt
.type CRYPT_AES_ECB_Decrypt, @function
CRYPT_AES_ECB_Decrypt:
.cfi_startproc
.align 16
.ecb_aesdec_start:
cmpl $64, ARG4
jae .ecb_dec_above_equal_4_blks
cmpl $32, ARG4
jae .ecb_dec_above_equal_2_blks
cmpl $0, ARG4
je .Lecb_aesdec_finish
jmp .ecb_dec_proc_1_blk
.ecb_dec_above_equal_2_blks:
cmpl $48, ARG4
jb .ecb_dec_proc_2_blks
jmp .ecb_dec_proc_3_blks
.ecb_dec_above_equal_4_blks:
cmpl $96, ARG4
jae .ecb_dec_above_equal_6_blks
cmpl $80, ARG4
jb .ecb_dec_proc_4_blks
jmp .ecb_dec_proc_5_blks
.ecb_dec_above_equal_6_blks:
cmpl $112, ARG4
jb .ecb_dec_proc_6_blks
cmpl $128, ARG4
jb .ecb_dec_proc_7_blks
cmpl $256, ARG4
jbe .ecb_dec_proc_8_blks
.align 16
.ecb_dec_proc_14_blks:
.ecb_aesdec_14_blks_loop:
movq KEY, KTMP
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
vpxor 96(ARG2), RDK, BLK6
vpxor 112(ARG2), RDK, BLK7
vpxor 128(ARG2), RDK, BLK8
vpxor 144(ARG2), RDK, BLK9
vpxor 160(ARG2), RDK, BLK10
vpxor 176(ARG2), RDK, BLK11
vpxor 192(ARG2), RDK, BLK12
vpxor 208(ARG2), RDK, BLK13
decl ROUNDS
AES_DEC_14_BLKS KTMP ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6 BLK7 BLK8 BLK9 BLK10 BLK11 BLK12 BLK13
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
vmovdqu BLK6, 96(ARG3)
vmovdqu BLK7, 112(ARG3)
vmovdqu BLK8, 128(ARG3)
vmovdqu BLK9, 144(ARG3)
vmovdqu BLK10, 160(ARG3)
vmovdqu BLK11, 176(ARG3)
vmovdqu BLK12, 192(ARG3)
vmovdqu BLK13, 208(ARG3)
leaq 224(ARG2), ARG2
leaq 224(ARG3), ARG3
subl $224, ARG4
cmpl $224, ARG4
jb .ecb_aesdec_start
jmp .ecb_aesdec_14_blks_loop
.align 16
.ecb_dec_proc_8_blks:
.aesecbdec_8_blks_loop:
movq KEY, KTMP
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
vpxor 96(ARG2), RDK, BLK6
vpxor 112(ARG2), RDK, BLK7
decl ROUNDS
AES_DEC_8_BLKS KTMP ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6 BLK7
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
vmovdqu BLK6, 96(ARG3)
vmovdqu BLK7, 112(ARG3)
leaq 128(ARG2), ARG2
leaq 128(ARG3), ARG3
subl $128, ARG4
cmpl $128, ARG4
jb .ecb_aesdec_start
jmp .aesecbdec_8_blks_loop
.align 16
.ecb_dec_proc_1_blk:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
decl ROUNDS
AES_DEC_1_BLK KEY ROUNDS RDK BLK0
vmovdqu BLK0, (ARG3)
jmp .Lecb_aesdec_finish
.align 16
.ecb_dec_proc_2_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
decl ROUNDS
AES_DEC_2_BLKS KEY ROUNDS RDK BLK0 BLK1
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
jmp .Lecb_aesdec_finish
.align 16
.ecb_dec_proc_3_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
decl ROUNDS
AES_DEC_3_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
jmp .Lecb_aesdec_finish
.align 16
.ecb_dec_proc_4_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
decl ROUNDS
AES_DEC_4_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
jmp .Lecb_aesdec_finish
.align 16
.ecb_dec_proc_5_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
decl ROUNDS
AES_DEC_5_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
jmp .Lecb_aesdec_finish
.align 16
.ecb_dec_proc_6_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
decl ROUNDS
AES_DEC_6_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
jmp .Lecb_aesdec_finish
.align 16
.ecb_dec_proc_7_blks:
movl 240(KEY), ROUNDS
vmovdqu (KEY), RDK
vpxor (ARG2), RDK, BLK0
vpxor 16(ARG2), RDK, BLK1
vpxor 32(ARG2), RDK, BLK2
vpxor 48(ARG2), RDK, BLK3
vpxor 64(ARG2), RDK, BLK4
vpxor 80(ARG2), RDK, BLK5
vpxor 96(ARG2), RDK, BLK6
decl ROUNDS
AES_DEC_7_BLKS KEY ROUNDS RDK BLK0 BLK1 BLK2 BLK3 BLK4 BLK5 BLK6
vmovdqu BLK0, (ARG3)
vmovdqu BLK1, 16(ARG3)
vmovdqu BLK2, 32(ARG3)
vmovdqu BLK3, 48(ARG3)
vmovdqu BLK4, 64(ARG3)
vmovdqu BLK5, 80(ARG3)
vmovdqu BLK6, 96(ARG3)
.align 16
.Lecb_aesdec_finish:
vpxor BLK0, BLK0, BLK0
vpxor BLK1, BLK1, BLK1
vpxor BLK2, BLK2, BLK2
vpxor BLK3, BLK3, BLK3
vpxor BLK4, BLK4, BLK4
vpxor BLK5, BLK5, BLK5
vpxor BLK6, BLK6, BLK6
vpxor BLK7, BLK7, BLK7
vpxor BLK8, BLK8, BLK8
vpxor BLK9, BLK9, BLK9
vpxor BLK10, BLK10, BLK10
vpxor BLK11, BLK11, BLK11
vpxor BLK12, BLK12, BLK12
vpxor BLK13, BLK13, BLK13
vpxor RDK, RDK, RDK
movl $0, RET
ret
.cfi_endproc
.size CRYPT_AES_ECB_Decrypt, .-CRYPT_AES_ECB_Decrypt
#endif