aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s
diff options
context:
space:
mode:
authordanlark <danlark@yandex-team.ru>2022-02-10 16:46:08 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:08 +0300
commit3426a9bc7f169ae9da54cef557ad2a33f6e8eee0 (patch)
tree26154e1e9990f1bb4525d3e3fb5b6dac2c2c1da2 /contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s
parentcb68f224c46a8ee52ac3fdd2a32534b8bb8dc134 (diff)
downloadydb-3426a9bc7f169ae9da54cef557ad2a33f6e8eee0.tar.gz
Restoring authorship annotation for <danlark@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s')
-rw-r--r--contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s2574
1 files changed, 1287 insertions, 1287 deletions
diff --git a/contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s b/contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s
index b283b82359..ff368c4278 100644
--- a/contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s
+++ b/contrib/libs/openssl/asm/ppc64le/crypto/chacha/chacha-ppc.s
@@ -1,237 +1,237 @@
-.machine "any"
-.text
-
-.globl ChaCha20_ctr32_int
-.type ChaCha20_ctr32_int,@function
+.machine "any"
+.text
+
+.globl ChaCha20_ctr32_int
+.type ChaCha20_ctr32_int,@function
.section ".opd","aw"
.align 3
ChaCha20_ctr32_int:
.quad .ChaCha20_ctr32_int,.TOC.@tocbase,0
.previous
-.align 5
+.align 5
.ChaCha20_ctr32_int:
-__ChaCha20_ctr32_int:
- cmpldi 5,0
- .long 0x4DC20020
-
- stdu 1,-256(1)
- mflr 0
-
- std 14,112(1)
- std 15,120(1)
- std 16,128(1)
- std 17,136(1)
- std 18,144(1)
- std 19,152(1)
- std 20,160(1)
- std 21,168(1)
- std 22,176(1)
- std 23,184(1)
- std 24,192(1)
- std 25,200(1)
- std 26,208(1)
- std 27,216(1)
- std 28,224(1)
- std 29,232(1)
- std 30,240(1)
- std 31,248(1)
- std 0,272(1)
-
- lwz 11,0(7)
- lwz 12,4(7)
- lwz 14,8(7)
- lwz 15,12(7)
-
- bl __ChaCha20_1x
-
- ld 0,272(1)
- ld 14,112(1)
- ld 15,120(1)
- ld 16,128(1)
- ld 17,136(1)
- ld 18,144(1)
- ld 19,152(1)
- ld 20,160(1)
- ld 21,168(1)
- ld 22,176(1)
- ld 23,184(1)
- ld 24,192(1)
- ld 25,200(1)
- ld 26,208(1)
- ld 27,216(1)
- ld 28,224(1)
- ld 29,232(1)
- ld 30,240(1)
- ld 31,248(1)
- mtlr 0
- addi 1,1,256
- blr
-.long 0
-.byte 0,12,4,1,0x80,18,5,0
-.long 0
+__ChaCha20_ctr32_int:
+ cmpldi 5,0
+ .long 0x4DC20020
+
+ stdu 1,-256(1)
+ mflr 0
+
+ std 14,112(1)
+ std 15,120(1)
+ std 16,128(1)
+ std 17,136(1)
+ std 18,144(1)
+ std 19,152(1)
+ std 20,160(1)
+ std 21,168(1)
+ std 22,176(1)
+ std 23,184(1)
+ std 24,192(1)
+ std 25,200(1)
+ std 26,208(1)
+ std 27,216(1)
+ std 28,224(1)
+ std 29,232(1)
+ std 30,240(1)
+ std 31,248(1)
+ std 0,272(1)
+
+ lwz 11,0(7)
+ lwz 12,4(7)
+ lwz 14,8(7)
+ lwz 15,12(7)
+
+ bl __ChaCha20_1x
+
+ ld 0,272(1)
+ ld 14,112(1)
+ ld 15,120(1)
+ ld 16,128(1)
+ ld 17,136(1)
+ ld 18,144(1)
+ ld 19,152(1)
+ ld 20,160(1)
+ ld 21,168(1)
+ ld 22,176(1)
+ ld 23,184(1)
+ ld 24,192(1)
+ ld 25,200(1)
+ ld 26,208(1)
+ ld 27,216(1)
+ ld 28,224(1)
+ ld 29,232(1)
+ ld 30,240(1)
+ ld 31,248(1)
+ mtlr 0
+ addi 1,1,256
+ blr
+.long 0
+.byte 0,12,4,1,0x80,18,5,0
+.long 0
.size .ChaCha20_ctr32_int,.-.ChaCha20_ctr32_int
.size ChaCha20_ctr32_int,.-.ChaCha20_ctr32_int
-
-.align 5
-__ChaCha20_1x:
-.Loop_outer:
- lis 16,0x6170
- lis 17,0x3320
- lis 18,0x7962
- lis 19,0x6b20
- ori 16,16,0x7865
- ori 17,17,0x646e
- ori 18,18,0x2d32
- ori 19,19,0x6574
-
- li 0,10
- lwz 20,0(6)
- lwz 21,4(6)
- lwz 22,8(6)
- lwz 23,12(6)
- lwz 24,16(6)
- mr 28,11
- lwz 25,20(6)
- mr 29,12
- lwz 26,24(6)
- mr 30,14
- lwz 27,28(6)
- mr 31,15
-
- mr 7,20
- mr 8,21
- mr 9,22
- mr 10,23
-
- mtctr 0
-.Loop:
- add 16,16,20
- add 17,17,21
- add 18,18,22
- add 19,19,23
- xor 28,28,16
- xor 29,29,17
- xor 30,30,18
- xor 31,31,19
- rotlwi 28,28,16
- rotlwi 29,29,16
- rotlwi 30,30,16
- rotlwi 31,31,16
- add 24,24,28
- add 25,25,29
- add 26,26,30
- add 27,27,31
- xor 20,20,24
- xor 21,21,25
- xor 22,22,26
- xor 23,23,27
- rotlwi 20,20,12
- rotlwi 21,21,12
- rotlwi 22,22,12
- rotlwi 23,23,12
- add 16,16,20
- add 17,17,21
- add 18,18,22
- add 19,19,23
- xor 28,28,16
- xor 29,29,17
- xor 30,30,18
- xor 31,31,19
- rotlwi 28,28,8
- rotlwi 29,29,8
- rotlwi 30,30,8
- rotlwi 31,31,8
- add 24,24,28
- add 25,25,29
- add 26,26,30
- add 27,27,31
- xor 20,20,24
- xor 21,21,25
- xor 22,22,26
- xor 23,23,27
- rotlwi 20,20,7
- rotlwi 21,21,7
- rotlwi 22,22,7
- rotlwi 23,23,7
- add 16,16,21
- add 17,17,22
- add 18,18,23
- add 19,19,20
- xor 31,31,16
- xor 28,28,17
- xor 29,29,18
- xor 30,30,19
- rotlwi 31,31,16
- rotlwi 28,28,16
- rotlwi 29,29,16
- rotlwi 30,30,16
- add 26,26,31
- add 27,27,28
- add 24,24,29
- add 25,25,30
- xor 21,21,26
- xor 22,22,27
- xor 23,23,24
- xor 20,20,25
- rotlwi 21,21,12
- rotlwi 22,22,12
- rotlwi 23,23,12
- rotlwi 20,20,12
- add 16,16,21
- add 17,17,22
- add 18,18,23
- add 19,19,20
- xor 31,31,16
- xor 28,28,17
- xor 29,29,18
- xor 30,30,19
- rotlwi 31,31,8
- rotlwi 28,28,8
- rotlwi 29,29,8
- rotlwi 30,30,8
- add 26,26,31
- add 27,27,28
- add 24,24,29
- add 25,25,30
- xor 21,21,26
- xor 22,22,27
- xor 23,23,24
- xor 20,20,25
- rotlwi 21,21,7
- rotlwi 22,22,7
- rotlwi 23,23,7
- rotlwi 20,20,7
- bdnz .Loop
-
- subic 5,5,64
- addi 16,16,0x7865
- addi 17,17,0x646e
- addi 18,18,0x2d32
- addi 19,19,0x6574
- addis 16,16,0x6170
- addis 17,17,0x3320
- addis 18,18,0x7962
- addis 19,19,0x6b20
-
- subfe. 0,0,0
- add 20,20,7
- lwz 7,16(6)
- add 21,21,8
- lwz 8,20(6)
- add 22,22,9
- lwz 9,24(6)
- add 23,23,10
- lwz 10,28(6)
- add 24,24,7
- add 25,25,8
- add 26,26,9
- add 27,27,10
-
- add 28,28,11
- add 29,29,12
- add 30,30,14
- add 31,31,15
- addi 11,11,1
+
+.align 5
+__ChaCha20_1x:
+.Loop_outer:
+ lis 16,0x6170
+ lis 17,0x3320
+ lis 18,0x7962
+ lis 19,0x6b20
+ ori 16,16,0x7865
+ ori 17,17,0x646e
+ ori 18,18,0x2d32
+ ori 19,19,0x6574
+
+ li 0,10
+ lwz 20,0(6)
+ lwz 21,4(6)
+ lwz 22,8(6)
+ lwz 23,12(6)
+ lwz 24,16(6)
+ mr 28,11
+ lwz 25,20(6)
+ mr 29,12
+ lwz 26,24(6)
+ mr 30,14
+ lwz 27,28(6)
+ mr 31,15
+
+ mr 7,20
+ mr 8,21
+ mr 9,22
+ mr 10,23
+
+ mtctr 0
+.Loop:
+ add 16,16,20
+ add 17,17,21
+ add 18,18,22
+ add 19,19,23
+ xor 28,28,16
+ xor 29,29,17
+ xor 30,30,18
+ xor 31,31,19
+ rotlwi 28,28,16
+ rotlwi 29,29,16
+ rotlwi 30,30,16
+ rotlwi 31,31,16
+ add 24,24,28
+ add 25,25,29
+ add 26,26,30
+ add 27,27,31
+ xor 20,20,24
+ xor 21,21,25
+ xor 22,22,26
+ xor 23,23,27
+ rotlwi 20,20,12
+ rotlwi 21,21,12
+ rotlwi 22,22,12
+ rotlwi 23,23,12
+ add 16,16,20
+ add 17,17,21
+ add 18,18,22
+ add 19,19,23
+ xor 28,28,16
+ xor 29,29,17
+ xor 30,30,18
+ xor 31,31,19
+ rotlwi 28,28,8
+ rotlwi 29,29,8
+ rotlwi 30,30,8
+ rotlwi 31,31,8
+ add 24,24,28
+ add 25,25,29
+ add 26,26,30
+ add 27,27,31
+ xor 20,20,24
+ xor 21,21,25
+ xor 22,22,26
+ xor 23,23,27
+ rotlwi 20,20,7
+ rotlwi 21,21,7
+ rotlwi 22,22,7
+ rotlwi 23,23,7
+ add 16,16,21
+ add 17,17,22
+ add 18,18,23
+ add 19,19,20
+ xor 31,31,16
+ xor 28,28,17
+ xor 29,29,18
+ xor 30,30,19
+ rotlwi 31,31,16
+ rotlwi 28,28,16
+ rotlwi 29,29,16
+ rotlwi 30,30,16
+ add 26,26,31
+ add 27,27,28
+ add 24,24,29
+ add 25,25,30
+ xor 21,21,26
+ xor 22,22,27
+ xor 23,23,24
+ xor 20,20,25
+ rotlwi 21,21,12
+ rotlwi 22,22,12
+ rotlwi 23,23,12
+ rotlwi 20,20,12
+ add 16,16,21
+ add 17,17,22
+ add 18,18,23
+ add 19,19,20
+ xor 31,31,16
+ xor 28,28,17
+ xor 29,29,18
+ xor 30,30,19
+ rotlwi 31,31,8
+ rotlwi 28,28,8
+ rotlwi 29,29,8
+ rotlwi 30,30,8
+ add 26,26,31
+ add 27,27,28
+ add 24,24,29
+ add 25,25,30
+ xor 21,21,26
+ xor 22,22,27
+ xor 23,23,24
+ xor 20,20,25
+ rotlwi 21,21,7
+ rotlwi 22,22,7
+ rotlwi 23,23,7
+ rotlwi 20,20,7
+ bdnz .Loop
+
+ subic 5,5,64
+ addi 16,16,0x7865
+ addi 17,17,0x646e
+ addi 18,18,0x2d32
+ addi 19,19,0x6574
+ addis 16,16,0x6170
+ addis 17,17,0x3320
+ addis 18,18,0x7962
+ addis 19,19,0x6b20
+
+ subfe. 0,0,0
+ add 20,20,7
+ lwz 7,16(6)
+ add 21,21,8
+ lwz 8,20(6)
+ add 22,22,9
+ lwz 9,24(6)
+ add 23,23,10
+ lwz 10,28(6)
+ add 24,24,7
+ add 25,25,8
+ add 26,26,9
+ add 27,27,10
+
+ add 28,28,11
+ add 29,29,12
+ add 30,30,14
+ add 31,31,15
+ addi 11,11,1
mr 7,16
rotlwi 16,16,8
rlwimi 16,7,24,0,7
@@ -296,502 +296,502 @@ __ChaCha20_1x:
rotlwi 31,31,8
rlwimi 31,10,24,0,7
rlwimi 31,10,24,16,23
- bne .Ltail
-
- lwz 7,0(4)
- lwz 8,4(4)
- cmpldi 5,0
- lwz 9,8(4)
- lwz 10,12(4)
- xor 16,16,7
- lwz 7,16(4)
- xor 17,17,8
- lwz 8,20(4)
- xor 18,18,9
- lwz 9,24(4)
- xor 19,19,10
- lwz 10,28(4)
- xor 20,20,7
- lwz 7,32(4)
- xor 21,21,8
- lwz 8,36(4)
- xor 22,22,9
- lwz 9,40(4)
- xor 23,23,10
- lwz 10,44(4)
- xor 24,24,7
- lwz 7,48(4)
- xor 25,25,8
- lwz 8,52(4)
- xor 26,26,9
- lwz 9,56(4)
- xor 27,27,10
- lwz 10,60(4)
- xor 28,28,7
- stw 16,0(3)
- xor 29,29,8
- stw 17,4(3)
- xor 30,30,9
- stw 18,8(3)
- xor 31,31,10
- stw 19,12(3)
- stw 20,16(3)
- stw 21,20(3)
- stw 22,24(3)
- stw 23,28(3)
- stw 24,32(3)
- stw 25,36(3)
- stw 26,40(3)
- stw 27,44(3)
- stw 28,48(3)
- stw 29,52(3)
- stw 30,56(3)
- addi 4,4,64
- stw 31,60(3)
- addi 3,3,64
-
- bne .Loop_outer
-
- blr
-
-.align 4
-.Ltail:
- addi 5,5,64
- subi 4,4,1
- subi 3,3,1
- addi 7,1,48-1
- mtctr 5
-
- stw 16,48(1)
- stw 17,52(1)
- stw 18,56(1)
- stw 19,60(1)
- stw 20,64(1)
- stw 21,68(1)
- stw 22,72(1)
- stw 23,76(1)
- stw 24,80(1)
- stw 25,84(1)
- stw 26,88(1)
- stw 27,92(1)
- stw 28,96(1)
- stw 29,100(1)
- stw 30,104(1)
- stw 31,108(1)
-
-.Loop_tail:
- lbzu 11,1(4)
- lbzu 16,1(7)
- xor 12,11,16
- stbu 12,1(3)
- bdnz .Loop_tail
-
- stw 1,48(1)
- stw 1,52(1)
- stw 1,56(1)
- stw 1,60(1)
- stw 1,64(1)
- stw 1,68(1)
- stw 1,72(1)
- stw 1,76(1)
- stw 1,80(1)
- stw 1,84(1)
- stw 1,88(1)
- stw 1,92(1)
- stw 1,96(1)
- stw 1,100(1)
- stw 1,104(1)
- stw 1,108(1)
-
- blr
-.long 0
-.byte 0,12,0x14,0,0,0,0,0
-
-.globl ChaCha20_ctr32_vmx
-.type ChaCha20_ctr32_vmx,@function
+ bne .Ltail
+
+ lwz 7,0(4)
+ lwz 8,4(4)
+ cmpldi 5,0
+ lwz 9,8(4)
+ lwz 10,12(4)
+ xor 16,16,7
+ lwz 7,16(4)
+ xor 17,17,8
+ lwz 8,20(4)
+ xor 18,18,9
+ lwz 9,24(4)
+ xor 19,19,10
+ lwz 10,28(4)
+ xor 20,20,7
+ lwz 7,32(4)
+ xor 21,21,8
+ lwz 8,36(4)
+ xor 22,22,9
+ lwz 9,40(4)
+ xor 23,23,10
+ lwz 10,44(4)
+ xor 24,24,7
+ lwz 7,48(4)
+ xor 25,25,8
+ lwz 8,52(4)
+ xor 26,26,9
+ lwz 9,56(4)
+ xor 27,27,10
+ lwz 10,60(4)
+ xor 28,28,7
+ stw 16,0(3)
+ xor 29,29,8
+ stw 17,4(3)
+ xor 30,30,9
+ stw 18,8(3)
+ xor 31,31,10
+ stw 19,12(3)
+ stw 20,16(3)
+ stw 21,20(3)
+ stw 22,24(3)
+ stw 23,28(3)
+ stw 24,32(3)
+ stw 25,36(3)
+ stw 26,40(3)
+ stw 27,44(3)
+ stw 28,48(3)
+ stw 29,52(3)
+ stw 30,56(3)
+ addi 4,4,64
+ stw 31,60(3)
+ addi 3,3,64
+
+ bne .Loop_outer
+
+ blr
+
+.align 4
+.Ltail:
+ addi 5,5,64
+ subi 4,4,1
+ subi 3,3,1
+ addi 7,1,48-1
+ mtctr 5
+
+ stw 16,48(1)
+ stw 17,52(1)
+ stw 18,56(1)
+ stw 19,60(1)
+ stw 20,64(1)
+ stw 21,68(1)
+ stw 22,72(1)
+ stw 23,76(1)
+ stw 24,80(1)
+ stw 25,84(1)
+ stw 26,88(1)
+ stw 27,92(1)
+ stw 28,96(1)
+ stw 29,100(1)
+ stw 30,104(1)
+ stw 31,108(1)
+
+.Loop_tail:
+ lbzu 11,1(4)
+ lbzu 16,1(7)
+ xor 12,11,16
+ stbu 12,1(3)
+ bdnz .Loop_tail
+
+ stw 1,48(1)
+ stw 1,52(1)
+ stw 1,56(1)
+ stw 1,60(1)
+ stw 1,64(1)
+ stw 1,68(1)
+ stw 1,72(1)
+ stw 1,76(1)
+ stw 1,80(1)
+ stw 1,84(1)
+ stw 1,88(1)
+ stw 1,92(1)
+ stw 1,96(1)
+ stw 1,100(1)
+ stw 1,104(1)
+ stw 1,108(1)
+
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+
+.globl ChaCha20_ctr32_vmx
+.type ChaCha20_ctr32_vmx,@function
.section ".opd","aw"
.align 3
ChaCha20_ctr32_vmx:
.quad .ChaCha20_ctr32_vmx,.TOC.@tocbase,0
.previous
-.align 5
+.align 5
.ChaCha20_ctr32_vmx:
- cmpldi 5,256
- blt __ChaCha20_ctr32_int
-
- stdu 1,-416(1)
- mflr 0
- li 10,127
- li 11,143
+ cmpldi 5,256
+ blt __ChaCha20_ctr32_int
+
+ stdu 1,-416(1)
+ mflr 0
+ li 10,127
+ li 11,143
mfspr 12,256
- stvx 23,10,1
- addi 10,10,32
- stvx 24,11,1
- addi 11,11,32
- stvx 25,10,1
- addi 10,10,32
- stvx 26,11,1
- addi 11,11,32
- stvx 27,10,1
- addi 10,10,32
- stvx 28,11,1
- addi 11,11,32
- stvx 29,10,1
- addi 10,10,32
- stvx 30,11,1
- stvx 31,10,1
- stw 12,268(1)
- std 14,272(1)
- std 15,280(1)
- std 16,288(1)
- std 17,296(1)
- std 18,304(1)
- std 19,312(1)
- std 20,320(1)
- std 21,328(1)
- std 22,336(1)
- std 23,344(1)
- std 24,352(1)
- std 25,360(1)
- std 26,368(1)
- std 27,376(1)
- std 28,384(1)
- std 29,392(1)
- std 30,400(1)
- std 31,408(1)
- li 12,-4096+511
- std 0, 432(1)
+ stvx 23,10,1
+ addi 10,10,32
+ stvx 24,11,1
+ addi 11,11,32
+ stvx 25,10,1
+ addi 10,10,32
+ stvx 26,11,1
+ addi 11,11,32
+ stvx 27,10,1
+ addi 10,10,32
+ stvx 28,11,1
+ addi 11,11,32
+ stvx 29,10,1
+ addi 10,10,32
+ stvx 30,11,1
+ stvx 31,10,1
+ stw 12,268(1)
+ std 14,272(1)
+ std 15,280(1)
+ std 16,288(1)
+ std 17,296(1)
+ std 18,304(1)
+ std 19,312(1)
+ std 20,320(1)
+ std 21,328(1)
+ std 22,336(1)
+ std 23,344(1)
+ std 24,352(1)
+ std 25,360(1)
+ std 26,368(1)
+ std 27,376(1)
+ std 28,384(1)
+ std 29,392(1)
+ std 30,400(1)
+ std 31,408(1)
+ li 12,-4096+511
+ std 0, 432(1)
mtspr 256,12
-
- bl .Lconsts
- li 16,16
- li 17,32
- li 18,48
- li 19,64
- li 20,31
- li 21,15
-
- lvx 13,0,6
+
+ bl .Lconsts
+ li 16,16
+ li 17,32
+ li 18,48
+ li 19,64
+ li 20,31
+ li 21,15
+
+ lvx 13,0,6
lvsl 29,0,6
- lvx 14,16,6
- lvx 27,20,6
-
- lvx 15,0,7
+ lvx 14,16,6
+ lvx 27,20,6
+
+ lvx 15,0,7
lvsl 30,0,7
- lvx 28,21,7
-
- lvx 12,0,12
- lvx 17,16,12
- lvx 18,17,12
- lvx 19,18,12
- lvx 23,19,12
-
+ lvx 28,21,7
+
+ lvx 12,0,12
+ lvx 17,16,12
+ lvx 18,17,12
+ lvx 19,18,12
+ lvx 23,19,12
+
vperm 13,13,14,29
vperm 14,14,27,29
vperm 15,15,28,30
-
- lwz 11,0(7)
- lwz 12,4(7)
- vadduwm 15,15,17
- lwz 14,8(7)
- vadduwm 16,15,17
- lwz 15,12(7)
- vadduwm 17,16,17
-
- vxor 29,29,29
- vspltisw 26,-1
+
+ lwz 11,0(7)
+ lwz 12,4(7)
+ vadduwm 15,15,17
+ lwz 14,8(7)
+ vadduwm 16,15,17
+ lwz 15,12(7)
+ vadduwm 17,16,17
+
+ vxor 29,29,29
+ vspltisw 26,-1
lvsl 24,0,4
lvsr 25,0,3
vperm 26,29,26,25
-
+
lvsl 29,0,16
vspltisb 30,3
vxor 29,29,30
vxor 25,25,30
vperm 24,24,24,29
-
- li 0,10
- b .Loop_outer_vmx
-
-.align 4
-.Loop_outer_vmx:
- lis 16,0x6170
- lis 17,0x3320
- vor 0,12,12
- lis 18,0x7962
- lis 19,0x6b20
- vor 4,12,12
- ori 16,16,0x7865
- ori 17,17,0x646e
- vor 8,12,12
- ori 18,18,0x2d32
- ori 19,19,0x6574
- vor 1,13,13
-
- lwz 20,0(6)
- vor 5,13,13
- lwz 21,4(6)
- vor 9,13,13
- lwz 22,8(6)
- vor 2,14,14
- lwz 23,12(6)
- vor 6,14,14
- lwz 24,16(6)
- vor 10,14,14
- mr 28,11
- lwz 25,20(6)
- vor 3,15,15
- mr 29,12
- lwz 26,24(6)
- vor 7,16,16
- mr 30,14
- lwz 27,28(6)
- vor 11,17,17
- mr 31,15
-
- mr 7,20
- mr 8,21
- mr 9,22
- mr 10,23
-
- vspltisw 27,12
- vspltisw 28,7
-
- mtctr 0
- nop
-.Loop_vmx:
- vadduwm 0,0,1
- vadduwm 4,4,5
- vadduwm 8,8,9
- add 16,16,20
- add 17,17,21
- add 18,18,22
- vxor 3,3,0
- vxor 7,7,4
- vxor 11,11,8
- add 19,19,23
- xor 28,28,16
- xor 29,29,17
- vperm 3,3,3,19
- vperm 7,7,7,19
- vperm 11,11,11,19
- xor 30,30,18
- xor 31,31,19
- rotlwi 28,28,16
- vadduwm 2,2,3
- vadduwm 6,6,7
- vadduwm 10,10,11
- rotlwi 29,29,16
- rotlwi 30,30,16
- rotlwi 31,31,16
- vxor 1,1,2
- vxor 5,5,6
- vxor 9,9,10
- add 24,24,28
- add 25,25,29
- add 26,26,30
- vrlw 1,1,27
- vrlw 5,5,27
- vrlw 9,9,27
- add 27,27,31
- xor 20,20,24
- xor 21,21,25
- vadduwm 0,0,1
- vadduwm 4,4,5
- vadduwm 8,8,9
- xor 22,22,26
- xor 23,23,27
- rotlwi 20,20,12
- vxor 3,3,0
- vxor 7,7,4
- vxor 11,11,8
- rotlwi 21,21,12
- rotlwi 22,22,12
- rotlwi 23,23,12
- vperm 3,3,3,23
- vperm 7,7,7,23
- vperm 11,11,11,23
- add 16,16,20
- add 17,17,21
- add 18,18,22
- vadduwm 2,2,3
- vadduwm 6,6,7
- vadduwm 10,10,11
- add 19,19,23
- xor 28,28,16
- xor 29,29,17
- vxor 1,1,2
- vxor 5,5,6
- vxor 9,9,10
- xor 30,30,18
- xor 31,31,19
- rotlwi 28,28,8
- vrlw 1,1,28
- vrlw 5,5,28
- vrlw 9,9,28
- rotlwi 29,29,8
- rotlwi 30,30,8
- rotlwi 31,31,8
+
+ li 0,10
+ b .Loop_outer_vmx
+
+.align 4
+.Loop_outer_vmx:
+ lis 16,0x6170
+ lis 17,0x3320
+ vor 0,12,12
+ lis 18,0x7962
+ lis 19,0x6b20
+ vor 4,12,12
+ ori 16,16,0x7865
+ ori 17,17,0x646e
+ vor 8,12,12
+ ori 18,18,0x2d32
+ ori 19,19,0x6574
+ vor 1,13,13
+
+ lwz 20,0(6)
+ vor 5,13,13
+ lwz 21,4(6)
+ vor 9,13,13
+ lwz 22,8(6)
+ vor 2,14,14
+ lwz 23,12(6)
+ vor 6,14,14
+ lwz 24,16(6)
+ vor 10,14,14
+ mr 28,11
+ lwz 25,20(6)
+ vor 3,15,15
+ mr 29,12
+ lwz 26,24(6)
+ vor 7,16,16
+ mr 30,14
+ lwz 27,28(6)
+ vor 11,17,17
+ mr 31,15
+
+ mr 7,20
+ mr 8,21
+ mr 9,22
+ mr 10,23
+
+ vspltisw 27,12
+ vspltisw 28,7
+
+ mtctr 0
+ nop
+.Loop_vmx:
+ vadduwm 0,0,1
+ vadduwm 4,4,5
+ vadduwm 8,8,9
+ add 16,16,20
+ add 17,17,21
+ add 18,18,22
+ vxor 3,3,0
+ vxor 7,7,4
+ vxor 11,11,8
+ add 19,19,23
+ xor 28,28,16
+ xor 29,29,17
+ vperm 3,3,3,19
+ vperm 7,7,7,19
+ vperm 11,11,11,19
+ xor 30,30,18
+ xor 31,31,19
+ rotlwi 28,28,16
+ vadduwm 2,2,3
+ vadduwm 6,6,7
+ vadduwm 10,10,11
+ rotlwi 29,29,16
+ rotlwi 30,30,16
+ rotlwi 31,31,16
+ vxor 1,1,2
+ vxor 5,5,6
+ vxor 9,9,10
+ add 24,24,28
+ add 25,25,29
+ add 26,26,30
+ vrlw 1,1,27
+ vrlw 5,5,27
+ vrlw 9,9,27
+ add 27,27,31
+ xor 20,20,24
+ xor 21,21,25
+ vadduwm 0,0,1
+ vadduwm 4,4,5
+ vadduwm 8,8,9
+ xor 22,22,26
+ xor 23,23,27
+ rotlwi 20,20,12
+ vxor 3,3,0
+ vxor 7,7,4
+ vxor 11,11,8
+ rotlwi 21,21,12
+ rotlwi 22,22,12
+ rotlwi 23,23,12
+ vperm 3,3,3,23
+ vperm 7,7,7,23
+ vperm 11,11,11,23
+ add 16,16,20
+ add 17,17,21
+ add 18,18,22
+ vadduwm 2,2,3
+ vadduwm 6,6,7
+ vadduwm 10,10,11
+ add 19,19,23
+ xor 28,28,16
+ xor 29,29,17
+ vxor 1,1,2
+ vxor 5,5,6
+ vxor 9,9,10
+ xor 30,30,18
+ xor 31,31,19
+ rotlwi 28,28,8
+ vrlw 1,1,28
+ vrlw 5,5,28
+ vrlw 9,9,28
+ rotlwi 29,29,8
+ rotlwi 30,30,8
+ rotlwi 31,31,8
vsldoi 2,2,2, 16-8
vsldoi 6,6,6, 16-8
vsldoi 10,10,10, 16-8
- add 24,24,28
- add 25,25,29
- add 26,26,30
+ add 24,24,28
+ add 25,25,29
+ add 26,26,30
vsldoi 1,1,1, 16-12
vsldoi 5,5,5, 16-12
vsldoi 9,9,9, 16-12
- add 27,27,31
- xor 20,20,24
- xor 21,21,25
+ add 27,27,31
+ xor 20,20,24
+ xor 21,21,25
vsldoi 3,3,3, 16-4
vsldoi 7,7,7, 16-4
vsldoi 11,11,11, 16-4
- xor 22,22,26
- xor 23,23,27
- rotlwi 20,20,7
- rotlwi 21,21,7
- rotlwi 22,22,7
- rotlwi 23,23,7
- vadduwm 0,0,1
- vadduwm 4,4,5
- vadduwm 8,8,9
- add 16,16,21
- add 17,17,22
- add 18,18,23
- vxor 3,3,0
- vxor 7,7,4
- vxor 11,11,8
- add 19,19,20
- xor 31,31,16
- xor 28,28,17
- vperm 3,3,3,19
- vperm 7,7,7,19
- vperm 11,11,11,19
- xor 29,29,18
- xor 30,30,19
- rotlwi 31,31,16
- vadduwm 2,2,3
- vadduwm 6,6,7
- vadduwm 10,10,11
- rotlwi 28,28,16
- rotlwi 29,29,16
- rotlwi 30,30,16
- vxor 1,1,2
- vxor 5,5,6
- vxor 9,9,10
- add 26,26,31
- add 27,27,28
- add 24,24,29
- vrlw 1,1,27
- vrlw 5,5,27
- vrlw 9,9,27
- add 25,25,30
- xor 21,21,26
- xor 22,22,27
- vadduwm 0,0,1
- vadduwm 4,4,5
- vadduwm 8,8,9
- xor 23,23,24
- xor 20,20,25
- rotlwi 21,21,12
- vxor 3,3,0
- vxor 7,7,4
- vxor 11,11,8
- rotlwi 22,22,12
- rotlwi 23,23,12
- rotlwi 20,20,12
- vperm 3,3,3,23
- vperm 7,7,7,23
- vperm 11,11,11,23
- add 16,16,21
- add 17,17,22
- add 18,18,23
- vadduwm 2,2,3
- vadduwm 6,6,7
- vadduwm 10,10,11
- add 19,19,20
- xor 31,31,16
- xor 28,28,17
- vxor 1,1,2
- vxor 5,5,6
- vxor 9,9,10
- xor 29,29,18
- xor 30,30,19
- rotlwi 31,31,8
- vrlw 1,1,28
- vrlw 5,5,28
- vrlw 9,9,28
- rotlwi 28,28,8
- rotlwi 29,29,8
- rotlwi 30,30,8
+ xor 22,22,26
+ xor 23,23,27
+ rotlwi 20,20,7
+ rotlwi 21,21,7
+ rotlwi 22,22,7
+ rotlwi 23,23,7
+ vadduwm 0,0,1
+ vadduwm 4,4,5
+ vadduwm 8,8,9
+ add 16,16,21
+ add 17,17,22
+ add 18,18,23
+ vxor 3,3,0
+ vxor 7,7,4
+ vxor 11,11,8
+ add 19,19,20
+ xor 31,31,16
+ xor 28,28,17
+ vperm 3,3,3,19
+ vperm 7,7,7,19
+ vperm 11,11,11,19
+ xor 29,29,18
+ xor 30,30,19
+ rotlwi 31,31,16
+ vadduwm 2,2,3
+ vadduwm 6,6,7
+ vadduwm 10,10,11
+ rotlwi 28,28,16
+ rotlwi 29,29,16
+ rotlwi 30,30,16
+ vxor 1,1,2
+ vxor 5,5,6
+ vxor 9,9,10
+ add 26,26,31
+ add 27,27,28
+ add 24,24,29
+ vrlw 1,1,27
+ vrlw 5,5,27
+ vrlw 9,9,27
+ add 25,25,30
+ xor 21,21,26
+ xor 22,22,27
+ vadduwm 0,0,1
+ vadduwm 4,4,5
+ vadduwm 8,8,9
+ xor 23,23,24
+ xor 20,20,25
+ rotlwi 21,21,12
+ vxor 3,3,0
+ vxor 7,7,4
+ vxor 11,11,8
+ rotlwi 22,22,12
+ rotlwi 23,23,12
+ rotlwi 20,20,12
+ vperm 3,3,3,23
+ vperm 7,7,7,23
+ vperm 11,11,11,23
+ add 16,16,21
+ add 17,17,22
+ add 18,18,23
+ vadduwm 2,2,3
+ vadduwm 6,6,7
+ vadduwm 10,10,11
+ add 19,19,20
+ xor 31,31,16
+ xor 28,28,17
+ vxor 1,1,2
+ vxor 5,5,6
+ vxor 9,9,10
+ xor 29,29,18
+ xor 30,30,19
+ rotlwi 31,31,8
+ vrlw 1,1,28
+ vrlw 5,5,28
+ vrlw 9,9,28
+ rotlwi 28,28,8
+ rotlwi 29,29,8
+ rotlwi 30,30,8
vsldoi 2,2,2, 16-8
vsldoi 6,6,6, 16-8
vsldoi 10,10,10, 16-8
- add 26,26,31
- add 27,27,28
- add 24,24,29
+ add 26,26,31
+ add 27,27,28
+ add 24,24,29
vsldoi 1,1,1, 16-4
vsldoi 5,5,5, 16-4
vsldoi 9,9,9, 16-4
- add 25,25,30
- xor 21,21,26
- xor 22,22,27
+ add 25,25,30
+ xor 21,21,26
+ xor 22,22,27
vsldoi 3,3,3, 16-12
vsldoi 7,7,7, 16-12
vsldoi 11,11,11, 16-12
- xor 23,23,24
- xor 20,20,25
- rotlwi 21,21,7
- rotlwi 22,22,7
- rotlwi 23,23,7
- rotlwi 20,20,7
- bdnz .Loop_vmx
-
- subi 5,5,256
- addi 16,16,0x7865
- addi 17,17,0x646e
- addi 18,18,0x2d32
- addi 19,19,0x6574
- addis 16,16,0x6170
- addis 17,17,0x3320
- addis 18,18,0x7962
- addis 19,19,0x6b20
- add 20,20,7
- lwz 7,16(6)
- add 21,21,8
- lwz 8,20(6)
- add 22,22,9
- lwz 9,24(6)
- add 23,23,10
- lwz 10,28(6)
- add 24,24,7
- add 25,25,8
- add 26,26,9
- add 27,27,10
- add 28,28,11
- add 29,29,12
- add 30,30,14
- add 31,31,15
-
- vadduwm 0,0,12
- vadduwm 4,4,12
- vadduwm 8,8,12
- vadduwm 1,1,13
- vadduwm 5,5,13
- vadduwm 9,9,13
- vadduwm 2,2,14
- vadduwm 6,6,14
- vadduwm 10,10,14
- vadduwm 3,3,15
- vadduwm 7,7,16
- vadduwm 11,11,17
-
- addi 11,11,4
- vadduwm 15,15,18
- vadduwm 16,16,18
- vadduwm 17,17,18
-
+ xor 23,23,24
+ xor 20,20,25
+ rotlwi 21,21,7
+ rotlwi 22,22,7
+ rotlwi 23,23,7
+ rotlwi 20,20,7
+ bdnz .Loop_vmx
+
+ subi 5,5,256
+ addi 16,16,0x7865
+ addi 17,17,0x646e
+ addi 18,18,0x2d32
+ addi 19,19,0x6574
+ addis 16,16,0x6170
+ addis 17,17,0x3320
+ addis 18,18,0x7962
+ addis 19,19,0x6b20
+ add 20,20,7
+ lwz 7,16(6)
+ add 21,21,8
+ lwz 8,20(6)
+ add 22,22,9
+ lwz 9,24(6)
+ add 23,23,10
+ lwz 10,28(6)
+ add 24,24,7
+ add 25,25,8
+ add 26,26,9
+ add 27,27,10
+ add 28,28,11
+ add 29,29,12
+ add 30,30,14
+ add 31,31,15
+
+ vadduwm 0,0,12
+ vadduwm 4,4,12
+ vadduwm 8,8,12
+ vadduwm 1,1,13
+ vadduwm 5,5,13
+ vadduwm 9,9,13
+ vadduwm 2,2,14
+ vadduwm 6,6,14
+ vadduwm 10,10,14
+ vadduwm 3,3,15
+ vadduwm 7,7,16
+ vadduwm 11,11,17
+
+ addi 11,11,4
+ vadduwm 15,15,18
+ vadduwm 16,16,18
+ vadduwm 17,17,18
+
mr 7,16
rotlwi 16,16,8
rlwimi 16,7,24,0,7
@@ -856,654 +856,654 @@ ChaCha20_ctr32_vmx:
rotlwi 31,31,8
rlwimi 31,10,24,0,7
rlwimi 31,10,24,16,23
- lwz 7,0(4)
- lwz 8,4(4)
- lwz 9,8(4)
- lwz 10,12(4)
- xor 16,16,7
- lwz 7,16(4)
- xor 17,17,8
- lwz 8,20(4)
- xor 18,18,9
- lwz 9,24(4)
- xor 19,19,10
- lwz 10,28(4)
- xor 20,20,7
- lwz 7,32(4)
- xor 21,21,8
- lwz 8,36(4)
- xor 22,22,9
- lwz 9,40(4)
- xor 23,23,10
- lwz 10,44(4)
- xor 24,24,7
- lwz 7,48(4)
- xor 25,25,8
- lwz 8,52(4)
- xor 26,26,9
- lwz 9,56(4)
- xor 27,27,10
- lwz 10,60(4)
- xor 28,28,7
- stw 16,0(3)
- xor 29,29,8
- stw 17,4(3)
- xor 30,30,9
- stw 18,8(3)
- xor 31,31,10
- stw 19,12(3)
- addi 4,4,64
- stw 20,16(3)
- li 7,16
- stw 21,20(3)
- li 8,32
- stw 22,24(3)
- li 9,48
- stw 23,28(3)
- li 10,64
- stw 24,32(3)
- stw 25,36(3)
- stw 26,40(3)
- stw 27,44(3)
- stw 28,48(3)
- stw 29,52(3)
- stw 30,56(3)
- stw 31,60(3)
- addi 3,3,64
-
- lvx 27,0,4
- lvx 28,7,4
- lvx 29,8,4
- lvx 30,9,4
- lvx 31,10,4
- addi 4,4,64
-
+ lwz 7,0(4)
+ lwz 8,4(4)
+ lwz 9,8(4)
+ lwz 10,12(4)
+ xor 16,16,7
+ lwz 7,16(4)
+ xor 17,17,8
+ lwz 8,20(4)
+ xor 18,18,9
+ lwz 9,24(4)
+ xor 19,19,10
+ lwz 10,28(4)
+ xor 20,20,7
+ lwz 7,32(4)
+ xor 21,21,8
+ lwz 8,36(4)
+ xor 22,22,9
+ lwz 9,40(4)
+ xor 23,23,10
+ lwz 10,44(4)
+ xor 24,24,7
+ lwz 7,48(4)
+ xor 25,25,8
+ lwz 8,52(4)
+ xor 26,26,9
+ lwz 9,56(4)
+ xor 27,27,10
+ lwz 10,60(4)
+ xor 28,28,7
+ stw 16,0(3)
+ xor 29,29,8
+ stw 17,4(3)
+ xor 30,30,9
+ stw 18,8(3)
+ xor 31,31,10
+ stw 19,12(3)
+ addi 4,4,64
+ stw 20,16(3)
+ li 7,16
+ stw 21,20(3)
+ li 8,32
+ stw 22,24(3)
+ li 9,48
+ stw 23,28(3)
+ li 10,64
+ stw 24,32(3)
+ stw 25,36(3)
+ stw 26,40(3)
+ stw 27,44(3)
+ stw 28,48(3)
+ stw 29,52(3)
+ stw 30,56(3)
+ stw 31,60(3)
+ addi 3,3,64
+
+ lvx 27,0,4
+ lvx 28,7,4
+ lvx 29,8,4
+ lvx 30,9,4
+ lvx 31,10,4
+ addi 4,4,64
+
vperm 27,27,28,24
vperm 28,28,29,24
vperm 29,29,30,24
vperm 30,30,31,24
- vxor 0,0,27
- vxor 1,1,28
- lvx 28,7,4
- vxor 2,2,29
- lvx 29,8,4
- vxor 3,3,30
- lvx 30,9,4
- lvx 27,10,4
- addi 4,4,64
- li 10,63
- vperm 0,0,0,25
- vperm 1,1,1,25
- vperm 2,2,2,25
- vperm 3,3,3,25
-
+ vxor 0,0,27
+ vxor 1,1,28
+ lvx 28,7,4
+ vxor 2,2,29
+ lvx 29,8,4
+ vxor 3,3,30
+ lvx 30,9,4
+ lvx 27,10,4
+ addi 4,4,64
+ li 10,63
+ vperm 0,0,0,25
+ vperm 1,1,1,25
+ vperm 2,2,2,25
+ vperm 3,3,3,25
+
vperm 31,31,28,24
vperm 28,28,29,24
vperm 29,29,30,24
vperm 30,30,27,24
- vxor 4,4,31
- vxor 5,5,28
- lvx 28,7,4
- vxor 6,6,29
- lvx 29,8,4
- vxor 7,7,30
- lvx 30,9,4
- lvx 31,10,4
- addi 4,4,64
- vperm 4,4,4,25
- vperm 5,5,5,25
- vperm 6,6,6,25
- vperm 7,7,7,25
-
+ vxor 4,4,31
+ vxor 5,5,28
+ lvx 28,7,4
+ vxor 6,6,29
+ lvx 29,8,4
+ vxor 7,7,30
+ lvx 30,9,4
+ lvx 31,10,4
+ addi 4,4,64
+ vperm 4,4,4,25
+ vperm 5,5,5,25
+ vperm 6,6,6,25
+ vperm 7,7,7,25
+
vperm 27,27,28,24
vperm 28,28,29,24
vperm 29,29,30,24
vperm 30,30,31,24
- vxor 8,8,27
- vxor 9,9,28
- vxor 10,10,29
- vxor 11,11,30
- vperm 8,8,8,25
- vperm 9,9,9,25
- vperm 10,10,10,25
- vperm 11,11,11,25
-
- andi. 17,3,15
- mr 16,3
-
- vsel 27,0,1,26
- vsel 28,1,2,26
- vsel 29,2,3,26
- vsel 30,3,4,26
- vsel 1,4,5,26
- vsel 2,5,6,26
- vsel 3,6,7,26
- vsel 4,7,8,26
- vsel 5,8,9,26
- vsel 6,9,10,26
- vsel 7,10,11,26
-
-
- stvx 27,7,3
- stvx 28,8,3
- stvx 29,9,3
- addi 3,3,64
- stvx 30,0,3
- stvx 1,7,3
- stvx 2,8,3
- stvx 3,9,3
- addi 3,3,64
- stvx 4,0,3
- stvx 5,7,3
- stvx 6,8,3
- stvx 7,9,3
- addi 3,3,64
-
- beq .Laligned_vmx
-
- sub 18,3,17
- li 19,0
-.Lunaligned_tail_vmx:
- stvebx 11,19,18
- addi 19,19,1
- cmpw 19,17
- bne .Lunaligned_tail_vmx
-
- sub 18,16,17
-.Lunaligned_head_vmx:
- stvebx 0,17,18
- cmpwi 17,15
- addi 17,17,1
- bne .Lunaligned_head_vmx
-
- cmpldi 5,255
- bgt .Loop_outer_vmx
-
- b .Ldone_vmx
-
-.align 4
-.Laligned_vmx:
- stvx 0,0,16
-
- cmpldi 5,255
- bgt .Loop_outer_vmx
- nop
-
-.Ldone_vmx:
- cmpldi 5,0
- bnel __ChaCha20_1x
-
- lwz 12,268(1)
- li 10,127
- li 11,143
+ vxor 8,8,27
+ vxor 9,9,28
+ vxor 10,10,29
+ vxor 11,11,30
+ vperm 8,8,8,25
+ vperm 9,9,9,25
+ vperm 10,10,10,25
+ vperm 11,11,11,25
+
+ andi. 17,3,15
+ mr 16,3
+
+ vsel 27,0,1,26
+ vsel 28,1,2,26
+ vsel 29,2,3,26
+ vsel 30,3,4,26
+ vsel 1,4,5,26
+ vsel 2,5,6,26
+ vsel 3,6,7,26
+ vsel 4,7,8,26
+ vsel 5,8,9,26
+ vsel 6,9,10,26
+ vsel 7,10,11,26
+
+
+ stvx 27,7,3
+ stvx 28,8,3
+ stvx 29,9,3
+ addi 3,3,64
+ stvx 30,0,3
+ stvx 1,7,3
+ stvx 2,8,3
+ stvx 3,9,3
+ addi 3,3,64
+ stvx 4,0,3
+ stvx 5,7,3
+ stvx 6,8,3
+ stvx 7,9,3
+ addi 3,3,64
+
+ beq .Laligned_vmx
+
+ sub 18,3,17
+ li 19,0
+.Lunaligned_tail_vmx:
+ stvebx 11,19,18
+ addi 19,19,1
+ cmpw 19,17
+ bne .Lunaligned_tail_vmx
+
+ sub 18,16,17
+.Lunaligned_head_vmx:
+ stvebx 0,17,18
+ cmpwi 17,15
+ addi 17,17,1
+ bne .Lunaligned_head_vmx
+
+ cmpldi 5,255
+ bgt .Loop_outer_vmx
+
+ b .Ldone_vmx
+
+.align 4
+.Laligned_vmx:
+ stvx 0,0,16
+
+ cmpldi 5,255
+ bgt .Loop_outer_vmx
+ nop
+
+.Ldone_vmx:
+ cmpldi 5,0
+ bnel __ChaCha20_1x
+
+ lwz 12,268(1)
+ li 10,127
+ li 11,143
mtspr 256,12
- lvx 23,10,1
- addi 10,10,32
- lvx 24,11,1
- addi 11,11,32
- lvx 25,10,1
- addi 10,10,32
- lvx 26,11,1
- addi 11,11,32
- lvx 27,10,1
- addi 10,10,32
- lvx 28,11,1
- addi 11,11,32
- lvx 29,10,1
- addi 10,10,32
- lvx 30,11,1
- lvx 31,10,1
- ld 0, 432(1)
- ld 14,272(1)
- ld 15,280(1)
- ld 16,288(1)
- ld 17,296(1)
- ld 18,304(1)
- ld 19,312(1)
- ld 20,320(1)
- ld 21,328(1)
- ld 22,336(1)
- ld 23,344(1)
- ld 24,352(1)
- ld 25,360(1)
- ld 26,368(1)
- ld 27,376(1)
- ld 28,384(1)
- ld 29,392(1)
- ld 30,400(1)
- ld 31,408(1)
- mtlr 0
- addi 1,1,416
- blr
-.long 0
-.byte 0,12,0x04,1,0x80,18,5,0
-.long 0
+ lvx 23,10,1
+ addi 10,10,32
+ lvx 24,11,1
+ addi 11,11,32
+ lvx 25,10,1
+ addi 10,10,32
+ lvx 26,11,1
+ addi 11,11,32
+ lvx 27,10,1
+ addi 10,10,32
+ lvx 28,11,1
+ addi 11,11,32
+ lvx 29,10,1
+ addi 10,10,32
+ lvx 30,11,1
+ lvx 31,10,1
+ ld 0, 432(1)
+ ld 14,272(1)
+ ld 15,280(1)
+ ld 16,288(1)
+ ld 17,296(1)
+ ld 18,304(1)
+ ld 19,312(1)
+ ld 20,320(1)
+ ld 21,328(1)
+ ld 22,336(1)
+ ld 23,344(1)
+ ld 24,352(1)
+ ld 25,360(1)
+ ld 26,368(1)
+ ld 27,376(1)
+ ld 28,384(1)
+ ld 29,392(1)
+ ld 30,400(1)
+ ld 31,408(1)
+ mtlr 0
+ addi 1,1,416
+ blr
+.long 0
+.byte 0,12,0x04,1,0x80,18,5,0
+.long 0
.size .ChaCha20_ctr32_vmx,.-.ChaCha20_ctr32_vmx
.size ChaCha20_ctr32_vmx,.-.ChaCha20_ctr32_vmx
-
-.globl ChaCha20_ctr32_vsx
-.type ChaCha20_ctr32_vsx,@function
+
+.globl ChaCha20_ctr32_vsx
+.type ChaCha20_ctr32_vsx,@function
.section ".opd","aw"
.align 3
ChaCha20_ctr32_vsx:
.quad .ChaCha20_ctr32_vsx,.TOC.@tocbase,0
.previous
-.align 5
+.align 5
.ChaCha20_ctr32_vsx:
- stdu 1,-224(1)
- mflr 0
- li 10,127
- li 11,143
+ stdu 1,-224(1)
+ mflr 0
+ li 10,127
+ li 11,143
mfspr 12,256
- stvx 26,10,1
- addi 10,10,32
- stvx 27,11,1
- addi 11,11,32
- stvx 28,10,1
- addi 10,10,32
- stvx 29,11,1
- addi 11,11,32
- stvx 30,10,1
- stvx 31,11,1
- stw 12,220(1)
- li 12,-4096+63
- std 0, 240(1)
+ stvx 26,10,1
+ addi 10,10,32
+ stvx 27,11,1
+ addi 11,11,32
+ stvx 28,10,1
+ addi 10,10,32
+ stvx 29,11,1
+ addi 11,11,32
+ stvx 30,10,1
+ stvx 31,11,1
+ stw 12,220(1)
+ li 12,-4096+63
+ std 0, 240(1)
mtspr 256,12
-
- bl .Lconsts
- .long 0x7E006619
- addi 12,12,0x50
- li 8,16
- li 9,32
- li 10,48
- li 11,64
-
- .long 0x7E203619
- .long 0x7E483619
- .long 0x7E603E19
-
- vxor 27,27,27
- .long 0x7F8B6619
- vspltw 26,19,0
- vsldoi 19,19,27,4
- vsldoi 19,27,19,12
- vadduwm 26,26,28
-
+
+ bl .Lconsts
+ .long 0x7E006619
+ addi 12,12,0x50
+ li 8,16
+ li 9,32
+ li 10,48
+ li 11,64
+
+ .long 0x7E203619
+ .long 0x7E483619
+ .long 0x7E603E19
+
+ vxor 27,27,27
+ .long 0x7F8B6619
+ vspltw 26,19,0
+ vsldoi 19,19,27,4
+ vsldoi 19,27,19,12
+ vadduwm 26,26,28
+
lvsl 31,0,8
vspltisb 27,3
vxor 31,31,27
-
- li 0,10
- mtctr 0
- b .Loop_outer_vsx
-
-.align 5
-.Loop_outer_vsx:
- lvx 0,0,12
- lvx 1,8,12
- lvx 2,9,12
- lvx 3,10,12
-
- vspltw 4,17,0
- vspltw 5,17,1
- vspltw 6,17,2
- vspltw 7,17,3
-
- vspltw 8,18,0
- vspltw 9,18,1
- vspltw 10,18,2
- vspltw 11,18,3
-
- vor 12,26,26
- vspltw 13,19,1
- vspltw 14,19,2
- vspltw 15,19,3
-
- vspltisw 27,-16
- vspltisw 28,12
- vspltisw 29,8
- vspltisw 30,7
-
-.Loop_vsx:
- vadduwm 0,0,4
- vadduwm 1,1,5
- vadduwm 2,2,6
- vadduwm 3,3,7
- vxor 12,12,0
- vxor 13,13,1
- vxor 14,14,2
- vxor 15,15,3
- vrlw 12,12,27
- vrlw 13,13,27
- vrlw 14,14,27
- vrlw 15,15,27
- vadduwm 8,8,12
- vadduwm 9,9,13
- vadduwm 10,10,14
- vadduwm 11,11,15
- vxor 4,4,8
- vxor 5,5,9
- vxor 6,6,10
- vxor 7,7,11
- vrlw 4,4,28
- vrlw 5,5,28
- vrlw 6,6,28
- vrlw 7,7,28
- vadduwm 0,0,4
- vadduwm 1,1,5
- vadduwm 2,2,6
- vadduwm 3,3,7
- vxor 12,12,0
- vxor 13,13,1
- vxor 14,14,2
- vxor 15,15,3
- vrlw 12,12,29
- vrlw 13,13,29
- vrlw 14,14,29
- vrlw 15,15,29
- vadduwm 8,8,12
- vadduwm 9,9,13
- vadduwm 10,10,14
- vadduwm 11,11,15
- vxor 4,4,8
- vxor 5,5,9
- vxor 6,6,10
- vxor 7,7,11
- vrlw 4,4,30
- vrlw 5,5,30
- vrlw 6,6,30
- vrlw 7,7,30
- vadduwm 0,0,5
- vadduwm 1,1,6
- vadduwm 2,2,7
- vadduwm 3,3,4
- vxor 15,15,0
- vxor 12,12,1
- vxor 13,13,2
- vxor 14,14,3
- vrlw 15,15,27
- vrlw 12,12,27
- vrlw 13,13,27
- vrlw 14,14,27
- vadduwm 10,10,15
- vadduwm 11,11,12
- vadduwm 8,8,13
- vadduwm 9,9,14
- vxor 5,5,10
- vxor 6,6,11
- vxor 7,7,8
- vxor 4,4,9
- vrlw 5,5,28
- vrlw 6,6,28
- vrlw 7,7,28
- vrlw 4,4,28
- vadduwm 0,0,5
- vadduwm 1,1,6
- vadduwm 2,2,7
- vadduwm 3,3,4
- vxor 15,15,0
- vxor 12,12,1
- vxor 13,13,2
- vxor 14,14,3
- vrlw 15,15,29
- vrlw 12,12,29
- vrlw 13,13,29
- vrlw 14,14,29
- vadduwm 10,10,15
- vadduwm 11,11,12
- vadduwm 8,8,13
- vadduwm 9,9,14
- vxor 5,5,10
- vxor 6,6,11
- vxor 7,7,8
- vxor 4,4,9
- vrlw 5,5,30
- vrlw 6,6,30
- vrlw 7,7,30
- vrlw 4,4,30
- bdnz .Loop_vsx
-
- vadduwm 12,12,26
-
- .long 0x13600F8C
- .long 0x13821F8C
- .long 0x10000E8C
- .long 0x10421E8C
- .long 0x13A42F8C
- .long 0x13C63F8C
- .long 0xF0201057
- .long 0xF0601357
- .long 0xF01BE057
- .long 0xF05BE357
-
- .long 0x10842E8C
- .long 0x10C63E8C
- .long 0x13684F8C
- .long 0x138A5F8C
- .long 0xF0A43057
- .long 0xF0E43357
- .long 0xF09DF057
- .long 0xF0DDF357
-
- .long 0x11084E8C
- .long 0x114A5E8C
- .long 0x13AC6F8C
- .long 0x13CE7F8C
- .long 0xF1285057
- .long 0xF1685357
- .long 0xF11BE057
- .long 0xF15BE357
-
- .long 0x118C6E8C
- .long 0x11CE7E8C
- vspltisw 27,4
- vadduwm 26,26,27
- .long 0xF1AC7057
- .long 0xF1EC7357
- .long 0xF19DF057
- .long 0xF1DDF357
-
- vadduwm 0,0,16
- vadduwm 4,4,17
- vadduwm 8,8,18
- vadduwm 12,12,19
-
+
+ li 0,10
+ mtctr 0
+ b .Loop_outer_vsx
+
+.align 5
+.Loop_outer_vsx:
+ lvx 0,0,12
+ lvx 1,8,12
+ lvx 2,9,12
+ lvx 3,10,12
+
+ vspltw 4,17,0
+ vspltw 5,17,1
+ vspltw 6,17,2
+ vspltw 7,17,3
+
+ vspltw 8,18,0
+ vspltw 9,18,1
+ vspltw 10,18,2
+ vspltw 11,18,3
+
+ vor 12,26,26
+ vspltw 13,19,1
+ vspltw 14,19,2
+ vspltw 15,19,3
+
+ vspltisw 27,-16
+ vspltisw 28,12
+ vspltisw 29,8
+ vspltisw 30,7
+
+.Loop_vsx:
+ vadduwm 0,0,4
+ vadduwm 1,1,5
+ vadduwm 2,2,6
+ vadduwm 3,3,7
+ vxor 12,12,0
+ vxor 13,13,1
+ vxor 14,14,2
+ vxor 15,15,3
+ vrlw 12,12,27
+ vrlw 13,13,27
+ vrlw 14,14,27
+ vrlw 15,15,27
+ vadduwm 8,8,12
+ vadduwm 9,9,13
+ vadduwm 10,10,14
+ vadduwm 11,11,15
+ vxor 4,4,8
+ vxor 5,5,9
+ vxor 6,6,10
+ vxor 7,7,11
+ vrlw 4,4,28
+ vrlw 5,5,28
+ vrlw 6,6,28
+ vrlw 7,7,28
+ vadduwm 0,0,4
+ vadduwm 1,1,5
+ vadduwm 2,2,6
+ vadduwm 3,3,7
+ vxor 12,12,0
+ vxor 13,13,1
+ vxor 14,14,2
+ vxor 15,15,3
+ vrlw 12,12,29
+ vrlw 13,13,29
+ vrlw 14,14,29
+ vrlw 15,15,29
+ vadduwm 8,8,12
+ vadduwm 9,9,13
+ vadduwm 10,10,14
+ vadduwm 11,11,15
+ vxor 4,4,8
+ vxor 5,5,9
+ vxor 6,6,10
+ vxor 7,7,11
+ vrlw 4,4,30
+ vrlw 5,5,30
+ vrlw 6,6,30
+ vrlw 7,7,30
+ vadduwm 0,0,5
+ vadduwm 1,1,6
+ vadduwm 2,2,7
+ vadduwm 3,3,4
+ vxor 15,15,0
+ vxor 12,12,1
+ vxor 13,13,2
+ vxor 14,14,3
+ vrlw 15,15,27
+ vrlw 12,12,27
+ vrlw 13,13,27
+ vrlw 14,14,27
+ vadduwm 10,10,15
+ vadduwm 11,11,12
+ vadduwm 8,8,13
+ vadduwm 9,9,14
+ vxor 5,5,10
+ vxor 6,6,11
+ vxor 7,7,8
+ vxor 4,4,9
+ vrlw 5,5,28
+ vrlw 6,6,28
+ vrlw 7,7,28
+ vrlw 4,4,28
+ vadduwm 0,0,5
+ vadduwm 1,1,6
+ vadduwm 2,2,7
+ vadduwm 3,3,4
+ vxor 15,15,0
+ vxor 12,12,1
+ vxor 13,13,2
+ vxor 14,14,3
+ vrlw 15,15,29
+ vrlw 12,12,29
+ vrlw 13,13,29
+ vrlw 14,14,29
+ vadduwm 10,10,15
+ vadduwm 11,11,12
+ vadduwm 8,8,13
+ vadduwm 9,9,14
+ vxor 5,5,10
+ vxor 6,6,11
+ vxor 7,7,8
+ vxor 4,4,9
+ vrlw 5,5,30
+ vrlw 6,6,30
+ vrlw 7,7,30
+ vrlw 4,4,30
+ bdnz .Loop_vsx
+
+ vadduwm 12,12,26
+
+ .long 0x13600F8C
+ .long 0x13821F8C
+ .long 0x10000E8C
+ .long 0x10421E8C
+ .long 0x13A42F8C
+ .long 0x13C63F8C
+ .long 0xF0201057
+ .long 0xF0601357
+ .long 0xF01BE057
+ .long 0xF05BE357
+
+ .long 0x10842E8C
+ .long 0x10C63E8C
+ .long 0x13684F8C
+ .long 0x138A5F8C
+ .long 0xF0A43057
+ .long 0xF0E43357
+ .long 0xF09DF057
+ .long 0xF0DDF357
+
+ .long 0x11084E8C
+ .long 0x114A5E8C
+ .long 0x13AC6F8C
+ .long 0x13CE7F8C
+ .long 0xF1285057
+ .long 0xF1685357
+ .long 0xF11BE057
+ .long 0xF15BE357
+
+ .long 0x118C6E8C
+ .long 0x11CE7E8C
+ vspltisw 27,4
+ vadduwm 26,26,27
+ .long 0xF1AC7057
+ .long 0xF1EC7357
+ .long 0xF19DF057
+ .long 0xF1DDF357
+
+ vadduwm 0,0,16
+ vadduwm 4,4,17
+ vadduwm 8,8,18
+ vadduwm 12,12,19
+
vperm 0,0,0,31
vperm 4,4,4,31
vperm 8,8,8,31
vperm 12,12,12,31
-
- cmpldi 5,0x40
- blt .Ltail_vsx
-
- .long 0x7F602619
- .long 0x7F882619
- .long 0x7FA92619
- .long 0x7FCA2619
-
- vxor 27,27,0
- vxor 28,28,4
- vxor 29,29,8
- vxor 30,30,12
-
- .long 0x7F601F19
- .long 0x7F881F19
- addi 4,4,0x40
- .long 0x7FA91F19
- subi 5,5,0x40
- .long 0x7FCA1F19
- addi 3,3,0x40
- beq .Ldone_vsx
-
- vadduwm 0,1,16
- vadduwm 4,5,17
- vadduwm 8,9,18
- vadduwm 12,13,19
-
+
+ cmpldi 5,0x40
+ blt .Ltail_vsx
+
+ .long 0x7F602619
+ .long 0x7F882619
+ .long 0x7FA92619
+ .long 0x7FCA2619
+
+ vxor 27,27,0
+ vxor 28,28,4
+ vxor 29,29,8
+ vxor 30,30,12
+
+ .long 0x7F601F19
+ .long 0x7F881F19
+ addi 4,4,0x40
+ .long 0x7FA91F19
+ subi 5,5,0x40
+ .long 0x7FCA1F19
+ addi 3,3,0x40
+ beq .Ldone_vsx
+
+ vadduwm 0,1,16
+ vadduwm 4,5,17
+ vadduwm 8,9,18
+ vadduwm 12,13,19
+
vperm 0,0,0,31
vperm 4,4,4,31
vperm 8,8,8,31
vperm 12,12,12,31
-
- cmpldi 5,0x40
- blt .Ltail_vsx
-
- .long 0x7F602619
- .long 0x7F882619
- .long 0x7FA92619
- .long 0x7FCA2619
-
- vxor 27,27,0
- vxor 28,28,4
- vxor 29,29,8
- vxor 30,30,12
-
- .long 0x7F601F19
- .long 0x7F881F19
- addi 4,4,0x40
- .long 0x7FA91F19
- subi 5,5,0x40
- .long 0x7FCA1F19
- addi 3,3,0x40
- beq .Ldone_vsx
-
- vadduwm 0,2,16
- vadduwm 4,6,17
- vadduwm 8,10,18
- vadduwm 12,14,19
-
+
+ cmpldi 5,0x40
+ blt .Ltail_vsx
+
+ .long 0x7F602619
+ .long 0x7F882619
+ .long 0x7FA92619
+ .long 0x7FCA2619
+
+ vxor 27,27,0
+ vxor 28,28,4
+ vxor 29,29,8
+ vxor 30,30,12
+
+ .long 0x7F601F19
+ .long 0x7F881F19
+ addi 4,4,0x40
+ .long 0x7FA91F19
+ subi 5,5,0x40
+ .long 0x7FCA1F19
+ addi 3,3,0x40
+ beq .Ldone_vsx
+
+ vadduwm 0,2,16
+ vadduwm 4,6,17
+ vadduwm 8,10,18
+ vadduwm 12,14,19
+
vperm 0,0,0,31
vperm 4,4,4,31
vperm 8,8,8,31
vperm 12,12,12,31
-
- cmpldi 5,0x40
- blt .Ltail_vsx
-
- .long 0x7F602619
- .long 0x7F882619
- .long 0x7FA92619
- .long 0x7FCA2619
-
- vxor 27,27,0
- vxor 28,28,4
- vxor 29,29,8
- vxor 30,30,12
-
- .long 0x7F601F19
- .long 0x7F881F19
- addi 4,4,0x40
- .long 0x7FA91F19
- subi 5,5,0x40
- .long 0x7FCA1F19
- addi 3,3,0x40
- beq .Ldone_vsx
-
- vadduwm 0,3,16
- vadduwm 4,7,17
- vadduwm 8,11,18
- vadduwm 12,15,19
-
+
+ cmpldi 5,0x40
+ blt .Ltail_vsx
+
+ .long 0x7F602619
+ .long 0x7F882619
+ .long 0x7FA92619
+ .long 0x7FCA2619
+
+ vxor 27,27,0
+ vxor 28,28,4
+ vxor 29,29,8
+ vxor 30,30,12
+
+ .long 0x7F601F19
+ .long 0x7F881F19
+ addi 4,4,0x40
+ .long 0x7FA91F19
+ subi 5,5,0x40
+ .long 0x7FCA1F19
+ addi 3,3,0x40
+ beq .Ldone_vsx
+
+ vadduwm 0,3,16
+ vadduwm 4,7,17
+ vadduwm 8,11,18
+ vadduwm 12,15,19
+
vperm 0,0,0,31
vperm 4,4,4,31
vperm 8,8,8,31
vperm 12,12,12,31
-
- cmpldi 5,0x40
- blt .Ltail_vsx
-
- .long 0x7F602619
- .long 0x7F882619
- .long 0x7FA92619
- .long 0x7FCA2619
-
- vxor 27,27,0
- vxor 28,28,4
- vxor 29,29,8
- vxor 30,30,12
-
- .long 0x7F601F19
- .long 0x7F881F19
- addi 4,4,0x40
- .long 0x7FA91F19
- subi 5,5,0x40
- .long 0x7FCA1F19
- addi 3,3,0x40
- mtctr 0
- bne .Loop_outer_vsx
-
-.Ldone_vsx:
- lwz 12,220(1)
- li 10,127
- li 11,143
- ld 0, 240(1)
+
+ cmpldi 5,0x40
+ blt .Ltail_vsx
+
+ .long 0x7F602619
+ .long 0x7F882619
+ .long 0x7FA92619
+ .long 0x7FCA2619
+
+ vxor 27,27,0
+ vxor 28,28,4
+ vxor 29,29,8
+ vxor 30,30,12
+
+ .long 0x7F601F19
+ .long 0x7F881F19
+ addi 4,4,0x40
+ .long 0x7FA91F19
+ subi 5,5,0x40
+ .long 0x7FCA1F19
+ addi 3,3,0x40
+ mtctr 0
+ bne .Loop_outer_vsx
+
+.Ldone_vsx:
+ lwz 12,220(1)
+ li 10,127
+ li 11,143
+ ld 0, 240(1)
mtspr 256,12
- lvx 26,10,1
- addi 10,10,32
- lvx 27,11,1
- addi 11,11,32
- lvx 28,10,1
- addi 10,10,32
- lvx 29,11,1
- addi 11,11,32
- lvx 30,10,1
- lvx 31,11,1
- mtlr 0
- addi 1,1,224
- blr
-
-.align 4
-.Ltail_vsx:
- addi 11,1,48
- mtctr 5
- .long 0x7C005F19
- .long 0x7C885F19
- .long 0x7D095F19
- .long 0x7D8A5F19
- subi 12,11,1
- subi 4,4,1
- subi 3,3,1
-
-.Loop_tail_vsx:
- lbzu 6,1(12)
- lbzu 7,1(4)
- xor 6,6,7
- stbu 6,1(3)
- bdnz .Loop_tail_vsx
-
- .long 0x7E005F19
- .long 0x7E085F19
- .long 0x7E095F19
- .long 0x7E0A5F19
-
- b .Ldone_vsx
-.long 0
-.byte 0,12,0x04,1,0x80,0,5,0
-.long 0
+ lvx 26,10,1
+ addi 10,10,32
+ lvx 27,11,1
+ addi 11,11,32
+ lvx 28,10,1
+ addi 10,10,32
+ lvx 29,11,1
+ addi 11,11,32
+ lvx 30,10,1
+ lvx 31,11,1
+ mtlr 0
+ addi 1,1,224
+ blr
+
+.align 4
+.Ltail_vsx:
+ addi 11,1,48
+ mtctr 5
+ .long 0x7C005F19
+ .long 0x7C885F19
+ .long 0x7D095F19
+ .long 0x7D8A5F19
+ subi 12,11,1
+ subi 4,4,1
+ subi 3,3,1
+
+.Loop_tail_vsx:
+ lbzu 6,1(12)
+ lbzu 7,1(4)
+ xor 6,6,7
+ stbu 6,1(3)
+ bdnz .Loop_tail_vsx
+
+ .long 0x7E005F19
+ .long 0x7E085F19
+ .long 0x7E095F19
+ .long 0x7E0A5F19
+
+ b .Ldone_vsx
+.long 0
+.byte 0,12,0x04,1,0x80,0,5,0
+.long 0
.size .ChaCha20_ctr32_vsx,.-.ChaCha20_ctr32_vsx
.size ChaCha20_ctr32_vsx,.-.ChaCha20_ctr32_vsx
-.align 5
-.Lconsts:
- mflr 0
- bcl 20,31,$+4
- mflr 12
- addi 12,12,56
- mtlr 0
- blr
-.long 0
-.byte 0,12,0x14,0,0,0,0,0
-.space 28
-.Lsigma:
-.long 0x61707865,0x3320646e,0x79622d32,0x6b206574
-.long 1,0,0,0
-.long 4,0,0,0
+.align 5
+.Lconsts:
+ mflr 0
+ bcl 20,31,$+4
+ mflr 12
+ addi 12,12,56
+ mtlr 0
+ blr
+.long 0
+.byte 0,12,0x14,0,0,0,0,0
+.space 28
+.Lsigma:
+.long 0x61707865,0x3320646e,0x79622d32,0x6b206574
+.long 1,0,0,0
+.long 4,0,0,0
.long 0x02030001,0x06070405,0x0a0b0809,0x0e0f0c0d
.long 0x01020300,0x05060704,0x090a0b08,0x0d0e0f0c
-.long 0x61707865,0x61707865,0x61707865,0x61707865
-.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
-.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
-.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
-.long 0,1,2,3
-.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 2
-.align 2
+.long 0x61707865,0x61707865,0x61707865,0x61707865
+.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
+.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
+.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
+.long 0,1,2,3
+.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2
+.align 2