aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s
diff options
context:
space:
mode:
authordanlark <danlark@yandex-team.ru>2022-02-10 16:46:10 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:10 +0300
commitbaa58daefa91fde4b4769facdbd2903763b9c6a8 (patch)
tree1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s
parent3426a9bc7f169ae9da54cef557ad2a33f6e8eee0 (diff)
downloadydb-baa58daefa91fde4b4769facdbd2903763b9c6a8.tar.gz
Restoring authorship annotation for <danlark@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s')
-rw-r--r--contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s3560
1 files changed, 1780 insertions, 1780 deletions
diff --git a/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s b/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s
index 885d609e73..95bfd7bf60 100644
--- a/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s
+++ b/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s
@@ -1,1796 +1,1796 @@
-.machine "any"
-.text
-
-.globl bn_mul_mont_int
-.type bn_mul_mont_int,@function
+.machine "any"
+.text
+
+.globl bn_mul_mont_int
+.type bn_mul_mont_int,@function
.section ".opd","aw"
.align 3
bn_mul_mont_int:
.quad .bn_mul_mont_int,.TOC.@tocbase,0
.previous
-.align 5
+.align 5
.bn_mul_mont_int:
- mr 9,3
- li 3,0
- slwi 8,8,3
- li 12,-4096
- addi 3,8,352
- subf 3,3,1
- and 3,3,12
- subf 3,1,3
- mr 12,1
- srwi 8,8,3
- stdux 1,1,3
-
- std 20,-96(12)
- std 21,-88(12)
- std 22,-80(12)
- std 23,-72(12)
- std 24,-64(12)
- std 25,-56(12)
- std 26,-48(12)
- std 27,-40(12)
- std 28,-32(12)
- std 29,-24(12)
- std 30,-16(12)
- std 31,-8(12)
-
- ld 7,0(7)
- addi 8,8,-2
-
- ld 23,0(5)
- ld 10,0(4)
- addi 22,1,64
- mulld 25,10,23
- mulhdu 26,10,23
-
- ld 10,8(4)
- ld 11,0(6)
-
- mulld 24,25,7
-
- mulld 29,10,23
- mulhdu 30,10,23
-
- mulld 27,11,24
- mulhdu 28,11,24
- ld 11,8(6)
- addc 27,27,25
- addze 28,28
-
- mulld 31,11,24
- mulhdu 0,11,24
-
- mtctr 8
- li 21,16
-.align 4
-.L1st:
- ldx 10,4,21
- addc 25,29,26
- ldx 11,6,21
- addze 26,30
- mulld 29,10,23
- addc 27,31,28
- mulhdu 30,10,23
- addze 28,0
- mulld 31,11,24
- addc 27,27,25
- mulhdu 0,11,24
- addze 28,28
- std 27,0(22)
-
- addi 21,21,8
- addi 22,22,8
- bdnz .L1st
-
- addc 25,29,26
- addze 26,30
-
- addc 27,31,28
- addze 28,0
- addc 27,27,25
- addze 28,28
- std 27,0(22)
-
- li 3,0
- addc 28,28,26
- addze 3,3
- std 28,8(22)
-
- li 20,8
-.align 4
-.Louter:
- ldx 23,5,20
- ld 10,0(4)
- addi 22,1,64
- ld 12,64(1)
- mulld 25,10,23
- mulhdu 26,10,23
- ld 10,8(4)
- ld 11,0(6)
- addc 25,25,12
- mulld 29,10,23
- addze 26,26
- mulld 24,25,7
- mulhdu 30,10,23
- mulld 27,11,24
- mulhdu 28,11,24
- ld 11,8(6)
- addc 27,27,25
- mulld 31,11,24
- addze 28,28
- mulhdu 0,11,24
-
- mtctr 8
- li 21,16
-.align 4
-.Linner:
- ldx 10,4,21
- addc 25,29,26
- ld 12,8(22)
- addze 26,30
- ldx 11,6,21
- addc 27,31,28
- mulld 29,10,23
- addze 28,0
- mulhdu 30,10,23
- addc 25,25,12
- mulld 31,11,24
- addze 26,26
- mulhdu 0,11,24
- addc 27,27,25
- addi 21,21,8
- addze 28,28
- std 27,0(22)
- addi 22,22,8
- bdnz .Linner
-
- ld 12,8(22)
- addc 25,29,26
- addze 26,30
- addc 25,25,12
- addze 26,26
-
- addc 27,31,28
- addze 28,0
- addc 27,27,25
- addze 28,28
- std 27,0(22)
-
- addic 3,3,-1
- li 3,0
- adde 28,28,26
- addze 3,3
- std 28,8(22)
-
- slwi 12,8,3
- cmpld 20,12
- addi 20,20,8
- ble .Louter
-
- addi 8,8,2
- subfc 21,21,21
- addi 22,1,64
- mtctr 8
-
-.align 4
-.Lsub: ldx 12,22,21
- ldx 11,6,21
- subfe 10,11,12
- stdx 10,9,21
- addi 21,21,8
- bdnz .Lsub
-
- li 21,0
- mtctr 8
- subfe 3,21,3
-
-.align 4
-.Lcopy:
- ldx 12,22,21
- ldx 10,9,21
- and 12,12,3
- andc 10,10,3
- stdx 21,22,21
- or 10,10,12
- stdx 10,9,21
- addi 21,21,8
- bdnz .Lcopy
-
- ld 12,0(1)
- li 3,1
- ld 20,-96(12)
- ld 21,-88(12)
- ld 22,-80(12)
- ld 23,-72(12)
- ld 24,-64(12)
- ld 25,-56(12)
- ld 26,-48(12)
- ld 27,-40(12)
- ld 28,-32(12)
- ld 29,-24(12)
- ld 30,-16(12)
- ld 31,-8(12)
- mr 1,12
- blr
-.long 0
-.byte 0,12,4,0,0x80,12,6,0
-.long 0
+ mr 9,3
+ li 3,0
+ slwi 8,8,3
+ li 12,-4096
+ addi 3,8,352
+ subf 3,3,1
+ and 3,3,12
+ subf 3,1,3
+ mr 12,1
+ srwi 8,8,3
+ stdux 1,1,3
+
+ std 20,-96(12)
+ std 21,-88(12)
+ std 22,-80(12)
+ std 23,-72(12)
+ std 24,-64(12)
+ std 25,-56(12)
+ std 26,-48(12)
+ std 27,-40(12)
+ std 28,-32(12)
+ std 29,-24(12)
+ std 30,-16(12)
+ std 31,-8(12)
+
+ ld 7,0(7)
+ addi 8,8,-2
+
+ ld 23,0(5)
+ ld 10,0(4)
+ addi 22,1,64
+ mulld 25,10,23
+ mulhdu 26,10,23
+
+ ld 10,8(4)
+ ld 11,0(6)
+
+ mulld 24,25,7
+
+ mulld 29,10,23
+ mulhdu 30,10,23
+
+ mulld 27,11,24
+ mulhdu 28,11,24
+ ld 11,8(6)
+ addc 27,27,25
+ addze 28,28
+
+ mulld 31,11,24
+ mulhdu 0,11,24
+
+ mtctr 8
+ li 21,16
+.align 4
+.L1st:
+ ldx 10,4,21
+ addc 25,29,26
+ ldx 11,6,21
+ addze 26,30
+ mulld 29,10,23
+ addc 27,31,28
+ mulhdu 30,10,23
+ addze 28,0
+ mulld 31,11,24
+ addc 27,27,25
+ mulhdu 0,11,24
+ addze 28,28
+ std 27,0(22)
+
+ addi 21,21,8
+ addi 22,22,8
+ bdnz .L1st
+
+ addc 25,29,26
+ addze 26,30
+
+ addc 27,31,28
+ addze 28,0
+ addc 27,27,25
+ addze 28,28
+ std 27,0(22)
+
+ li 3,0
+ addc 28,28,26
+ addze 3,3
+ std 28,8(22)
+
+ li 20,8
+.align 4
+.Louter:
+ ldx 23,5,20
+ ld 10,0(4)
+ addi 22,1,64
+ ld 12,64(1)
+ mulld 25,10,23
+ mulhdu 26,10,23
+ ld 10,8(4)
+ ld 11,0(6)
+ addc 25,25,12
+ mulld 29,10,23
+ addze 26,26
+ mulld 24,25,7
+ mulhdu 30,10,23
+ mulld 27,11,24
+ mulhdu 28,11,24
+ ld 11,8(6)
+ addc 27,27,25
+ mulld 31,11,24
+ addze 28,28
+ mulhdu 0,11,24
+
+ mtctr 8
+ li 21,16
+.align 4
+.Linner:
+ ldx 10,4,21
+ addc 25,29,26
+ ld 12,8(22)
+ addze 26,30
+ ldx 11,6,21
+ addc 27,31,28
+ mulld 29,10,23
+ addze 28,0
+ mulhdu 30,10,23
+ addc 25,25,12
+ mulld 31,11,24
+ addze 26,26
+ mulhdu 0,11,24
+ addc 27,27,25
+ addi 21,21,8
+ addze 28,28
+ std 27,0(22)
+ addi 22,22,8
+ bdnz .Linner
+
+ ld 12,8(22)
+ addc 25,29,26
+ addze 26,30
+ addc 25,25,12
+ addze 26,26
+
+ addc 27,31,28
+ addze 28,0
+ addc 27,27,25
+ addze 28,28
+ std 27,0(22)
+
+ addic 3,3,-1
+ li 3,0
+ adde 28,28,26
+ addze 3,3
+ std 28,8(22)
+
+ slwi 12,8,3
+ cmpld 20,12
+ addi 20,20,8
+ ble .Louter
+
+ addi 8,8,2
+ subfc 21,21,21
+ addi 22,1,64
+ mtctr 8
+
+.align 4
+.Lsub: ldx 12,22,21
+ ldx 11,6,21
+ subfe 10,11,12
+ stdx 10,9,21
+ addi 21,21,8
+ bdnz .Lsub
+
+ li 21,0
+ mtctr 8
+ subfe 3,21,3
+
+.align 4
+.Lcopy:
+ ldx 12,22,21
+ ldx 10,9,21
+ and 12,12,3
+ andc 10,10,3
+ stdx 21,22,21
+ or 10,10,12
+ stdx 10,9,21
+ addi 21,21,8
+ bdnz .Lcopy
+
+ ld 12,0(1)
+ li 3,1
+ ld 20,-96(12)
+ ld 21,-88(12)
+ ld 22,-80(12)
+ ld 23,-72(12)
+ ld 24,-64(12)
+ ld 25,-56(12)
+ ld 26,-48(12)
+ ld 27,-40(12)
+ ld 28,-32(12)
+ ld 29,-24(12)
+ ld 30,-16(12)
+ ld 31,-8(12)
+ mr 1,12
+ blr
+.long 0
+.byte 0,12,4,0,0x80,12,6,0
+.long 0
.size .bn_mul_mont_int,.-.bn_mul_mont_int
.size bn_mul_mont_int,.-.bn_mul_mont_int
-.globl bn_mul4x_mont_int
-.type bn_mul4x_mont_int,@function
+.globl bn_mul4x_mont_int
+.type bn_mul4x_mont_int,@function
.section ".opd","aw"
.align 3
bn_mul4x_mont_int:
.quad .bn_mul4x_mont_int,.TOC.@tocbase,0
.previous
-.align 5
+.align 5
.bn_mul4x_mont_int:
- andi. 0,8,7
- bne .Lmul4x_do
- cmpld 4,5
- bne .Lmul4x_do
- b .Lsqr8x_do
-.Lmul4x_do:
- slwi 8,8,3
- mr 9,1
- li 10,-32*8
- sub 10,10,8
- stdux 1,1,10
-
- std 14,-8*18(9)
- std 15,-8*17(9)
- std 16,-8*16(9)
- std 17,-8*15(9)
- std 18,-8*14(9)
- std 19,-8*13(9)
- std 20,-8*12(9)
- std 21,-8*11(9)
- std 22,-8*10(9)
- std 23,-8*9(9)
- std 24,-8*8(9)
- std 25,-8*7(9)
- std 26,-8*6(9)
- std 27,-8*5(9)
- std 28,-8*4(9)
- std 29,-8*3(9)
- std 30,-8*2(9)
- std 31,-8*1(9)
-
- subi 4,4,8
- subi 6,6,8
- subi 3,3,8
- ld 7,0(7)
-
- add 14,5,8
- add 30,4,8
- subi 14,14,8*4
-
- ld 27,8*0(5)
- li 22,0
- ld 9,8*1(4)
- li 23,0
- ld 10,8*2(4)
- li 24,0
- ld 11,8*3(4)
- li 25,0
- ldu 12,8*4(4)
- ld 18,8*1(6)
- ld 19,8*2(6)
- ld 20,8*3(6)
- ldu 21,8*4(6)
-
- std 3,8*6(1)
- std 14,8*7(1)
- li 3,0
- addic 29,1,8*7
- li 31,0
- li 0,0
- b .Loop_mul4x_1st_reduction
-
-.align 5
-.Loop_mul4x_1st_reduction:
- mulld 14,9,27
- addze 3,3
- mulld 15,10,27
- addi 31,31,8
- mulld 16,11,27
- andi. 31,31,8*4-1
- mulld 17,12,27
- addc 22,22,14
- mulhdu 14,9,27
- adde 23,23,15
- mulhdu 15,10,27
- adde 24,24,16
- mulld 28,22,7
- adde 25,25,17
- mulhdu 16,11,27
- addze 26,0
- mulhdu 17,12,27
- ldx 27,5,31
- addc 23,23,14
-
- stdu 28,8(29)
- adde 24,24,15
- mulld 15,19,28
- adde 25,25,16
- mulld 16,20,28
- adde 26,26,17
- mulld 17,21,28
-
-
-
-
-
-
-
-
-
-
- addic 22,22,-1
- mulhdu 14,18,28
- adde 22,23,15
- mulhdu 15,19,28
- adde 23,24,16
- mulhdu 16,20,28
- adde 24,25,17
- mulhdu 17,21,28
- adde 25,26,3
- addze 3,0
- addc 22,22,14
- adde 23,23,15
- adde 24,24,16
- adde 25,25,17
-
- bne .Loop_mul4x_1st_reduction
-
- cmpld 30,4
- beq .Lmul4x4_post_condition
-
- ld 9,8*1(4)
- ld 10,8*2(4)
- ld 11,8*3(4)
- ldu 12,8*4(4)
- ld 28,8*8(1)
- ld 18,8*1(6)
- ld 19,8*2(6)
- ld 20,8*3(6)
- ldu 21,8*4(6)
- b .Loop_mul4x_1st_tail
-
-.align 5
-.Loop_mul4x_1st_tail:
- mulld 14,9,27
- addze 3,3
- mulld 15,10,27
- addi 31,31,8
- mulld 16,11,27
- andi. 31,31,8*4-1
- mulld 17,12,27
- addc 22,22,14
- mulhdu 14,9,27
- adde 23,23,15
- mulhdu 15,10,27
- adde 24,24,16
- mulhdu 16,11,27
- adde 25,25,17
- mulhdu 17,12,27
- addze 26,0
- ldx 27,5,31
- addc 23,23,14
- mulld 14,18,28
- adde 24,24,15
- mulld 15,19,28
- adde 25,25,16
- mulld 16,20,28
- adde 26,26,17
- mulld 17,21,28
- addc 22,22,14
- mulhdu 14,18,28
- adde 23,23,15
- mulhdu 15,19,28
- adde 24,24,16
- mulhdu 16,20,28
- adde 25,25,17
- adde 26,26,3
- mulhdu 17,21,28
- addze 3,0
- addi 28,1,8*8
- ldx 28,28,31
- stdu 22,8(29)
- addc 22,23,14
- adde 23,24,15
- adde 24,25,16
- adde 25,26,17
-
- bne .Loop_mul4x_1st_tail
-
- sub 15,30,8
- cmpld 30,4
- beq .Lmul4x_proceed
-
- ld 9,8*1(4)
- ld 10,8*2(4)
- ld 11,8*3(4)
- ldu 12,8*4(4)
- ld 18,8*1(6)
- ld 19,8*2(6)
- ld 20,8*3(6)
- ldu 21,8*4(6)
- b .Loop_mul4x_1st_tail
-
-.align 5
-.Lmul4x_proceed:
- ldu 27,8*4(5)
- addze 3,3
- ld 9,8*1(15)
- ld 10,8*2(15)
- ld 11,8*3(15)
- ld 12,8*4(15)
- addi 4,15,8*4
- sub 6,6,8
-
- std 22,8*1(29)
- std 23,8*2(29)
- std 24,8*3(29)
- std 25,8*4(29)
- std 3,8*5(29)
- ld 22,8*12(1)
- ld 23,8*13(1)
- ld 24,8*14(1)
- ld 25,8*15(1)
-
- ld 18,8*1(6)
- ld 19,8*2(6)
- ld 20,8*3(6)
- ldu 21,8*4(6)
- addic 29,1,8*7
- li 3,0
- b .Loop_mul4x_reduction
-
-.align 5
-.Loop_mul4x_reduction:
- mulld 14,9,27
- addze 3,3
- mulld 15,10,27
- addi 31,31,8
- mulld 16,11,27
- andi. 31,31,8*4-1
- mulld 17,12,27
- addc 22,22,14
- mulhdu 14,9,27
- adde 23,23,15
- mulhdu 15,10,27
- adde 24,24,16
- mulld 28,22,7
- adde 25,25,17
- mulhdu 16,11,27
- addze 26,0
- mulhdu 17,12,27
- ldx 27,5,31
- addc 23,23,14
-
- stdu 28,8(29)
- adde 24,24,15
- mulld 15,19,28
- adde 25,25,16
- mulld 16,20,28
- adde 26,26,17
- mulld 17,21,28
-
- addic 22,22,-1
- mulhdu 14,18,28
- adde 22,23,15
- mulhdu 15,19,28
- adde 23,24,16
- mulhdu 16,20,28
- adde 24,25,17
- mulhdu 17,21,28
- adde 25,26,3
- addze 3,0
- addc 22,22,14
- adde 23,23,15
- adde 24,24,16
- adde 25,25,17
-
- bne .Loop_mul4x_reduction
-
- ld 14,8*5(29)
- addze 3,3
- ld 15,8*6(29)
- ld 16,8*7(29)
- ld 17,8*8(29)
- ld 9,8*1(4)
- ld 10,8*2(4)
- ld 11,8*3(4)
- ldu 12,8*4(4)
- addc 22,22,14
- adde 23,23,15
- adde 24,24,16
- adde 25,25,17
-
-
- ld 28,8*8(1)
- ld 18,8*1(6)
- ld 19,8*2(6)
- ld 20,8*3(6)
- ldu 21,8*4(6)
- b .Loop_mul4x_tail
-
-.align 5
-.Loop_mul4x_tail:
- mulld 14,9,27
- addze 3,3
- mulld 15,10,27
- addi 31,31,8
- mulld 16,11,27
- andi. 31,31,8*4-1
- mulld 17,12,27
- addc 22,22,14
- mulhdu 14,9,27
- adde 23,23,15
- mulhdu 15,10,27
- adde 24,24,16
- mulhdu 16,11,27
- adde 25,25,17
- mulhdu 17,12,27
- addze 26,0
- ldx 27,5,31
- addc 23,23,14
- mulld 14,18,28
- adde 24,24,15
- mulld 15,19,28
- adde 25,25,16
- mulld 16,20,28
- adde 26,26,17
- mulld 17,21,28
- addc 22,22,14
- mulhdu 14,18,28
- adde 23,23,15
- mulhdu 15,19,28
- adde 24,24,16
- mulhdu 16,20,28
- adde 25,25,17
- mulhdu 17,21,28
- adde 26,26,3
- addi 28,1,8*8
- ldx 28,28,31
- addze 3,0
- stdu 22,8(29)
- addc 22,23,14
- adde 23,24,15
- adde 24,25,16
- adde 25,26,17
-
- bne .Loop_mul4x_tail
-
- ld 14,8*5(29)
- sub 15,6,8
- addze 3,3
- cmpld 30,4
- beq .Loop_mul4x_break
-
- ld 15,8*6(29)
- ld 16,8*7(29)
- ld 17,8*8(29)
- ld 9,8*1(4)
- ld 10,8*2(4)
- ld 11,8*3(4)
- ldu 12,8*4(4)
- addc 22,22,14
- adde 23,23,15
- adde 24,24,16
- adde 25,25,17
-
-
- ld 18,8*1(6)
- ld 19,8*2(6)
- ld 20,8*3(6)
- ldu 21,8*4(6)
- b .Loop_mul4x_tail
-
-.align 5
-.Loop_mul4x_break:
- ld 16,8*6(1)
- ld 17,8*7(1)
- addc 9,22,14
- ld 22,8*12(1)
- addze 10,23
- ld 23,8*13(1)
- addze 11,24
- ld 24,8*14(1)
- addze 12,25
- ld 25,8*15(1)
- addze 3,3
- std 9,8*1(29)
- sub 4,30,8
- std 10,8*2(29)
- std 11,8*3(29)
- std 12,8*4(29)
- std 3,8*5(29)
-
- ld 18,8*1(15)
- ld 19,8*2(15)
- ld 20,8*3(15)
- ld 21,8*4(15)
- addi 6,15,8*4
- cmpld 5,17
- beq .Lmul4x_post
-
- ldu 27,8*4(5)
- ld 9,8*1(4)
- ld 10,8*2(4)
- ld 11,8*3(4)
- ldu 12,8*4(4)
- li 3,0
- addic 29,1,8*7
- b .Loop_mul4x_reduction
-
-.align 5
-.Lmul4x_post:
-
-
-
-
- srwi 31,8,5
- mr 5,16
- subi 31,31,1
- mr 30,16
- subfc 14,18,22
- addi 29,1,8*15
- subfe 15,19,23
-
- mtctr 31
-.Lmul4x_sub:
- ld 18,8*1(6)
- ld 22,8*1(29)
- subfe 16,20,24
- ld 19,8*2(6)
- ld 23,8*2(29)
- subfe 17,21,25
- ld 20,8*3(6)
- ld 24,8*3(29)
- ldu 21,8*4(6)
- ldu 25,8*4(29)
- std 14,8*1(5)
- std 15,8*2(5)
- subfe 14,18,22
- std 16,8*3(5)
- stdu 17,8*4(5)
- subfe 15,19,23
- bdnz .Lmul4x_sub
-
- ld 9,8*1(30)
- std 14,8*1(5)
- ld 14,8*12(1)
- subfe 16,20,24
- ld 10,8*2(30)
- std 15,8*2(5)
- ld 15,8*13(1)
- subfe 17,21,25
- subfe 3,0,3
- addi 29,1,8*12
- ld 11,8*3(30)
- std 16,8*3(5)
- ld 16,8*14(1)
- ld 12,8*4(30)
- std 17,8*4(5)
- ld 17,8*15(1)
-
- mtctr 31
-.Lmul4x_cond_copy:
- and 14,14,3
- andc 9,9,3
- std 0,8*0(29)
- and 15,15,3
- andc 10,10,3
- std 0,8*1(29)
- and 16,16,3
- andc 11,11,3
- std 0,8*2(29)
- and 17,17,3
- andc 12,12,3
- std 0,8*3(29)
- or 22,14,9
- ld 9,8*5(30)
- ld 14,8*4(29)
- or 23,15,10
- ld 10,8*6(30)
- ld 15,8*5(29)
- or 24,16,11
- ld 11,8*7(30)
- ld 16,8*6(29)
- or 25,17,12
- ld 12,8*8(30)
- ld 17,8*7(29)
- addi 29,29,8*4
- std 22,8*1(30)
- std 23,8*2(30)
- std 24,8*3(30)
- stdu 25,8*4(30)
- bdnz .Lmul4x_cond_copy
-
- ld 5,0(1)
- and 14,14,3
- andc 9,9,3
- std 0,8*0(29)
- and 15,15,3
- andc 10,10,3
- std 0,8*1(29)
- and 16,16,3
- andc 11,11,3
- std 0,8*2(29)
- and 17,17,3
- andc 12,12,3
- std 0,8*3(29)
- or 22,14,9
- or 23,15,10
- std 0,8*4(29)
- or 24,16,11
- or 25,17,12
- std 22,8*1(30)
- std 23,8*2(30)
- std 24,8*3(30)
- std 25,8*4(30)
-
- b .Lmul4x_done
-
-.align 4
-.Lmul4x4_post_condition:
- ld 4,8*6(1)
- ld 5,0(1)
- addze 3,3
-
- subfc 9,18,22
- subfe 10,19,23
- subfe 11,20,24
- subfe 12,21,25
- subfe 3,0,3
-
- and 18,18,3
- and 19,19,3
- addc 9,9,18
- and 20,20,3
- adde 10,10,19
- and 21,21,3
- adde 11,11,20
- adde 12,12,21
-
- std 9,8*1(4)
- std 10,8*2(4)
- std 11,8*3(4)
- std 12,8*4(4)
-
-.Lmul4x_done:
- std 0,8*8(1)
- std 0,8*9(1)
- std 0,8*10(1)
- std 0,8*11(1)
- li 3,1
- ld 14,-8*18(5)
- ld 15,-8*17(5)
- ld 16,-8*16(5)
- ld 17,-8*15(5)
- ld 18,-8*14(5)
- ld 19,-8*13(5)
- ld 20,-8*12(5)
- ld 21,-8*11(5)
- ld 22,-8*10(5)
- ld 23,-8*9(5)
- ld 24,-8*8(5)
- ld 25,-8*7(5)
- ld 26,-8*6(5)
- ld 27,-8*5(5)
- ld 28,-8*4(5)
- ld 29,-8*3(5)
- ld 30,-8*2(5)
- ld 31,-8*1(5)
- mr 1,5
- blr
-.long 0
-.byte 0,12,4,0x20,0x80,18,6,0
-.long 0
+ andi. 0,8,7
+ bne .Lmul4x_do
+ cmpld 4,5
+ bne .Lmul4x_do
+ b .Lsqr8x_do
+.Lmul4x_do:
+ slwi 8,8,3
+ mr 9,1
+ li 10,-32*8
+ sub 10,10,8
+ stdux 1,1,10
+
+ std 14,-8*18(9)
+ std 15,-8*17(9)
+ std 16,-8*16(9)
+ std 17,-8*15(9)
+ std 18,-8*14(9)
+ std 19,-8*13(9)
+ std 20,-8*12(9)
+ std 21,-8*11(9)
+ std 22,-8*10(9)
+ std 23,-8*9(9)
+ std 24,-8*8(9)
+ std 25,-8*7(9)
+ std 26,-8*6(9)
+ std 27,-8*5(9)
+ std 28,-8*4(9)
+ std 29,-8*3(9)
+ std 30,-8*2(9)
+ std 31,-8*1(9)
+
+ subi 4,4,8
+ subi 6,6,8
+ subi 3,3,8
+ ld 7,0(7)
+
+ add 14,5,8
+ add 30,4,8
+ subi 14,14,8*4
+
+ ld 27,8*0(5)
+ li 22,0
+ ld 9,8*1(4)
+ li 23,0
+ ld 10,8*2(4)
+ li 24,0
+ ld 11,8*3(4)
+ li 25,0
+ ldu 12,8*4(4)
+ ld 18,8*1(6)
+ ld 19,8*2(6)
+ ld 20,8*3(6)
+ ldu 21,8*4(6)
+
+ std 3,8*6(1)
+ std 14,8*7(1)
+ li 3,0
+ addic 29,1,8*7
+ li 31,0
+ li 0,0
+ b .Loop_mul4x_1st_reduction
+
+.align 5
+.Loop_mul4x_1st_reduction:
+ mulld 14,9,27
+ addze 3,3
+ mulld 15,10,27
+ addi 31,31,8
+ mulld 16,11,27
+ andi. 31,31,8*4-1
+ mulld 17,12,27
+ addc 22,22,14
+ mulhdu 14,9,27
+ adde 23,23,15
+ mulhdu 15,10,27
+ adde 24,24,16
+ mulld 28,22,7
+ adde 25,25,17
+ mulhdu 16,11,27
+ addze 26,0
+ mulhdu 17,12,27
+ ldx 27,5,31
+ addc 23,23,14
+
+ stdu 28,8(29)
+ adde 24,24,15
+ mulld 15,19,28
+ adde 25,25,16
+ mulld 16,20,28
+ adde 26,26,17
+ mulld 17,21,28
+
+
+
+
+
+
+
+
+
+
+ addic 22,22,-1
+ mulhdu 14,18,28
+ adde 22,23,15
+ mulhdu 15,19,28
+ adde 23,24,16
+ mulhdu 16,20,28
+ adde 24,25,17
+ mulhdu 17,21,28
+ adde 25,26,3
+ addze 3,0
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+ bne .Loop_mul4x_1st_reduction
+
+ cmpld 30,4
+ beq .Lmul4x4_post_condition
+
+ ld 9,8*1(4)
+ ld 10,8*2(4)
+ ld 11,8*3(4)
+ ldu 12,8*4(4)
+ ld 28,8*8(1)
+ ld 18,8*1(6)
+ ld 19,8*2(6)
+ ld 20,8*3(6)
+ ldu 21,8*4(6)
+ b .Loop_mul4x_1st_tail
+
+.align 5
+.Loop_mul4x_1st_tail:
+ mulld 14,9,27
+ addze 3,3
+ mulld 15,10,27
+ addi 31,31,8
+ mulld 16,11,27
+ andi. 31,31,8*4-1
+ mulld 17,12,27
+ addc 22,22,14
+ mulhdu 14,9,27
+ adde 23,23,15
+ mulhdu 15,10,27
+ adde 24,24,16
+ mulhdu 16,11,27
+ adde 25,25,17
+ mulhdu 17,12,27
+ addze 26,0
+ ldx 27,5,31
+ addc 23,23,14
+ mulld 14,18,28
+ adde 24,24,15
+ mulld 15,19,28
+ adde 25,25,16
+ mulld 16,20,28
+ adde 26,26,17
+ mulld 17,21,28
+ addc 22,22,14
+ mulhdu 14,18,28
+ adde 23,23,15
+ mulhdu 15,19,28
+ adde 24,24,16
+ mulhdu 16,20,28
+ adde 25,25,17
+ adde 26,26,3
+ mulhdu 17,21,28
+ addze 3,0
+ addi 28,1,8*8
+ ldx 28,28,31
+ stdu 22,8(29)
+ addc 22,23,14
+ adde 23,24,15
+ adde 24,25,16
+ adde 25,26,17
+
+ bne .Loop_mul4x_1st_tail
+
+ sub 15,30,8
+ cmpld 30,4
+ beq .Lmul4x_proceed
+
+ ld 9,8*1(4)
+ ld 10,8*2(4)
+ ld 11,8*3(4)
+ ldu 12,8*4(4)
+ ld 18,8*1(6)
+ ld 19,8*2(6)
+ ld 20,8*3(6)
+ ldu 21,8*4(6)
+ b .Loop_mul4x_1st_tail
+
+.align 5
+.Lmul4x_proceed:
+ ldu 27,8*4(5)
+ addze 3,3
+ ld 9,8*1(15)
+ ld 10,8*2(15)
+ ld 11,8*3(15)
+ ld 12,8*4(15)
+ addi 4,15,8*4
+ sub 6,6,8
+
+ std 22,8*1(29)
+ std 23,8*2(29)
+ std 24,8*3(29)
+ std 25,8*4(29)
+ std 3,8*5(29)
+ ld 22,8*12(1)
+ ld 23,8*13(1)
+ ld 24,8*14(1)
+ ld 25,8*15(1)
+
+ ld 18,8*1(6)
+ ld 19,8*2(6)
+ ld 20,8*3(6)
+ ldu 21,8*4(6)
+ addic 29,1,8*7
+ li 3,0
+ b .Loop_mul4x_reduction
+
+.align 5
+.Loop_mul4x_reduction:
+ mulld 14,9,27
+ addze 3,3
+ mulld 15,10,27
+ addi 31,31,8
+ mulld 16,11,27
+ andi. 31,31,8*4-1
+ mulld 17,12,27
+ addc 22,22,14
+ mulhdu 14,9,27
+ adde 23,23,15
+ mulhdu 15,10,27
+ adde 24,24,16
+ mulld 28,22,7
+ adde 25,25,17
+ mulhdu 16,11,27
+ addze 26,0
+ mulhdu 17,12,27
+ ldx 27,5,31
+ addc 23,23,14
+
+ stdu 28,8(29)
+ adde 24,24,15
+ mulld 15,19,28
+ adde 25,25,16
+ mulld 16,20,28
+ adde 26,26,17
+ mulld 17,21,28
+
+ addic 22,22,-1
+ mulhdu 14,18,28
+ adde 22,23,15
+ mulhdu 15,19,28
+ adde 23,24,16
+ mulhdu 16,20,28
+ adde 24,25,17
+ mulhdu 17,21,28
+ adde 25,26,3
+ addze 3,0
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+ bne .Loop_mul4x_reduction
+
+ ld 14,8*5(29)
+ addze 3,3
+ ld 15,8*6(29)
+ ld 16,8*7(29)
+ ld 17,8*8(29)
+ ld 9,8*1(4)
+ ld 10,8*2(4)
+ ld 11,8*3(4)
+ ldu 12,8*4(4)
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+
+ ld 28,8*8(1)
+ ld 18,8*1(6)
+ ld 19,8*2(6)
+ ld 20,8*3(6)
+ ldu 21,8*4(6)
+ b .Loop_mul4x_tail
+
+.align 5
+.Loop_mul4x_tail:
+ mulld 14,9,27
+ addze 3,3
+ mulld 15,10,27
+ addi 31,31,8
+ mulld 16,11,27
+ andi. 31,31,8*4-1
+ mulld 17,12,27
+ addc 22,22,14
+ mulhdu 14,9,27
+ adde 23,23,15
+ mulhdu 15,10,27
+ adde 24,24,16
+ mulhdu 16,11,27
+ adde 25,25,17
+ mulhdu 17,12,27
+ addze 26,0
+ ldx 27,5,31
+ addc 23,23,14
+ mulld 14,18,28
+ adde 24,24,15
+ mulld 15,19,28
+ adde 25,25,16
+ mulld 16,20,28
+ adde 26,26,17
+ mulld 17,21,28
+ addc 22,22,14
+ mulhdu 14,18,28
+ adde 23,23,15
+ mulhdu 15,19,28
+ adde 24,24,16
+ mulhdu 16,20,28
+ adde 25,25,17
+ mulhdu 17,21,28
+ adde 26,26,3
+ addi 28,1,8*8
+ ldx 28,28,31
+ addze 3,0
+ stdu 22,8(29)
+ addc 22,23,14
+ adde 23,24,15
+ adde 24,25,16
+ adde 25,26,17
+
+ bne .Loop_mul4x_tail
+
+ ld 14,8*5(29)
+ sub 15,6,8
+ addze 3,3
+ cmpld 30,4
+ beq .Loop_mul4x_break
+
+ ld 15,8*6(29)
+ ld 16,8*7(29)
+ ld 17,8*8(29)
+ ld 9,8*1(4)
+ ld 10,8*2(4)
+ ld 11,8*3(4)
+ ldu 12,8*4(4)
+ addc 22,22,14
+ adde 23,23,15
+ adde 24,24,16
+ adde 25,25,17
+
+
+ ld 18,8*1(6)
+ ld 19,8*2(6)
+ ld 20,8*3(6)
+ ldu 21,8*4(6)
+ b .Loop_mul4x_tail
+
+.align 5
+.Loop_mul4x_break:
+ ld 16,8*6(1)
+ ld 17,8*7(1)
+ addc 9,22,14
+ ld 22,8*12(1)
+ addze 10,23
+ ld 23,8*13(1)
+ addze 11,24
+ ld 24,8*14(1)
+ addze 12,25
+ ld 25,8*15(1)
+ addze 3,3
+ std 9,8*1(29)
+ sub 4,30,8
+ std 10,8*2(29)
+ std 11,8*3(29)
+ std 12,8*4(29)
+ std 3,8*5(29)
+
+ ld 18,8*1(15)
+ ld 19,8*2(15)
+ ld 20,8*3(15)
+ ld 21,8*4(15)
+ addi 6,15,8*4
+ cmpld 5,17
+ beq .Lmul4x_post
+
+ ldu 27,8*4(5)
+ ld 9,8*1(4)
+ ld 10,8*2(4)
+ ld 11,8*3(4)
+ ldu 12,8*4(4)
+ li 3,0
+ addic 29,1,8*7
+ b .Loop_mul4x_reduction
+
+.align 5
+.Lmul4x_post:
+
+
+
+
+ srwi 31,8,5
+ mr 5,16
+ subi 31,31,1
+ mr 30,16
+ subfc 14,18,22
+ addi 29,1,8*15
+ subfe 15,19,23
+
+ mtctr 31
+.Lmul4x_sub:
+ ld 18,8*1(6)
+ ld 22,8*1(29)
+ subfe 16,20,24
+ ld 19,8*2(6)
+ ld 23,8*2(29)
+ subfe 17,21,25
+ ld 20,8*3(6)
+ ld 24,8*3(29)
+ ldu 21,8*4(6)
+ ldu 25,8*4(29)
+ std 14,8*1(5)
+ std 15,8*2(5)
+ subfe 14,18,22
+ std 16,8*3(5)
+ stdu 17,8*4(5)
+ subfe 15,19,23
+ bdnz .Lmul4x_sub
+
+ ld 9,8*1(30)
+ std 14,8*1(5)
+ ld 14,8*12(1)
+ subfe 16,20,24
+ ld 10,8*2(30)
+ std 15,8*2(5)
+ ld 15,8*13(1)
+ subfe 17,21,25
+ subfe 3,0,3
+ addi 29,1,8*12
+ ld 11,8*3(30)
+ std 16,8*3(5)
+ ld 16,8*14(1)
+ ld 12,8*4(30)
+ std 17,8*4(5)
+ ld 17,8*15(1)
+
+ mtctr 31
+.Lmul4x_cond_copy:
+ and 14,14,3
+ andc 9,9,3
+ std 0,8*0(29)
+ and 15,15,3
+ andc 10,10,3
+ std 0,8*1(29)
+ and 16,16,3
+ andc 11,11,3
+ std 0,8*2(29)
+ and 17,17,3
+ andc 12,12,3
+ std 0,8*3(29)
+ or 22,14,9
+ ld 9,8*5(30)
+ ld 14,8*4(29)
+ or 23,15,10
+ ld 10,8*6(30)
+ ld 15,8*5(29)
+ or 24,16,11
+ ld 11,8*7(30)
+ ld 16,8*6(29)
+ or 25,17,12
+ ld 12,8*8(30)
+ ld 17,8*7(29)
+ addi 29,29,8*4
+ std 22,8*1(30)
+ std 23,8*2(30)
+ std 24,8*3(30)
+ stdu 25,8*4(30)
+ bdnz .Lmul4x_cond_copy
+
+ ld 5,0(1)
+ and 14,14,3
+ andc 9,9,3
+ std 0,8*0(29)
+ and 15,15,3
+ andc 10,10,3
+ std 0,8*1(29)
+ and 16,16,3
+ andc 11,11,3
+ std 0,8*2(29)
+ and 17,17,3
+ andc 12,12,3
+ std 0,8*3(29)
+ or 22,14,9
+ or 23,15,10
+ std 0,8*4(29)
+ or 24,16,11
+ or 25,17,12
+ std 22,8*1(30)
+ std 23,8*2(30)
+ std 24,8*3(30)
+ std 25,8*4(30)
+
+ b .Lmul4x_done
+
+.align 4
+.Lmul4x4_post_condition:
+ ld 4,8*6(1)
+ ld 5,0(1)
+ addze 3,3
+
+ subfc 9,18,22
+ subfe 10,19,23
+ subfe 11,20,24
+ subfe 12,21,25
+ subfe 3,0,3
+
+ and 18,18,3
+ and 19,19,3
+ addc 9,9,18
+ and 20,20,3
+ adde 10,10,19
+ and 21,21,3
+ adde 11,11,20
+ adde 12,12,21
+
+ std 9,8*1(4)
+ std 10,8*2(4)
+ std 11,8*3(4)
+ std 12,8*4(4)
+
+.Lmul4x_done:
+ std 0,8*8(1)
+ std 0,8*9(1)
+ std 0,8*10(1)
+ std 0,8*11(1)
+ li 3,1
+ ld 14,-8*18(5)
+ ld 15,-8*17(5)
+ ld 16,-8*16(5)
+ ld 17,-8*15(5)
+ ld 18,-8*14(5)
+ ld 19,-8*13(5)
+ ld 20,-8*12(5)
+ ld 21,-8*11(5)
+ ld 22,-8*10(5)
+ ld 23,-8*9(5)
+ ld 24,-8*8(5)
+ ld 25,-8*7(5)
+ ld 26,-8*6(5)
+ ld 27,-8*5(5)
+ ld 28,-8*4(5)
+ ld 29,-8*3(5)
+ ld 30,-8*2(5)
+ ld 31,-8*1(5)
+ mr 1,5
+ blr
+.long 0
+.byte 0,12,4,0x20,0x80,18,6,0
+.long 0
.size .bn_mul4x_mont_int,.-.bn_mul4x_mont_int
.size bn_mul4x_mont_int,.-.bn_mul4x_mont_int
-.align 5
-__bn_sqr8x_mont:
-.Lsqr8x_do:
- mr 9,1
- slwi 10,8,4
- li 11,-32*8
- sub 10,11,10
- slwi 8,8,3
- stdux 1,1,10
-
- std 14,-8*18(9)
- std 15,-8*17(9)
- std 16,-8*16(9)
- std 17,-8*15(9)
- std 18,-8*14(9)
- std 19,-8*13(9)
- std 20,-8*12(9)
- std 21,-8*11(9)
- std 22,-8*10(9)
- std 23,-8*9(9)
- std 24,-8*8(9)
- std 25,-8*7(9)
- std 26,-8*6(9)
- std 27,-8*5(9)
- std 28,-8*4(9)
- std 29,-8*3(9)
- std 30,-8*2(9)
- std 31,-8*1(9)
-
- subi 4,4,8
- subi 18,6,8
- subi 3,3,8
- ld 7,0(7)
- li 0,0
-
- add 6,4,8
- ld 9,8*1(4)
-
- ld 10,8*2(4)
- li 23,0
- ld 11,8*3(4)
- li 24,0
- ld 12,8*4(4)
- li 25,0
- ld 14,8*5(4)
- li 26,0
- ld 15,8*6(4)
- li 27,0
- ld 16,8*7(4)
- li 28,0
- ldu 17,8*8(4)
- li 29,0
-
- addi 5,1,8*11
- subic. 30,8,8*8
- b .Lsqr8x_zero_start
-
-.align 5
-.Lsqr8x_zero:
- subic. 30,30,8*8
- std 0,8*1(5)
- std 0,8*2(5)
- std 0,8*3(5)
- std 0,8*4(5)
- std 0,8*5(5)
- std 0,8*6(5)
- std 0,8*7(5)
- std 0,8*8(5)
-.Lsqr8x_zero_start:
- std 0,8*9(5)
- std 0,8*10(5)
- std 0,8*11(5)
- std 0,8*12(5)
- std 0,8*13(5)
- std 0,8*14(5)
- std 0,8*15(5)
- stdu 0,8*16(5)
- bne .Lsqr8x_zero
-
- std 3,8*6(1)
- std 18,8*7(1)
- std 7,8*8(1)
- std 5,8*9(1)
- std 0,8*10(1)
- addi 5,1,8*11
-
-
-.align 5
-.Lsqr8x_outer_loop:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- mulld 18,10,9
- mulld 19,11,9
- mulld 20,12,9
- mulld 21,14,9
- addc 23,23,18
- mulld 18,15,9
- adde 24,24,19
- mulld 19,16,9
- adde 25,25,20
- mulld 20,17,9
- adde 26,26,21
- mulhdu 21,10,9
- adde 27,27,18
- mulhdu 18,11,9
- adde 28,28,19
- mulhdu 19,12,9
- adde 29,29,20
- mulhdu 20,14,9
- std 22,8*1(5)
- addze 22,0
- std 23,8*2(5)
- addc 24,24,21
- mulhdu 21,15,9
- adde 25,25,18
- mulhdu 18,16,9
- adde 26,26,19
- mulhdu 19,17,9
- adde 27,27,20
- mulld 20,11,10
- adde 28,28,21
- mulld 21,12,10
- adde 29,29,18
- mulld 18,14,10
- adde 22,22,19
-
- mulld 19,15,10
- addc 25,25,20
- mulld 20,16,10
- adde 26,26,21
- mulld 21,17,10
- adde 27,27,18
- mulhdu 18,11,10
- adde 28,28,19
- mulhdu 19,12,10
- adde 29,29,20
- mulhdu 20,14,10
- adde 22,22,21
- mulhdu 21,15,10
- std 24,8*3(5)
- addze 23,0
- std 25,8*4(5)
- addc 26,26,18
- mulhdu 18,16,10
- adde 27,27,19
- mulhdu 19,17,10
- adde 28,28,20
- mulld 20,12,11
- adde 29,29,21
- mulld 21,14,11
- adde 22,22,18
- mulld 18,15,11
- adde 23,23,19
-
- mulld 19,16,11
- addc 27,27,20
- mulld 20,17,11
- adde 28,28,21
- mulhdu 21,12,11
- adde 29,29,18
- mulhdu 18,14,11
- adde 22,22,19
- mulhdu 19,15,11
- adde 23,23,20
- mulhdu 20,16,11
- std 26,8*5(5)
- addze 24,0
- std 27,8*6(5)
- addc 28,28,21
- mulhdu 21,17,11
- adde 29,29,18
- mulld 18,14,12
- adde 22,22,19
- mulld 19,15,12
- adde 23,23,20
- mulld 20,16,12
- adde 24,24,21
-
- mulld 21,17,12
- addc 29,29,18
- mulhdu 18,14,12
- adde 22,22,19
- mulhdu 19,15,12
- adde 23,23,20
- mulhdu 20,16,12
- adde 24,24,21
- mulhdu 21,17,12
- std 28,8*7(5)
- addze 25,0
- stdu 29,8*8(5)
- addc 22,22,18
- mulld 18,15,14
- adde 23,23,19
- mulld 19,16,14
- adde 24,24,20
- mulld 20,17,14
- adde 25,25,21
-
- mulhdu 21,15,14
- addc 23,23,18
- mulhdu 18,16,14
- adde 24,24,19
- mulhdu 19,17,14
- adde 25,25,20
- mulld 20,16,15
- addze 26,0
- addc 24,24,21
- mulld 21,17,15
- adde 25,25,18
- mulhdu 18,16,15
- adde 26,26,19
-
- mulhdu 19,17,15
- addc 25,25,20
- mulld 20,17,16
- adde 26,26,21
- mulhdu 21,17,16
- addze 27,0
- addc 26,26,18
- cmpld 6,4
- adde 27,27,19
-
- addc 27,27,20
- sub 18,6,8
- addze 28,0
- add 28,28,21
-
- beq .Lsqr8x_outer_break
-
- mr 7,9
- ld 9,8*1(5)
- ld 10,8*2(5)
- ld 11,8*3(5)
- ld 12,8*4(5)
- ld 14,8*5(5)
- ld 15,8*6(5)
- ld 16,8*7(5)
- ld 17,8*8(5)
- addc 22,22,9
- ld 9,8*1(4)
- adde 23,23,10
- ld 10,8*2(4)
- adde 24,24,11
- ld 11,8*3(4)
- adde 25,25,12
- ld 12,8*4(4)
- adde 26,26,14
- ld 14,8*5(4)
- adde 27,27,15
- ld 15,8*6(4)
- adde 28,28,16
- ld 16,8*7(4)
- subi 3,4,8*7
- addze 29,17
- ldu 17,8*8(4)
-
- li 30,0
- b .Lsqr8x_mul
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.align 5
-.Lsqr8x_mul:
- mulld 18,9,7
- addze 31,0
- mulld 19,10,7
- addi 30,30,8
- mulld 20,11,7
- andi. 30,30,8*8-1
- mulld 21,12,7
- addc 22,22,18
- mulld 18,14,7
- adde 23,23,19
- mulld 19,15,7
- adde 24,24,20
- mulld 20,16,7
- adde 25,25,21
- mulld 21,17,7
- adde 26,26,18
- mulhdu 18,9,7
- adde 27,27,19
- mulhdu 19,10,7
- adde 28,28,20
- mulhdu 20,11,7
- adde 29,29,21
- mulhdu 21,12,7
- addze 31,31
- stdu 22,8(5)
- addc 22,23,18
- mulhdu 18,14,7
- adde 23,24,19
- mulhdu 19,15,7
- adde 24,25,20
- mulhdu 20,16,7
- adde 25,26,21
- mulhdu 21,17,7
- ldx 7,3,30
- adde 26,27,18
- adde 27,28,19
- adde 28,29,20
- adde 29,31,21
-
- bne .Lsqr8x_mul
-
-
- cmpld 4,6
- beq .Lsqr8x_break
-
- ld 9,8*1(5)
- ld 10,8*2(5)
- ld 11,8*3(5)
- ld 12,8*4(5)
- ld 14,8*5(5)
- ld 15,8*6(5)
- ld 16,8*7(5)
- ld 17,8*8(5)
- addc 22,22,9
- ld 9,8*1(4)
- adde 23,23,10
- ld 10,8*2(4)
- adde 24,24,11
- ld 11,8*3(4)
- adde 25,25,12
- ld 12,8*4(4)
- adde 26,26,14
- ld 14,8*5(4)
- adde 27,27,15
- ld 15,8*6(4)
- adde 28,28,16
- ld 16,8*7(4)
- adde 29,29,17
- ldu 17,8*8(4)
-
- b .Lsqr8x_mul
-
-.align 5
-.Lsqr8x_break:
- ld 9,8*8(3)
- addi 4,3,8*15
- ld 10,8*9(3)
- sub. 18,6,4
- ld 11,8*10(3)
- sub 19,5,18
- ld 12,8*11(3)
- ld 14,8*12(3)
- ld 15,8*13(3)
- ld 16,8*14(3)
- ld 17,8*15(3)
- beq .Lsqr8x_outer_loop
-
- std 22,8*1(5)
- ld 22,8*1(19)
- std 23,8*2(5)
- ld 23,8*2(19)
- std 24,8*3(5)
- ld 24,8*3(19)
- std 25,8*4(5)
- ld 25,8*4(19)
- std 26,8*5(5)
- ld 26,8*5(19)
- std 27,8*6(5)
- ld 27,8*6(19)
- std 28,8*7(5)
- ld 28,8*7(19)
- std 29,8*8(5)
- ld 29,8*8(19)
- mr 5,19
- b .Lsqr8x_outer_loop
-
-.align 5
-.Lsqr8x_outer_break:
-
-
- ld 10,8*1(18)
- ld 12,8*2(18)
- ld 15,8*3(18)
- ld 17,8*4(18)
- addi 4,18,8*4
-
- ld 19,8*13(1)
- ld 20,8*14(1)
- ld 21,8*15(1)
- ld 18,8*16(1)
-
- std 22,8*1(5)
- srwi 30,8,5
- std 23,8*2(5)
- subi 30,30,1
- std 24,8*3(5)
- std 25,8*4(5)
- std 26,8*5(5)
- std 27,8*6(5)
- std 28,8*7(5)
-
- addi 5,1,8*11
- mulld 22,10,10
- mulhdu 10,10,10
- add 23,19,19
- srdi 19,19,64-1
- mulld 11,12,12
- mulhdu 12,12,12
- addc 23,23,10
- add 24,20,20
- srdi 20,20,64-1
- add 25,21,21
- srdi 21,21,64-1
- or 24,24,19
-
- mtctr 30
-.Lsqr4x_shift_n_add:
- mulld 14,15,15
- mulhdu 15,15,15
- ld 19,8*6(5)
- ld 10,8*1(4)
- adde 24,24,11
- add 26,18,18
- srdi 18,18,64-1
- or 25,25,20
- ld 20,8*7(5)
- adde 25,25,12
- ld 12,8*2(4)
- add 27,19,19
- srdi 19,19,64-1
- or 26,26,21
- ld 21,8*8(5)
- mulld 16,17,17
- mulhdu 17,17,17
- adde 26,26,14
- add 28,20,20
- srdi 20,20,64-1
- or 27,27,18
- ld 18,8*9(5)
- adde 27,27,15
- ld 15,8*3(4)
- add 29,21,21
- srdi 21,21,64-1
- or 28,28,19
- ld 19,8*10(5)
- mulld 9,10,10
- mulhdu 10,10,10
- adde 28,28,16
- std 22,8*1(5)
- add 22,18,18
- srdi 18,18,64-1
- or 29,29,20
- ld 20,8*11(5)
- adde 29,29,17
- ldu 17,8*4(4)
- std 23,8*2(5)
- add 23,19,19
- srdi 19,19,64-1
- or 22,22,21
- ld 21,8*12(5)
- mulld 11,12,12
- mulhdu 12,12,12
- adde 22,22,9
- std 24,8*3(5)
- add 24,20,20
- srdi 20,20,64-1
- or 23,23,18
- ld 18,8*13(5)
- adde 23,23,10
- std 25,8*4(5)
- std 26,8*5(5)
- std 27,8*6(5)
- std 28,8*7(5)
- stdu 29,8*8(5)
- add 25,21,21
- srdi 21,21,64-1
- or 24,24,19
- bdnz .Lsqr4x_shift_n_add
- ld 4,8*7(1)
- ld 7,8*8(1)
-
- mulld 14,15,15
- mulhdu 15,15,15
- std 22,8*1(5)
- ld 22,8*12(1)
- ld 19,8*6(5)
- adde 24,24,11
- add 26,18,18
- srdi 18,18,64-1
- or 25,25,20
- ld 20,8*7(5)
- adde 25,25,12
- add 27,19,19
- srdi 19,19,64-1
- or 26,26,21
- mulld 16,17,17
- mulhdu 17,17,17
- adde 26,26,14
- add 28,20,20
- srdi 20,20,64-1
- or 27,27,18
- std 23,8*2(5)
- ld 23,8*13(1)
- adde 27,27,15
- or 28,28,19
- ld 9,8*1(4)
- ld 10,8*2(4)
- adde 28,28,16
- ld 11,8*3(4)
- ld 12,8*4(4)
- adde 29,17,20
- ld 14,8*5(4)
- ld 15,8*6(4)
-
-
-
- mulld 31,7,22
- li 30,8
- ld 16,8*7(4)
- add 6,4,8
- ldu 17,8*8(4)
- std 24,8*3(5)
- ld 24,8*14(1)
- std 25,8*4(5)
- ld 25,8*15(1)
- std 26,8*5(5)
- ld 26,8*16(1)
- std 27,8*6(5)
- ld 27,8*17(1)
- std 28,8*7(5)
- ld 28,8*18(1)
- std 29,8*8(5)
- ld 29,8*19(1)
- addi 5,1,8*11
- mtctr 30
- b .Lsqr8x_reduction
-
-.align 5
-.Lsqr8x_reduction:
-
- mulld 19,10,31
- mulld 20,11,31
- stdu 31,8(5)
- mulld 21,12,31
-
- addic 22,22,-1
- mulld 18,14,31
- adde 22,23,19
- mulld 19,15,31
- adde 23,24,20
- mulld 20,16,31
- adde 24,25,21
- mulld 21,17,31
- adde 25,26,18
- mulhdu 18,9,31
- adde 26,27,19
- mulhdu 19,10,31
- adde 27,28,20
- mulhdu 20,11,31
- adde 28,29,21
- mulhdu 21,12,31
- addze 29,0
- addc 22,22,18
- mulhdu 18,14,31
- adde 23,23,19
- mulhdu 19,15,31
- adde 24,24,20
- mulhdu 20,16,31
- adde 25,25,21
- mulhdu 21,17,31
- mulld 31,7,22
- adde 26,26,18
- adde 27,27,19
- adde 28,28,20
- adde 29,29,21
- bdnz .Lsqr8x_reduction
-
- ld 18,8*1(5)
- ld 19,8*2(5)
- ld 20,8*3(5)
- ld 21,8*4(5)
- subi 3,5,8*7
- cmpld 6,4
- addc 22,22,18
- ld 18,8*5(5)
- adde 23,23,19
- ld 19,8*6(5)
- adde 24,24,20
- ld 20,8*7(5)
- adde 25,25,21
- ld 21,8*8(5)
- adde 26,26,18
- adde 27,27,19
- adde 28,28,20
- adde 29,29,21
-
- beq .Lsqr8x8_post_condition
-
- ld 7,8*0(3)
- ld 9,8*1(4)
- ld 10,8*2(4)
- ld 11,8*3(4)
- ld 12,8*4(4)
- ld 14,8*5(4)
- ld 15,8*6(4)
- ld 16,8*7(4)
- ldu 17,8*8(4)
- li 30,0
-
-.align 5
-.Lsqr8x_tail:
- mulld 18,9,7
- addze 31,0
- mulld 19,10,7
- addi 30,30,8
- mulld 20,11,7
- andi. 30,30,8*8-1
- mulld 21,12,7
- addc 22,22,18
- mulld 18,14,7
- adde 23,23,19
- mulld 19,15,7
- adde 24,24,20
- mulld 20,16,7
- adde 25,25,21
- mulld 21,17,7
- adde 26,26,18
- mulhdu 18,9,7
- adde 27,27,19
- mulhdu 19,10,7
- adde 28,28,20
- mulhdu 20,11,7
- adde 29,29,21
- mulhdu 21,12,7
- addze 31,31
- stdu 22,8(5)
- addc 22,23,18
- mulhdu 18,14,7
- adde 23,24,19
- mulhdu 19,15,7
- adde 24,25,20
- mulhdu 20,16,7
- adde 25,26,21
- mulhdu 21,17,7
- ldx 7,3,30
- adde 26,27,18
- adde 27,28,19
- adde 28,29,20
- adde 29,31,21
-
- bne .Lsqr8x_tail
-
-
- ld 9,8*1(5)
- ld 31,8*10(1)
- cmpld 6,4
- ld 10,8*2(5)
- sub 20,6,8
- ld 11,8*3(5)
- ld 12,8*4(5)
- ld 14,8*5(5)
- ld 15,8*6(5)
- ld 16,8*7(5)
- ld 17,8*8(5)
- beq .Lsqr8x_tail_break
-
- addc 22,22,9
- ld 9,8*1(4)
- adde 23,23,10
- ld 10,8*2(4)
- adde 24,24,11
- ld 11,8*3(4)
- adde 25,25,12
- ld 12,8*4(4)
- adde 26,26,14
- ld 14,8*5(4)
- adde 27,27,15
- ld 15,8*6(4)
- adde 28,28,16
- ld 16,8*7(4)
- adde 29,29,17
- ldu 17,8*8(4)
-
- b .Lsqr8x_tail
-
-.align 5
-.Lsqr8x_tail_break:
- ld 7,8*8(1)
- ld 21,8*9(1)
- addi 30,5,8*8
-
- addic 31,31,-1
- adde 18,22,9
- ld 22,8*8(3)
- ld 9,8*1(20)
- adde 19,23,10
- ld 23,8*9(3)
- ld 10,8*2(20)
- adde 24,24,11
- ld 11,8*3(20)
- adde 25,25,12
- ld 12,8*4(20)
- adde 26,26,14
- ld 14,8*5(20)
- adde 27,27,15
- ld 15,8*6(20)
- adde 28,28,16
- ld 16,8*7(20)
- adde 29,29,17
- ld 17,8*8(20)
- addi 4,20,8*8
- addze 20,0
- mulld 31,7,22
- std 18,8*1(5)
- cmpld 30,21
- std 19,8*2(5)
- li 30,8
- std 24,8*3(5)
- ld 24,8*10(3)
- std 25,8*4(5)
- ld 25,8*11(3)
- std 26,8*5(5)
- ld 26,8*12(3)
- std 27,8*6(5)
- ld 27,8*13(3)
- std 28,8*7(5)
- ld 28,8*14(3)
- std 29,8*8(5)
- ld 29,8*15(3)
- std 20,8*10(1)
- addi 5,3,8*7
- mtctr 30
- bne .Lsqr8x_reduction
-
-
-
-
-
-
- ld 3,8*6(1)
- srwi 30,8,6
- mr 7,5
- addi 5,5,8*8
- subi 30,30,1
- subfc 18,9,22
- subfe 19,10,23
- mr 31,20
- mr 6,3
-
- mtctr 30
- b .Lsqr8x_sub
-
-.align 5
-.Lsqr8x_sub:
- ld 9,8*1(4)
- ld 22,8*1(5)
- ld 10,8*2(4)
- ld 23,8*2(5)
- subfe 20,11,24
- ld 11,8*3(4)
- ld 24,8*3(5)
- subfe 21,12,25
- ld 12,8*4(4)
- ld 25,8*4(5)
- std 18,8*1(3)
- subfe 18,14,26
- ld 14,8*5(4)
- ld 26,8*5(5)
- std 19,8*2(3)
- subfe 19,15,27
- ld 15,8*6(4)
- ld 27,8*6(5)
- std 20,8*3(3)
- subfe 20,16,28
- ld 16,8*7(4)
- ld 28,8*7(5)
- std 21,8*4(3)
- subfe 21,17,29
- ldu 17,8*8(4)
- ldu 29,8*8(5)
- std 18,8*5(3)
- subfe 18,9,22
- std 19,8*6(3)
- subfe 19,10,23
- std 20,8*7(3)
- stdu 21,8*8(3)
- bdnz .Lsqr8x_sub
-
- srwi 30,8,5
- ld 9,8*1(6)
- ld 22,8*1(7)
- subi 30,30,1
- ld 10,8*2(6)
- ld 23,8*2(7)
- subfe 20,11,24
- ld 11,8*3(6)
- ld 24,8*3(7)
- subfe 21,12,25
- ld 12,8*4(6)
- ldu 25,8*4(7)
- std 18,8*1(3)
- subfe 18,14,26
- std 19,8*2(3)
- subfe 19,15,27
- std 20,8*3(3)
- subfe 20,16,28
- std 21,8*4(3)
- subfe 21,17,29
- std 18,8*5(3)
- subfe 31,0,31
- std 19,8*6(3)
- std 20,8*7(3)
- std 21,8*8(3)
-
- addi 5,1,8*11
- mtctr 30
-
-.Lsqr4x_cond_copy:
- andc 9,9,31
- std 0,-8*3(7)
- and 22,22,31
- std 0,-8*2(7)
- andc 10,10,31
- std 0,-8*1(7)
- and 23,23,31
- std 0,-8*0(7)
- andc 11,11,31
- std 0,8*1(5)
- and 24,24,31
- std 0,8*2(5)
- andc 12,12,31
- std 0,8*3(5)
- and 25,25,31
- stdu 0,8*4(5)
- or 18,9,22
- ld 9,8*5(6)
- ld 22,8*1(7)
- or 19,10,23
- ld 10,8*6(6)
- ld 23,8*2(7)
- or 20,11,24
- ld 11,8*7(6)
- ld 24,8*3(7)
- or 21,12,25
- ld 12,8*8(6)
- ldu 25,8*4(7)
- std 18,8*1(6)
- std 19,8*2(6)
- std 20,8*3(6)
- stdu 21,8*4(6)
- bdnz .Lsqr4x_cond_copy
-
- ld 4,0(1)
- andc 9,9,31
- and 22,22,31
- andc 10,10,31
- and 23,23,31
- andc 11,11,31
- and 24,24,31
- andc 12,12,31
- and 25,25,31
- or 18,9,22
- or 19,10,23
- or 20,11,24
- or 21,12,25
- std 18,8*1(6)
- std 19,8*2(6)
- std 20,8*3(6)
- std 21,8*4(6)
-
- b .Lsqr8x_done
-
-.align 5
-.Lsqr8x8_post_condition:
- ld 3,8*6(1)
- ld 4,0(1)
- addze 31,0
-
-
- subfc 22,9,22
- subfe 23,10,23
- std 0,8*12(1)
- std 0,8*13(1)
- subfe 24,11,24
- std 0,8*14(1)
- std 0,8*15(1)
- subfe 25,12,25
- std 0,8*16(1)
- std 0,8*17(1)
- subfe 26,14,26
- std 0,8*18(1)
- std 0,8*19(1)
- subfe 27,15,27
- std 0,8*20(1)
- std 0,8*21(1)
- subfe 28,16,28
- std 0,8*22(1)
- std 0,8*23(1)
- subfe 29,17,29
- std 0,8*24(1)
- std 0,8*25(1)
- subfe 31,0,31
- std 0,8*26(1)
- std 0,8*27(1)
-
- and 9,9,31
- and 10,10,31
- addc 22,22,9
- and 11,11,31
- adde 23,23,10
- and 12,12,31
- adde 24,24,11
- and 14,14,31
- adde 25,25,12
- and 15,15,31
- adde 26,26,14
- and 16,16,31
- adde 27,27,15
- and 17,17,31
- adde 28,28,16
- adde 29,29,17
- std 22,8*1(3)
- std 23,8*2(3)
- std 24,8*3(3)
- std 25,8*4(3)
- std 26,8*5(3)
- std 27,8*6(3)
- std 28,8*7(3)
- std 29,8*8(3)
-
-.Lsqr8x_done:
- std 0,8*8(1)
- std 0,8*10(1)
-
- ld 14,-8*18(4)
- li 3,1
- ld 15,-8*17(4)
- ld 16,-8*16(4)
- ld 17,-8*15(4)
- ld 18,-8*14(4)
- ld 19,-8*13(4)
- ld 20,-8*12(4)
- ld 21,-8*11(4)
- ld 22,-8*10(4)
- ld 23,-8*9(4)
- ld 24,-8*8(4)
- ld 25,-8*7(4)
- ld 26,-8*6(4)
- ld 27,-8*5(4)
- ld 28,-8*4(4)
- ld 29,-8*3(4)
- ld 30,-8*2(4)
- ld 31,-8*1(4)
- mr 1,4
- blr
-.long 0
-.byte 0,12,4,0x20,0x80,18,6,0
-.long 0
-.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
-.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 2
+.align 5
+__bn_sqr8x_mont:
+.Lsqr8x_do:
+ mr 9,1
+ slwi 10,8,4
+ li 11,-32*8
+ sub 10,11,10
+ slwi 8,8,3
+ stdux 1,1,10
+
+ std 14,-8*18(9)
+ std 15,-8*17(9)
+ std 16,-8*16(9)
+ std 17,-8*15(9)
+ std 18,-8*14(9)
+ std 19,-8*13(9)
+ std 20,-8*12(9)
+ std 21,-8*11(9)
+ std 22,-8*10(9)
+ std 23,-8*9(9)
+ std 24,-8*8(9)
+ std 25,-8*7(9)
+ std 26,-8*6(9)
+ std 27,-8*5(9)
+ std 28,-8*4(9)
+ std 29,-8*3(9)
+ std 30,-8*2(9)
+ std 31,-8*1(9)
+
+ subi 4,4,8
+ subi 18,6,8
+ subi 3,3,8
+ ld 7,0(7)
+ li 0,0
+
+ add 6,4,8
+ ld 9,8*1(4)
+
+ ld 10,8*2(4)
+ li 23,0
+ ld 11,8*3(4)
+ li 24,0
+ ld 12,8*4(4)
+ li 25,0
+ ld 14,8*5(4)
+ li 26,0
+ ld 15,8*6(4)
+ li 27,0
+ ld 16,8*7(4)
+ li 28,0
+ ldu 17,8*8(4)
+ li 29,0
+
+ addi 5,1,8*11
+ subic. 30,8,8*8
+ b .Lsqr8x_zero_start
+
+.align 5
+.Lsqr8x_zero:
+ subic. 30,30,8*8
+ std 0,8*1(5)
+ std 0,8*2(5)
+ std 0,8*3(5)
+ std 0,8*4(5)
+ std 0,8*5(5)
+ std 0,8*6(5)
+ std 0,8*7(5)
+ std 0,8*8(5)
+.Lsqr8x_zero_start:
+ std 0,8*9(5)
+ std 0,8*10(5)
+ std 0,8*11(5)
+ std 0,8*12(5)
+ std 0,8*13(5)
+ std 0,8*14(5)
+ std 0,8*15(5)
+ stdu 0,8*16(5)
+ bne .Lsqr8x_zero
+
+ std 3,8*6(1)
+ std 18,8*7(1)
+ std 7,8*8(1)
+ std 5,8*9(1)
+ std 0,8*10(1)
+ addi 5,1,8*11
+
+
+.align 5
+.Lsqr8x_outer_loop:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mulld 18,10,9
+ mulld 19,11,9
+ mulld 20,12,9
+ mulld 21,14,9
+ addc 23,23,18
+ mulld 18,15,9
+ adde 24,24,19
+ mulld 19,16,9
+ adde 25,25,20
+ mulld 20,17,9
+ adde 26,26,21
+ mulhdu 21,10,9
+ adde 27,27,18
+ mulhdu 18,11,9
+ adde 28,28,19
+ mulhdu 19,12,9
+ adde 29,29,20
+ mulhdu 20,14,9
+ std 22,8*1(5)
+ addze 22,0
+ std 23,8*2(5)
+ addc 24,24,21
+ mulhdu 21,15,9
+ adde 25,25,18
+ mulhdu 18,16,9
+ adde 26,26,19
+ mulhdu 19,17,9
+ adde 27,27,20
+ mulld 20,11,10
+ adde 28,28,21
+ mulld 21,12,10
+ adde 29,29,18
+ mulld 18,14,10
+ adde 22,22,19
+
+ mulld 19,15,10
+ addc 25,25,20
+ mulld 20,16,10
+ adde 26,26,21
+ mulld 21,17,10
+ adde 27,27,18
+ mulhdu 18,11,10
+ adde 28,28,19
+ mulhdu 19,12,10
+ adde 29,29,20
+ mulhdu 20,14,10
+ adde 22,22,21
+ mulhdu 21,15,10
+ std 24,8*3(5)
+ addze 23,0
+ std 25,8*4(5)
+ addc 26,26,18
+ mulhdu 18,16,10
+ adde 27,27,19
+ mulhdu 19,17,10
+ adde 28,28,20
+ mulld 20,12,11
+ adde 29,29,21
+ mulld 21,14,11
+ adde 22,22,18
+ mulld 18,15,11
+ adde 23,23,19
+
+ mulld 19,16,11
+ addc 27,27,20
+ mulld 20,17,11
+ adde 28,28,21
+ mulhdu 21,12,11
+ adde 29,29,18
+ mulhdu 18,14,11
+ adde 22,22,19
+ mulhdu 19,15,11
+ adde 23,23,20
+ mulhdu 20,16,11
+ std 26,8*5(5)
+ addze 24,0
+ std 27,8*6(5)
+ addc 28,28,21
+ mulhdu 21,17,11
+ adde 29,29,18
+ mulld 18,14,12
+ adde 22,22,19
+ mulld 19,15,12
+ adde 23,23,20
+ mulld 20,16,12
+ adde 24,24,21
+
+ mulld 21,17,12
+ addc 29,29,18
+ mulhdu 18,14,12
+ adde 22,22,19
+ mulhdu 19,15,12
+ adde 23,23,20
+ mulhdu 20,16,12
+ adde 24,24,21
+ mulhdu 21,17,12
+ std 28,8*7(5)
+ addze 25,0
+ stdu 29,8*8(5)
+ addc 22,22,18
+ mulld 18,15,14
+ adde 23,23,19
+ mulld 19,16,14
+ adde 24,24,20
+ mulld 20,17,14
+ adde 25,25,21
+
+ mulhdu 21,15,14
+ addc 23,23,18
+ mulhdu 18,16,14
+ adde 24,24,19
+ mulhdu 19,17,14
+ adde 25,25,20
+ mulld 20,16,15
+ addze 26,0
+ addc 24,24,21
+ mulld 21,17,15
+ adde 25,25,18
+ mulhdu 18,16,15
+ adde 26,26,19
+
+ mulhdu 19,17,15
+ addc 25,25,20
+ mulld 20,17,16
+ adde 26,26,21
+ mulhdu 21,17,16
+ addze 27,0
+ addc 26,26,18
+ cmpld 6,4
+ adde 27,27,19
+
+ addc 27,27,20
+ sub 18,6,8
+ addze 28,0
+ add 28,28,21
+
+ beq .Lsqr8x_outer_break
+
+ mr 7,9
+ ld 9,8*1(5)
+ ld 10,8*2(5)
+ ld 11,8*3(5)
+ ld 12,8*4(5)
+ ld 14,8*5(5)
+ ld 15,8*6(5)
+ ld 16,8*7(5)
+ ld 17,8*8(5)
+ addc 22,22,9
+ ld 9,8*1(4)
+ adde 23,23,10
+ ld 10,8*2(4)
+ adde 24,24,11
+ ld 11,8*3(4)
+ adde 25,25,12
+ ld 12,8*4(4)
+ adde 26,26,14
+ ld 14,8*5(4)
+ adde 27,27,15
+ ld 15,8*6(4)
+ adde 28,28,16
+ ld 16,8*7(4)
+ subi 3,4,8*7
+ addze 29,17
+ ldu 17,8*8(4)
+
+ li 30,0
+ b .Lsqr8x_mul
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.align 5
+.Lsqr8x_mul:
+ mulld 18,9,7
+ addze 31,0
+ mulld 19,10,7
+ addi 30,30,8
+ mulld 20,11,7
+ andi. 30,30,8*8-1
+ mulld 21,12,7
+ addc 22,22,18
+ mulld 18,14,7
+ adde 23,23,19
+ mulld 19,15,7
+ adde 24,24,20
+ mulld 20,16,7
+ adde 25,25,21
+ mulld 21,17,7
+ adde 26,26,18
+ mulhdu 18,9,7
+ adde 27,27,19
+ mulhdu 19,10,7
+ adde 28,28,20
+ mulhdu 20,11,7
+ adde 29,29,21
+ mulhdu 21,12,7
+ addze 31,31
+ stdu 22,8(5)
+ addc 22,23,18
+ mulhdu 18,14,7
+ adde 23,24,19
+ mulhdu 19,15,7
+ adde 24,25,20
+ mulhdu 20,16,7
+ adde 25,26,21
+ mulhdu 21,17,7
+ ldx 7,3,30
+ adde 26,27,18
+ adde 27,28,19
+ adde 28,29,20
+ adde 29,31,21
+
+ bne .Lsqr8x_mul
+
+
+ cmpld 4,6
+ beq .Lsqr8x_break
+
+ ld 9,8*1(5)
+ ld 10,8*2(5)
+ ld 11,8*3(5)
+ ld 12,8*4(5)
+ ld 14,8*5(5)
+ ld 15,8*6(5)
+ ld 16,8*7(5)
+ ld 17,8*8(5)
+ addc 22,22,9
+ ld 9,8*1(4)
+ adde 23,23,10
+ ld 10,8*2(4)
+ adde 24,24,11
+ ld 11,8*3(4)
+ adde 25,25,12
+ ld 12,8*4(4)
+ adde 26,26,14
+ ld 14,8*5(4)
+ adde 27,27,15
+ ld 15,8*6(4)
+ adde 28,28,16
+ ld 16,8*7(4)
+ adde 29,29,17
+ ldu 17,8*8(4)
+
+ b .Lsqr8x_mul
+
+.align 5
+.Lsqr8x_break:
+ ld 9,8*8(3)
+ addi 4,3,8*15
+ ld 10,8*9(3)
+ sub. 18,6,4
+ ld 11,8*10(3)
+ sub 19,5,18
+ ld 12,8*11(3)
+ ld 14,8*12(3)
+ ld 15,8*13(3)
+ ld 16,8*14(3)
+ ld 17,8*15(3)
+ beq .Lsqr8x_outer_loop
+
+ std 22,8*1(5)
+ ld 22,8*1(19)
+ std 23,8*2(5)
+ ld 23,8*2(19)
+ std 24,8*3(5)
+ ld 24,8*3(19)
+ std 25,8*4(5)
+ ld 25,8*4(19)
+ std 26,8*5(5)
+ ld 26,8*5(19)
+ std 27,8*6(5)
+ ld 27,8*6(19)
+ std 28,8*7(5)
+ ld 28,8*7(19)
+ std 29,8*8(5)
+ ld 29,8*8(19)
+ mr 5,19
+ b .Lsqr8x_outer_loop
+
+.align 5
+.Lsqr8x_outer_break:
+
+
+ ld 10,8*1(18)
+ ld 12,8*2(18)
+ ld 15,8*3(18)
+ ld 17,8*4(18)
+ addi 4,18,8*4
+
+ ld 19,8*13(1)
+ ld 20,8*14(1)
+ ld 21,8*15(1)
+ ld 18,8*16(1)
+
+ std 22,8*1(5)
+ srwi 30,8,5
+ std 23,8*2(5)
+ subi 30,30,1
+ std 24,8*3(5)
+ std 25,8*4(5)
+ std 26,8*5(5)
+ std 27,8*6(5)
+ std 28,8*7(5)
+
+ addi 5,1,8*11
+ mulld 22,10,10
+ mulhdu 10,10,10
+ add 23,19,19
+ srdi 19,19,64-1
+ mulld 11,12,12
+ mulhdu 12,12,12
+ addc 23,23,10
+ add 24,20,20
+ srdi 20,20,64-1
+ add 25,21,21
+ srdi 21,21,64-1
+ or 24,24,19
+
+ mtctr 30
+.Lsqr4x_shift_n_add:
+ mulld 14,15,15
+ mulhdu 15,15,15
+ ld 19,8*6(5)
+ ld 10,8*1(4)
+ adde 24,24,11
+ add 26,18,18
+ srdi 18,18,64-1
+ or 25,25,20
+ ld 20,8*7(5)
+ adde 25,25,12
+ ld 12,8*2(4)
+ add 27,19,19
+ srdi 19,19,64-1
+ or 26,26,21
+ ld 21,8*8(5)
+ mulld 16,17,17
+ mulhdu 17,17,17
+ adde 26,26,14
+ add 28,20,20
+ srdi 20,20,64-1
+ or 27,27,18
+ ld 18,8*9(5)
+ adde 27,27,15
+ ld 15,8*3(4)
+ add 29,21,21
+ srdi 21,21,64-1
+ or 28,28,19
+ ld 19,8*10(5)
+ mulld 9,10,10
+ mulhdu 10,10,10
+ adde 28,28,16
+ std 22,8*1(5)
+ add 22,18,18
+ srdi 18,18,64-1
+ or 29,29,20
+ ld 20,8*11(5)
+ adde 29,29,17
+ ldu 17,8*4(4)
+ std 23,8*2(5)
+ add 23,19,19
+ srdi 19,19,64-1
+ or 22,22,21
+ ld 21,8*12(5)
+ mulld 11,12,12
+ mulhdu 12,12,12
+ adde 22,22,9
+ std 24,8*3(5)
+ add 24,20,20
+ srdi 20,20,64-1
+ or 23,23,18
+ ld 18,8*13(5)
+ adde 23,23,10
+ std 25,8*4(5)
+ std 26,8*5(5)
+ std 27,8*6(5)
+ std 28,8*7(5)
+ stdu 29,8*8(5)
+ add 25,21,21
+ srdi 21,21,64-1
+ or 24,24,19
+ bdnz .Lsqr4x_shift_n_add
+ ld 4,8*7(1)
+ ld 7,8*8(1)
+
+ mulld 14,15,15
+ mulhdu 15,15,15
+ std 22,8*1(5)
+ ld 22,8*12(1)
+ ld 19,8*6(5)
+ adde 24,24,11
+ add 26,18,18
+ srdi 18,18,64-1
+ or 25,25,20
+ ld 20,8*7(5)
+ adde 25,25,12
+ add 27,19,19
+ srdi 19,19,64-1
+ or 26,26,21
+ mulld 16,17,17
+ mulhdu 17,17,17
+ adde 26,26,14
+ add 28,20,20
+ srdi 20,20,64-1
+ or 27,27,18
+ std 23,8*2(5)
+ ld 23,8*13(1)
+ adde 27,27,15
+ or 28,28,19
+ ld 9,8*1(4)
+ ld 10,8*2(4)
+ adde 28,28,16
+ ld 11,8*3(4)
+ ld 12,8*4(4)
+ adde 29,17,20
+ ld 14,8*5(4)
+ ld 15,8*6(4)
+
+
+
+ mulld 31,7,22
+ li 30,8
+ ld 16,8*7(4)
+ add 6,4,8
+ ldu 17,8*8(4)
+ std 24,8*3(5)
+ ld 24,8*14(1)
+ std 25,8*4(5)
+ ld 25,8*15(1)
+ std 26,8*5(5)
+ ld 26,8*16(1)
+ std 27,8*6(5)
+ ld 27,8*17(1)
+ std 28,8*7(5)
+ ld 28,8*18(1)
+ std 29,8*8(5)
+ ld 29,8*19(1)
+ addi 5,1,8*11
+ mtctr 30
+ b .Lsqr8x_reduction
+
+.align 5
+.Lsqr8x_reduction:
+
+ mulld 19,10,31
+ mulld 20,11,31
+ stdu 31,8(5)
+ mulld 21,12,31
+
+ addic 22,22,-1
+ mulld 18,14,31
+ adde 22,23,19
+ mulld 19,15,31
+ adde 23,24,20
+ mulld 20,16,31
+ adde 24,25,21
+ mulld 21,17,31
+ adde 25,26,18
+ mulhdu 18,9,31
+ adde 26,27,19
+ mulhdu 19,10,31
+ adde 27,28,20
+ mulhdu 20,11,31
+ adde 28,29,21
+ mulhdu 21,12,31
+ addze 29,0
+ addc 22,22,18
+ mulhdu 18,14,31
+ adde 23,23,19
+ mulhdu 19,15,31
+ adde 24,24,20
+ mulhdu 20,16,31
+ adde 25,25,21
+ mulhdu 21,17,31
+ mulld 31,7,22
+ adde 26,26,18
+ adde 27,27,19
+ adde 28,28,20
+ adde 29,29,21
+ bdnz .Lsqr8x_reduction
+
+ ld 18,8*1(5)
+ ld 19,8*2(5)
+ ld 20,8*3(5)
+ ld 21,8*4(5)
+ subi 3,5,8*7
+ cmpld 6,4
+ addc 22,22,18
+ ld 18,8*5(5)
+ adde 23,23,19
+ ld 19,8*6(5)
+ adde 24,24,20
+ ld 20,8*7(5)
+ adde 25,25,21
+ ld 21,8*8(5)
+ adde 26,26,18
+ adde 27,27,19
+ adde 28,28,20
+ adde 29,29,21
+
+ beq .Lsqr8x8_post_condition
+
+ ld 7,8*0(3)
+ ld 9,8*1(4)
+ ld 10,8*2(4)
+ ld 11,8*3(4)
+ ld 12,8*4(4)
+ ld 14,8*5(4)
+ ld 15,8*6(4)
+ ld 16,8*7(4)
+ ldu 17,8*8(4)
+ li 30,0
+
+.align 5
+.Lsqr8x_tail:
+ mulld 18,9,7
+ addze 31,0
+ mulld 19,10,7
+ addi 30,30,8
+ mulld 20,11,7
+ andi. 30,30,8*8-1
+ mulld 21,12,7
+ addc 22,22,18
+ mulld 18,14,7
+ adde 23,23,19
+ mulld 19,15,7
+ adde 24,24,20
+ mulld 20,16,7
+ adde 25,25,21
+ mulld 21,17,7
+ adde 26,26,18
+ mulhdu 18,9,7
+ adde 27,27,19
+ mulhdu 19,10,7
+ adde 28,28,20
+ mulhdu 20,11,7
+ adde 29,29,21
+ mulhdu 21,12,7
+ addze 31,31
+ stdu 22,8(5)
+ addc 22,23,18
+ mulhdu 18,14,7
+ adde 23,24,19
+ mulhdu 19,15,7
+ adde 24,25,20
+ mulhdu 20,16,7
+ adde 25,26,21
+ mulhdu 21,17,7
+ ldx 7,3,30
+ adde 26,27,18
+ adde 27,28,19
+ adde 28,29,20
+ adde 29,31,21
+
+ bne .Lsqr8x_tail
+
+
+ ld 9,8*1(5)
+ ld 31,8*10(1)
+ cmpld 6,4
+ ld 10,8*2(5)
+ sub 20,6,8
+ ld 11,8*3(5)
+ ld 12,8*4(5)
+ ld 14,8*5(5)
+ ld 15,8*6(5)
+ ld 16,8*7(5)
+ ld 17,8*8(5)
+ beq .Lsqr8x_tail_break
+
+ addc 22,22,9
+ ld 9,8*1(4)
+ adde 23,23,10
+ ld 10,8*2(4)
+ adde 24,24,11
+ ld 11,8*3(4)
+ adde 25,25,12
+ ld 12,8*4(4)
+ adde 26,26,14
+ ld 14,8*5(4)
+ adde 27,27,15
+ ld 15,8*6(4)
+ adde 28,28,16
+ ld 16,8*7(4)
+ adde 29,29,17
+ ldu 17,8*8(4)
+
+ b .Lsqr8x_tail
+
+.align 5
+.Lsqr8x_tail_break:
+ ld 7,8*8(1)
+ ld 21,8*9(1)
+ addi 30,5,8*8
+
+ addic 31,31,-1
+ adde 18,22,9
+ ld 22,8*8(3)
+ ld 9,8*1(20)
+ adde 19,23,10
+ ld 23,8*9(3)
+ ld 10,8*2(20)
+ adde 24,24,11
+ ld 11,8*3(20)
+ adde 25,25,12
+ ld 12,8*4(20)
+ adde 26,26,14
+ ld 14,8*5(20)
+ adde 27,27,15
+ ld 15,8*6(20)
+ adde 28,28,16
+ ld 16,8*7(20)
+ adde 29,29,17
+ ld 17,8*8(20)
+ addi 4,20,8*8
+ addze 20,0
+ mulld 31,7,22
+ std 18,8*1(5)
+ cmpld 30,21
+ std 19,8*2(5)
+ li 30,8
+ std 24,8*3(5)
+ ld 24,8*10(3)
+ std 25,8*4(5)
+ ld 25,8*11(3)
+ std 26,8*5(5)
+ ld 26,8*12(3)
+ std 27,8*6(5)
+ ld 27,8*13(3)
+ std 28,8*7(5)
+ ld 28,8*14(3)
+ std 29,8*8(5)
+ ld 29,8*15(3)
+ std 20,8*10(1)
+ addi 5,3,8*7
+ mtctr 30
+ bne .Lsqr8x_reduction
+
+
+
+
+
+
+ ld 3,8*6(1)
+ srwi 30,8,6
+ mr 7,5
+ addi 5,5,8*8
+ subi 30,30,1
+ subfc 18,9,22
+ subfe 19,10,23
+ mr 31,20
+ mr 6,3
+
+ mtctr 30
+ b .Lsqr8x_sub
+
+.align 5
+.Lsqr8x_sub:
+ ld 9,8*1(4)
+ ld 22,8*1(5)
+ ld 10,8*2(4)
+ ld 23,8*2(5)
+ subfe 20,11,24
+ ld 11,8*3(4)
+ ld 24,8*3(5)
+ subfe 21,12,25
+ ld 12,8*4(4)
+ ld 25,8*4(5)
+ std 18,8*1(3)
+ subfe 18,14,26
+ ld 14,8*5(4)
+ ld 26,8*5(5)
+ std 19,8*2(3)
+ subfe 19,15,27
+ ld 15,8*6(4)
+ ld 27,8*6(5)
+ std 20,8*3(3)
+ subfe 20,16,28
+ ld 16,8*7(4)
+ ld 28,8*7(5)
+ std 21,8*4(3)
+ subfe 21,17,29
+ ldu 17,8*8(4)
+ ldu 29,8*8(5)
+ std 18,8*5(3)
+ subfe 18,9,22
+ std 19,8*6(3)
+ subfe 19,10,23
+ std 20,8*7(3)
+ stdu 21,8*8(3)
+ bdnz .Lsqr8x_sub
+
+ srwi 30,8,5
+ ld 9,8*1(6)
+ ld 22,8*1(7)
+ subi 30,30,1
+ ld 10,8*2(6)
+ ld 23,8*2(7)
+ subfe 20,11,24
+ ld 11,8*3(6)
+ ld 24,8*3(7)
+ subfe 21,12,25
+ ld 12,8*4(6)
+ ldu 25,8*4(7)
+ std 18,8*1(3)
+ subfe 18,14,26
+ std 19,8*2(3)
+ subfe 19,15,27
+ std 20,8*3(3)
+ subfe 20,16,28
+ std 21,8*4(3)
+ subfe 21,17,29
+ std 18,8*5(3)
+ subfe 31,0,31
+ std 19,8*6(3)
+ std 20,8*7(3)
+ std 21,8*8(3)
+
+ addi 5,1,8*11
+ mtctr 30
+
+.Lsqr4x_cond_copy:
+ andc 9,9,31
+ std 0,-8*3(7)
+ and 22,22,31
+ std 0,-8*2(7)
+ andc 10,10,31
+ std 0,-8*1(7)
+ and 23,23,31
+ std 0,-8*0(7)
+ andc 11,11,31
+ std 0,8*1(5)
+ and 24,24,31
+ std 0,8*2(5)
+ andc 12,12,31
+ std 0,8*3(5)
+ and 25,25,31
+ stdu 0,8*4(5)
+ or 18,9,22
+ ld 9,8*5(6)
+ ld 22,8*1(7)
+ or 19,10,23
+ ld 10,8*6(6)
+ ld 23,8*2(7)
+ or 20,11,24
+ ld 11,8*7(6)
+ ld 24,8*3(7)
+ or 21,12,25
+ ld 12,8*8(6)
+ ldu 25,8*4(7)
+ std 18,8*1(6)
+ std 19,8*2(6)
+ std 20,8*3(6)
+ stdu 21,8*4(6)
+ bdnz .Lsqr4x_cond_copy
+
+ ld 4,0(1)
+ andc 9,9,31
+ and 22,22,31
+ andc 10,10,31
+ and 23,23,31
+ andc 11,11,31
+ and 24,24,31
+ andc 12,12,31
+ and 25,25,31
+ or 18,9,22
+ or 19,10,23
+ or 20,11,24
+ or 21,12,25
+ std 18,8*1(6)
+ std 19,8*2(6)
+ std 20,8*3(6)
+ std 21,8*4(6)
+
+ b .Lsqr8x_done
+
+.align 5
+.Lsqr8x8_post_condition:
+ ld 3,8*6(1)
+ ld 4,0(1)
+ addze 31,0
+
+
+ subfc 22,9,22
+ subfe 23,10,23
+ std 0,8*12(1)
+ std 0,8*13(1)
+ subfe 24,11,24
+ std 0,8*14(1)
+ std 0,8*15(1)
+ subfe 25,12,25
+ std 0,8*16(1)
+ std 0,8*17(1)
+ subfe 26,14,26
+ std 0,8*18(1)
+ std 0,8*19(1)
+ subfe 27,15,27
+ std 0,8*20(1)
+ std 0,8*21(1)
+ subfe 28,16,28
+ std 0,8*22(1)
+ std 0,8*23(1)
+ subfe 29,17,29
+ std 0,8*24(1)
+ std 0,8*25(1)
+ subfe 31,0,31
+ std 0,8*26(1)
+ std 0,8*27(1)
+
+ and 9,9,31
+ and 10,10,31
+ addc 22,22,9
+ and 11,11,31
+ adde 23,23,10
+ and 12,12,31
+ adde 24,24,11
+ and 14,14,31
+ adde 25,25,12
+ and 15,15,31
+ adde 26,26,14
+ and 16,16,31
+ adde 27,27,15
+ and 17,17,31
+ adde 28,28,16
+ adde 29,29,17
+ std 22,8*1(3)
+ std 23,8*2(3)
+ std 24,8*3(3)
+ std 25,8*4(3)
+ std 26,8*5(3)
+ std 27,8*6(3)
+ std 28,8*7(3)
+ std 29,8*8(3)
+
+.Lsqr8x_done:
+ std 0,8*8(1)
+ std 0,8*10(1)
+
+ ld 14,-8*18(4)
+ li 3,1
+ ld 15,-8*17(4)
+ ld 16,-8*16(4)
+ ld 17,-8*15(4)
+ ld 18,-8*14(4)
+ ld 19,-8*13(4)
+ ld 20,-8*12(4)
+ ld 21,-8*11(4)
+ ld 22,-8*10(4)
+ ld 23,-8*9(4)
+ ld 24,-8*8(4)
+ ld 25,-8*7(4)
+ ld 26,-8*6(4)
+ ld 27,-8*5(4)
+ ld 28,-8*4(4)
+ ld 29,-8*3(4)
+ ld 30,-8*2(4)
+ ld 31,-8*1(4)
+ mr 1,4
+ blr
+.long 0
+.byte 0,12,4,0x20,0x80,18,6,0
+.long 0
+.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
+.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align 2