diff options
author | danlark <danlark@yandex-team.ru> | 2022-02-10 16:46:08 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:08 +0300 |
commit | 3426a9bc7f169ae9da54cef557ad2a33f6e8eee0 (patch) | |
tree | 26154e1e9990f1bb4525d3e3fb5b6dac2c2c1da2 /contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s | |
parent | cb68f224c46a8ee52ac3fdd2a32534b8bb8dc134 (diff) | |
download | ydb-3426a9bc7f169ae9da54cef557ad2a33f6e8eee0.tar.gz |
Restoring authorship annotation for <danlark@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s')
-rw-r--r-- | contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s | 3560 |
1 files changed, 1780 insertions, 1780 deletions
diff --git a/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s b/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s index 95bfd7bf60..885d609e73 100644 --- a/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s +++ b/contrib/libs/openssl/asm/ppc64le/crypto/bn/ppc-mont.s @@ -1,1796 +1,1796 @@ -.machine "any" -.text - -.globl bn_mul_mont_int -.type bn_mul_mont_int,@function +.machine "any" +.text + +.globl bn_mul_mont_int +.type bn_mul_mont_int,@function .section ".opd","aw" .align 3 bn_mul_mont_int: .quad .bn_mul_mont_int,.TOC.@tocbase,0 .previous -.align 5 +.align 5 .bn_mul_mont_int: - mr 9,3 - li 3,0 - slwi 8,8,3 - li 12,-4096 - addi 3,8,352 - subf 3,3,1 - and 3,3,12 - subf 3,1,3 - mr 12,1 - srwi 8,8,3 - stdux 1,1,3 - - std 20,-96(12) - std 21,-88(12) - std 22,-80(12) - std 23,-72(12) - std 24,-64(12) - std 25,-56(12) - std 26,-48(12) - std 27,-40(12) - std 28,-32(12) - std 29,-24(12) - std 30,-16(12) - std 31,-8(12) - - ld 7,0(7) - addi 8,8,-2 - - ld 23,0(5) - ld 10,0(4) - addi 22,1,64 - mulld 25,10,23 - mulhdu 26,10,23 - - ld 10,8(4) - ld 11,0(6) - - mulld 24,25,7 - - mulld 29,10,23 - mulhdu 30,10,23 - - mulld 27,11,24 - mulhdu 28,11,24 - ld 11,8(6) - addc 27,27,25 - addze 28,28 - - mulld 31,11,24 - mulhdu 0,11,24 - - mtctr 8 - li 21,16 -.align 4 -.L1st: - ldx 10,4,21 - addc 25,29,26 - ldx 11,6,21 - addze 26,30 - mulld 29,10,23 - addc 27,31,28 - mulhdu 30,10,23 - addze 28,0 - mulld 31,11,24 - addc 27,27,25 - mulhdu 0,11,24 - addze 28,28 - std 27,0(22) - - addi 21,21,8 - addi 22,22,8 - bdnz .L1st - - addc 25,29,26 - addze 26,30 - - addc 27,31,28 - addze 28,0 - addc 27,27,25 - addze 28,28 - std 27,0(22) - - li 3,0 - addc 28,28,26 - addze 3,3 - std 28,8(22) - - li 20,8 -.align 4 -.Louter: - ldx 23,5,20 - ld 10,0(4) - addi 22,1,64 - ld 12,64(1) - mulld 25,10,23 - mulhdu 26,10,23 - ld 10,8(4) - ld 11,0(6) - addc 25,25,12 - mulld 29,10,23 - addze 26,26 - mulld 24,25,7 - mulhdu 30,10,23 - mulld 27,11,24 - mulhdu 28,11,24 - ld 11,8(6) - addc 27,27,25 - mulld 31,11,24 - addze 28,28 - mulhdu 0,11,24 - - mtctr 8 - li 21,16 -.align 4 -.Linner: - ldx 10,4,21 - addc 25,29,26 - ld 12,8(22) - addze 26,30 - ldx 11,6,21 - addc 27,31,28 - mulld 29,10,23 - addze 28,0 - mulhdu 30,10,23 - addc 25,25,12 - mulld 31,11,24 - addze 26,26 - mulhdu 0,11,24 - addc 27,27,25 - addi 21,21,8 - addze 28,28 - std 27,0(22) - addi 22,22,8 - bdnz .Linner - - ld 12,8(22) - addc 25,29,26 - addze 26,30 - addc 25,25,12 - addze 26,26 - - addc 27,31,28 - addze 28,0 - addc 27,27,25 - addze 28,28 - std 27,0(22) - - addic 3,3,-1 - li 3,0 - adde 28,28,26 - addze 3,3 - std 28,8(22) - - slwi 12,8,3 - cmpld 20,12 - addi 20,20,8 - ble .Louter - - addi 8,8,2 - subfc 21,21,21 - addi 22,1,64 - mtctr 8 - -.align 4 -.Lsub: ldx 12,22,21 - ldx 11,6,21 - subfe 10,11,12 - stdx 10,9,21 - addi 21,21,8 - bdnz .Lsub - - li 21,0 - mtctr 8 - subfe 3,21,3 - -.align 4 -.Lcopy: - ldx 12,22,21 - ldx 10,9,21 - and 12,12,3 - andc 10,10,3 - stdx 21,22,21 - or 10,10,12 - stdx 10,9,21 - addi 21,21,8 - bdnz .Lcopy - - ld 12,0(1) - li 3,1 - ld 20,-96(12) - ld 21,-88(12) - ld 22,-80(12) - ld 23,-72(12) - ld 24,-64(12) - ld 25,-56(12) - ld 26,-48(12) - ld 27,-40(12) - ld 28,-32(12) - ld 29,-24(12) - ld 30,-16(12) - ld 31,-8(12) - mr 1,12 - blr -.long 0 -.byte 0,12,4,0,0x80,12,6,0 -.long 0 + mr 9,3 + li 3,0 + slwi 8,8,3 + li 12,-4096 + addi 3,8,352 + subf 3,3,1 + and 3,3,12 + subf 3,1,3 + mr 12,1 + srwi 8,8,3 + stdux 1,1,3 + + std 20,-96(12) + std 21,-88(12) + std 22,-80(12) + std 23,-72(12) + std 24,-64(12) + std 25,-56(12) + std 26,-48(12) + std 27,-40(12) + std 28,-32(12) + std 29,-24(12) + std 30,-16(12) + std 31,-8(12) + + ld 7,0(7) + addi 8,8,-2 + + ld 23,0(5) + ld 10,0(4) + addi 22,1,64 + mulld 25,10,23 + mulhdu 26,10,23 + + ld 10,8(4) + ld 11,0(6) + + mulld 24,25,7 + + mulld 29,10,23 + mulhdu 30,10,23 + + mulld 27,11,24 + mulhdu 28,11,24 + ld 11,8(6) + addc 27,27,25 + addze 28,28 + + mulld 31,11,24 + mulhdu 0,11,24 + + mtctr 8 + li 21,16 +.align 4 +.L1st: + ldx 10,4,21 + addc 25,29,26 + ldx 11,6,21 + addze 26,30 + mulld 29,10,23 + addc 27,31,28 + mulhdu 30,10,23 + addze 28,0 + mulld 31,11,24 + addc 27,27,25 + mulhdu 0,11,24 + addze 28,28 + std 27,0(22) + + addi 21,21,8 + addi 22,22,8 + bdnz .L1st + + addc 25,29,26 + addze 26,30 + + addc 27,31,28 + addze 28,0 + addc 27,27,25 + addze 28,28 + std 27,0(22) + + li 3,0 + addc 28,28,26 + addze 3,3 + std 28,8(22) + + li 20,8 +.align 4 +.Louter: + ldx 23,5,20 + ld 10,0(4) + addi 22,1,64 + ld 12,64(1) + mulld 25,10,23 + mulhdu 26,10,23 + ld 10,8(4) + ld 11,0(6) + addc 25,25,12 + mulld 29,10,23 + addze 26,26 + mulld 24,25,7 + mulhdu 30,10,23 + mulld 27,11,24 + mulhdu 28,11,24 + ld 11,8(6) + addc 27,27,25 + mulld 31,11,24 + addze 28,28 + mulhdu 0,11,24 + + mtctr 8 + li 21,16 +.align 4 +.Linner: + ldx 10,4,21 + addc 25,29,26 + ld 12,8(22) + addze 26,30 + ldx 11,6,21 + addc 27,31,28 + mulld 29,10,23 + addze 28,0 + mulhdu 30,10,23 + addc 25,25,12 + mulld 31,11,24 + addze 26,26 + mulhdu 0,11,24 + addc 27,27,25 + addi 21,21,8 + addze 28,28 + std 27,0(22) + addi 22,22,8 + bdnz .Linner + + ld 12,8(22) + addc 25,29,26 + addze 26,30 + addc 25,25,12 + addze 26,26 + + addc 27,31,28 + addze 28,0 + addc 27,27,25 + addze 28,28 + std 27,0(22) + + addic 3,3,-1 + li 3,0 + adde 28,28,26 + addze 3,3 + std 28,8(22) + + slwi 12,8,3 + cmpld 20,12 + addi 20,20,8 + ble .Louter + + addi 8,8,2 + subfc 21,21,21 + addi 22,1,64 + mtctr 8 + +.align 4 +.Lsub: ldx 12,22,21 + ldx 11,6,21 + subfe 10,11,12 + stdx 10,9,21 + addi 21,21,8 + bdnz .Lsub + + li 21,0 + mtctr 8 + subfe 3,21,3 + +.align 4 +.Lcopy: + ldx 12,22,21 + ldx 10,9,21 + and 12,12,3 + andc 10,10,3 + stdx 21,22,21 + or 10,10,12 + stdx 10,9,21 + addi 21,21,8 + bdnz .Lcopy + + ld 12,0(1) + li 3,1 + ld 20,-96(12) + ld 21,-88(12) + ld 22,-80(12) + ld 23,-72(12) + ld 24,-64(12) + ld 25,-56(12) + ld 26,-48(12) + ld 27,-40(12) + ld 28,-32(12) + ld 29,-24(12) + ld 30,-16(12) + ld 31,-8(12) + mr 1,12 + blr +.long 0 +.byte 0,12,4,0,0x80,12,6,0 +.long 0 .size .bn_mul_mont_int,.-.bn_mul_mont_int .size bn_mul_mont_int,.-.bn_mul_mont_int -.globl bn_mul4x_mont_int -.type bn_mul4x_mont_int,@function +.globl bn_mul4x_mont_int +.type bn_mul4x_mont_int,@function .section ".opd","aw" .align 3 bn_mul4x_mont_int: .quad .bn_mul4x_mont_int,.TOC.@tocbase,0 .previous -.align 5 +.align 5 .bn_mul4x_mont_int: - andi. 0,8,7 - bne .Lmul4x_do - cmpld 4,5 - bne .Lmul4x_do - b .Lsqr8x_do -.Lmul4x_do: - slwi 8,8,3 - mr 9,1 - li 10,-32*8 - sub 10,10,8 - stdux 1,1,10 - - std 14,-8*18(9) - std 15,-8*17(9) - std 16,-8*16(9) - std 17,-8*15(9) - std 18,-8*14(9) - std 19,-8*13(9) - std 20,-8*12(9) - std 21,-8*11(9) - std 22,-8*10(9) - std 23,-8*9(9) - std 24,-8*8(9) - std 25,-8*7(9) - std 26,-8*6(9) - std 27,-8*5(9) - std 28,-8*4(9) - std 29,-8*3(9) - std 30,-8*2(9) - std 31,-8*1(9) - - subi 4,4,8 - subi 6,6,8 - subi 3,3,8 - ld 7,0(7) - - add 14,5,8 - add 30,4,8 - subi 14,14,8*4 - - ld 27,8*0(5) - li 22,0 - ld 9,8*1(4) - li 23,0 - ld 10,8*2(4) - li 24,0 - ld 11,8*3(4) - li 25,0 - ldu 12,8*4(4) - ld 18,8*1(6) - ld 19,8*2(6) - ld 20,8*3(6) - ldu 21,8*4(6) - - std 3,8*6(1) - std 14,8*7(1) - li 3,0 - addic 29,1,8*7 - li 31,0 - li 0,0 - b .Loop_mul4x_1st_reduction - -.align 5 -.Loop_mul4x_1st_reduction: - mulld 14,9,27 - addze 3,3 - mulld 15,10,27 - addi 31,31,8 - mulld 16,11,27 - andi. 31,31,8*4-1 - mulld 17,12,27 - addc 22,22,14 - mulhdu 14,9,27 - adde 23,23,15 - mulhdu 15,10,27 - adde 24,24,16 - mulld 28,22,7 - adde 25,25,17 - mulhdu 16,11,27 - addze 26,0 - mulhdu 17,12,27 - ldx 27,5,31 - addc 23,23,14 - - stdu 28,8(29) - adde 24,24,15 - mulld 15,19,28 - adde 25,25,16 - mulld 16,20,28 - adde 26,26,17 - mulld 17,21,28 - - - - - - - - - - - addic 22,22,-1 - mulhdu 14,18,28 - adde 22,23,15 - mulhdu 15,19,28 - adde 23,24,16 - mulhdu 16,20,28 - adde 24,25,17 - mulhdu 17,21,28 - adde 25,26,3 - addze 3,0 - addc 22,22,14 - adde 23,23,15 - adde 24,24,16 - adde 25,25,17 - - bne .Loop_mul4x_1st_reduction - - cmpld 30,4 - beq .Lmul4x4_post_condition - - ld 9,8*1(4) - ld 10,8*2(4) - ld 11,8*3(4) - ldu 12,8*4(4) - ld 28,8*8(1) - ld 18,8*1(6) - ld 19,8*2(6) - ld 20,8*3(6) - ldu 21,8*4(6) - b .Loop_mul4x_1st_tail - -.align 5 -.Loop_mul4x_1st_tail: - mulld 14,9,27 - addze 3,3 - mulld 15,10,27 - addi 31,31,8 - mulld 16,11,27 - andi. 31,31,8*4-1 - mulld 17,12,27 - addc 22,22,14 - mulhdu 14,9,27 - adde 23,23,15 - mulhdu 15,10,27 - adde 24,24,16 - mulhdu 16,11,27 - adde 25,25,17 - mulhdu 17,12,27 - addze 26,0 - ldx 27,5,31 - addc 23,23,14 - mulld 14,18,28 - adde 24,24,15 - mulld 15,19,28 - adde 25,25,16 - mulld 16,20,28 - adde 26,26,17 - mulld 17,21,28 - addc 22,22,14 - mulhdu 14,18,28 - adde 23,23,15 - mulhdu 15,19,28 - adde 24,24,16 - mulhdu 16,20,28 - adde 25,25,17 - adde 26,26,3 - mulhdu 17,21,28 - addze 3,0 - addi 28,1,8*8 - ldx 28,28,31 - stdu 22,8(29) - addc 22,23,14 - adde 23,24,15 - adde 24,25,16 - adde 25,26,17 - - bne .Loop_mul4x_1st_tail - - sub 15,30,8 - cmpld 30,4 - beq .Lmul4x_proceed - - ld 9,8*1(4) - ld 10,8*2(4) - ld 11,8*3(4) - ldu 12,8*4(4) - ld 18,8*1(6) - ld 19,8*2(6) - ld 20,8*3(6) - ldu 21,8*4(6) - b .Loop_mul4x_1st_tail - -.align 5 -.Lmul4x_proceed: - ldu 27,8*4(5) - addze 3,3 - ld 9,8*1(15) - ld 10,8*2(15) - ld 11,8*3(15) - ld 12,8*4(15) - addi 4,15,8*4 - sub 6,6,8 - - std 22,8*1(29) - std 23,8*2(29) - std 24,8*3(29) - std 25,8*4(29) - std 3,8*5(29) - ld 22,8*12(1) - ld 23,8*13(1) - ld 24,8*14(1) - ld 25,8*15(1) - - ld 18,8*1(6) - ld 19,8*2(6) - ld 20,8*3(6) - ldu 21,8*4(6) - addic 29,1,8*7 - li 3,0 - b .Loop_mul4x_reduction - -.align 5 -.Loop_mul4x_reduction: - mulld 14,9,27 - addze 3,3 - mulld 15,10,27 - addi 31,31,8 - mulld 16,11,27 - andi. 31,31,8*4-1 - mulld 17,12,27 - addc 22,22,14 - mulhdu 14,9,27 - adde 23,23,15 - mulhdu 15,10,27 - adde 24,24,16 - mulld 28,22,7 - adde 25,25,17 - mulhdu 16,11,27 - addze 26,0 - mulhdu 17,12,27 - ldx 27,5,31 - addc 23,23,14 - - stdu 28,8(29) - adde 24,24,15 - mulld 15,19,28 - adde 25,25,16 - mulld 16,20,28 - adde 26,26,17 - mulld 17,21,28 - - addic 22,22,-1 - mulhdu 14,18,28 - adde 22,23,15 - mulhdu 15,19,28 - adde 23,24,16 - mulhdu 16,20,28 - adde 24,25,17 - mulhdu 17,21,28 - adde 25,26,3 - addze 3,0 - addc 22,22,14 - adde 23,23,15 - adde 24,24,16 - adde 25,25,17 - - bne .Loop_mul4x_reduction - - ld 14,8*5(29) - addze 3,3 - ld 15,8*6(29) - ld 16,8*7(29) - ld 17,8*8(29) - ld 9,8*1(4) - ld 10,8*2(4) - ld 11,8*3(4) - ldu 12,8*4(4) - addc 22,22,14 - adde 23,23,15 - adde 24,24,16 - adde 25,25,17 - - - ld 28,8*8(1) - ld 18,8*1(6) - ld 19,8*2(6) - ld 20,8*3(6) - ldu 21,8*4(6) - b .Loop_mul4x_tail - -.align 5 -.Loop_mul4x_tail: - mulld 14,9,27 - addze 3,3 - mulld 15,10,27 - addi 31,31,8 - mulld 16,11,27 - andi. 31,31,8*4-1 - mulld 17,12,27 - addc 22,22,14 - mulhdu 14,9,27 - adde 23,23,15 - mulhdu 15,10,27 - adde 24,24,16 - mulhdu 16,11,27 - adde 25,25,17 - mulhdu 17,12,27 - addze 26,0 - ldx 27,5,31 - addc 23,23,14 - mulld 14,18,28 - adde 24,24,15 - mulld 15,19,28 - adde 25,25,16 - mulld 16,20,28 - adde 26,26,17 - mulld 17,21,28 - addc 22,22,14 - mulhdu 14,18,28 - adde 23,23,15 - mulhdu 15,19,28 - adde 24,24,16 - mulhdu 16,20,28 - adde 25,25,17 - mulhdu 17,21,28 - adde 26,26,3 - addi 28,1,8*8 - ldx 28,28,31 - addze 3,0 - stdu 22,8(29) - addc 22,23,14 - adde 23,24,15 - adde 24,25,16 - adde 25,26,17 - - bne .Loop_mul4x_tail - - ld 14,8*5(29) - sub 15,6,8 - addze 3,3 - cmpld 30,4 - beq .Loop_mul4x_break - - ld 15,8*6(29) - ld 16,8*7(29) - ld 17,8*8(29) - ld 9,8*1(4) - ld 10,8*2(4) - ld 11,8*3(4) - ldu 12,8*4(4) - addc 22,22,14 - adde 23,23,15 - adde 24,24,16 - adde 25,25,17 - - - ld 18,8*1(6) - ld 19,8*2(6) - ld 20,8*3(6) - ldu 21,8*4(6) - b .Loop_mul4x_tail - -.align 5 -.Loop_mul4x_break: - ld 16,8*6(1) - ld 17,8*7(1) - addc 9,22,14 - ld 22,8*12(1) - addze 10,23 - ld 23,8*13(1) - addze 11,24 - ld 24,8*14(1) - addze 12,25 - ld 25,8*15(1) - addze 3,3 - std 9,8*1(29) - sub 4,30,8 - std 10,8*2(29) - std 11,8*3(29) - std 12,8*4(29) - std 3,8*5(29) - - ld 18,8*1(15) - ld 19,8*2(15) - ld 20,8*3(15) - ld 21,8*4(15) - addi 6,15,8*4 - cmpld 5,17 - beq .Lmul4x_post - - ldu 27,8*4(5) - ld 9,8*1(4) - ld 10,8*2(4) - ld 11,8*3(4) - ldu 12,8*4(4) - li 3,0 - addic 29,1,8*7 - b .Loop_mul4x_reduction - -.align 5 -.Lmul4x_post: - - - - - srwi 31,8,5 - mr 5,16 - subi 31,31,1 - mr 30,16 - subfc 14,18,22 - addi 29,1,8*15 - subfe 15,19,23 - - mtctr 31 -.Lmul4x_sub: - ld 18,8*1(6) - ld 22,8*1(29) - subfe 16,20,24 - ld 19,8*2(6) - ld 23,8*2(29) - subfe 17,21,25 - ld 20,8*3(6) - ld 24,8*3(29) - ldu 21,8*4(6) - ldu 25,8*4(29) - std 14,8*1(5) - std 15,8*2(5) - subfe 14,18,22 - std 16,8*3(5) - stdu 17,8*4(5) - subfe 15,19,23 - bdnz .Lmul4x_sub - - ld 9,8*1(30) - std 14,8*1(5) - ld 14,8*12(1) - subfe 16,20,24 - ld 10,8*2(30) - std 15,8*2(5) - ld 15,8*13(1) - subfe 17,21,25 - subfe 3,0,3 - addi 29,1,8*12 - ld 11,8*3(30) - std 16,8*3(5) - ld 16,8*14(1) - ld 12,8*4(30) - std 17,8*4(5) - ld 17,8*15(1) - - mtctr 31 -.Lmul4x_cond_copy: - and 14,14,3 - andc 9,9,3 - std 0,8*0(29) - and 15,15,3 - andc 10,10,3 - std 0,8*1(29) - and 16,16,3 - andc 11,11,3 - std 0,8*2(29) - and 17,17,3 - andc 12,12,3 - std 0,8*3(29) - or 22,14,9 - ld 9,8*5(30) - ld 14,8*4(29) - or 23,15,10 - ld 10,8*6(30) - ld 15,8*5(29) - or 24,16,11 - ld 11,8*7(30) - ld 16,8*6(29) - or 25,17,12 - ld 12,8*8(30) - ld 17,8*7(29) - addi 29,29,8*4 - std 22,8*1(30) - std 23,8*2(30) - std 24,8*3(30) - stdu 25,8*4(30) - bdnz .Lmul4x_cond_copy - - ld 5,0(1) - and 14,14,3 - andc 9,9,3 - std 0,8*0(29) - and 15,15,3 - andc 10,10,3 - std 0,8*1(29) - and 16,16,3 - andc 11,11,3 - std 0,8*2(29) - and 17,17,3 - andc 12,12,3 - std 0,8*3(29) - or 22,14,9 - or 23,15,10 - std 0,8*4(29) - or 24,16,11 - or 25,17,12 - std 22,8*1(30) - std 23,8*2(30) - std 24,8*3(30) - std 25,8*4(30) - - b .Lmul4x_done - -.align 4 -.Lmul4x4_post_condition: - ld 4,8*6(1) - ld 5,0(1) - addze 3,3 - - subfc 9,18,22 - subfe 10,19,23 - subfe 11,20,24 - subfe 12,21,25 - subfe 3,0,3 - - and 18,18,3 - and 19,19,3 - addc 9,9,18 - and 20,20,3 - adde 10,10,19 - and 21,21,3 - adde 11,11,20 - adde 12,12,21 - - std 9,8*1(4) - std 10,8*2(4) - std 11,8*3(4) - std 12,8*4(4) - -.Lmul4x_done: - std 0,8*8(1) - std 0,8*9(1) - std 0,8*10(1) - std 0,8*11(1) - li 3,1 - ld 14,-8*18(5) - ld 15,-8*17(5) - ld 16,-8*16(5) - ld 17,-8*15(5) - ld 18,-8*14(5) - ld 19,-8*13(5) - ld 20,-8*12(5) - ld 21,-8*11(5) - ld 22,-8*10(5) - ld 23,-8*9(5) - ld 24,-8*8(5) - ld 25,-8*7(5) - ld 26,-8*6(5) - ld 27,-8*5(5) - ld 28,-8*4(5) - ld 29,-8*3(5) - ld 30,-8*2(5) - ld 31,-8*1(5) - mr 1,5 - blr -.long 0 -.byte 0,12,4,0x20,0x80,18,6,0 -.long 0 + andi. 0,8,7 + bne .Lmul4x_do + cmpld 4,5 + bne .Lmul4x_do + b .Lsqr8x_do +.Lmul4x_do: + slwi 8,8,3 + mr 9,1 + li 10,-32*8 + sub 10,10,8 + stdux 1,1,10 + + std 14,-8*18(9) + std 15,-8*17(9) + std 16,-8*16(9) + std 17,-8*15(9) + std 18,-8*14(9) + std 19,-8*13(9) + std 20,-8*12(9) + std 21,-8*11(9) + std 22,-8*10(9) + std 23,-8*9(9) + std 24,-8*8(9) + std 25,-8*7(9) + std 26,-8*6(9) + std 27,-8*5(9) + std 28,-8*4(9) + std 29,-8*3(9) + std 30,-8*2(9) + std 31,-8*1(9) + + subi 4,4,8 + subi 6,6,8 + subi 3,3,8 + ld 7,0(7) + + add 14,5,8 + add 30,4,8 + subi 14,14,8*4 + + ld 27,8*0(5) + li 22,0 + ld 9,8*1(4) + li 23,0 + ld 10,8*2(4) + li 24,0 + ld 11,8*3(4) + li 25,0 + ldu 12,8*4(4) + ld 18,8*1(6) + ld 19,8*2(6) + ld 20,8*3(6) + ldu 21,8*4(6) + + std 3,8*6(1) + std 14,8*7(1) + li 3,0 + addic 29,1,8*7 + li 31,0 + li 0,0 + b .Loop_mul4x_1st_reduction + +.align 5 +.Loop_mul4x_1st_reduction: + mulld 14,9,27 + addze 3,3 + mulld 15,10,27 + addi 31,31,8 + mulld 16,11,27 + andi. 31,31,8*4-1 + mulld 17,12,27 + addc 22,22,14 + mulhdu 14,9,27 + adde 23,23,15 + mulhdu 15,10,27 + adde 24,24,16 + mulld 28,22,7 + adde 25,25,17 + mulhdu 16,11,27 + addze 26,0 + mulhdu 17,12,27 + ldx 27,5,31 + addc 23,23,14 + + stdu 28,8(29) + adde 24,24,15 + mulld 15,19,28 + adde 25,25,16 + mulld 16,20,28 + adde 26,26,17 + mulld 17,21,28 + + + + + + + + + + + addic 22,22,-1 + mulhdu 14,18,28 + adde 22,23,15 + mulhdu 15,19,28 + adde 23,24,16 + mulhdu 16,20,28 + adde 24,25,17 + mulhdu 17,21,28 + adde 25,26,3 + addze 3,0 + addc 22,22,14 + adde 23,23,15 + adde 24,24,16 + adde 25,25,17 + + bne .Loop_mul4x_1st_reduction + + cmpld 30,4 + beq .Lmul4x4_post_condition + + ld 9,8*1(4) + ld 10,8*2(4) + ld 11,8*3(4) + ldu 12,8*4(4) + ld 28,8*8(1) + ld 18,8*1(6) + ld 19,8*2(6) + ld 20,8*3(6) + ldu 21,8*4(6) + b .Loop_mul4x_1st_tail + +.align 5 +.Loop_mul4x_1st_tail: + mulld 14,9,27 + addze 3,3 + mulld 15,10,27 + addi 31,31,8 + mulld 16,11,27 + andi. 31,31,8*4-1 + mulld 17,12,27 + addc 22,22,14 + mulhdu 14,9,27 + adde 23,23,15 + mulhdu 15,10,27 + adde 24,24,16 + mulhdu 16,11,27 + adde 25,25,17 + mulhdu 17,12,27 + addze 26,0 + ldx 27,5,31 + addc 23,23,14 + mulld 14,18,28 + adde 24,24,15 + mulld 15,19,28 + adde 25,25,16 + mulld 16,20,28 + adde 26,26,17 + mulld 17,21,28 + addc 22,22,14 + mulhdu 14,18,28 + adde 23,23,15 + mulhdu 15,19,28 + adde 24,24,16 + mulhdu 16,20,28 + adde 25,25,17 + adde 26,26,3 + mulhdu 17,21,28 + addze 3,0 + addi 28,1,8*8 + ldx 28,28,31 + stdu 22,8(29) + addc 22,23,14 + adde 23,24,15 + adde 24,25,16 + adde 25,26,17 + + bne .Loop_mul4x_1st_tail + + sub 15,30,8 + cmpld 30,4 + beq .Lmul4x_proceed + + ld 9,8*1(4) + ld 10,8*2(4) + ld 11,8*3(4) + ldu 12,8*4(4) + ld 18,8*1(6) + ld 19,8*2(6) + ld 20,8*3(6) + ldu 21,8*4(6) + b .Loop_mul4x_1st_tail + +.align 5 +.Lmul4x_proceed: + ldu 27,8*4(5) + addze 3,3 + ld 9,8*1(15) + ld 10,8*2(15) + ld 11,8*3(15) + ld 12,8*4(15) + addi 4,15,8*4 + sub 6,6,8 + + std 22,8*1(29) + std 23,8*2(29) + std 24,8*3(29) + std 25,8*4(29) + std 3,8*5(29) + ld 22,8*12(1) + ld 23,8*13(1) + ld 24,8*14(1) + ld 25,8*15(1) + + ld 18,8*1(6) + ld 19,8*2(6) + ld 20,8*3(6) + ldu 21,8*4(6) + addic 29,1,8*7 + li 3,0 + b .Loop_mul4x_reduction + +.align 5 +.Loop_mul4x_reduction: + mulld 14,9,27 + addze 3,3 + mulld 15,10,27 + addi 31,31,8 + mulld 16,11,27 + andi. 31,31,8*4-1 + mulld 17,12,27 + addc 22,22,14 + mulhdu 14,9,27 + adde 23,23,15 + mulhdu 15,10,27 + adde 24,24,16 + mulld 28,22,7 + adde 25,25,17 + mulhdu 16,11,27 + addze 26,0 + mulhdu 17,12,27 + ldx 27,5,31 + addc 23,23,14 + + stdu 28,8(29) + adde 24,24,15 + mulld 15,19,28 + adde 25,25,16 + mulld 16,20,28 + adde 26,26,17 + mulld 17,21,28 + + addic 22,22,-1 + mulhdu 14,18,28 + adde 22,23,15 + mulhdu 15,19,28 + adde 23,24,16 + mulhdu 16,20,28 + adde 24,25,17 + mulhdu 17,21,28 + adde 25,26,3 + addze 3,0 + addc 22,22,14 + adde 23,23,15 + adde 24,24,16 + adde 25,25,17 + + bne .Loop_mul4x_reduction + + ld 14,8*5(29) + addze 3,3 + ld 15,8*6(29) + ld 16,8*7(29) + ld 17,8*8(29) + ld 9,8*1(4) + ld 10,8*2(4) + ld 11,8*3(4) + ldu 12,8*4(4) + addc 22,22,14 + adde 23,23,15 + adde 24,24,16 + adde 25,25,17 + + + ld 28,8*8(1) + ld 18,8*1(6) + ld 19,8*2(6) + ld 20,8*3(6) + ldu 21,8*4(6) + b .Loop_mul4x_tail + +.align 5 +.Loop_mul4x_tail: + mulld 14,9,27 + addze 3,3 + mulld 15,10,27 + addi 31,31,8 + mulld 16,11,27 + andi. 31,31,8*4-1 + mulld 17,12,27 + addc 22,22,14 + mulhdu 14,9,27 + adde 23,23,15 + mulhdu 15,10,27 + adde 24,24,16 + mulhdu 16,11,27 + adde 25,25,17 + mulhdu 17,12,27 + addze 26,0 + ldx 27,5,31 + addc 23,23,14 + mulld 14,18,28 + adde 24,24,15 + mulld 15,19,28 + adde 25,25,16 + mulld 16,20,28 + adde 26,26,17 + mulld 17,21,28 + addc 22,22,14 + mulhdu 14,18,28 + adde 23,23,15 + mulhdu 15,19,28 + adde 24,24,16 + mulhdu 16,20,28 + adde 25,25,17 + mulhdu 17,21,28 + adde 26,26,3 + addi 28,1,8*8 + ldx 28,28,31 + addze 3,0 + stdu 22,8(29) + addc 22,23,14 + adde 23,24,15 + adde 24,25,16 + adde 25,26,17 + + bne .Loop_mul4x_tail + + ld 14,8*5(29) + sub 15,6,8 + addze 3,3 + cmpld 30,4 + beq .Loop_mul4x_break + + ld 15,8*6(29) + ld 16,8*7(29) + ld 17,8*8(29) + ld 9,8*1(4) + ld 10,8*2(4) + ld 11,8*3(4) + ldu 12,8*4(4) + addc 22,22,14 + adde 23,23,15 + adde 24,24,16 + adde 25,25,17 + + + ld 18,8*1(6) + ld 19,8*2(6) + ld 20,8*3(6) + ldu 21,8*4(6) + b .Loop_mul4x_tail + +.align 5 +.Loop_mul4x_break: + ld 16,8*6(1) + ld 17,8*7(1) + addc 9,22,14 + ld 22,8*12(1) + addze 10,23 + ld 23,8*13(1) + addze 11,24 + ld 24,8*14(1) + addze 12,25 + ld 25,8*15(1) + addze 3,3 + std 9,8*1(29) + sub 4,30,8 + std 10,8*2(29) + std 11,8*3(29) + std 12,8*4(29) + std 3,8*5(29) + + ld 18,8*1(15) + ld 19,8*2(15) + ld 20,8*3(15) + ld 21,8*4(15) + addi 6,15,8*4 + cmpld 5,17 + beq .Lmul4x_post + + ldu 27,8*4(5) + ld 9,8*1(4) + ld 10,8*2(4) + ld 11,8*3(4) + ldu 12,8*4(4) + li 3,0 + addic 29,1,8*7 + b .Loop_mul4x_reduction + +.align 5 +.Lmul4x_post: + + + + + srwi 31,8,5 + mr 5,16 + subi 31,31,1 + mr 30,16 + subfc 14,18,22 + addi 29,1,8*15 + subfe 15,19,23 + + mtctr 31 +.Lmul4x_sub: + ld 18,8*1(6) + ld 22,8*1(29) + subfe 16,20,24 + ld 19,8*2(6) + ld 23,8*2(29) + subfe 17,21,25 + ld 20,8*3(6) + ld 24,8*3(29) + ldu 21,8*4(6) + ldu 25,8*4(29) + std 14,8*1(5) + std 15,8*2(5) + subfe 14,18,22 + std 16,8*3(5) + stdu 17,8*4(5) + subfe 15,19,23 + bdnz .Lmul4x_sub + + ld 9,8*1(30) + std 14,8*1(5) + ld 14,8*12(1) + subfe 16,20,24 + ld 10,8*2(30) + std 15,8*2(5) + ld 15,8*13(1) + subfe 17,21,25 + subfe 3,0,3 + addi 29,1,8*12 + ld 11,8*3(30) + std 16,8*3(5) + ld 16,8*14(1) + ld 12,8*4(30) + std 17,8*4(5) + ld 17,8*15(1) + + mtctr 31 +.Lmul4x_cond_copy: + and 14,14,3 + andc 9,9,3 + std 0,8*0(29) + and 15,15,3 + andc 10,10,3 + std 0,8*1(29) + and 16,16,3 + andc 11,11,3 + std 0,8*2(29) + and 17,17,3 + andc 12,12,3 + std 0,8*3(29) + or 22,14,9 + ld 9,8*5(30) + ld 14,8*4(29) + or 23,15,10 + ld 10,8*6(30) + ld 15,8*5(29) + or 24,16,11 + ld 11,8*7(30) + ld 16,8*6(29) + or 25,17,12 + ld 12,8*8(30) + ld 17,8*7(29) + addi 29,29,8*4 + std 22,8*1(30) + std 23,8*2(30) + std 24,8*3(30) + stdu 25,8*4(30) + bdnz .Lmul4x_cond_copy + + ld 5,0(1) + and 14,14,3 + andc 9,9,3 + std 0,8*0(29) + and 15,15,3 + andc 10,10,3 + std 0,8*1(29) + and 16,16,3 + andc 11,11,3 + std 0,8*2(29) + and 17,17,3 + andc 12,12,3 + std 0,8*3(29) + or 22,14,9 + or 23,15,10 + std 0,8*4(29) + or 24,16,11 + or 25,17,12 + std 22,8*1(30) + std 23,8*2(30) + std 24,8*3(30) + std 25,8*4(30) + + b .Lmul4x_done + +.align 4 +.Lmul4x4_post_condition: + ld 4,8*6(1) + ld 5,0(1) + addze 3,3 + + subfc 9,18,22 + subfe 10,19,23 + subfe 11,20,24 + subfe 12,21,25 + subfe 3,0,3 + + and 18,18,3 + and 19,19,3 + addc 9,9,18 + and 20,20,3 + adde 10,10,19 + and 21,21,3 + adde 11,11,20 + adde 12,12,21 + + std 9,8*1(4) + std 10,8*2(4) + std 11,8*3(4) + std 12,8*4(4) + +.Lmul4x_done: + std 0,8*8(1) + std 0,8*9(1) + std 0,8*10(1) + std 0,8*11(1) + li 3,1 + ld 14,-8*18(5) + ld 15,-8*17(5) + ld 16,-8*16(5) + ld 17,-8*15(5) + ld 18,-8*14(5) + ld 19,-8*13(5) + ld 20,-8*12(5) + ld 21,-8*11(5) + ld 22,-8*10(5) + ld 23,-8*9(5) + ld 24,-8*8(5) + ld 25,-8*7(5) + ld 26,-8*6(5) + ld 27,-8*5(5) + ld 28,-8*4(5) + ld 29,-8*3(5) + ld 30,-8*2(5) + ld 31,-8*1(5) + mr 1,5 + blr +.long 0 +.byte 0,12,4,0x20,0x80,18,6,0 +.long 0 .size .bn_mul4x_mont_int,.-.bn_mul4x_mont_int .size bn_mul4x_mont_int,.-.bn_mul4x_mont_int -.align 5 -__bn_sqr8x_mont: -.Lsqr8x_do: - mr 9,1 - slwi 10,8,4 - li 11,-32*8 - sub 10,11,10 - slwi 8,8,3 - stdux 1,1,10 - - std 14,-8*18(9) - std 15,-8*17(9) - std 16,-8*16(9) - std 17,-8*15(9) - std 18,-8*14(9) - std 19,-8*13(9) - std 20,-8*12(9) - std 21,-8*11(9) - std 22,-8*10(9) - std 23,-8*9(9) - std 24,-8*8(9) - std 25,-8*7(9) - std 26,-8*6(9) - std 27,-8*5(9) - std 28,-8*4(9) - std 29,-8*3(9) - std 30,-8*2(9) - std 31,-8*1(9) - - subi 4,4,8 - subi 18,6,8 - subi 3,3,8 - ld 7,0(7) - li 0,0 - - add 6,4,8 - ld 9,8*1(4) - - ld 10,8*2(4) - li 23,0 - ld 11,8*3(4) - li 24,0 - ld 12,8*4(4) - li 25,0 - ld 14,8*5(4) - li 26,0 - ld 15,8*6(4) - li 27,0 - ld 16,8*7(4) - li 28,0 - ldu 17,8*8(4) - li 29,0 - - addi 5,1,8*11 - subic. 30,8,8*8 - b .Lsqr8x_zero_start - -.align 5 -.Lsqr8x_zero: - subic. 30,30,8*8 - std 0,8*1(5) - std 0,8*2(5) - std 0,8*3(5) - std 0,8*4(5) - std 0,8*5(5) - std 0,8*6(5) - std 0,8*7(5) - std 0,8*8(5) -.Lsqr8x_zero_start: - std 0,8*9(5) - std 0,8*10(5) - std 0,8*11(5) - std 0,8*12(5) - std 0,8*13(5) - std 0,8*14(5) - std 0,8*15(5) - stdu 0,8*16(5) - bne .Lsqr8x_zero - - std 3,8*6(1) - std 18,8*7(1) - std 7,8*8(1) - std 5,8*9(1) - std 0,8*10(1) - addi 5,1,8*11 - - -.align 5 -.Lsqr8x_outer_loop: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - mulld 18,10,9 - mulld 19,11,9 - mulld 20,12,9 - mulld 21,14,9 - addc 23,23,18 - mulld 18,15,9 - adde 24,24,19 - mulld 19,16,9 - adde 25,25,20 - mulld 20,17,9 - adde 26,26,21 - mulhdu 21,10,9 - adde 27,27,18 - mulhdu 18,11,9 - adde 28,28,19 - mulhdu 19,12,9 - adde 29,29,20 - mulhdu 20,14,9 - std 22,8*1(5) - addze 22,0 - std 23,8*2(5) - addc 24,24,21 - mulhdu 21,15,9 - adde 25,25,18 - mulhdu 18,16,9 - adde 26,26,19 - mulhdu 19,17,9 - adde 27,27,20 - mulld 20,11,10 - adde 28,28,21 - mulld 21,12,10 - adde 29,29,18 - mulld 18,14,10 - adde 22,22,19 - - mulld 19,15,10 - addc 25,25,20 - mulld 20,16,10 - adde 26,26,21 - mulld 21,17,10 - adde 27,27,18 - mulhdu 18,11,10 - adde 28,28,19 - mulhdu 19,12,10 - adde 29,29,20 - mulhdu 20,14,10 - adde 22,22,21 - mulhdu 21,15,10 - std 24,8*3(5) - addze 23,0 - std 25,8*4(5) - addc 26,26,18 - mulhdu 18,16,10 - adde 27,27,19 - mulhdu 19,17,10 - adde 28,28,20 - mulld 20,12,11 - adde 29,29,21 - mulld 21,14,11 - adde 22,22,18 - mulld 18,15,11 - adde 23,23,19 - - mulld 19,16,11 - addc 27,27,20 - mulld 20,17,11 - adde 28,28,21 - mulhdu 21,12,11 - adde 29,29,18 - mulhdu 18,14,11 - adde 22,22,19 - mulhdu 19,15,11 - adde 23,23,20 - mulhdu 20,16,11 - std 26,8*5(5) - addze 24,0 - std 27,8*6(5) - addc 28,28,21 - mulhdu 21,17,11 - adde 29,29,18 - mulld 18,14,12 - adde 22,22,19 - mulld 19,15,12 - adde 23,23,20 - mulld 20,16,12 - adde 24,24,21 - - mulld 21,17,12 - addc 29,29,18 - mulhdu 18,14,12 - adde 22,22,19 - mulhdu 19,15,12 - adde 23,23,20 - mulhdu 20,16,12 - adde 24,24,21 - mulhdu 21,17,12 - std 28,8*7(5) - addze 25,0 - stdu 29,8*8(5) - addc 22,22,18 - mulld 18,15,14 - adde 23,23,19 - mulld 19,16,14 - adde 24,24,20 - mulld 20,17,14 - adde 25,25,21 - - mulhdu 21,15,14 - addc 23,23,18 - mulhdu 18,16,14 - adde 24,24,19 - mulhdu 19,17,14 - adde 25,25,20 - mulld 20,16,15 - addze 26,0 - addc 24,24,21 - mulld 21,17,15 - adde 25,25,18 - mulhdu 18,16,15 - adde 26,26,19 - - mulhdu 19,17,15 - addc 25,25,20 - mulld 20,17,16 - adde 26,26,21 - mulhdu 21,17,16 - addze 27,0 - addc 26,26,18 - cmpld 6,4 - adde 27,27,19 - - addc 27,27,20 - sub 18,6,8 - addze 28,0 - add 28,28,21 - - beq .Lsqr8x_outer_break - - mr 7,9 - ld 9,8*1(5) - ld 10,8*2(5) - ld 11,8*3(5) - ld 12,8*4(5) - ld 14,8*5(5) - ld 15,8*6(5) - ld 16,8*7(5) - ld 17,8*8(5) - addc 22,22,9 - ld 9,8*1(4) - adde 23,23,10 - ld 10,8*2(4) - adde 24,24,11 - ld 11,8*3(4) - adde 25,25,12 - ld 12,8*4(4) - adde 26,26,14 - ld 14,8*5(4) - adde 27,27,15 - ld 15,8*6(4) - adde 28,28,16 - ld 16,8*7(4) - subi 3,4,8*7 - addze 29,17 - ldu 17,8*8(4) - - li 30,0 - b .Lsqr8x_mul - - - - - - - - - - - - - - - - - - - - - - - -.align 5 -.Lsqr8x_mul: - mulld 18,9,7 - addze 31,0 - mulld 19,10,7 - addi 30,30,8 - mulld 20,11,7 - andi. 30,30,8*8-1 - mulld 21,12,7 - addc 22,22,18 - mulld 18,14,7 - adde 23,23,19 - mulld 19,15,7 - adde 24,24,20 - mulld 20,16,7 - adde 25,25,21 - mulld 21,17,7 - adde 26,26,18 - mulhdu 18,9,7 - adde 27,27,19 - mulhdu 19,10,7 - adde 28,28,20 - mulhdu 20,11,7 - adde 29,29,21 - mulhdu 21,12,7 - addze 31,31 - stdu 22,8(5) - addc 22,23,18 - mulhdu 18,14,7 - adde 23,24,19 - mulhdu 19,15,7 - adde 24,25,20 - mulhdu 20,16,7 - adde 25,26,21 - mulhdu 21,17,7 - ldx 7,3,30 - adde 26,27,18 - adde 27,28,19 - adde 28,29,20 - adde 29,31,21 - - bne .Lsqr8x_mul - - - cmpld 4,6 - beq .Lsqr8x_break - - ld 9,8*1(5) - ld 10,8*2(5) - ld 11,8*3(5) - ld 12,8*4(5) - ld 14,8*5(5) - ld 15,8*6(5) - ld 16,8*7(5) - ld 17,8*8(5) - addc 22,22,9 - ld 9,8*1(4) - adde 23,23,10 - ld 10,8*2(4) - adde 24,24,11 - ld 11,8*3(4) - adde 25,25,12 - ld 12,8*4(4) - adde 26,26,14 - ld 14,8*5(4) - adde 27,27,15 - ld 15,8*6(4) - adde 28,28,16 - ld 16,8*7(4) - adde 29,29,17 - ldu 17,8*8(4) - - b .Lsqr8x_mul - -.align 5 -.Lsqr8x_break: - ld 9,8*8(3) - addi 4,3,8*15 - ld 10,8*9(3) - sub. 18,6,4 - ld 11,8*10(3) - sub 19,5,18 - ld 12,8*11(3) - ld 14,8*12(3) - ld 15,8*13(3) - ld 16,8*14(3) - ld 17,8*15(3) - beq .Lsqr8x_outer_loop - - std 22,8*1(5) - ld 22,8*1(19) - std 23,8*2(5) - ld 23,8*2(19) - std 24,8*3(5) - ld 24,8*3(19) - std 25,8*4(5) - ld 25,8*4(19) - std 26,8*5(5) - ld 26,8*5(19) - std 27,8*6(5) - ld 27,8*6(19) - std 28,8*7(5) - ld 28,8*7(19) - std 29,8*8(5) - ld 29,8*8(19) - mr 5,19 - b .Lsqr8x_outer_loop - -.align 5 -.Lsqr8x_outer_break: - - - ld 10,8*1(18) - ld 12,8*2(18) - ld 15,8*3(18) - ld 17,8*4(18) - addi 4,18,8*4 - - ld 19,8*13(1) - ld 20,8*14(1) - ld 21,8*15(1) - ld 18,8*16(1) - - std 22,8*1(5) - srwi 30,8,5 - std 23,8*2(5) - subi 30,30,1 - std 24,8*3(5) - std 25,8*4(5) - std 26,8*5(5) - std 27,8*6(5) - std 28,8*7(5) - - addi 5,1,8*11 - mulld 22,10,10 - mulhdu 10,10,10 - add 23,19,19 - srdi 19,19,64-1 - mulld 11,12,12 - mulhdu 12,12,12 - addc 23,23,10 - add 24,20,20 - srdi 20,20,64-1 - add 25,21,21 - srdi 21,21,64-1 - or 24,24,19 - - mtctr 30 -.Lsqr4x_shift_n_add: - mulld 14,15,15 - mulhdu 15,15,15 - ld 19,8*6(5) - ld 10,8*1(4) - adde 24,24,11 - add 26,18,18 - srdi 18,18,64-1 - or 25,25,20 - ld 20,8*7(5) - adde 25,25,12 - ld 12,8*2(4) - add 27,19,19 - srdi 19,19,64-1 - or 26,26,21 - ld 21,8*8(5) - mulld 16,17,17 - mulhdu 17,17,17 - adde 26,26,14 - add 28,20,20 - srdi 20,20,64-1 - or 27,27,18 - ld 18,8*9(5) - adde 27,27,15 - ld 15,8*3(4) - add 29,21,21 - srdi 21,21,64-1 - or 28,28,19 - ld 19,8*10(5) - mulld 9,10,10 - mulhdu 10,10,10 - adde 28,28,16 - std 22,8*1(5) - add 22,18,18 - srdi 18,18,64-1 - or 29,29,20 - ld 20,8*11(5) - adde 29,29,17 - ldu 17,8*4(4) - std 23,8*2(5) - add 23,19,19 - srdi 19,19,64-1 - or 22,22,21 - ld 21,8*12(5) - mulld 11,12,12 - mulhdu 12,12,12 - adde 22,22,9 - std 24,8*3(5) - add 24,20,20 - srdi 20,20,64-1 - or 23,23,18 - ld 18,8*13(5) - adde 23,23,10 - std 25,8*4(5) - std 26,8*5(5) - std 27,8*6(5) - std 28,8*7(5) - stdu 29,8*8(5) - add 25,21,21 - srdi 21,21,64-1 - or 24,24,19 - bdnz .Lsqr4x_shift_n_add - ld 4,8*7(1) - ld 7,8*8(1) - - mulld 14,15,15 - mulhdu 15,15,15 - std 22,8*1(5) - ld 22,8*12(1) - ld 19,8*6(5) - adde 24,24,11 - add 26,18,18 - srdi 18,18,64-1 - or 25,25,20 - ld 20,8*7(5) - adde 25,25,12 - add 27,19,19 - srdi 19,19,64-1 - or 26,26,21 - mulld 16,17,17 - mulhdu 17,17,17 - adde 26,26,14 - add 28,20,20 - srdi 20,20,64-1 - or 27,27,18 - std 23,8*2(5) - ld 23,8*13(1) - adde 27,27,15 - or 28,28,19 - ld 9,8*1(4) - ld 10,8*2(4) - adde 28,28,16 - ld 11,8*3(4) - ld 12,8*4(4) - adde 29,17,20 - ld 14,8*5(4) - ld 15,8*6(4) - - - - mulld 31,7,22 - li 30,8 - ld 16,8*7(4) - add 6,4,8 - ldu 17,8*8(4) - std 24,8*3(5) - ld 24,8*14(1) - std 25,8*4(5) - ld 25,8*15(1) - std 26,8*5(5) - ld 26,8*16(1) - std 27,8*6(5) - ld 27,8*17(1) - std 28,8*7(5) - ld 28,8*18(1) - std 29,8*8(5) - ld 29,8*19(1) - addi 5,1,8*11 - mtctr 30 - b .Lsqr8x_reduction - -.align 5 -.Lsqr8x_reduction: - - mulld 19,10,31 - mulld 20,11,31 - stdu 31,8(5) - mulld 21,12,31 - - addic 22,22,-1 - mulld 18,14,31 - adde 22,23,19 - mulld 19,15,31 - adde 23,24,20 - mulld 20,16,31 - adde 24,25,21 - mulld 21,17,31 - adde 25,26,18 - mulhdu 18,9,31 - adde 26,27,19 - mulhdu 19,10,31 - adde 27,28,20 - mulhdu 20,11,31 - adde 28,29,21 - mulhdu 21,12,31 - addze 29,0 - addc 22,22,18 - mulhdu 18,14,31 - adde 23,23,19 - mulhdu 19,15,31 - adde 24,24,20 - mulhdu 20,16,31 - adde 25,25,21 - mulhdu 21,17,31 - mulld 31,7,22 - adde 26,26,18 - adde 27,27,19 - adde 28,28,20 - adde 29,29,21 - bdnz .Lsqr8x_reduction - - ld 18,8*1(5) - ld 19,8*2(5) - ld 20,8*3(5) - ld 21,8*4(5) - subi 3,5,8*7 - cmpld 6,4 - addc 22,22,18 - ld 18,8*5(5) - adde 23,23,19 - ld 19,8*6(5) - adde 24,24,20 - ld 20,8*7(5) - adde 25,25,21 - ld 21,8*8(5) - adde 26,26,18 - adde 27,27,19 - adde 28,28,20 - adde 29,29,21 - - beq .Lsqr8x8_post_condition - - ld 7,8*0(3) - ld 9,8*1(4) - ld 10,8*2(4) - ld 11,8*3(4) - ld 12,8*4(4) - ld 14,8*5(4) - ld 15,8*6(4) - ld 16,8*7(4) - ldu 17,8*8(4) - li 30,0 - -.align 5 -.Lsqr8x_tail: - mulld 18,9,7 - addze 31,0 - mulld 19,10,7 - addi 30,30,8 - mulld 20,11,7 - andi. 30,30,8*8-1 - mulld 21,12,7 - addc 22,22,18 - mulld 18,14,7 - adde 23,23,19 - mulld 19,15,7 - adde 24,24,20 - mulld 20,16,7 - adde 25,25,21 - mulld 21,17,7 - adde 26,26,18 - mulhdu 18,9,7 - adde 27,27,19 - mulhdu 19,10,7 - adde 28,28,20 - mulhdu 20,11,7 - adde 29,29,21 - mulhdu 21,12,7 - addze 31,31 - stdu 22,8(5) - addc 22,23,18 - mulhdu 18,14,7 - adde 23,24,19 - mulhdu 19,15,7 - adde 24,25,20 - mulhdu 20,16,7 - adde 25,26,21 - mulhdu 21,17,7 - ldx 7,3,30 - adde 26,27,18 - adde 27,28,19 - adde 28,29,20 - adde 29,31,21 - - bne .Lsqr8x_tail - - - ld 9,8*1(5) - ld 31,8*10(1) - cmpld 6,4 - ld 10,8*2(5) - sub 20,6,8 - ld 11,8*3(5) - ld 12,8*4(5) - ld 14,8*5(5) - ld 15,8*6(5) - ld 16,8*7(5) - ld 17,8*8(5) - beq .Lsqr8x_tail_break - - addc 22,22,9 - ld 9,8*1(4) - adde 23,23,10 - ld 10,8*2(4) - adde 24,24,11 - ld 11,8*3(4) - adde 25,25,12 - ld 12,8*4(4) - adde 26,26,14 - ld 14,8*5(4) - adde 27,27,15 - ld 15,8*6(4) - adde 28,28,16 - ld 16,8*7(4) - adde 29,29,17 - ldu 17,8*8(4) - - b .Lsqr8x_tail - -.align 5 -.Lsqr8x_tail_break: - ld 7,8*8(1) - ld 21,8*9(1) - addi 30,5,8*8 - - addic 31,31,-1 - adde 18,22,9 - ld 22,8*8(3) - ld 9,8*1(20) - adde 19,23,10 - ld 23,8*9(3) - ld 10,8*2(20) - adde 24,24,11 - ld 11,8*3(20) - adde 25,25,12 - ld 12,8*4(20) - adde 26,26,14 - ld 14,8*5(20) - adde 27,27,15 - ld 15,8*6(20) - adde 28,28,16 - ld 16,8*7(20) - adde 29,29,17 - ld 17,8*8(20) - addi 4,20,8*8 - addze 20,0 - mulld 31,7,22 - std 18,8*1(5) - cmpld 30,21 - std 19,8*2(5) - li 30,8 - std 24,8*3(5) - ld 24,8*10(3) - std 25,8*4(5) - ld 25,8*11(3) - std 26,8*5(5) - ld 26,8*12(3) - std 27,8*6(5) - ld 27,8*13(3) - std 28,8*7(5) - ld 28,8*14(3) - std 29,8*8(5) - ld 29,8*15(3) - std 20,8*10(1) - addi 5,3,8*7 - mtctr 30 - bne .Lsqr8x_reduction - - - - - - - ld 3,8*6(1) - srwi 30,8,6 - mr 7,5 - addi 5,5,8*8 - subi 30,30,1 - subfc 18,9,22 - subfe 19,10,23 - mr 31,20 - mr 6,3 - - mtctr 30 - b .Lsqr8x_sub - -.align 5 -.Lsqr8x_sub: - ld 9,8*1(4) - ld 22,8*1(5) - ld 10,8*2(4) - ld 23,8*2(5) - subfe 20,11,24 - ld 11,8*3(4) - ld 24,8*3(5) - subfe 21,12,25 - ld 12,8*4(4) - ld 25,8*4(5) - std 18,8*1(3) - subfe 18,14,26 - ld 14,8*5(4) - ld 26,8*5(5) - std 19,8*2(3) - subfe 19,15,27 - ld 15,8*6(4) - ld 27,8*6(5) - std 20,8*3(3) - subfe 20,16,28 - ld 16,8*7(4) - ld 28,8*7(5) - std 21,8*4(3) - subfe 21,17,29 - ldu 17,8*8(4) - ldu 29,8*8(5) - std 18,8*5(3) - subfe 18,9,22 - std 19,8*6(3) - subfe 19,10,23 - std 20,8*7(3) - stdu 21,8*8(3) - bdnz .Lsqr8x_sub - - srwi 30,8,5 - ld 9,8*1(6) - ld 22,8*1(7) - subi 30,30,1 - ld 10,8*2(6) - ld 23,8*2(7) - subfe 20,11,24 - ld 11,8*3(6) - ld 24,8*3(7) - subfe 21,12,25 - ld 12,8*4(6) - ldu 25,8*4(7) - std 18,8*1(3) - subfe 18,14,26 - std 19,8*2(3) - subfe 19,15,27 - std 20,8*3(3) - subfe 20,16,28 - std 21,8*4(3) - subfe 21,17,29 - std 18,8*5(3) - subfe 31,0,31 - std 19,8*6(3) - std 20,8*7(3) - std 21,8*8(3) - - addi 5,1,8*11 - mtctr 30 - -.Lsqr4x_cond_copy: - andc 9,9,31 - std 0,-8*3(7) - and 22,22,31 - std 0,-8*2(7) - andc 10,10,31 - std 0,-8*1(7) - and 23,23,31 - std 0,-8*0(7) - andc 11,11,31 - std 0,8*1(5) - and 24,24,31 - std 0,8*2(5) - andc 12,12,31 - std 0,8*3(5) - and 25,25,31 - stdu 0,8*4(5) - or 18,9,22 - ld 9,8*5(6) - ld 22,8*1(7) - or 19,10,23 - ld 10,8*6(6) - ld 23,8*2(7) - or 20,11,24 - ld 11,8*7(6) - ld 24,8*3(7) - or 21,12,25 - ld 12,8*8(6) - ldu 25,8*4(7) - std 18,8*1(6) - std 19,8*2(6) - std 20,8*3(6) - stdu 21,8*4(6) - bdnz .Lsqr4x_cond_copy - - ld 4,0(1) - andc 9,9,31 - and 22,22,31 - andc 10,10,31 - and 23,23,31 - andc 11,11,31 - and 24,24,31 - andc 12,12,31 - and 25,25,31 - or 18,9,22 - or 19,10,23 - or 20,11,24 - or 21,12,25 - std 18,8*1(6) - std 19,8*2(6) - std 20,8*3(6) - std 21,8*4(6) - - b .Lsqr8x_done - -.align 5 -.Lsqr8x8_post_condition: - ld 3,8*6(1) - ld 4,0(1) - addze 31,0 - - - subfc 22,9,22 - subfe 23,10,23 - std 0,8*12(1) - std 0,8*13(1) - subfe 24,11,24 - std 0,8*14(1) - std 0,8*15(1) - subfe 25,12,25 - std 0,8*16(1) - std 0,8*17(1) - subfe 26,14,26 - std 0,8*18(1) - std 0,8*19(1) - subfe 27,15,27 - std 0,8*20(1) - std 0,8*21(1) - subfe 28,16,28 - std 0,8*22(1) - std 0,8*23(1) - subfe 29,17,29 - std 0,8*24(1) - std 0,8*25(1) - subfe 31,0,31 - std 0,8*26(1) - std 0,8*27(1) - - and 9,9,31 - and 10,10,31 - addc 22,22,9 - and 11,11,31 - adde 23,23,10 - and 12,12,31 - adde 24,24,11 - and 14,14,31 - adde 25,25,12 - and 15,15,31 - adde 26,26,14 - and 16,16,31 - adde 27,27,15 - and 17,17,31 - adde 28,28,16 - adde 29,29,17 - std 22,8*1(3) - std 23,8*2(3) - std 24,8*3(3) - std 25,8*4(3) - std 26,8*5(3) - std 27,8*6(3) - std 28,8*7(3) - std 29,8*8(3) - -.Lsqr8x_done: - std 0,8*8(1) - std 0,8*10(1) - - ld 14,-8*18(4) - li 3,1 - ld 15,-8*17(4) - ld 16,-8*16(4) - ld 17,-8*15(4) - ld 18,-8*14(4) - ld 19,-8*13(4) - ld 20,-8*12(4) - ld 21,-8*11(4) - ld 22,-8*10(4) - ld 23,-8*9(4) - ld 24,-8*8(4) - ld 25,-8*7(4) - ld 26,-8*6(4) - ld 27,-8*5(4) - ld 28,-8*4(4) - ld 29,-8*3(4) - ld 30,-8*2(4) - ld 31,-8*1(4) - mr 1,4 - blr -.long 0 -.byte 0,12,4,0x20,0x80,18,6,0 -.long 0 -.size __bn_sqr8x_mont,.-__bn_sqr8x_mont -.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 2 +.align 5 +__bn_sqr8x_mont: +.Lsqr8x_do: + mr 9,1 + slwi 10,8,4 + li 11,-32*8 + sub 10,11,10 + slwi 8,8,3 + stdux 1,1,10 + + std 14,-8*18(9) + std 15,-8*17(9) + std 16,-8*16(9) + std 17,-8*15(9) + std 18,-8*14(9) + std 19,-8*13(9) + std 20,-8*12(9) + std 21,-8*11(9) + std 22,-8*10(9) + std 23,-8*9(9) + std 24,-8*8(9) + std 25,-8*7(9) + std 26,-8*6(9) + std 27,-8*5(9) + std 28,-8*4(9) + std 29,-8*3(9) + std 30,-8*2(9) + std 31,-8*1(9) + + subi 4,4,8 + subi 18,6,8 + subi 3,3,8 + ld 7,0(7) + li 0,0 + + add 6,4,8 + ld 9,8*1(4) + + ld 10,8*2(4) + li 23,0 + ld 11,8*3(4) + li 24,0 + ld 12,8*4(4) + li 25,0 + ld 14,8*5(4) + li 26,0 + ld 15,8*6(4) + li 27,0 + ld 16,8*7(4) + li 28,0 + ldu 17,8*8(4) + li 29,0 + + addi 5,1,8*11 + subic. 30,8,8*8 + b .Lsqr8x_zero_start + +.align 5 +.Lsqr8x_zero: + subic. 30,30,8*8 + std 0,8*1(5) + std 0,8*2(5) + std 0,8*3(5) + std 0,8*4(5) + std 0,8*5(5) + std 0,8*6(5) + std 0,8*7(5) + std 0,8*8(5) +.Lsqr8x_zero_start: + std 0,8*9(5) + std 0,8*10(5) + std 0,8*11(5) + std 0,8*12(5) + std 0,8*13(5) + std 0,8*14(5) + std 0,8*15(5) + stdu 0,8*16(5) + bne .Lsqr8x_zero + + std 3,8*6(1) + std 18,8*7(1) + std 7,8*8(1) + std 5,8*9(1) + std 0,8*10(1) + addi 5,1,8*11 + + +.align 5 +.Lsqr8x_outer_loop: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + mulld 18,10,9 + mulld 19,11,9 + mulld 20,12,9 + mulld 21,14,9 + addc 23,23,18 + mulld 18,15,9 + adde 24,24,19 + mulld 19,16,9 + adde 25,25,20 + mulld 20,17,9 + adde 26,26,21 + mulhdu 21,10,9 + adde 27,27,18 + mulhdu 18,11,9 + adde 28,28,19 + mulhdu 19,12,9 + adde 29,29,20 + mulhdu 20,14,9 + std 22,8*1(5) + addze 22,0 + std 23,8*2(5) + addc 24,24,21 + mulhdu 21,15,9 + adde 25,25,18 + mulhdu 18,16,9 + adde 26,26,19 + mulhdu 19,17,9 + adde 27,27,20 + mulld 20,11,10 + adde 28,28,21 + mulld 21,12,10 + adde 29,29,18 + mulld 18,14,10 + adde 22,22,19 + + mulld 19,15,10 + addc 25,25,20 + mulld 20,16,10 + adde 26,26,21 + mulld 21,17,10 + adde 27,27,18 + mulhdu 18,11,10 + adde 28,28,19 + mulhdu 19,12,10 + adde 29,29,20 + mulhdu 20,14,10 + adde 22,22,21 + mulhdu 21,15,10 + std 24,8*3(5) + addze 23,0 + std 25,8*4(5) + addc 26,26,18 + mulhdu 18,16,10 + adde 27,27,19 + mulhdu 19,17,10 + adde 28,28,20 + mulld 20,12,11 + adde 29,29,21 + mulld 21,14,11 + adde 22,22,18 + mulld 18,15,11 + adde 23,23,19 + + mulld 19,16,11 + addc 27,27,20 + mulld 20,17,11 + adde 28,28,21 + mulhdu 21,12,11 + adde 29,29,18 + mulhdu 18,14,11 + adde 22,22,19 + mulhdu 19,15,11 + adde 23,23,20 + mulhdu 20,16,11 + std 26,8*5(5) + addze 24,0 + std 27,8*6(5) + addc 28,28,21 + mulhdu 21,17,11 + adde 29,29,18 + mulld 18,14,12 + adde 22,22,19 + mulld 19,15,12 + adde 23,23,20 + mulld 20,16,12 + adde 24,24,21 + + mulld 21,17,12 + addc 29,29,18 + mulhdu 18,14,12 + adde 22,22,19 + mulhdu 19,15,12 + adde 23,23,20 + mulhdu 20,16,12 + adde 24,24,21 + mulhdu 21,17,12 + std 28,8*7(5) + addze 25,0 + stdu 29,8*8(5) + addc 22,22,18 + mulld 18,15,14 + adde 23,23,19 + mulld 19,16,14 + adde 24,24,20 + mulld 20,17,14 + adde 25,25,21 + + mulhdu 21,15,14 + addc 23,23,18 + mulhdu 18,16,14 + adde 24,24,19 + mulhdu 19,17,14 + adde 25,25,20 + mulld 20,16,15 + addze 26,0 + addc 24,24,21 + mulld 21,17,15 + adde 25,25,18 + mulhdu 18,16,15 + adde 26,26,19 + + mulhdu 19,17,15 + addc 25,25,20 + mulld 20,17,16 + adde 26,26,21 + mulhdu 21,17,16 + addze 27,0 + addc 26,26,18 + cmpld 6,4 + adde 27,27,19 + + addc 27,27,20 + sub 18,6,8 + addze 28,0 + add 28,28,21 + + beq .Lsqr8x_outer_break + + mr 7,9 + ld 9,8*1(5) + ld 10,8*2(5) + ld 11,8*3(5) + ld 12,8*4(5) + ld 14,8*5(5) + ld 15,8*6(5) + ld 16,8*7(5) + ld 17,8*8(5) + addc 22,22,9 + ld 9,8*1(4) + adde 23,23,10 + ld 10,8*2(4) + adde 24,24,11 + ld 11,8*3(4) + adde 25,25,12 + ld 12,8*4(4) + adde 26,26,14 + ld 14,8*5(4) + adde 27,27,15 + ld 15,8*6(4) + adde 28,28,16 + ld 16,8*7(4) + subi 3,4,8*7 + addze 29,17 + ldu 17,8*8(4) + + li 30,0 + b .Lsqr8x_mul + + + + + + + + + + + + + + + + + + + + + + + +.align 5 +.Lsqr8x_mul: + mulld 18,9,7 + addze 31,0 + mulld 19,10,7 + addi 30,30,8 + mulld 20,11,7 + andi. 30,30,8*8-1 + mulld 21,12,7 + addc 22,22,18 + mulld 18,14,7 + adde 23,23,19 + mulld 19,15,7 + adde 24,24,20 + mulld 20,16,7 + adde 25,25,21 + mulld 21,17,7 + adde 26,26,18 + mulhdu 18,9,7 + adde 27,27,19 + mulhdu 19,10,7 + adde 28,28,20 + mulhdu 20,11,7 + adde 29,29,21 + mulhdu 21,12,7 + addze 31,31 + stdu 22,8(5) + addc 22,23,18 + mulhdu 18,14,7 + adde 23,24,19 + mulhdu 19,15,7 + adde 24,25,20 + mulhdu 20,16,7 + adde 25,26,21 + mulhdu 21,17,7 + ldx 7,3,30 + adde 26,27,18 + adde 27,28,19 + adde 28,29,20 + adde 29,31,21 + + bne .Lsqr8x_mul + + + cmpld 4,6 + beq .Lsqr8x_break + + ld 9,8*1(5) + ld 10,8*2(5) + ld 11,8*3(5) + ld 12,8*4(5) + ld 14,8*5(5) + ld 15,8*6(5) + ld 16,8*7(5) + ld 17,8*8(5) + addc 22,22,9 + ld 9,8*1(4) + adde 23,23,10 + ld 10,8*2(4) + adde 24,24,11 + ld 11,8*3(4) + adde 25,25,12 + ld 12,8*4(4) + adde 26,26,14 + ld 14,8*5(4) + adde 27,27,15 + ld 15,8*6(4) + adde 28,28,16 + ld 16,8*7(4) + adde 29,29,17 + ldu 17,8*8(4) + + b .Lsqr8x_mul + +.align 5 +.Lsqr8x_break: + ld 9,8*8(3) + addi 4,3,8*15 + ld 10,8*9(3) + sub. 18,6,4 + ld 11,8*10(3) + sub 19,5,18 + ld 12,8*11(3) + ld 14,8*12(3) + ld 15,8*13(3) + ld 16,8*14(3) + ld 17,8*15(3) + beq .Lsqr8x_outer_loop + + std 22,8*1(5) + ld 22,8*1(19) + std 23,8*2(5) + ld 23,8*2(19) + std 24,8*3(5) + ld 24,8*3(19) + std 25,8*4(5) + ld 25,8*4(19) + std 26,8*5(5) + ld 26,8*5(19) + std 27,8*6(5) + ld 27,8*6(19) + std 28,8*7(5) + ld 28,8*7(19) + std 29,8*8(5) + ld 29,8*8(19) + mr 5,19 + b .Lsqr8x_outer_loop + +.align 5 +.Lsqr8x_outer_break: + + + ld 10,8*1(18) + ld 12,8*2(18) + ld 15,8*3(18) + ld 17,8*4(18) + addi 4,18,8*4 + + ld 19,8*13(1) + ld 20,8*14(1) + ld 21,8*15(1) + ld 18,8*16(1) + + std 22,8*1(5) + srwi 30,8,5 + std 23,8*2(5) + subi 30,30,1 + std 24,8*3(5) + std 25,8*4(5) + std 26,8*5(5) + std 27,8*6(5) + std 28,8*7(5) + + addi 5,1,8*11 + mulld 22,10,10 + mulhdu 10,10,10 + add 23,19,19 + srdi 19,19,64-1 + mulld 11,12,12 + mulhdu 12,12,12 + addc 23,23,10 + add 24,20,20 + srdi 20,20,64-1 + add 25,21,21 + srdi 21,21,64-1 + or 24,24,19 + + mtctr 30 +.Lsqr4x_shift_n_add: + mulld 14,15,15 + mulhdu 15,15,15 + ld 19,8*6(5) + ld 10,8*1(4) + adde 24,24,11 + add 26,18,18 + srdi 18,18,64-1 + or 25,25,20 + ld 20,8*7(5) + adde 25,25,12 + ld 12,8*2(4) + add 27,19,19 + srdi 19,19,64-1 + or 26,26,21 + ld 21,8*8(5) + mulld 16,17,17 + mulhdu 17,17,17 + adde 26,26,14 + add 28,20,20 + srdi 20,20,64-1 + or 27,27,18 + ld 18,8*9(5) + adde 27,27,15 + ld 15,8*3(4) + add 29,21,21 + srdi 21,21,64-1 + or 28,28,19 + ld 19,8*10(5) + mulld 9,10,10 + mulhdu 10,10,10 + adde 28,28,16 + std 22,8*1(5) + add 22,18,18 + srdi 18,18,64-1 + or 29,29,20 + ld 20,8*11(5) + adde 29,29,17 + ldu 17,8*4(4) + std 23,8*2(5) + add 23,19,19 + srdi 19,19,64-1 + or 22,22,21 + ld 21,8*12(5) + mulld 11,12,12 + mulhdu 12,12,12 + adde 22,22,9 + std 24,8*3(5) + add 24,20,20 + srdi 20,20,64-1 + or 23,23,18 + ld 18,8*13(5) + adde 23,23,10 + std 25,8*4(5) + std 26,8*5(5) + std 27,8*6(5) + std 28,8*7(5) + stdu 29,8*8(5) + add 25,21,21 + srdi 21,21,64-1 + or 24,24,19 + bdnz .Lsqr4x_shift_n_add + ld 4,8*7(1) + ld 7,8*8(1) + + mulld 14,15,15 + mulhdu 15,15,15 + std 22,8*1(5) + ld 22,8*12(1) + ld 19,8*6(5) + adde 24,24,11 + add 26,18,18 + srdi 18,18,64-1 + or 25,25,20 + ld 20,8*7(5) + adde 25,25,12 + add 27,19,19 + srdi 19,19,64-1 + or 26,26,21 + mulld 16,17,17 + mulhdu 17,17,17 + adde 26,26,14 + add 28,20,20 + srdi 20,20,64-1 + or 27,27,18 + std 23,8*2(5) + ld 23,8*13(1) + adde 27,27,15 + or 28,28,19 + ld 9,8*1(4) + ld 10,8*2(4) + adde 28,28,16 + ld 11,8*3(4) + ld 12,8*4(4) + adde 29,17,20 + ld 14,8*5(4) + ld 15,8*6(4) + + + + mulld 31,7,22 + li 30,8 + ld 16,8*7(4) + add 6,4,8 + ldu 17,8*8(4) + std 24,8*3(5) + ld 24,8*14(1) + std 25,8*4(5) + ld 25,8*15(1) + std 26,8*5(5) + ld 26,8*16(1) + std 27,8*6(5) + ld 27,8*17(1) + std 28,8*7(5) + ld 28,8*18(1) + std 29,8*8(5) + ld 29,8*19(1) + addi 5,1,8*11 + mtctr 30 + b .Lsqr8x_reduction + +.align 5 +.Lsqr8x_reduction: + + mulld 19,10,31 + mulld 20,11,31 + stdu 31,8(5) + mulld 21,12,31 + + addic 22,22,-1 + mulld 18,14,31 + adde 22,23,19 + mulld 19,15,31 + adde 23,24,20 + mulld 20,16,31 + adde 24,25,21 + mulld 21,17,31 + adde 25,26,18 + mulhdu 18,9,31 + adde 26,27,19 + mulhdu 19,10,31 + adde 27,28,20 + mulhdu 20,11,31 + adde 28,29,21 + mulhdu 21,12,31 + addze 29,0 + addc 22,22,18 + mulhdu 18,14,31 + adde 23,23,19 + mulhdu 19,15,31 + adde 24,24,20 + mulhdu 20,16,31 + adde 25,25,21 + mulhdu 21,17,31 + mulld 31,7,22 + adde 26,26,18 + adde 27,27,19 + adde 28,28,20 + adde 29,29,21 + bdnz .Lsqr8x_reduction + + ld 18,8*1(5) + ld 19,8*2(5) + ld 20,8*3(5) + ld 21,8*4(5) + subi 3,5,8*7 + cmpld 6,4 + addc 22,22,18 + ld 18,8*5(5) + adde 23,23,19 + ld 19,8*6(5) + adde 24,24,20 + ld 20,8*7(5) + adde 25,25,21 + ld 21,8*8(5) + adde 26,26,18 + adde 27,27,19 + adde 28,28,20 + adde 29,29,21 + + beq .Lsqr8x8_post_condition + + ld 7,8*0(3) + ld 9,8*1(4) + ld 10,8*2(4) + ld 11,8*3(4) + ld 12,8*4(4) + ld 14,8*5(4) + ld 15,8*6(4) + ld 16,8*7(4) + ldu 17,8*8(4) + li 30,0 + +.align 5 +.Lsqr8x_tail: + mulld 18,9,7 + addze 31,0 + mulld 19,10,7 + addi 30,30,8 + mulld 20,11,7 + andi. 30,30,8*8-1 + mulld 21,12,7 + addc 22,22,18 + mulld 18,14,7 + adde 23,23,19 + mulld 19,15,7 + adde 24,24,20 + mulld 20,16,7 + adde 25,25,21 + mulld 21,17,7 + adde 26,26,18 + mulhdu 18,9,7 + adde 27,27,19 + mulhdu 19,10,7 + adde 28,28,20 + mulhdu 20,11,7 + adde 29,29,21 + mulhdu 21,12,7 + addze 31,31 + stdu 22,8(5) + addc 22,23,18 + mulhdu 18,14,7 + adde 23,24,19 + mulhdu 19,15,7 + adde 24,25,20 + mulhdu 20,16,7 + adde 25,26,21 + mulhdu 21,17,7 + ldx 7,3,30 + adde 26,27,18 + adde 27,28,19 + adde 28,29,20 + adde 29,31,21 + + bne .Lsqr8x_tail + + + ld 9,8*1(5) + ld 31,8*10(1) + cmpld 6,4 + ld 10,8*2(5) + sub 20,6,8 + ld 11,8*3(5) + ld 12,8*4(5) + ld 14,8*5(5) + ld 15,8*6(5) + ld 16,8*7(5) + ld 17,8*8(5) + beq .Lsqr8x_tail_break + + addc 22,22,9 + ld 9,8*1(4) + adde 23,23,10 + ld 10,8*2(4) + adde 24,24,11 + ld 11,8*3(4) + adde 25,25,12 + ld 12,8*4(4) + adde 26,26,14 + ld 14,8*5(4) + adde 27,27,15 + ld 15,8*6(4) + adde 28,28,16 + ld 16,8*7(4) + adde 29,29,17 + ldu 17,8*8(4) + + b .Lsqr8x_tail + +.align 5 +.Lsqr8x_tail_break: + ld 7,8*8(1) + ld 21,8*9(1) + addi 30,5,8*8 + + addic 31,31,-1 + adde 18,22,9 + ld 22,8*8(3) + ld 9,8*1(20) + adde 19,23,10 + ld 23,8*9(3) + ld 10,8*2(20) + adde 24,24,11 + ld 11,8*3(20) + adde 25,25,12 + ld 12,8*4(20) + adde 26,26,14 + ld 14,8*5(20) + adde 27,27,15 + ld 15,8*6(20) + adde 28,28,16 + ld 16,8*7(20) + adde 29,29,17 + ld 17,8*8(20) + addi 4,20,8*8 + addze 20,0 + mulld 31,7,22 + std 18,8*1(5) + cmpld 30,21 + std 19,8*2(5) + li 30,8 + std 24,8*3(5) + ld 24,8*10(3) + std 25,8*4(5) + ld 25,8*11(3) + std 26,8*5(5) + ld 26,8*12(3) + std 27,8*6(5) + ld 27,8*13(3) + std 28,8*7(5) + ld 28,8*14(3) + std 29,8*8(5) + ld 29,8*15(3) + std 20,8*10(1) + addi 5,3,8*7 + mtctr 30 + bne .Lsqr8x_reduction + + + + + + + ld 3,8*6(1) + srwi 30,8,6 + mr 7,5 + addi 5,5,8*8 + subi 30,30,1 + subfc 18,9,22 + subfe 19,10,23 + mr 31,20 + mr 6,3 + + mtctr 30 + b .Lsqr8x_sub + +.align 5 +.Lsqr8x_sub: + ld 9,8*1(4) + ld 22,8*1(5) + ld 10,8*2(4) + ld 23,8*2(5) + subfe 20,11,24 + ld 11,8*3(4) + ld 24,8*3(5) + subfe 21,12,25 + ld 12,8*4(4) + ld 25,8*4(5) + std 18,8*1(3) + subfe 18,14,26 + ld 14,8*5(4) + ld 26,8*5(5) + std 19,8*2(3) + subfe 19,15,27 + ld 15,8*6(4) + ld 27,8*6(5) + std 20,8*3(3) + subfe 20,16,28 + ld 16,8*7(4) + ld 28,8*7(5) + std 21,8*4(3) + subfe 21,17,29 + ldu 17,8*8(4) + ldu 29,8*8(5) + std 18,8*5(3) + subfe 18,9,22 + std 19,8*6(3) + subfe 19,10,23 + std 20,8*7(3) + stdu 21,8*8(3) + bdnz .Lsqr8x_sub + + srwi 30,8,5 + ld 9,8*1(6) + ld 22,8*1(7) + subi 30,30,1 + ld 10,8*2(6) + ld 23,8*2(7) + subfe 20,11,24 + ld 11,8*3(6) + ld 24,8*3(7) + subfe 21,12,25 + ld 12,8*4(6) + ldu 25,8*4(7) + std 18,8*1(3) + subfe 18,14,26 + std 19,8*2(3) + subfe 19,15,27 + std 20,8*3(3) + subfe 20,16,28 + std 21,8*4(3) + subfe 21,17,29 + std 18,8*5(3) + subfe 31,0,31 + std 19,8*6(3) + std 20,8*7(3) + std 21,8*8(3) + + addi 5,1,8*11 + mtctr 30 + +.Lsqr4x_cond_copy: + andc 9,9,31 + std 0,-8*3(7) + and 22,22,31 + std 0,-8*2(7) + andc 10,10,31 + std 0,-8*1(7) + and 23,23,31 + std 0,-8*0(7) + andc 11,11,31 + std 0,8*1(5) + and 24,24,31 + std 0,8*2(5) + andc 12,12,31 + std 0,8*3(5) + and 25,25,31 + stdu 0,8*4(5) + or 18,9,22 + ld 9,8*5(6) + ld 22,8*1(7) + or 19,10,23 + ld 10,8*6(6) + ld 23,8*2(7) + or 20,11,24 + ld 11,8*7(6) + ld 24,8*3(7) + or 21,12,25 + ld 12,8*8(6) + ldu 25,8*4(7) + std 18,8*1(6) + std 19,8*2(6) + std 20,8*3(6) + stdu 21,8*4(6) + bdnz .Lsqr4x_cond_copy + + ld 4,0(1) + andc 9,9,31 + and 22,22,31 + andc 10,10,31 + and 23,23,31 + andc 11,11,31 + and 24,24,31 + andc 12,12,31 + and 25,25,31 + or 18,9,22 + or 19,10,23 + or 20,11,24 + or 21,12,25 + std 18,8*1(6) + std 19,8*2(6) + std 20,8*3(6) + std 21,8*4(6) + + b .Lsqr8x_done + +.align 5 +.Lsqr8x8_post_condition: + ld 3,8*6(1) + ld 4,0(1) + addze 31,0 + + + subfc 22,9,22 + subfe 23,10,23 + std 0,8*12(1) + std 0,8*13(1) + subfe 24,11,24 + std 0,8*14(1) + std 0,8*15(1) + subfe 25,12,25 + std 0,8*16(1) + std 0,8*17(1) + subfe 26,14,26 + std 0,8*18(1) + std 0,8*19(1) + subfe 27,15,27 + std 0,8*20(1) + std 0,8*21(1) + subfe 28,16,28 + std 0,8*22(1) + std 0,8*23(1) + subfe 29,17,29 + std 0,8*24(1) + std 0,8*25(1) + subfe 31,0,31 + std 0,8*26(1) + std 0,8*27(1) + + and 9,9,31 + and 10,10,31 + addc 22,22,9 + and 11,11,31 + adde 23,23,10 + and 12,12,31 + adde 24,24,11 + and 14,14,31 + adde 25,25,12 + and 15,15,31 + adde 26,26,14 + and 16,16,31 + adde 27,27,15 + and 17,17,31 + adde 28,28,16 + adde 29,29,17 + std 22,8*1(3) + std 23,8*2(3) + std 24,8*3(3) + std 25,8*4(3) + std 26,8*5(3) + std 27,8*6(3) + std 28,8*7(3) + std 29,8*8(3) + +.Lsqr8x_done: + std 0,8*8(1) + std 0,8*10(1) + + ld 14,-8*18(4) + li 3,1 + ld 15,-8*17(4) + ld 16,-8*16(4) + ld 17,-8*15(4) + ld 18,-8*14(4) + ld 19,-8*13(4) + ld 20,-8*12(4) + ld 21,-8*11(4) + ld 22,-8*10(4) + ld 23,-8*9(4) + ld 24,-8*8(4) + ld 25,-8*7(4) + ld 26,-8*6(4) + ld 27,-8*5(4) + ld 28,-8*4(4) + ld 29,-8*3(4) + ld 30,-8*2(4) + ld 31,-8*1(4) + mr 1,4 + blr +.long 0 +.byte 0,12,4,0x20,0x80,18,6,0 +.long 0 +.size __bn_sqr8x_mont,.-__bn_sqr8x_mont +.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 |