2 # SPDX-License-Identifier: GPL-2.0
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
10 # [1] https://www.openssl.org/~appro/cryptogams/
13 # All rights reserved.
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
19 # * Redistributions of source code must retain copyright notices,
20 # this list of conditions and the following disclaimer.
22 # * Redistributions in binary form must reproduce the above
23 # copyright notice, this list of conditions and the following
24 # disclaimer in the documentation and/or other materials
25 # provided with the distribution.
27 # * Neither the name of the CRYPTOGAMS nor the names of its
28 # copyright holder and contributors may be used to endorse or
29 # promote products derived from this software without specific
30 # prior written permission.
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 # ====================================================================
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see http://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
79 # CBC en-/decrypt CTR XTS
80 # POWER8[le] 3.96/0.72 0.74 1.1
81 # POWER8[be] 3.75/0.65 0.66 1.0
85 if ($flavour =~ /64/) {
93 } elsif ($flavour =~ /32/) {
101 } else { die "nonsense $flavour"; }
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
118 #########################################################################
119 {{{ # Key setup procedures #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
131 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
138 mflr $ptr #vvvvv "distance between . and rcon
143 .byte 0,12,0x14,0,0,0,0,0
144 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146 .globl .${prefix}_set_encrypt_key
149 $PUSH r11,$LRSAVE($sp)
153 beq- Lenc_key_abort # if ($inp==0) return -1;
155 beq- Lenc_key_abort # if ($out==0) return -1;
173 addi $inp,$inp,15 # 15 is not typo
174 lvsr $key,0,r9 # borrow $key
178 le?vspltisb $mask,0x0f # borrow $mask
180 le?vxor $key,$key,$mask # adjust for byte swap
183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
185 vxor $zero,$zero,$zero
188 ?lvsr $outperm,0,$out
191 ?vperm $outmask,$zero,$outmask,$outperm
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
211 vsldoi $tmp,$zero,$tmp,12 # >>32
213 vsldoi $tmp,$zero,$tmp,12 # >>32
215 vadduwm $rcon,$rcon,$rcon
219 lvx $rcon,0,$ptr # last two round keys
221 vperm $key,$in0,$in0,$mask # rotate-n-splat
222 vsldoi $tmp,$zero,$in0,12 # >>32
223 vperm $outtail,$in0,$in0,$outperm # rotate
224 vsel $stage,$outhead,$outtail,$outmask
225 vmr $outhead,$outtail
226 vcipherlast $key,$key,$rcon
231 vsldoi $tmp,$zero,$tmp,12 # >>32
233 vsldoi $tmp,$zero,$tmp,12 # >>32
235 vadduwm $rcon,$rcon,$rcon
238 vperm $key,$in0,$in0,$mask # rotate-n-splat
239 vsldoi $tmp,$zero,$in0,12 # >>32
240 vperm $outtail,$in0,$in0,$outperm # rotate
241 vsel $stage,$outhead,$outtail,$outmask
242 vmr $outhead,$outtail
243 vcipherlast $key,$key,$rcon
248 vsldoi $tmp,$zero,$tmp,12 # >>32
250 vsldoi $tmp,$zero,$tmp,12 # >>32
253 vperm $outtail,$in0,$in0,$outperm # rotate
254 vsel $stage,$outhead,$outtail,$outmask
255 vmr $outhead,$outtail
258 addi $inp,$out,15 # 15 is not typo
268 vperm $outtail,$in0,$in0,$outperm # rotate
269 vsel $stage,$outhead,$outtail,$outmask
270 vmr $outhead,$outtail
273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
274 vspltisb $key,8 # borrow $key
276 vsububm $mask,$mask,$key # adjust the mask
279 vperm $key,$in1,$in1,$mask # roate-n-splat
280 vsldoi $tmp,$zero,$in0,12 # >>32
281 vcipherlast $key,$key,$rcon
284 vsldoi $tmp,$zero,$tmp,12 # >>32
286 vsldoi $tmp,$zero,$tmp,12 # >>32
289 vsldoi $stage,$zero,$in1,8
292 vsldoi $in1,$zero,$in1,12 # >>32
293 vadduwm $rcon,$rcon,$rcon
297 vsldoi $stage,$stage,$in0,8
299 vperm $key,$in1,$in1,$mask # rotate-n-splat
300 vsldoi $tmp,$zero,$in0,12 # >>32
301 vperm $outtail,$stage,$stage,$outperm # rotate
302 vsel $stage,$outhead,$outtail,$outmask
303 vmr $outhead,$outtail
304 vcipherlast $key,$key,$rcon
308 vsldoi $stage,$in0,$in1,8
310 vsldoi $tmp,$zero,$tmp,12 # >>32
311 vperm $outtail,$stage,$stage,$outperm # rotate
312 vsel $stage,$outhead,$outtail,$outmask
313 vmr $outhead,$outtail
315 vsldoi $tmp,$zero,$tmp,12 # >>32
322 vsldoi $in1,$zero,$in1,12 # >>32
323 vadduwm $rcon,$rcon,$rcon
327 vperm $outtail,$in0,$in0,$outperm # rotate
328 vsel $stage,$outhead,$outtail,$outmask
329 vmr $outhead,$outtail
331 addi $inp,$out,15 # 15 is not typo
344 vperm $outtail,$in0,$in0,$outperm # rotate
345 vsel $stage,$outhead,$outtail,$outmask
346 vmr $outhead,$outtail
349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
353 vperm $key,$in1,$in1,$mask # rotate-n-splat
354 vsldoi $tmp,$zero,$in0,12 # >>32
355 vperm $outtail,$in1,$in1,$outperm # rotate
356 vsel $stage,$outhead,$outtail,$outmask
357 vmr $outhead,$outtail
358 vcipherlast $key,$key,$rcon
363 vsldoi $tmp,$zero,$tmp,12 # >>32
365 vsldoi $tmp,$zero,$tmp,12 # >>32
367 vadduwm $rcon,$rcon,$rcon
369 vperm $outtail,$in0,$in0,$outperm # rotate
370 vsel $stage,$outhead,$outtail,$outmask
371 vmr $outhead,$outtail
373 addi $inp,$out,15 # 15 is not typo
377 vspltw $key,$in0,3 # just splat
378 vsldoi $tmp,$zero,$in1,12 # >>32
382 vsldoi $tmp,$zero,$tmp,12 # >>32
384 vsldoi $tmp,$zero,$tmp,12 # >>32
392 lvx $in1,0,$inp # redundant in aligned case
393 vsel $in1,$outhead,$in1,$outmask
403 .byte 0,12,0x14,1,0,0,3,0
405 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
407 .globl .${prefix}_set_decrypt_key
408 $STU $sp,-$FRAME($sp)
410 $PUSH r10,$FRAME+$LRSAVE($sp)
418 subi $inp,$out,240 # first round key
419 srwi $rounds,$rounds,1
420 add $out,$inp,$cnt # last round key
444 xor r3,r3,r3 # return value
449 .byte 0,12,4,1,0x80,0,3,0
451 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
454 #########################################################################
455 {{{ # Single block en- and decrypt procedures #
458 my $n = $dir eq "de" ? "n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
462 .globl .${prefix}_${dir}crypt
463 lwz $rounds,240($key)
466 li $idx,15 # 15 is not typo
472 lvsl v2,0,$inp # inpperm
474 ?lvsl v3,0,r11 # outperm
477 vperm v0,v0,v1,v2 # align [and byte swap in LE]
479 ?lvsl v5,0,$key # keyperm
480 srwi $rounds,$rounds,1
483 subi $rounds,$rounds,1
484 ?vperm v1,v1,v2,v5 # align round key
506 v${n}cipherlast v0,v0,v1
510 li $idx,15 # 15 is not typo
511 ?vperm v2,v1,v2,v3 # outmask
513 lvx v1,0,$out # outhead
514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
524 .byte 0,12,0x14,0,0,0,3,0
526 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
532 #########################################################################
533 {{{ # CBC en- and decrypt procedures #
534 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
535 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
536 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
539 .globl .${prefix}_cbc_encrypt
543 cmpwi $enc,0 # test direction
549 vxor $rndkey0,$rndkey0,$rndkey0
550 le?vspltisb $tmp,0x0f
552 lvx $ivec,0,$ivp # load [unaligned] iv
554 lvx $inptail,$idx,$ivp
555 le?vxor $inpperm,$inpperm,$tmp
556 vperm $ivec,$ivec,$inptail,$inpperm
559 ?lvsl $keyperm,0,$key # prepare for unaligned key
560 lwz $rounds,240($key)
562 lvsr $inpperm,0,r11 # prepare for unaligned load
564 addi $inp,$inp,15 # 15 is not typo
565 le?vxor $inpperm,$inpperm,$tmp
567 ?lvsr $outperm,0,$out # prepare for unaligned store
570 ?vperm $outmask,$rndkey0,$outmask,$outperm
571 le?vxor $outperm,$outperm,$tmp
573 srwi $rounds,$rounds,1
575 subi $rounds,$rounds,1
583 subi $len,$len,16 # len-=16
586 vperm $inout,$inout,$inptail,$inpperm
587 lvx $rndkey1,$idx,$key
589 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
590 vxor $inout,$inout,$rndkey0
591 lvx $rndkey0,$idx,$key
593 vxor $inout,$inout,$ivec
596 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
597 vcipher $inout,$inout,$rndkey1
598 lvx $rndkey1,$idx,$key
600 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
601 vcipher $inout,$inout,$rndkey0
602 lvx $rndkey0,$idx,$key
606 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
607 vcipher $inout,$inout,$rndkey1
608 lvx $rndkey1,$idx,$key
610 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
611 vcipherlast $ivec,$inout,$rndkey0
614 vperm $tmp,$ivec,$ivec,$outperm
615 vsel $inout,$outhead,$tmp,$outmask
626 bge _aesp8_cbc_decrypt8x
631 subi $len,$len,16 # len-=16
634 vperm $tmp,$tmp,$inptail,$inpperm
635 lvx $rndkey1,$idx,$key
637 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
638 vxor $inout,$tmp,$rndkey0
639 lvx $rndkey0,$idx,$key
643 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
644 vncipher $inout,$inout,$rndkey1
645 lvx $rndkey1,$idx,$key
647 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
648 vncipher $inout,$inout,$rndkey0
649 lvx $rndkey0,$idx,$key
653 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
654 vncipher $inout,$inout,$rndkey1
655 lvx $rndkey1,$idx,$key
657 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
658 vncipherlast $inout,$inout,$rndkey0
661 vxor $inout,$inout,$ivec
663 vperm $tmp,$inout,$inout,$outperm
664 vsel $inout,$outhead,$tmp,$outmask
672 lvx $inout,0,$out # redundant in aligned case
673 vsel $inout,$outhead,$inout,$outmask
676 neg $enc,$ivp # write [unaligned] iv
677 li $idx,15 # 15 is not typo
678 vxor $rndkey0,$rndkey0,$rndkey0
680 le?vspltisb $tmp,0x0f
681 ?lvsl $outperm,0,$enc
682 ?vperm $outmask,$rndkey0,$outmask,$outperm
683 le?vxor $outperm,$outperm,$tmp
685 vperm $ivec,$ivec,$ivec,$outperm
686 vsel $inout,$outhead,$ivec,$outmask
687 lvx $inptail,$idx,$ivp
689 vsel $inout,$ivec,$inptail,$outmask
690 stvx $inout,$idx,$ivp
695 .byte 0,12,0x14,0,0,0,6,0
698 #########################################################################
699 {{ # Optimized CBC decrypt procedure #
701 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
702 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
703 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
704 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
705 # v26-v31 last 6 round keys
706 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
710 _aesp8_cbc_decrypt8x:
711 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
712 li r10,`$FRAME+8*16+15`
713 li r11,`$FRAME+8*16+31`
714 stvx v20,r10,$sp # ABI says so
737 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
739 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
741 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
743 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
745 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
747 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
749 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
753 subi $rounds,$rounds,3 # -4 in total
754 subi $len,$len,128 # bias
756 lvx $rndkey0,$x00,$key # load key schedule
760 ?vperm $rndkey0,$rndkey0,v30,$keyperm
761 addi $key_,$sp,$FRAME+15
765 ?vperm v24,v30,v31,$keyperm
768 stvx v24,$x00,$key_ # off-load round[1]
769 ?vperm v25,v31,v30,$keyperm
771 stvx v25,$x10,$key_ # off-load round[2]
772 addi $key_,$key_,0x20
773 bdnz Load_cbc_dec_key
776 ?vperm v24,v30,v31,$keyperm
778 stvx v24,$x00,$key_ # off-load round[3]
779 ?vperm v25,v31,v26,$keyperm
781 stvx v25,$x10,$key_ # off-load round[4]
782 addi $key_,$sp,$FRAME+15 # rewind $key_
783 ?vperm v26,v26,v27,$keyperm
785 ?vperm v27,v27,v28,$keyperm
787 ?vperm v28,v28,v29,$keyperm
789 ?vperm v29,v29,v30,$keyperm
790 lvx $out0,$x70,$key # borrow $out0
791 ?vperm v30,v30,v31,$keyperm
792 lvx v24,$x00,$key_ # pre-load round[1]
793 ?vperm v31,v31,$out0,$keyperm
794 lvx v25,$x10,$key_ # pre-load round[2]
796 #lvx $inptail,0,$inp # "caller" already did this
797 #addi $inp,$inp,15 # 15 is not typo
798 subi $inp,$inp,15 # undo "caller"
801 lvx_u $in0,$x00,$inp # load first 8 "words"
802 le?lvsl $inpperm,0,$idx
803 le?vspltisb $tmp,0x0f
805 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
807 le?vperm $in0,$in0,$in0,$inpperm
809 le?vperm $in1,$in1,$in1,$inpperm
811 le?vperm $in2,$in2,$in2,$inpperm
812 vxor $out0,$in0,$rndkey0
814 le?vperm $in3,$in3,$in3,$inpperm
815 vxor $out1,$in1,$rndkey0
817 le?vperm $in4,$in4,$in4,$inpperm
818 vxor $out2,$in2,$rndkey0
821 le?vperm $in5,$in5,$in5,$inpperm
822 vxor $out3,$in3,$rndkey0
823 le?vperm $in6,$in6,$in6,$inpperm
824 vxor $out4,$in4,$rndkey0
825 le?vperm $in7,$in7,$in7,$inpperm
826 vxor $out5,$in5,$rndkey0
827 vxor $out6,$in6,$rndkey0
828 vxor $out7,$in7,$rndkey0
834 vncipher $out0,$out0,v24
835 vncipher $out1,$out1,v24
836 vncipher $out2,$out2,v24
837 vncipher $out3,$out3,v24
838 vncipher $out4,$out4,v24
839 vncipher $out5,$out5,v24
840 vncipher $out6,$out6,v24
841 vncipher $out7,$out7,v24
842 lvx v24,$x20,$key_ # round[3]
843 addi $key_,$key_,0x20
845 vncipher $out0,$out0,v25
846 vncipher $out1,$out1,v25
847 vncipher $out2,$out2,v25
848 vncipher $out3,$out3,v25
849 vncipher $out4,$out4,v25
850 vncipher $out5,$out5,v25
851 vncipher $out6,$out6,v25
852 vncipher $out7,$out7,v25
853 lvx v25,$x10,$key_ # round[4]
856 subic $len,$len,128 # $len-=128
857 vncipher $out0,$out0,v24
858 vncipher $out1,$out1,v24
859 vncipher $out2,$out2,v24
860 vncipher $out3,$out3,v24
861 vncipher $out4,$out4,v24
862 vncipher $out5,$out5,v24
863 vncipher $out6,$out6,v24
864 vncipher $out7,$out7,v24
866 subfe. r0,r0,r0 # borrow?-1:0
867 vncipher $out0,$out0,v25
868 vncipher $out1,$out1,v25
869 vncipher $out2,$out2,v25
870 vncipher $out3,$out3,v25
871 vncipher $out4,$out4,v25
872 vncipher $out5,$out5,v25
873 vncipher $out6,$out6,v25
874 vncipher $out7,$out7,v25
877 vncipher $out0,$out0,v26
878 vncipher $out1,$out1,v26
879 vncipher $out2,$out2,v26
880 vncipher $out3,$out3,v26
881 vncipher $out4,$out4,v26
882 vncipher $out5,$out5,v26
883 vncipher $out6,$out6,v26
884 vncipher $out7,$out7,v26
886 add $inp,$inp,r0 # $inp is adjusted in such
887 # way that at exit from the
888 # loop inX-in7 are loaded
890 vncipher $out0,$out0,v27
891 vncipher $out1,$out1,v27
892 vncipher $out2,$out2,v27
893 vncipher $out3,$out3,v27
894 vncipher $out4,$out4,v27
895 vncipher $out5,$out5,v27
896 vncipher $out6,$out6,v27
897 vncipher $out7,$out7,v27
899 addi $key_,$sp,$FRAME+15 # rewind $key_
900 vncipher $out0,$out0,v28
901 vncipher $out1,$out1,v28
902 vncipher $out2,$out2,v28
903 vncipher $out3,$out3,v28
904 vncipher $out4,$out4,v28
905 vncipher $out5,$out5,v28
906 vncipher $out6,$out6,v28
907 vncipher $out7,$out7,v28
908 lvx v24,$x00,$key_ # re-pre-load round[1]
910 vncipher $out0,$out0,v29
911 vncipher $out1,$out1,v29
912 vncipher $out2,$out2,v29
913 vncipher $out3,$out3,v29
914 vncipher $out4,$out4,v29
915 vncipher $out5,$out5,v29
916 vncipher $out6,$out6,v29
917 vncipher $out7,$out7,v29
918 lvx v25,$x10,$key_ # re-pre-load round[2]
920 vncipher $out0,$out0,v30
921 vxor $ivec,$ivec,v31 # xor with last round key
922 vncipher $out1,$out1,v30
924 vncipher $out2,$out2,v30
926 vncipher $out3,$out3,v30
928 vncipher $out4,$out4,v30
930 vncipher $out5,$out5,v30
932 vncipher $out6,$out6,v30
934 vncipher $out7,$out7,v30
937 vncipherlast $out0,$out0,$ivec
938 vncipherlast $out1,$out1,$in0
939 lvx_u $in0,$x00,$inp # load next input block
940 vncipherlast $out2,$out2,$in1
942 vncipherlast $out3,$out3,$in2
943 le?vperm $in0,$in0,$in0,$inpperm
945 vncipherlast $out4,$out4,$in3
946 le?vperm $in1,$in1,$in1,$inpperm
948 vncipherlast $out5,$out5,$in4
949 le?vperm $in2,$in2,$in2,$inpperm
951 vncipherlast $out6,$out6,$in5
952 le?vperm $in3,$in3,$in3,$inpperm
954 vncipherlast $out7,$out7,$in6
955 le?vperm $in4,$in4,$in4,$inpperm
958 le?vperm $in5,$in5,$in5,$inpperm
962 le?vperm $out0,$out0,$out0,$inpperm
963 le?vperm $out1,$out1,$out1,$inpperm
964 stvx_u $out0,$x00,$out
965 le?vperm $in6,$in6,$in6,$inpperm
966 vxor $out0,$in0,$rndkey0
967 le?vperm $out2,$out2,$out2,$inpperm
968 stvx_u $out1,$x10,$out
969 le?vperm $in7,$in7,$in7,$inpperm
970 vxor $out1,$in1,$rndkey0
971 le?vperm $out3,$out3,$out3,$inpperm
972 stvx_u $out2,$x20,$out
973 vxor $out2,$in2,$rndkey0
974 le?vperm $out4,$out4,$out4,$inpperm
975 stvx_u $out3,$x30,$out
976 vxor $out3,$in3,$rndkey0
977 le?vperm $out5,$out5,$out5,$inpperm
978 stvx_u $out4,$x40,$out
979 vxor $out4,$in4,$rndkey0
980 le?vperm $out6,$out6,$out6,$inpperm
981 stvx_u $out5,$x50,$out
982 vxor $out5,$in5,$rndkey0
983 le?vperm $out7,$out7,$out7,$inpperm
984 stvx_u $out6,$x60,$out
985 vxor $out6,$in6,$rndkey0
986 stvx_u $out7,$x70,$out
988 vxor $out7,$in7,$rndkey0
991 beq Loop_cbc_dec8x # did $len-=128 borrow?
998 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
999 vncipher $out1,$out1,v24
1000 vncipher $out2,$out2,v24
1001 vncipher $out3,$out3,v24
1002 vncipher $out4,$out4,v24
1003 vncipher $out5,$out5,v24
1004 vncipher $out6,$out6,v24
1005 vncipher $out7,$out7,v24
1006 lvx v24,$x20,$key_ # round[3]
1007 addi $key_,$key_,0x20
1009 vncipher $out1,$out1,v25
1010 vncipher $out2,$out2,v25
1011 vncipher $out3,$out3,v25
1012 vncipher $out4,$out4,v25
1013 vncipher $out5,$out5,v25
1014 vncipher $out6,$out6,v25
1015 vncipher $out7,$out7,v25
1016 lvx v25,$x10,$key_ # round[4]
1017 bdnz Loop_cbc_dec8x_tail
1019 vncipher $out1,$out1,v24
1020 vncipher $out2,$out2,v24
1021 vncipher $out3,$out3,v24
1022 vncipher $out4,$out4,v24
1023 vncipher $out5,$out5,v24
1024 vncipher $out6,$out6,v24
1025 vncipher $out7,$out7,v24
1027 vncipher $out1,$out1,v25
1028 vncipher $out2,$out2,v25
1029 vncipher $out3,$out3,v25
1030 vncipher $out4,$out4,v25
1031 vncipher $out5,$out5,v25
1032 vncipher $out6,$out6,v25
1033 vncipher $out7,$out7,v25
1035 vncipher $out1,$out1,v26
1036 vncipher $out2,$out2,v26
1037 vncipher $out3,$out3,v26
1038 vncipher $out4,$out4,v26
1039 vncipher $out5,$out5,v26
1040 vncipher $out6,$out6,v26
1041 vncipher $out7,$out7,v26
1043 vncipher $out1,$out1,v27
1044 vncipher $out2,$out2,v27
1045 vncipher $out3,$out3,v27
1046 vncipher $out4,$out4,v27
1047 vncipher $out5,$out5,v27
1048 vncipher $out6,$out6,v27
1049 vncipher $out7,$out7,v27
1051 vncipher $out1,$out1,v28
1052 vncipher $out2,$out2,v28
1053 vncipher $out3,$out3,v28
1054 vncipher $out4,$out4,v28
1055 vncipher $out5,$out5,v28
1056 vncipher $out6,$out6,v28
1057 vncipher $out7,$out7,v28
1059 vncipher $out1,$out1,v29
1060 vncipher $out2,$out2,v29
1061 vncipher $out3,$out3,v29
1062 vncipher $out4,$out4,v29
1063 vncipher $out5,$out5,v29
1064 vncipher $out6,$out6,v29
1065 vncipher $out7,$out7,v29
1067 vncipher $out1,$out1,v30
1068 vxor $ivec,$ivec,v31 # last round key
1069 vncipher $out2,$out2,v30
1071 vncipher $out3,$out3,v30
1073 vncipher $out4,$out4,v30
1075 vncipher $out5,$out5,v30
1077 vncipher $out6,$out6,v30
1079 vncipher $out7,$out7,v30
1082 cmplwi $len,32 # switch($len)
1087 blt Lcbc_dec8x_three
1096 vncipherlast $out1,$out1,$ivec
1097 vncipherlast $out2,$out2,$in1
1098 vncipherlast $out3,$out3,$in2
1099 vncipherlast $out4,$out4,$in3
1100 vncipherlast $out5,$out5,$in4
1101 vncipherlast $out6,$out6,$in5
1102 vncipherlast $out7,$out7,$in6
1105 le?vperm $out1,$out1,$out1,$inpperm
1106 le?vperm $out2,$out2,$out2,$inpperm
1107 stvx_u $out1,$x00,$out
1108 le?vperm $out3,$out3,$out3,$inpperm
1109 stvx_u $out2,$x10,$out
1110 le?vperm $out4,$out4,$out4,$inpperm
1111 stvx_u $out3,$x20,$out
1112 le?vperm $out5,$out5,$out5,$inpperm
1113 stvx_u $out4,$x30,$out
1114 le?vperm $out6,$out6,$out6,$inpperm
1115 stvx_u $out5,$x40,$out
1116 le?vperm $out7,$out7,$out7,$inpperm
1117 stvx_u $out6,$x50,$out
1118 stvx_u $out7,$x60,$out
1124 vncipherlast $out2,$out2,$ivec
1125 vncipherlast $out3,$out3,$in2
1126 vncipherlast $out4,$out4,$in3
1127 vncipherlast $out5,$out5,$in4
1128 vncipherlast $out6,$out6,$in5
1129 vncipherlast $out7,$out7,$in6
1132 le?vperm $out2,$out2,$out2,$inpperm
1133 le?vperm $out3,$out3,$out3,$inpperm
1134 stvx_u $out2,$x00,$out
1135 le?vperm $out4,$out4,$out4,$inpperm
1136 stvx_u $out3,$x10,$out
1137 le?vperm $out5,$out5,$out5,$inpperm
1138 stvx_u $out4,$x20,$out
1139 le?vperm $out6,$out6,$out6,$inpperm
1140 stvx_u $out5,$x30,$out
1141 le?vperm $out7,$out7,$out7,$inpperm
1142 stvx_u $out6,$x40,$out
1143 stvx_u $out7,$x50,$out
1149 vncipherlast $out3,$out3,$ivec
1150 vncipherlast $out4,$out4,$in3
1151 vncipherlast $out5,$out5,$in4
1152 vncipherlast $out6,$out6,$in5
1153 vncipherlast $out7,$out7,$in6
1156 le?vperm $out3,$out3,$out3,$inpperm
1157 le?vperm $out4,$out4,$out4,$inpperm
1158 stvx_u $out3,$x00,$out
1159 le?vperm $out5,$out5,$out5,$inpperm
1160 stvx_u $out4,$x10,$out
1161 le?vperm $out6,$out6,$out6,$inpperm
1162 stvx_u $out5,$x20,$out
1163 le?vperm $out7,$out7,$out7,$inpperm
1164 stvx_u $out6,$x30,$out
1165 stvx_u $out7,$x40,$out
1171 vncipherlast $out4,$out4,$ivec
1172 vncipherlast $out5,$out5,$in4
1173 vncipherlast $out6,$out6,$in5
1174 vncipherlast $out7,$out7,$in6
1177 le?vperm $out4,$out4,$out4,$inpperm
1178 le?vperm $out5,$out5,$out5,$inpperm
1179 stvx_u $out4,$x00,$out
1180 le?vperm $out6,$out6,$out6,$inpperm
1181 stvx_u $out5,$x10,$out
1182 le?vperm $out7,$out7,$out7,$inpperm
1183 stvx_u $out6,$x20,$out
1184 stvx_u $out7,$x30,$out
1190 vncipherlast $out5,$out5,$ivec
1191 vncipherlast $out6,$out6,$in5
1192 vncipherlast $out7,$out7,$in6
1195 le?vperm $out5,$out5,$out5,$inpperm
1196 le?vperm $out6,$out6,$out6,$inpperm
1197 stvx_u $out5,$x00,$out
1198 le?vperm $out7,$out7,$out7,$inpperm
1199 stvx_u $out6,$x10,$out
1200 stvx_u $out7,$x20,$out
1206 vncipherlast $out6,$out6,$ivec
1207 vncipherlast $out7,$out7,$in6
1210 le?vperm $out6,$out6,$out6,$inpperm
1211 le?vperm $out7,$out7,$out7,$inpperm
1212 stvx_u $out6,$x00,$out
1213 stvx_u $out7,$x10,$out
1219 vncipherlast $out7,$out7,$ivec
1222 le?vperm $out7,$out7,$out7,$inpperm
1227 le?vperm $ivec,$ivec,$ivec,$inpperm
1228 stvx_u $ivec,0,$ivp # write [unaligned] iv
1232 stvx $inpperm,r10,$sp # wipe copies of round keys
1234 stvx $inpperm,r11,$sp
1236 stvx $inpperm,r10,$sp
1238 stvx $inpperm,r11,$sp
1240 stvx $inpperm,r10,$sp
1242 stvx $inpperm,r11,$sp
1244 stvx $inpperm,r10,$sp
1246 stvx $inpperm,r11,$sp
1250 lvx v20,r10,$sp # ABI says so
1272 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1273 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1274 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1275 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1276 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1277 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1278 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1281 .byte 0,12,0x14,0,0x80,6,6,0
1283 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1287 #########################################################################
1288 {{{ # CTR procedure[s] #
1289 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1290 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1291 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1296 .globl .${prefix}_ctr32_encrypt_blocks
1305 vxor $rndkey0,$rndkey0,$rndkey0
1306 le?vspltisb $tmp,0x0f
1308 lvx $ivec,0,$ivp # load [unaligned] iv
1309 lvsl $inpperm,0,$ivp
1310 lvx $inptail,$idx,$ivp
1312 le?vxor $inpperm,$inpperm,$tmp
1313 vperm $ivec,$ivec,$inptail,$inpperm
1314 vsldoi $one,$rndkey0,$one,1
1317 ?lvsl $keyperm,0,$key # prepare for unaligned key
1318 lwz $rounds,240($key)
1320 lvsr $inpperm,0,r11 # prepare for unaligned load
1322 addi $inp,$inp,15 # 15 is not typo
1323 le?vxor $inpperm,$inpperm,$tmp
1325 srwi $rounds,$rounds,1
1327 subi $rounds,$rounds,1
1330 bge _aesp8_ctr32_encrypt8x
1332 ?lvsr $outperm,0,$out # prepare for unaligned store
1333 vspltisb $outmask,-1
1335 ?vperm $outmask,$rndkey0,$outmask,$outperm
1336 le?vxor $outperm,$outperm,$tmp
1340 lvx $rndkey1,$idx,$key
1342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1343 vxor $inout,$ivec,$rndkey0
1344 lvx $rndkey0,$idx,$key
1350 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1351 vcipher $inout,$inout,$rndkey1
1352 lvx $rndkey1,$idx,$key
1354 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1355 vcipher $inout,$inout,$rndkey0
1356 lvx $rndkey0,$idx,$key
1360 vadduwm $ivec,$ivec,$one
1364 subic. $len,$len,1 # blocks--
1366 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1367 vcipher $inout,$inout,$rndkey1
1368 lvx $rndkey1,$idx,$key
1369 vperm $dat,$dat,$inptail,$inpperm
1371 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1373 vxor $dat,$dat,$rndkey1 # last round key
1374 vcipherlast $inout,$inout,$dat
1376 lvx $rndkey1,$idx,$key
1378 vperm $inout,$inout,$inout,$outperm
1379 vsel $dat,$outhead,$inout,$outmask
1381 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1383 vxor $inout,$ivec,$rndkey0
1384 lvx $rndkey0,$idx,$key
1391 lvx $inout,0,$out # redundant in aligned case
1392 vsel $inout,$outhead,$inout,$outmask
1398 .byte 0,12,0x14,0,0,0,6,0
1401 #########################################################################
1402 {{ # Optimized CTR procedure #
1404 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1405 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1406 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1407 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1408 # v26-v31 last 6 round keys
1409 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1410 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1414 _aesp8_ctr32_encrypt8x:
1415 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1416 li r10,`$FRAME+8*16+15`
1417 li r11,`$FRAME+8*16+31`
1418 stvx v20,r10,$sp # ABI says so
1441 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1443 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1445 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1447 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1449 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1451 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1453 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1457 subi $rounds,$rounds,3 # -4 in total
1459 lvx $rndkey0,$x00,$key # load key schedule
1463 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1464 addi $key_,$sp,$FRAME+15
1468 ?vperm v24,v30,v31,$keyperm
1471 stvx v24,$x00,$key_ # off-load round[1]
1472 ?vperm v25,v31,v30,$keyperm
1474 stvx v25,$x10,$key_ # off-load round[2]
1475 addi $key_,$key_,0x20
1476 bdnz Load_ctr32_enc_key
1479 ?vperm v24,v30,v31,$keyperm
1481 stvx v24,$x00,$key_ # off-load round[3]
1482 ?vperm v25,v31,v26,$keyperm
1484 stvx v25,$x10,$key_ # off-load round[4]
1485 addi $key_,$sp,$FRAME+15 # rewind $key_
1486 ?vperm v26,v26,v27,$keyperm
1488 ?vperm v27,v27,v28,$keyperm
1490 ?vperm v28,v28,v29,$keyperm
1492 ?vperm v29,v29,v30,$keyperm
1493 lvx $out0,$x70,$key # borrow $out0
1494 ?vperm v30,v30,v31,$keyperm
1495 lvx v24,$x00,$key_ # pre-load round[1]
1496 ?vperm v31,v31,$out0,$keyperm
1497 lvx v25,$x10,$key_ # pre-load round[2]
1499 vadduqm $two,$one,$one
1500 subi $inp,$inp,15 # undo "caller"
1503 vadduqm $out1,$ivec,$one # counter values ...
1504 vadduqm $out2,$ivec,$two
1505 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1507 vadduqm $out3,$out1,$two
1508 vxor $out1,$out1,$rndkey0
1509 le?lvsl $inpperm,0,$idx
1510 vadduqm $out4,$out2,$two
1511 vxor $out2,$out2,$rndkey0
1512 le?vspltisb $tmp,0x0f
1513 vadduqm $out5,$out3,$two
1514 vxor $out3,$out3,$rndkey0
1515 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1516 vadduqm $out6,$out4,$two
1517 vxor $out4,$out4,$rndkey0
1518 vadduqm $out7,$out5,$two
1519 vxor $out5,$out5,$rndkey0
1520 vadduqm $ivec,$out6,$two # next counter value
1521 vxor $out6,$out6,$rndkey0
1522 vxor $out7,$out7,$rndkey0
1528 vcipher $out0,$out0,v24
1529 vcipher $out1,$out1,v24
1530 vcipher $out2,$out2,v24
1531 vcipher $out3,$out3,v24
1532 vcipher $out4,$out4,v24
1533 vcipher $out5,$out5,v24
1534 vcipher $out6,$out6,v24
1535 vcipher $out7,$out7,v24
1536 Loop_ctr32_enc8x_middle:
1537 lvx v24,$x20,$key_ # round[3]
1538 addi $key_,$key_,0x20
1540 vcipher $out0,$out0,v25
1541 vcipher $out1,$out1,v25
1542 vcipher $out2,$out2,v25
1543 vcipher $out3,$out3,v25
1544 vcipher $out4,$out4,v25
1545 vcipher $out5,$out5,v25
1546 vcipher $out6,$out6,v25
1547 vcipher $out7,$out7,v25
1548 lvx v25,$x10,$key_ # round[4]
1549 bdnz Loop_ctr32_enc8x
1551 subic r11,$len,256 # $len-256, borrow $key_
1552 vcipher $out0,$out0,v24
1553 vcipher $out1,$out1,v24
1554 vcipher $out2,$out2,v24
1555 vcipher $out3,$out3,v24
1556 vcipher $out4,$out4,v24
1557 vcipher $out5,$out5,v24
1558 vcipher $out6,$out6,v24
1559 vcipher $out7,$out7,v24
1561 subfe r0,r0,r0 # borrow?-1:0
1562 vcipher $out0,$out0,v25
1563 vcipher $out1,$out1,v25
1564 vcipher $out2,$out2,v25
1565 vcipher $out3,$out3,v25
1566 vcipher $out4,$out4,v25
1567 vcipher $out5,$out5,v25
1568 vcipher $out6,$out6,v25
1569 vcipher $out7,$out7,v25
1572 addi $key_,$sp,$FRAME+15 # rewind $key_
1573 vcipher $out0,$out0,v26
1574 vcipher $out1,$out1,v26
1575 vcipher $out2,$out2,v26
1576 vcipher $out3,$out3,v26
1577 vcipher $out4,$out4,v26
1578 vcipher $out5,$out5,v26
1579 vcipher $out6,$out6,v26
1580 vcipher $out7,$out7,v26
1581 lvx v24,$x00,$key_ # re-pre-load round[1]
1583 subic $len,$len,129 # $len-=129
1584 vcipher $out0,$out0,v27
1585 addi $len,$len,1 # $len-=128 really
1586 vcipher $out1,$out1,v27
1587 vcipher $out2,$out2,v27
1588 vcipher $out3,$out3,v27
1589 vcipher $out4,$out4,v27
1590 vcipher $out5,$out5,v27
1591 vcipher $out6,$out6,v27
1592 vcipher $out7,$out7,v27
1593 lvx v25,$x10,$key_ # re-pre-load round[2]
1595 vcipher $out0,$out0,v28
1596 lvx_u $in0,$x00,$inp # load input
1597 vcipher $out1,$out1,v28
1598 lvx_u $in1,$x10,$inp
1599 vcipher $out2,$out2,v28
1600 lvx_u $in2,$x20,$inp
1601 vcipher $out3,$out3,v28
1602 lvx_u $in3,$x30,$inp
1603 vcipher $out4,$out4,v28
1604 lvx_u $in4,$x40,$inp
1605 vcipher $out5,$out5,v28
1606 lvx_u $in5,$x50,$inp
1607 vcipher $out6,$out6,v28
1608 lvx_u $in6,$x60,$inp
1609 vcipher $out7,$out7,v28
1610 lvx_u $in7,$x70,$inp
1613 vcipher $out0,$out0,v29
1614 le?vperm $in0,$in0,$in0,$inpperm
1615 vcipher $out1,$out1,v29
1616 le?vperm $in1,$in1,$in1,$inpperm
1617 vcipher $out2,$out2,v29
1618 le?vperm $in2,$in2,$in2,$inpperm
1619 vcipher $out3,$out3,v29
1620 le?vperm $in3,$in3,$in3,$inpperm
1621 vcipher $out4,$out4,v29
1622 le?vperm $in4,$in4,$in4,$inpperm
1623 vcipher $out5,$out5,v29
1624 le?vperm $in5,$in5,$in5,$inpperm
1625 vcipher $out6,$out6,v29
1626 le?vperm $in6,$in6,$in6,$inpperm
1627 vcipher $out7,$out7,v29
1628 le?vperm $in7,$in7,$in7,$inpperm
1630 add $inp,$inp,r0 # $inp is adjusted in such
1631 # way that at exit from the
1632 # loop inX-in7 are loaded
1634 subfe. r0,r0,r0 # borrow?-1:0
1635 vcipher $out0,$out0,v30
1636 vxor $in0,$in0,v31 # xor with last round key
1637 vcipher $out1,$out1,v30
1639 vcipher $out2,$out2,v30
1641 vcipher $out3,$out3,v30
1643 vcipher $out4,$out4,v30
1645 vcipher $out5,$out5,v30
1647 vcipher $out6,$out6,v30
1649 vcipher $out7,$out7,v30
1652 bne Lctr32_enc8x_break # did $len-129 borrow?
1654 vcipherlast $in0,$out0,$in0
1655 vcipherlast $in1,$out1,$in1
1656 vadduqm $out1,$ivec,$one # counter values ...
1657 vcipherlast $in2,$out2,$in2
1658 vadduqm $out2,$ivec,$two
1659 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1660 vcipherlast $in3,$out3,$in3
1661 vadduqm $out3,$out1,$two
1662 vxor $out1,$out1,$rndkey0
1663 vcipherlast $in4,$out4,$in4
1664 vadduqm $out4,$out2,$two
1665 vxor $out2,$out2,$rndkey0
1666 vcipherlast $in5,$out5,$in5
1667 vadduqm $out5,$out3,$two
1668 vxor $out3,$out3,$rndkey0
1669 vcipherlast $in6,$out6,$in6
1670 vadduqm $out6,$out4,$two
1671 vxor $out4,$out4,$rndkey0
1672 vcipherlast $in7,$out7,$in7
1673 vadduqm $out7,$out5,$two
1674 vxor $out5,$out5,$rndkey0
1675 le?vperm $in0,$in0,$in0,$inpperm
1676 vadduqm $ivec,$out6,$two # next counter value
1677 vxor $out6,$out6,$rndkey0
1678 le?vperm $in1,$in1,$in1,$inpperm
1679 vxor $out7,$out7,$rndkey0
1682 vcipher $out0,$out0,v24
1683 stvx_u $in0,$x00,$out
1684 le?vperm $in2,$in2,$in2,$inpperm
1685 vcipher $out1,$out1,v24
1686 stvx_u $in1,$x10,$out
1687 le?vperm $in3,$in3,$in3,$inpperm
1688 vcipher $out2,$out2,v24
1689 stvx_u $in2,$x20,$out
1690 le?vperm $in4,$in4,$in4,$inpperm
1691 vcipher $out3,$out3,v24
1692 stvx_u $in3,$x30,$out
1693 le?vperm $in5,$in5,$in5,$inpperm
1694 vcipher $out4,$out4,v24
1695 stvx_u $in4,$x40,$out
1696 le?vperm $in6,$in6,$in6,$inpperm
1697 vcipher $out5,$out5,v24
1698 stvx_u $in5,$x50,$out
1699 le?vperm $in7,$in7,$in7,$inpperm
1700 vcipher $out6,$out6,v24
1701 stvx_u $in6,$x60,$out
1702 vcipher $out7,$out7,v24
1703 stvx_u $in7,$x70,$out
1706 b Loop_ctr32_enc8x_middle
1711 blt Lctr32_enc8x_one
1713 beq Lctr32_enc8x_two
1715 blt Lctr32_enc8x_three
1717 beq Lctr32_enc8x_four
1719 blt Lctr32_enc8x_five
1721 beq Lctr32_enc8x_six
1723 blt Lctr32_enc8x_seven
1726 vcipherlast $out0,$out0,$in0
1727 vcipherlast $out1,$out1,$in1
1728 vcipherlast $out2,$out2,$in2
1729 vcipherlast $out3,$out3,$in3
1730 vcipherlast $out4,$out4,$in4
1731 vcipherlast $out5,$out5,$in5
1732 vcipherlast $out6,$out6,$in6
1733 vcipherlast $out7,$out7,$in7
1735 le?vperm $out0,$out0,$out0,$inpperm
1736 le?vperm $out1,$out1,$out1,$inpperm
1737 stvx_u $out0,$x00,$out
1738 le?vperm $out2,$out2,$out2,$inpperm
1739 stvx_u $out1,$x10,$out
1740 le?vperm $out3,$out3,$out3,$inpperm
1741 stvx_u $out2,$x20,$out
1742 le?vperm $out4,$out4,$out4,$inpperm
1743 stvx_u $out3,$x30,$out
1744 le?vperm $out5,$out5,$out5,$inpperm
1745 stvx_u $out4,$x40,$out
1746 le?vperm $out6,$out6,$out6,$inpperm
1747 stvx_u $out5,$x50,$out
1748 le?vperm $out7,$out7,$out7,$inpperm
1749 stvx_u $out6,$x60,$out
1750 stvx_u $out7,$x70,$out
1756 vcipherlast $out0,$out0,$in1
1757 vcipherlast $out1,$out1,$in2
1758 vcipherlast $out2,$out2,$in3
1759 vcipherlast $out3,$out3,$in4
1760 vcipherlast $out4,$out4,$in5
1761 vcipherlast $out5,$out5,$in6
1762 vcipherlast $out6,$out6,$in7
1764 le?vperm $out0,$out0,$out0,$inpperm
1765 le?vperm $out1,$out1,$out1,$inpperm
1766 stvx_u $out0,$x00,$out
1767 le?vperm $out2,$out2,$out2,$inpperm
1768 stvx_u $out1,$x10,$out
1769 le?vperm $out3,$out3,$out3,$inpperm
1770 stvx_u $out2,$x20,$out
1771 le?vperm $out4,$out4,$out4,$inpperm
1772 stvx_u $out3,$x30,$out
1773 le?vperm $out5,$out5,$out5,$inpperm
1774 stvx_u $out4,$x40,$out
1775 le?vperm $out6,$out6,$out6,$inpperm
1776 stvx_u $out5,$x50,$out
1777 stvx_u $out6,$x60,$out
1783 vcipherlast $out0,$out0,$in2
1784 vcipherlast $out1,$out1,$in3
1785 vcipherlast $out2,$out2,$in4
1786 vcipherlast $out3,$out3,$in5
1787 vcipherlast $out4,$out4,$in6
1788 vcipherlast $out5,$out5,$in7
1790 le?vperm $out0,$out0,$out0,$inpperm
1791 le?vperm $out1,$out1,$out1,$inpperm
1792 stvx_u $out0,$x00,$out
1793 le?vperm $out2,$out2,$out2,$inpperm
1794 stvx_u $out1,$x10,$out
1795 le?vperm $out3,$out3,$out3,$inpperm
1796 stvx_u $out2,$x20,$out
1797 le?vperm $out4,$out4,$out4,$inpperm
1798 stvx_u $out3,$x30,$out
1799 le?vperm $out5,$out5,$out5,$inpperm
1800 stvx_u $out4,$x40,$out
1801 stvx_u $out5,$x50,$out
1807 vcipherlast $out0,$out0,$in3
1808 vcipherlast $out1,$out1,$in4
1809 vcipherlast $out2,$out2,$in5
1810 vcipherlast $out3,$out3,$in6
1811 vcipherlast $out4,$out4,$in7
1813 le?vperm $out0,$out0,$out0,$inpperm
1814 le?vperm $out1,$out1,$out1,$inpperm
1815 stvx_u $out0,$x00,$out
1816 le?vperm $out2,$out2,$out2,$inpperm
1817 stvx_u $out1,$x10,$out
1818 le?vperm $out3,$out3,$out3,$inpperm
1819 stvx_u $out2,$x20,$out
1820 le?vperm $out4,$out4,$out4,$inpperm
1821 stvx_u $out3,$x30,$out
1822 stvx_u $out4,$x40,$out
1828 vcipherlast $out0,$out0,$in4
1829 vcipherlast $out1,$out1,$in5
1830 vcipherlast $out2,$out2,$in6
1831 vcipherlast $out3,$out3,$in7
1833 le?vperm $out0,$out0,$out0,$inpperm
1834 le?vperm $out1,$out1,$out1,$inpperm
1835 stvx_u $out0,$x00,$out
1836 le?vperm $out2,$out2,$out2,$inpperm
1837 stvx_u $out1,$x10,$out
1838 le?vperm $out3,$out3,$out3,$inpperm
1839 stvx_u $out2,$x20,$out
1840 stvx_u $out3,$x30,$out
1846 vcipherlast $out0,$out0,$in5
1847 vcipherlast $out1,$out1,$in6
1848 vcipherlast $out2,$out2,$in7
1850 le?vperm $out0,$out0,$out0,$inpperm
1851 le?vperm $out1,$out1,$out1,$inpperm
1852 stvx_u $out0,$x00,$out
1853 le?vperm $out2,$out2,$out2,$inpperm
1854 stvx_u $out1,$x10,$out
1855 stvx_u $out2,$x20,$out
1861 vcipherlast $out0,$out0,$in6
1862 vcipherlast $out1,$out1,$in7
1864 le?vperm $out0,$out0,$out0,$inpperm
1865 le?vperm $out1,$out1,$out1,$inpperm
1866 stvx_u $out0,$x00,$out
1867 stvx_u $out1,$x10,$out
1873 vcipherlast $out0,$out0,$in7
1875 le?vperm $out0,$out0,$out0,$inpperm
1882 stvx $inpperm,r10,$sp # wipe copies of round keys
1884 stvx $inpperm,r11,$sp
1886 stvx $inpperm,r10,$sp
1888 stvx $inpperm,r11,$sp
1890 stvx $inpperm,r10,$sp
1892 stvx $inpperm,r11,$sp
1894 stvx $inpperm,r10,$sp
1896 stvx $inpperm,r11,$sp
1900 lvx v20,r10,$sp # ABI says so
1922 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1923 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1924 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1925 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1926 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1927 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1928 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1931 .byte 0,12,0x14,0,0x80,6,6,0
1933 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1937 #########################################################################
1938 {{{ # XTS procedures #
1939 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1940 # const AES_KEY *key1, const AES_KEY *key2, #
1941 # [const] unsigned char iv[16]); #
1942 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1943 # input tweak value is assumed to be encrypted already, and last tweak #
1944 # value, one suitable for consecutive call on same chunk of data, is #
1945 # written back to original buffer. In addition, in "tweak chaining" #
1946 # mode only complete input blocks are processed. #
1948 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1949 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1950 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1951 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1952 my $taillen = $key2;
1954 ($inp,$idx) = ($idx,$inp); # reassign
1957 .globl .${prefix}_xts_encrypt
1958 mr $inp,r3 # reassign
1964 mfspr r12,256 # save vrsave
1968 vspltisb $seven,0x07 # 0x070707..07
1969 le?lvsl $leperm,r11,r11
1970 le?vspltisb $tmp,0x0f
1971 le?vxor $leperm,$leperm,$seven
1974 lvx $tweak,0,$ivp # load [unaligned] iv
1975 lvsl $inpperm,0,$ivp
1976 lvx $inptail,$idx,$ivp
1977 le?vxor $inpperm,$inpperm,$tmp
1978 vperm $tweak,$tweak,$inptail,$inpperm
1981 lvsr $inpperm,0,r11 # prepare for unaligned load
1983 addi $inp,$inp,15 # 15 is not typo
1984 le?vxor $inpperm,$inpperm,$tmp
1986 ${UCMP}i $key2,0 # key2==NULL?
1987 beq Lxts_enc_no_key2
1989 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1990 lwz $rounds,240($key2)
1991 srwi $rounds,$rounds,1
1992 subi $rounds,$rounds,1
1995 lvx $rndkey0,0,$key2
1996 lvx $rndkey1,$idx,$key2
1998 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1999 vxor $tweak,$tweak,$rndkey0
2000 lvx $rndkey0,$idx,$key2
2005 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2006 vcipher $tweak,$tweak,$rndkey1
2007 lvx $rndkey1,$idx,$key2
2009 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2010 vcipher $tweak,$tweak,$rndkey0
2011 lvx $rndkey0,$idx,$key2
2015 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2016 vcipher $tweak,$tweak,$rndkey1
2017 lvx $rndkey1,$idx,$key2
2018 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2019 vcipherlast $tweak,$tweak,$rndkey0
2021 li $ivp,0 # don't chain the tweak
2026 and $len,$len,$idx # in "tweak chaining"
2027 # mode only complete
2028 # blocks are processed
2033 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2034 lwz $rounds,240($key1)
2035 srwi $rounds,$rounds,1
2036 subi $rounds,$rounds,1
2039 vslb $eighty7,$seven,$seven # 0x808080..80
2040 vor $eighty7,$eighty7,$seven # 0x878787..87
2041 vspltisb $tmp,1 # 0x010101..01
2042 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2045 bge _aesp8_xts_encrypt6x
2047 andi. $taillen,$len,15
2049 subi $taillen,$taillen,16
2054 lvx $rndkey0,0,$key1
2055 lvx $rndkey1,$idx,$key1
2057 vperm $inout,$inout,$inptail,$inpperm
2058 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2059 vxor $inout,$inout,$tweak
2060 vxor $inout,$inout,$rndkey0
2061 lvx $rndkey0,$idx,$key1
2068 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2069 vcipher $inout,$inout,$rndkey1
2070 lvx $rndkey1,$idx,$key1
2072 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2073 vcipher $inout,$inout,$rndkey0
2074 lvx $rndkey0,$idx,$key1
2078 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2079 vcipher $inout,$inout,$rndkey1
2080 lvx $rndkey1,$idx,$key1
2082 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2083 vxor $rndkey0,$rndkey0,$tweak
2084 vcipherlast $output,$inout,$rndkey0
2086 le?vperm $tmp,$output,$output,$leperm
2088 le?stvx_u $tmp,0,$out
2089 be?stvx_u $output,0,$out
2098 lvx $rndkey0,0,$key1
2099 lvx $rndkey1,$idx,$key1
2107 vsrab $tmp,$tweak,$seven # next tweak value
2108 vaddubm $tweak,$tweak,$tweak
2109 vsldoi $tmp,$tmp,$tmp,15
2110 vand $tmp,$tmp,$eighty7
2111 vxor $tweak,$tweak,$tmp
2113 vperm $inout,$inout,$inptail,$inpperm
2114 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2115 vxor $inout,$inout,$tweak
2116 vxor $output,$output,$rndkey0 # just in case $len<16
2117 vxor $inout,$inout,$rndkey0
2118 lvx $rndkey0,$idx,$key1
2125 vxor $output,$output,$tweak
2126 lvsr $inpperm,0,$len # $inpperm is no longer needed
2127 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2129 vperm $inptail,$inptail,$tmp,$inpperm
2130 vsel $inout,$inout,$output,$inptail
2139 bdnz Loop_xts_enc_steal
2142 b Loop_xts_enc # one more time...
2148 vsrab $tmp,$tweak,$seven # next tweak value
2149 vaddubm $tweak,$tweak,$tweak
2150 vsldoi $tmp,$tmp,$tmp,15
2151 vand $tmp,$tmp,$eighty7
2152 vxor $tweak,$tweak,$tmp
2154 le?vperm $tweak,$tweak,$tweak,$leperm
2155 stvx_u $tweak,0,$ivp
2158 mtspr 256,r12 # restore vrsave
2162 .byte 0,12,0x04,0,0x80,6,6,0
2164 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2166 .globl .${prefix}_xts_decrypt
2167 mr $inp,r3 # reassign
2173 mfspr r12,256 # save vrsave
2182 vspltisb $seven,0x07 # 0x070707..07
2183 le?lvsl $leperm,r11,r11
2184 le?vspltisb $tmp,0x0f
2185 le?vxor $leperm,$leperm,$seven
2188 lvx $tweak,0,$ivp # load [unaligned] iv
2189 lvsl $inpperm,0,$ivp
2190 lvx $inptail,$idx,$ivp
2191 le?vxor $inpperm,$inpperm,$tmp
2192 vperm $tweak,$tweak,$inptail,$inpperm
2195 lvsr $inpperm,0,r11 # prepare for unaligned load
2197 addi $inp,$inp,15 # 15 is not typo
2198 le?vxor $inpperm,$inpperm,$tmp
2200 ${UCMP}i $key2,0 # key2==NULL?
2201 beq Lxts_dec_no_key2
2203 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2204 lwz $rounds,240($key2)
2205 srwi $rounds,$rounds,1
2206 subi $rounds,$rounds,1
2209 lvx $rndkey0,0,$key2
2210 lvx $rndkey1,$idx,$key2
2212 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2213 vxor $tweak,$tweak,$rndkey0
2214 lvx $rndkey0,$idx,$key2
2219 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2220 vcipher $tweak,$tweak,$rndkey1
2221 lvx $rndkey1,$idx,$key2
2223 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2224 vcipher $tweak,$tweak,$rndkey0
2225 lvx $rndkey0,$idx,$key2
2229 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2230 vcipher $tweak,$tweak,$rndkey1
2231 lvx $rndkey1,$idx,$key2
2232 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2233 vcipherlast $tweak,$tweak,$rndkey0
2235 li $ivp,0 # don't chain the tweak
2241 add $len,$len,$idx # in "tweak chaining"
2242 # mode only complete
2243 # blocks are processed
2248 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2249 lwz $rounds,240($key1)
2250 srwi $rounds,$rounds,1
2251 subi $rounds,$rounds,1
2254 vslb $eighty7,$seven,$seven # 0x808080..80
2255 vor $eighty7,$eighty7,$seven # 0x878787..87
2256 vspltisb $tmp,1 # 0x010101..01
2257 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2260 bge _aesp8_xts_decrypt6x
2262 lvx $rndkey0,0,$key1
2263 lvx $rndkey1,$idx,$key1
2265 vperm $inout,$inout,$inptail,$inpperm
2266 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2267 vxor $inout,$inout,$tweak
2268 vxor $inout,$inout,$rndkey0
2269 lvx $rndkey0,$idx,$key1
2279 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2280 vncipher $inout,$inout,$rndkey1
2281 lvx $rndkey1,$idx,$key1
2283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2284 vncipher $inout,$inout,$rndkey0
2285 lvx $rndkey0,$idx,$key1
2289 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2290 vncipher $inout,$inout,$rndkey1
2291 lvx $rndkey1,$idx,$key1
2293 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2294 vxor $rndkey0,$rndkey0,$tweak
2295 vncipherlast $output,$inout,$rndkey0
2297 le?vperm $tmp,$output,$output,$leperm
2299 le?stvx_u $tmp,0,$out
2300 be?stvx_u $output,0,$out
2309 lvx $rndkey0,0,$key1
2310 lvx $rndkey1,$idx,$key1
2313 vsrab $tmp,$tweak,$seven # next tweak value
2314 vaddubm $tweak,$tweak,$tweak
2315 vsldoi $tmp,$tmp,$tmp,15
2316 vand $tmp,$tmp,$eighty7
2317 vxor $tweak,$tweak,$tmp
2319 vperm $inout,$inout,$inptail,$inpperm
2320 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2321 vxor $inout,$inout,$tweak
2322 vxor $inout,$inout,$rndkey0
2323 lvx $rndkey0,$idx,$key1
2331 vsrab $tmp,$tweak,$seven # next tweak value
2332 vaddubm $tweak1,$tweak,$tweak
2333 vsldoi $tmp,$tmp,$tmp,15
2334 vand $tmp,$tmp,$eighty7
2335 vxor $tweak1,$tweak1,$tmp
2340 vxor $inout,$inout,$tweak # :-(
2341 vxor $inout,$inout,$tweak1 # :-)
2344 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2345 vncipher $inout,$inout,$rndkey1
2346 lvx $rndkey1,$idx,$key1
2348 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2349 vncipher $inout,$inout,$rndkey0
2350 lvx $rndkey0,$idx,$key1
2352 bdnz Loop_xts_dec_short
2354 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2355 vncipher $inout,$inout,$rndkey1
2356 lvx $rndkey1,$idx,$key1
2358 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2359 vxor $rndkey0,$rndkey0,$tweak1
2360 vncipherlast $output,$inout,$rndkey0
2362 le?vperm $tmp,$output,$output,$leperm
2364 le?stvx_u $tmp,0,$out
2365 be?stvx_u $output,0,$out
2370 lvx $rndkey0,0,$key1
2371 lvx $rndkey1,$idx,$key1
2373 vperm $inout,$inout,$inptail,$inpperm
2374 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2376 lvsr $inpperm,0,$len # $inpperm is no longer needed
2377 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2379 vperm $inptail,$inptail,$tmp,$inpperm
2380 vsel $inout,$inout,$output,$inptail
2382 vxor $rndkey0,$rndkey0,$tweak
2383 vxor $inout,$inout,$rndkey0
2384 lvx $rndkey0,$idx,$key1
2393 bdnz Loop_xts_dec_steal
2396 b Loop_xts_dec # one more time...
2402 vsrab $tmp,$tweak,$seven # next tweak value
2403 vaddubm $tweak,$tweak,$tweak
2404 vsldoi $tmp,$tmp,$tmp,15
2405 vand $tmp,$tmp,$eighty7
2406 vxor $tweak,$tweak,$tmp
2408 le?vperm $tweak,$tweak,$tweak,$leperm
2409 stvx_u $tweak,0,$ivp
2412 mtspr 256,r12 # restore vrsave
2416 .byte 0,12,0x04,0,0x80,6,6,0
2418 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2420 #########################################################################
2421 {{ # Optimized XTS procedures #
2423 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2424 $x00=0 if ($flavour =~ /osx/);
2425 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2426 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2427 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2428 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2429 # v26-v31 last 6 round keys
2430 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2435 _aesp8_xts_encrypt6x:
2436 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2438 li r7,`$FRAME+8*16+15`
2439 li r3,`$FRAME+8*16+31`
2440 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2441 stvx v20,r7,$sp # ABI says so
2464 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2466 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2468 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2470 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2472 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2474 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2476 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2480 subi $rounds,$rounds,3 # -4 in total
2482 lvx $rndkey0,$x00,$key1 # load key schedule
2484 addi $key1,$key1,0x20
2486 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2487 addi $key_,$sp,$FRAME+15
2491 ?vperm v24,v30,v31,$keyperm
2493 addi $key1,$key1,0x20
2494 stvx v24,$x00,$key_ # off-load round[1]
2495 ?vperm v25,v31,v30,$keyperm
2497 stvx v25,$x10,$key_ # off-load round[2]
2498 addi $key_,$key_,0x20
2499 bdnz Load_xts_enc_key
2502 ?vperm v24,v30,v31,$keyperm
2504 stvx v24,$x00,$key_ # off-load round[3]
2505 ?vperm v25,v31,v26,$keyperm
2507 stvx v25,$x10,$key_ # off-load round[4]
2508 addi $key_,$sp,$FRAME+15 # rewind $key_
2509 ?vperm v26,v26,v27,$keyperm
2511 ?vperm v27,v27,v28,$keyperm
2513 ?vperm v28,v28,v29,$keyperm
2515 ?vperm v29,v29,v30,$keyperm
2516 lvx $twk5,$x70,$key1 # borrow $twk5
2517 ?vperm v30,v30,v31,$keyperm
2518 lvx v24,$x00,$key_ # pre-load round[1]
2519 ?vperm v31,v31,$twk5,$keyperm
2520 lvx v25,$x10,$key_ # pre-load round[2]
2522 vperm $in0,$inout,$inptail,$inpperm
2523 subi $inp,$inp,31 # undo "caller"
2524 vxor $twk0,$tweak,$rndkey0
2525 vsrab $tmp,$tweak,$seven # next tweak value
2526 vaddubm $tweak,$tweak,$tweak
2527 vsldoi $tmp,$tmp,$tmp,15
2528 vand $tmp,$tmp,$eighty7
2529 vxor $out0,$in0,$twk0
2530 vxor $tweak,$tweak,$tmp
2532 lvx_u $in1,$x10,$inp
2533 vxor $twk1,$tweak,$rndkey0
2534 vsrab $tmp,$tweak,$seven # next tweak value
2535 vaddubm $tweak,$tweak,$tweak
2536 vsldoi $tmp,$tmp,$tmp,15
2537 le?vperm $in1,$in1,$in1,$leperm
2538 vand $tmp,$tmp,$eighty7
2539 vxor $out1,$in1,$twk1
2540 vxor $tweak,$tweak,$tmp
2542 lvx_u $in2,$x20,$inp
2543 andi. $taillen,$len,15
2544 vxor $twk2,$tweak,$rndkey0
2545 vsrab $tmp,$tweak,$seven # next tweak value
2546 vaddubm $tweak,$tweak,$tweak
2547 vsldoi $tmp,$tmp,$tmp,15
2548 le?vperm $in2,$in2,$in2,$leperm
2549 vand $tmp,$tmp,$eighty7
2550 vxor $out2,$in2,$twk2
2551 vxor $tweak,$tweak,$tmp
2553 lvx_u $in3,$x30,$inp
2554 sub $len,$len,$taillen
2555 vxor $twk3,$tweak,$rndkey0
2556 vsrab $tmp,$tweak,$seven # next tweak value
2557 vaddubm $tweak,$tweak,$tweak
2558 vsldoi $tmp,$tmp,$tmp,15
2559 le?vperm $in3,$in3,$in3,$leperm
2560 vand $tmp,$tmp,$eighty7
2561 vxor $out3,$in3,$twk3
2562 vxor $tweak,$tweak,$tmp
2564 lvx_u $in4,$x40,$inp
2566 vxor $twk4,$tweak,$rndkey0
2567 vsrab $tmp,$tweak,$seven # next tweak value
2568 vaddubm $tweak,$tweak,$tweak
2569 vsldoi $tmp,$tmp,$tmp,15
2570 le?vperm $in4,$in4,$in4,$leperm
2571 vand $tmp,$tmp,$eighty7
2572 vxor $out4,$in4,$twk4
2573 vxor $tweak,$tweak,$tmp
2575 lvx_u $in5,$x50,$inp
2577 vxor $twk5,$tweak,$rndkey0
2578 vsrab $tmp,$tweak,$seven # next tweak value
2579 vaddubm $tweak,$tweak,$tweak
2580 vsldoi $tmp,$tmp,$tmp,15
2581 le?vperm $in5,$in5,$in5,$leperm
2582 vand $tmp,$tmp,$eighty7
2583 vxor $out5,$in5,$twk5
2584 vxor $tweak,$tweak,$tmp
2586 vxor v31,v31,$rndkey0
2592 vcipher $out0,$out0,v24
2593 vcipher $out1,$out1,v24
2594 vcipher $out2,$out2,v24
2595 vcipher $out3,$out3,v24
2596 vcipher $out4,$out4,v24
2597 vcipher $out5,$out5,v24
2598 lvx v24,$x20,$key_ # round[3]
2599 addi $key_,$key_,0x20
2601 vcipher $out0,$out0,v25
2602 vcipher $out1,$out1,v25
2603 vcipher $out2,$out2,v25
2604 vcipher $out3,$out3,v25
2605 vcipher $out4,$out4,v25
2606 vcipher $out5,$out5,v25
2607 lvx v25,$x10,$key_ # round[4]
2610 subic $len,$len,96 # $len-=96
2611 vxor $in0,$twk0,v31 # xor with last round key
2612 vcipher $out0,$out0,v24
2613 vcipher $out1,$out1,v24
2614 vsrab $tmp,$tweak,$seven # next tweak value
2615 vxor $twk0,$tweak,$rndkey0
2616 vaddubm $tweak,$tweak,$tweak
2617 vcipher $out2,$out2,v24
2618 vcipher $out3,$out3,v24
2619 vsldoi $tmp,$tmp,$tmp,15
2620 vcipher $out4,$out4,v24
2621 vcipher $out5,$out5,v24
2623 subfe. r0,r0,r0 # borrow?-1:0
2624 vand $tmp,$tmp,$eighty7
2625 vcipher $out0,$out0,v25
2626 vcipher $out1,$out1,v25
2627 vxor $tweak,$tweak,$tmp
2628 vcipher $out2,$out2,v25
2629 vcipher $out3,$out3,v25
2631 vsrab $tmp,$tweak,$seven # next tweak value
2632 vxor $twk1,$tweak,$rndkey0
2633 vcipher $out4,$out4,v25
2634 vcipher $out5,$out5,v25
2637 vaddubm $tweak,$tweak,$tweak
2638 vsldoi $tmp,$tmp,$tmp,15
2639 vcipher $out0,$out0,v26
2640 vcipher $out1,$out1,v26
2641 vand $tmp,$tmp,$eighty7
2642 vcipher $out2,$out2,v26
2643 vcipher $out3,$out3,v26
2644 vxor $tweak,$tweak,$tmp
2645 vcipher $out4,$out4,v26
2646 vcipher $out5,$out5,v26
2648 add $inp,$inp,r0 # $inp is adjusted in such
2649 # way that at exit from the
2650 # loop inX-in5 are loaded
2653 vsrab $tmp,$tweak,$seven # next tweak value
2654 vxor $twk2,$tweak,$rndkey0
2655 vaddubm $tweak,$tweak,$tweak
2656 vcipher $out0,$out0,v27
2657 vcipher $out1,$out1,v27
2658 vsldoi $tmp,$tmp,$tmp,15
2659 vcipher $out2,$out2,v27
2660 vcipher $out3,$out3,v27
2661 vand $tmp,$tmp,$eighty7
2662 vcipher $out4,$out4,v27
2663 vcipher $out5,$out5,v27
2665 addi $key_,$sp,$FRAME+15 # rewind $key_
2666 vxor $tweak,$tweak,$tmp
2667 vcipher $out0,$out0,v28
2668 vcipher $out1,$out1,v28
2670 vsrab $tmp,$tweak,$seven # next tweak value
2671 vxor $twk3,$tweak,$rndkey0
2672 vcipher $out2,$out2,v28
2673 vcipher $out3,$out3,v28
2674 vaddubm $tweak,$tweak,$tweak
2675 vsldoi $tmp,$tmp,$tmp,15
2676 vcipher $out4,$out4,v28
2677 vcipher $out5,$out5,v28
2678 lvx v24,$x00,$key_ # re-pre-load round[1]
2679 vand $tmp,$tmp,$eighty7
2681 vcipher $out0,$out0,v29
2682 vcipher $out1,$out1,v29
2683 vxor $tweak,$tweak,$tmp
2684 vcipher $out2,$out2,v29
2685 vcipher $out3,$out3,v29
2687 vsrab $tmp,$tweak,$seven # next tweak value
2688 vxor $twk4,$tweak,$rndkey0
2689 vcipher $out4,$out4,v29
2690 vcipher $out5,$out5,v29
2691 lvx v25,$x10,$key_ # re-pre-load round[2]
2692 vaddubm $tweak,$tweak,$tweak
2693 vsldoi $tmp,$tmp,$tmp,15
2695 vcipher $out0,$out0,v30
2696 vcipher $out1,$out1,v30
2697 vand $tmp,$tmp,$eighty7
2698 vcipher $out2,$out2,v30
2699 vcipher $out3,$out3,v30
2700 vxor $tweak,$tweak,$tmp
2701 vcipher $out4,$out4,v30
2702 vcipher $out5,$out5,v30
2704 vsrab $tmp,$tweak,$seven # next tweak value
2705 vxor $twk5,$tweak,$rndkey0
2707 vcipherlast $out0,$out0,$in0
2708 lvx_u $in0,$x00,$inp # load next input block
2709 vaddubm $tweak,$tweak,$tweak
2710 vsldoi $tmp,$tmp,$tmp,15
2711 vcipherlast $out1,$out1,$in1
2712 lvx_u $in1,$x10,$inp
2713 vcipherlast $out2,$out2,$in2
2714 le?vperm $in0,$in0,$in0,$leperm
2715 lvx_u $in2,$x20,$inp
2716 vand $tmp,$tmp,$eighty7
2717 vcipherlast $out3,$out3,$in3
2718 le?vperm $in1,$in1,$in1,$leperm
2719 lvx_u $in3,$x30,$inp
2720 vcipherlast $out4,$out4,$in4
2721 le?vperm $in2,$in2,$in2,$leperm
2722 lvx_u $in4,$x40,$inp
2723 vxor $tweak,$tweak,$tmp
2724 vcipherlast $tmp,$out5,$in5 # last block might be needed
2726 le?vperm $in3,$in3,$in3,$leperm
2727 lvx_u $in5,$x50,$inp
2729 le?vperm $in4,$in4,$in4,$leperm
2730 le?vperm $in5,$in5,$in5,$leperm
2732 le?vperm $out0,$out0,$out0,$leperm
2733 le?vperm $out1,$out1,$out1,$leperm
2734 stvx_u $out0,$x00,$out # store output
2735 vxor $out0,$in0,$twk0
2736 le?vperm $out2,$out2,$out2,$leperm
2737 stvx_u $out1,$x10,$out
2738 vxor $out1,$in1,$twk1
2739 le?vperm $out3,$out3,$out3,$leperm
2740 stvx_u $out2,$x20,$out
2741 vxor $out2,$in2,$twk2
2742 le?vperm $out4,$out4,$out4,$leperm
2743 stvx_u $out3,$x30,$out
2744 vxor $out3,$in3,$twk3
2745 le?vperm $out5,$tmp,$tmp,$leperm
2746 stvx_u $out4,$x40,$out
2747 vxor $out4,$in4,$twk4
2748 le?stvx_u $out5,$x50,$out
2749 be?stvx_u $tmp, $x50,$out
2750 vxor $out5,$in5,$twk5
2754 beq Loop_xts_enc6x # did $len-=96 borrow?
2756 addic. $len,$len,0x60
2763 blt Lxts_enc6x_three
2768 vxor $out0,$in1,$twk0
2769 vxor $out1,$in2,$twk1
2770 vxor $out2,$in3,$twk2
2771 vxor $out3,$in4,$twk3
2772 vxor $out4,$in5,$twk4
2776 le?vperm $out0,$out0,$out0,$leperm
2777 vmr $twk0,$twk5 # unused tweak
2778 le?vperm $out1,$out1,$out1,$leperm
2779 stvx_u $out0,$x00,$out # store output
2780 le?vperm $out2,$out2,$out2,$leperm
2781 stvx_u $out1,$x10,$out
2782 le?vperm $out3,$out3,$out3,$leperm
2783 stvx_u $out2,$x20,$out
2784 vxor $tmp,$out4,$twk5 # last block prep for stealing
2785 le?vperm $out4,$out4,$out4,$leperm
2786 stvx_u $out3,$x30,$out
2787 stvx_u $out4,$x40,$out
2789 bne Lxts_enc6x_steal
2794 vxor $out0,$in2,$twk0
2795 vxor $out1,$in3,$twk1
2796 vxor $out2,$in4,$twk2
2797 vxor $out3,$in5,$twk3
2798 vxor $out4,$out4,$out4
2802 le?vperm $out0,$out0,$out0,$leperm
2803 vmr $twk0,$twk4 # unused tweak
2804 le?vperm $out1,$out1,$out1,$leperm
2805 stvx_u $out0,$x00,$out # store output
2806 le?vperm $out2,$out2,$out2,$leperm
2807 stvx_u $out1,$x10,$out
2808 vxor $tmp,$out3,$twk4 # last block prep for stealing
2809 le?vperm $out3,$out3,$out3,$leperm
2810 stvx_u $out2,$x20,$out
2811 stvx_u $out3,$x30,$out
2813 bne Lxts_enc6x_steal
2818 vxor $out0,$in3,$twk0
2819 vxor $out1,$in4,$twk1
2820 vxor $out2,$in5,$twk2
2821 vxor $out3,$out3,$out3
2822 vxor $out4,$out4,$out4
2826 le?vperm $out0,$out0,$out0,$leperm
2827 vmr $twk0,$twk3 # unused tweak
2828 le?vperm $out1,$out1,$out1,$leperm
2829 stvx_u $out0,$x00,$out # store output
2830 vxor $tmp,$out2,$twk3 # last block prep for stealing
2831 le?vperm $out2,$out2,$out2,$leperm
2832 stvx_u $out1,$x10,$out
2833 stvx_u $out2,$x20,$out
2835 bne Lxts_enc6x_steal
2840 vxor $out0,$in4,$twk0
2841 vxor $out1,$in5,$twk1
2842 vxor $out2,$out2,$out2
2843 vxor $out3,$out3,$out3
2844 vxor $out4,$out4,$out4
2848 le?vperm $out0,$out0,$out0,$leperm
2849 vmr $twk0,$twk2 # unused tweak
2850 vxor $tmp,$out1,$twk2 # last block prep for stealing
2851 le?vperm $out1,$out1,$out1,$leperm
2852 stvx_u $out0,$x00,$out # store output
2853 stvx_u $out1,$x10,$out
2855 bne Lxts_enc6x_steal
2860 vxor $out0,$in5,$twk0
2863 vcipher $out0,$out0,v24
2864 lvx v24,$x20,$key_ # round[3]
2865 addi $key_,$key_,0x20
2867 vcipher $out0,$out0,v25
2868 lvx v25,$x10,$key_ # round[4]
2871 add $inp,$inp,$taillen
2873 vcipher $out0,$out0,v24
2876 vcipher $out0,$out0,v25
2878 lvsr $inpperm,0,$taillen
2879 vcipher $out0,$out0,v26
2882 vcipher $out0,$out0,v27
2884 addi $key_,$sp,$FRAME+15 # rewind $key_
2885 vcipher $out0,$out0,v28
2886 lvx v24,$x00,$key_ # re-pre-load round[1]
2888 vcipher $out0,$out0,v29
2889 lvx v25,$x10,$key_ # re-pre-load round[2]
2890 vxor $twk0,$twk0,v31
2892 le?vperm $in0,$in0,$in0,$leperm
2893 vcipher $out0,$out0,v30
2895 vperm $in0,$in0,$in0,$inpperm
2896 vcipherlast $out0,$out0,$twk0
2898 vmr $twk0,$twk1 # unused tweak
2899 vxor $tmp,$out0,$twk1 # last block prep for stealing
2900 le?vperm $out0,$out0,$out0,$leperm
2901 stvx_u $out0,$x00,$out # store output
2903 bne Lxts_enc6x_steal
2911 add $inp,$inp,$taillen
2914 lvsr $inpperm,0,$taillen # $in5 is no more
2915 le?vperm $in0,$in0,$in0,$leperm
2916 vperm $in0,$in0,$in0,$inpperm
2917 vxor $tmp,$tmp,$twk0
2919 vxor $in0,$in0,$twk0
2920 vxor $out0,$out0,$out0
2922 vperm $out0,$out0,$out1,$inpperm
2923 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2928 Loop_xts_enc6x_steal:
2931 bdnz Loop_xts_enc6x_steal
2935 b Loop_xts_enc1x # one more time...
2942 vxor $tweak,$twk0,$rndkey0
2943 le?vperm $tweak,$tweak,$tweak,$leperm
2944 stvx_u $tweak,0,$ivp
2950 stvx $seven,r10,$sp # wipe copies of round keys
2968 lvx v20,r10,$sp # ABI says so
2990 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2991 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2992 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2993 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2994 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2995 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2996 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2999 .byte 0,12,0x04,1,0x80,6,6,0
3004 vcipher $out0,$out0,v24
3005 vcipher $out1,$out1,v24
3006 vcipher $out2,$out2,v24
3007 vcipher $out3,$out3,v24
3008 vcipher $out4,$out4,v24
3009 lvx v24,$x20,$key_ # round[3]
3010 addi $key_,$key_,0x20
3012 vcipher $out0,$out0,v25
3013 vcipher $out1,$out1,v25
3014 vcipher $out2,$out2,v25
3015 vcipher $out3,$out3,v25
3016 vcipher $out4,$out4,v25
3017 lvx v25,$x10,$key_ # round[4]
3018 bdnz _aesp8_xts_enc5x
3020 add $inp,$inp,$taillen
3022 vcipher $out0,$out0,v24
3023 vcipher $out1,$out1,v24
3024 vcipher $out2,$out2,v24
3025 vcipher $out3,$out3,v24
3026 vcipher $out4,$out4,v24
3029 vcipher $out0,$out0,v25
3030 vcipher $out1,$out1,v25
3031 vcipher $out2,$out2,v25
3032 vcipher $out3,$out3,v25
3033 vcipher $out4,$out4,v25
3034 vxor $twk0,$twk0,v31
3036 vcipher $out0,$out0,v26
3037 lvsr $inpperm,r0,$taillen # $in5 is no more
3038 vcipher $out1,$out1,v26
3039 vcipher $out2,$out2,v26
3040 vcipher $out3,$out3,v26
3041 vcipher $out4,$out4,v26
3044 vcipher $out0,$out0,v27
3046 vcipher $out1,$out1,v27
3047 vcipher $out2,$out2,v27
3048 vcipher $out3,$out3,v27
3049 vcipher $out4,$out4,v27
3052 addi $key_,$sp,$FRAME+15 # rewind $key_
3053 vcipher $out0,$out0,v28
3054 vcipher $out1,$out1,v28
3055 vcipher $out2,$out2,v28
3056 vcipher $out3,$out3,v28
3057 vcipher $out4,$out4,v28
3058 lvx v24,$x00,$key_ # re-pre-load round[1]
3061 vcipher $out0,$out0,v29
3062 le?vperm $in0,$in0,$in0,$leperm
3063 vcipher $out1,$out1,v29
3064 vcipher $out2,$out2,v29
3065 vcipher $out3,$out3,v29
3066 vcipher $out4,$out4,v29
3067 lvx v25,$x10,$key_ # re-pre-load round[2]
3070 vcipher $out0,$out0,v30
3071 vperm $in0,$in0,$in0,$inpperm
3072 vcipher $out1,$out1,v30
3073 vcipher $out2,$out2,v30
3074 vcipher $out3,$out3,v30
3075 vcipher $out4,$out4,v30
3077 vcipherlast $out0,$out0,$twk0
3078 vcipherlast $out1,$out1,$in1
3079 vcipherlast $out2,$out2,$in2
3080 vcipherlast $out3,$out3,$in3
3081 vcipherlast $out4,$out4,$in4
3084 .byte 0,12,0x14,0,0,0,0,0
3087 _aesp8_xts_decrypt6x:
3088 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3090 li r7,`$FRAME+8*16+15`
3091 li r3,`$FRAME+8*16+31`
3092 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3093 stvx v20,r7,$sp # ABI says so
3116 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3118 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3120 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3122 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3124 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3126 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3128 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3132 subi $rounds,$rounds,3 # -4 in total
3134 lvx $rndkey0,$x00,$key1 # load key schedule
3136 addi $key1,$key1,0x20
3138 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3139 addi $key_,$sp,$FRAME+15
3143 ?vperm v24,v30,v31,$keyperm
3145 addi $key1,$key1,0x20
3146 stvx v24,$x00,$key_ # off-load round[1]
3147 ?vperm v25,v31,v30,$keyperm
3149 stvx v25,$x10,$key_ # off-load round[2]
3150 addi $key_,$key_,0x20
3151 bdnz Load_xts_dec_key
3154 ?vperm v24,v30,v31,$keyperm
3156 stvx v24,$x00,$key_ # off-load round[3]
3157 ?vperm v25,v31,v26,$keyperm
3159 stvx v25,$x10,$key_ # off-load round[4]
3160 addi $key_,$sp,$FRAME+15 # rewind $key_
3161 ?vperm v26,v26,v27,$keyperm
3163 ?vperm v27,v27,v28,$keyperm
3165 ?vperm v28,v28,v29,$keyperm
3167 ?vperm v29,v29,v30,$keyperm
3168 lvx $twk5,$x70,$key1 # borrow $twk5
3169 ?vperm v30,v30,v31,$keyperm
3170 lvx v24,$x00,$key_ # pre-load round[1]
3171 ?vperm v31,v31,$twk5,$keyperm
3172 lvx v25,$x10,$key_ # pre-load round[2]
3174 vperm $in0,$inout,$inptail,$inpperm
3175 subi $inp,$inp,31 # undo "caller"
3176 vxor $twk0,$tweak,$rndkey0
3177 vsrab $tmp,$tweak,$seven # next tweak value
3178 vaddubm $tweak,$tweak,$tweak
3179 vsldoi $tmp,$tmp,$tmp,15
3180 vand $tmp,$tmp,$eighty7
3181 vxor $out0,$in0,$twk0
3182 vxor $tweak,$tweak,$tmp
3184 lvx_u $in1,$x10,$inp
3185 vxor $twk1,$tweak,$rndkey0
3186 vsrab $tmp,$tweak,$seven # next tweak value
3187 vaddubm $tweak,$tweak,$tweak
3188 vsldoi $tmp,$tmp,$tmp,15
3189 le?vperm $in1,$in1,$in1,$leperm
3190 vand $tmp,$tmp,$eighty7
3191 vxor $out1,$in1,$twk1
3192 vxor $tweak,$tweak,$tmp
3194 lvx_u $in2,$x20,$inp
3195 andi. $taillen,$len,15
3196 vxor $twk2,$tweak,$rndkey0
3197 vsrab $tmp,$tweak,$seven # next tweak value
3198 vaddubm $tweak,$tweak,$tweak
3199 vsldoi $tmp,$tmp,$tmp,15
3200 le?vperm $in2,$in2,$in2,$leperm
3201 vand $tmp,$tmp,$eighty7
3202 vxor $out2,$in2,$twk2
3203 vxor $tweak,$tweak,$tmp
3205 lvx_u $in3,$x30,$inp
3206 sub $len,$len,$taillen
3207 vxor $twk3,$tweak,$rndkey0
3208 vsrab $tmp,$tweak,$seven # next tweak value
3209 vaddubm $tweak,$tweak,$tweak
3210 vsldoi $tmp,$tmp,$tmp,15
3211 le?vperm $in3,$in3,$in3,$leperm
3212 vand $tmp,$tmp,$eighty7
3213 vxor $out3,$in3,$twk3
3214 vxor $tweak,$tweak,$tmp
3216 lvx_u $in4,$x40,$inp
3218 vxor $twk4,$tweak,$rndkey0
3219 vsrab $tmp,$tweak,$seven # next tweak value
3220 vaddubm $tweak,$tweak,$tweak
3221 vsldoi $tmp,$tmp,$tmp,15
3222 le?vperm $in4,$in4,$in4,$leperm
3223 vand $tmp,$tmp,$eighty7
3224 vxor $out4,$in4,$twk4
3225 vxor $tweak,$tweak,$tmp
3227 lvx_u $in5,$x50,$inp
3229 vxor $twk5,$tweak,$rndkey0
3230 vsrab $tmp,$tweak,$seven # next tweak value
3231 vaddubm $tweak,$tweak,$tweak
3232 vsldoi $tmp,$tmp,$tmp,15
3233 le?vperm $in5,$in5,$in5,$leperm
3234 vand $tmp,$tmp,$eighty7
3235 vxor $out5,$in5,$twk5
3236 vxor $tweak,$tweak,$tmp
3238 vxor v31,v31,$rndkey0
3244 vncipher $out0,$out0,v24
3245 vncipher $out1,$out1,v24
3246 vncipher $out2,$out2,v24
3247 vncipher $out3,$out3,v24
3248 vncipher $out4,$out4,v24
3249 vncipher $out5,$out5,v24
3250 lvx v24,$x20,$key_ # round[3]
3251 addi $key_,$key_,0x20
3253 vncipher $out0,$out0,v25
3254 vncipher $out1,$out1,v25
3255 vncipher $out2,$out2,v25
3256 vncipher $out3,$out3,v25
3257 vncipher $out4,$out4,v25
3258 vncipher $out5,$out5,v25
3259 lvx v25,$x10,$key_ # round[4]
3262 subic $len,$len,96 # $len-=96
3263 vxor $in0,$twk0,v31 # xor with last round key
3264 vncipher $out0,$out0,v24
3265 vncipher $out1,$out1,v24
3266 vsrab $tmp,$tweak,$seven # next tweak value
3267 vxor $twk0,$tweak,$rndkey0
3268 vaddubm $tweak,$tweak,$tweak
3269 vncipher $out2,$out2,v24
3270 vncipher $out3,$out3,v24
3271 vsldoi $tmp,$tmp,$tmp,15
3272 vncipher $out4,$out4,v24
3273 vncipher $out5,$out5,v24
3275 subfe. r0,r0,r0 # borrow?-1:0
3276 vand $tmp,$tmp,$eighty7
3277 vncipher $out0,$out0,v25
3278 vncipher $out1,$out1,v25
3279 vxor $tweak,$tweak,$tmp
3280 vncipher $out2,$out2,v25
3281 vncipher $out3,$out3,v25
3283 vsrab $tmp,$tweak,$seven # next tweak value
3284 vxor $twk1,$tweak,$rndkey0
3285 vncipher $out4,$out4,v25
3286 vncipher $out5,$out5,v25
3289 vaddubm $tweak,$tweak,$tweak
3290 vsldoi $tmp,$tmp,$tmp,15
3291 vncipher $out0,$out0,v26
3292 vncipher $out1,$out1,v26
3293 vand $tmp,$tmp,$eighty7
3294 vncipher $out2,$out2,v26
3295 vncipher $out3,$out3,v26
3296 vxor $tweak,$tweak,$tmp
3297 vncipher $out4,$out4,v26
3298 vncipher $out5,$out5,v26
3300 add $inp,$inp,r0 # $inp is adjusted in such
3301 # way that at exit from the
3302 # loop inX-in5 are loaded
3305 vsrab $tmp,$tweak,$seven # next tweak value
3306 vxor $twk2,$tweak,$rndkey0
3307 vaddubm $tweak,$tweak,$tweak
3308 vncipher $out0,$out0,v27
3309 vncipher $out1,$out1,v27
3310 vsldoi $tmp,$tmp,$tmp,15
3311 vncipher $out2,$out2,v27
3312 vncipher $out3,$out3,v27
3313 vand $tmp,$tmp,$eighty7
3314 vncipher $out4,$out4,v27
3315 vncipher $out5,$out5,v27
3317 addi $key_,$sp,$FRAME+15 # rewind $key_
3318 vxor $tweak,$tweak,$tmp
3319 vncipher $out0,$out0,v28
3320 vncipher $out1,$out1,v28
3322 vsrab $tmp,$tweak,$seven # next tweak value
3323 vxor $twk3,$tweak,$rndkey0
3324 vncipher $out2,$out2,v28
3325 vncipher $out3,$out3,v28
3326 vaddubm $tweak,$tweak,$tweak
3327 vsldoi $tmp,$tmp,$tmp,15
3328 vncipher $out4,$out4,v28
3329 vncipher $out5,$out5,v28
3330 lvx v24,$x00,$key_ # re-pre-load round[1]
3331 vand $tmp,$tmp,$eighty7
3333 vncipher $out0,$out0,v29
3334 vncipher $out1,$out1,v29
3335 vxor $tweak,$tweak,$tmp
3336 vncipher $out2,$out2,v29
3337 vncipher $out3,$out3,v29
3339 vsrab $tmp,$tweak,$seven # next tweak value
3340 vxor $twk4,$tweak,$rndkey0
3341 vncipher $out4,$out4,v29
3342 vncipher $out5,$out5,v29
3343 lvx v25,$x10,$key_ # re-pre-load round[2]
3344 vaddubm $tweak,$tweak,$tweak
3345 vsldoi $tmp,$tmp,$tmp,15
3347 vncipher $out0,$out0,v30
3348 vncipher $out1,$out1,v30
3349 vand $tmp,$tmp,$eighty7
3350 vncipher $out2,$out2,v30
3351 vncipher $out3,$out3,v30
3352 vxor $tweak,$tweak,$tmp
3353 vncipher $out4,$out4,v30
3354 vncipher $out5,$out5,v30
3356 vsrab $tmp,$tweak,$seven # next tweak value
3357 vxor $twk5,$tweak,$rndkey0
3359 vncipherlast $out0,$out0,$in0
3360 lvx_u $in0,$x00,$inp # load next input block
3361 vaddubm $tweak,$tweak,$tweak
3362 vsldoi $tmp,$tmp,$tmp,15
3363 vncipherlast $out1,$out1,$in1
3364 lvx_u $in1,$x10,$inp
3365 vncipherlast $out2,$out2,$in2
3366 le?vperm $in0,$in0,$in0,$leperm
3367 lvx_u $in2,$x20,$inp
3368 vand $tmp,$tmp,$eighty7
3369 vncipherlast $out3,$out3,$in3
3370 le?vperm $in1,$in1,$in1,$leperm
3371 lvx_u $in3,$x30,$inp
3372 vncipherlast $out4,$out4,$in4
3373 le?vperm $in2,$in2,$in2,$leperm
3374 lvx_u $in4,$x40,$inp
3375 vxor $tweak,$tweak,$tmp
3376 vncipherlast $out5,$out5,$in5
3377 le?vperm $in3,$in3,$in3,$leperm
3378 lvx_u $in5,$x50,$inp
3380 le?vperm $in4,$in4,$in4,$leperm
3381 le?vperm $in5,$in5,$in5,$leperm
3383 le?vperm $out0,$out0,$out0,$leperm
3384 le?vperm $out1,$out1,$out1,$leperm
3385 stvx_u $out0,$x00,$out # store output
3386 vxor $out0,$in0,$twk0
3387 le?vperm $out2,$out2,$out2,$leperm
3388 stvx_u $out1,$x10,$out
3389 vxor $out1,$in1,$twk1
3390 le?vperm $out3,$out3,$out3,$leperm
3391 stvx_u $out2,$x20,$out
3392 vxor $out2,$in2,$twk2
3393 le?vperm $out4,$out4,$out4,$leperm
3394 stvx_u $out3,$x30,$out
3395 vxor $out3,$in3,$twk3
3396 le?vperm $out5,$out5,$out5,$leperm
3397 stvx_u $out4,$x40,$out
3398 vxor $out4,$in4,$twk4
3399 stvx_u $out5,$x50,$out
3400 vxor $out5,$in5,$twk5
3404 beq Loop_xts_dec6x # did $len-=96 borrow?
3406 addic. $len,$len,0x60
3413 blt Lxts_dec6x_three
3418 vxor $out0,$in1,$twk0
3419 vxor $out1,$in2,$twk1
3420 vxor $out2,$in3,$twk2
3421 vxor $out3,$in4,$twk3
3422 vxor $out4,$in5,$twk4
3426 le?vperm $out0,$out0,$out0,$leperm
3427 vmr $twk0,$twk5 # unused tweak
3428 vxor $twk1,$tweak,$rndkey0
3429 le?vperm $out1,$out1,$out1,$leperm
3430 stvx_u $out0,$x00,$out # store output
3431 vxor $out0,$in0,$twk1
3432 le?vperm $out2,$out2,$out2,$leperm
3433 stvx_u $out1,$x10,$out
3434 le?vperm $out3,$out3,$out3,$leperm
3435 stvx_u $out2,$x20,$out
3436 le?vperm $out4,$out4,$out4,$leperm
3437 stvx_u $out3,$x30,$out
3438 stvx_u $out4,$x40,$out
3440 bne Lxts_dec6x_steal
3445 vxor $out0,$in2,$twk0
3446 vxor $out1,$in3,$twk1
3447 vxor $out2,$in4,$twk2
3448 vxor $out3,$in5,$twk3
3449 vxor $out4,$out4,$out4
3453 le?vperm $out0,$out0,$out0,$leperm
3454 vmr $twk0,$twk4 # unused tweak
3456 le?vperm $out1,$out1,$out1,$leperm
3457 stvx_u $out0,$x00,$out # store output
3458 vxor $out0,$in0,$twk5
3459 le?vperm $out2,$out2,$out2,$leperm
3460 stvx_u $out1,$x10,$out
3461 le?vperm $out3,$out3,$out3,$leperm
3462 stvx_u $out2,$x20,$out
3463 stvx_u $out3,$x30,$out
3465 bne Lxts_dec6x_steal
3470 vxor $out0,$in3,$twk0
3471 vxor $out1,$in4,$twk1
3472 vxor $out2,$in5,$twk2
3473 vxor $out3,$out3,$out3
3474 vxor $out4,$out4,$out4
3478 le?vperm $out0,$out0,$out0,$leperm
3479 vmr $twk0,$twk3 # unused tweak
3481 le?vperm $out1,$out1,$out1,$leperm
3482 stvx_u $out0,$x00,$out # store output
3483 vxor $out0,$in0,$twk4
3484 le?vperm $out2,$out2,$out2,$leperm
3485 stvx_u $out1,$x10,$out
3486 stvx_u $out2,$x20,$out
3488 bne Lxts_dec6x_steal
3493 vxor $out0,$in4,$twk0
3494 vxor $out1,$in5,$twk1
3495 vxor $out2,$out2,$out2
3496 vxor $out3,$out3,$out3
3497 vxor $out4,$out4,$out4
3501 le?vperm $out0,$out0,$out0,$leperm
3502 vmr $twk0,$twk2 # unused tweak
3504 le?vperm $out1,$out1,$out1,$leperm
3505 stvx_u $out0,$x00,$out # store output
3506 vxor $out0,$in0,$twk3
3507 stvx_u $out1,$x10,$out
3509 bne Lxts_dec6x_steal
3514 vxor $out0,$in5,$twk0
3517 vncipher $out0,$out0,v24
3518 lvx v24,$x20,$key_ # round[3]
3519 addi $key_,$key_,0x20
3521 vncipher $out0,$out0,v25
3522 lvx v25,$x10,$key_ # round[4]
3526 vncipher $out0,$out0,v24
3530 vncipher $out0,$out0,v25
3533 vncipher $out0,$out0,v26
3536 vncipher $out0,$out0,v27
3538 addi $key_,$sp,$FRAME+15 # rewind $key_
3539 vncipher $out0,$out0,v28
3540 lvx v24,$x00,$key_ # re-pre-load round[1]
3542 vncipher $out0,$out0,v29
3543 lvx v25,$x10,$key_ # re-pre-load round[2]
3544 vxor $twk0,$twk0,v31
3546 le?vperm $in0,$in0,$in0,$leperm
3547 vncipher $out0,$out0,v30
3550 vncipherlast $out0,$out0,$twk0
3552 vmr $twk0,$twk1 # unused tweak
3554 le?vperm $out0,$out0,$out0,$leperm
3555 stvx_u $out0,$x00,$out # store output
3557 vxor $out0,$in0,$twk2
3558 bne Lxts_dec6x_steal
3567 le?vperm $in0,$in0,$in0,$leperm
3568 vxor $out0,$in0,$twk1
3570 vncipher $out0,$out0,v24
3571 lvx v24,$x20,$key_ # round[3]
3572 addi $key_,$key_,0x20
3574 vncipher $out0,$out0,v25
3575 lvx v25,$x10,$key_ # round[4]
3576 bdnz Lxts_dec6x_steal
3578 add $inp,$inp,$taillen
3579 vncipher $out0,$out0,v24
3582 vncipher $out0,$out0,v25
3585 vncipher $out0,$out0,v26
3587 lvsr $inpperm,0,$taillen # $in5 is no more
3588 vncipher $out0,$out0,v27
3590 addi $key_,$sp,$FRAME+15 # rewind $key_
3591 vncipher $out0,$out0,v28
3592 lvx v24,$x00,$key_ # re-pre-load round[1]
3594 vncipher $out0,$out0,v29
3595 lvx v25,$x10,$key_ # re-pre-load round[2]
3596 vxor $twk1,$twk1,v31
3598 le?vperm $in0,$in0,$in0,$leperm
3599 vncipher $out0,$out0,v30
3601 vperm $in0,$in0,$in0,$inpperm
3602 vncipherlast $tmp,$out0,$twk1
3604 le?vperm $out0,$tmp,$tmp,$leperm
3605 le?stvx_u $out0,0,$out
3606 be?stvx_u $tmp,0,$out
3608 vxor $out0,$out0,$out0
3610 vperm $out0,$out0,$out1,$inpperm
3611 vsel $out0,$in0,$tmp,$out0
3612 vxor $out0,$out0,$twk0
3616 Loop_xts_dec6x_steal:
3619 bdnz Loop_xts_dec6x_steal
3623 b Loop_xts_dec1x # one more time...
3630 vxor $tweak,$twk0,$rndkey0
3631 le?vperm $tweak,$tweak,$tweak,$leperm
3632 stvx_u $tweak,0,$ivp
3638 stvx $seven,r10,$sp # wipe copies of round keys
3656 lvx v20,r10,$sp # ABI says so
3678 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3679 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3680 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3681 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3682 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3683 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3684 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3687 .byte 0,12,0x04,1,0x80,6,6,0
3692 vncipher $out0,$out0,v24
3693 vncipher $out1,$out1,v24
3694 vncipher $out2,$out2,v24
3695 vncipher $out3,$out3,v24
3696 vncipher $out4,$out4,v24
3697 lvx v24,$x20,$key_ # round[3]
3698 addi $key_,$key_,0x20
3700 vncipher $out0,$out0,v25
3701 vncipher $out1,$out1,v25
3702 vncipher $out2,$out2,v25
3703 vncipher $out3,$out3,v25
3704 vncipher $out4,$out4,v25
3705 lvx v25,$x10,$key_ # round[4]
3706 bdnz _aesp8_xts_dec5x
3709 vncipher $out0,$out0,v24
3710 vncipher $out1,$out1,v24
3711 vncipher $out2,$out2,v24
3712 vncipher $out3,$out3,v24
3713 vncipher $out4,$out4,v24
3717 vncipher $out0,$out0,v25
3718 vncipher $out1,$out1,v25
3719 vncipher $out2,$out2,v25
3720 vncipher $out3,$out3,v25
3721 vncipher $out4,$out4,v25
3722 vxor $twk0,$twk0,v31
3725 vncipher $out0,$out0,v26
3726 vncipher $out1,$out1,v26
3727 vncipher $out2,$out2,v26
3728 vncipher $out3,$out3,v26
3729 vncipher $out4,$out4,v26
3732 vncipher $out0,$out0,v27
3734 vncipher $out1,$out1,v27
3735 vncipher $out2,$out2,v27
3736 vncipher $out3,$out3,v27
3737 vncipher $out4,$out4,v27
3740 addi $key_,$sp,$FRAME+15 # rewind $key_
3741 vncipher $out0,$out0,v28
3742 vncipher $out1,$out1,v28
3743 vncipher $out2,$out2,v28
3744 vncipher $out3,$out3,v28
3745 vncipher $out4,$out4,v28
3746 lvx v24,$x00,$key_ # re-pre-load round[1]
3749 vncipher $out0,$out0,v29
3750 le?vperm $in0,$in0,$in0,$leperm
3751 vncipher $out1,$out1,v29
3752 vncipher $out2,$out2,v29
3753 vncipher $out3,$out3,v29
3754 vncipher $out4,$out4,v29
3755 lvx v25,$x10,$key_ # re-pre-load round[2]
3758 vncipher $out0,$out0,v30
3759 vncipher $out1,$out1,v30
3760 vncipher $out2,$out2,v30
3761 vncipher $out3,$out3,v30
3762 vncipher $out4,$out4,v30
3764 vncipherlast $out0,$out0,$twk0
3765 vncipherlast $out1,$out1,$in1
3766 vncipherlast $out2,$out2,$in2
3767 vncipherlast $out3,$out3,$in3
3768 vncipherlast $out4,$out4,$in4
3772 .byte 0,12,0x14,0,0,0,0,0
3777 foreach(split("\n",$code)) {
3778 s/\`([^\`]*)\`/eval($1)/geo;
3780 # constants table endian-specific conversion
3781 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3785 # convert to endian-agnostic format
3787 foreach (split(/,\s*/,$2)) {
3788 my $l = /^0/?oct:int;
3789 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3792 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3795 # little-endian conversion
3796 if ($flavour =~ /le$/o) {
3797 SWITCH: for($conv) {
3798 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3799 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3804 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3807 $consts=0 if (m/Lconsts:/o); # end of table
3809 # instructions prefixed with '?' are endian-specific and need
3810 # to be adjusted accordingly...
3811 if ($flavour =~ /le$/o) { # little-endian
3816 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3817 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3818 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3819 } else { # big-endian