1 /* SPDX-License-Identifier: GPL-2.0 */
3 * Original implementation written by Andy Polyakov, @dot-asm.
4 * This is an adaptation of the original code for kernel use.
9 #include <linux/linkage.h>
10 #include <asm/nospec-insn.h>
11 #include <asm/vx-insn.h>
14 #define FRAME (16 * 8 + 4 * 8)
20 .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
24 .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
27 .long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma
28 .long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
29 .long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
30 .long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
38 #############################################################################
39 # void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
40 # counst u32 *key, const u32 *counter)
88 VL K0,0,,%r7 # load sigma
89 VL K1,0,,KEY # load key
91 VL K3,0,,COUNTER # load counter
96 VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma
98 VREPF XB0,K1,0 # smash the key
246 VMRHF XT0,XA0,XA1 # transpose data
250 VPDI XA0,XT0,XT1,0b0000
251 VPDI XA1,XT0,XT1,0b0101
252 VPDI XA2,XT2,XT3,0b0000
253 VPDI XA3,XT2,XT3,0b0101
259 VPDI XB0,XT0,XT1,0b0000
260 VPDI XB1,XT0,XT1,0b0101
261 VPDI XB2,XT2,XT3,0b0000
262 VPDI XB3,XT2,XT3,0b0101
268 VPDI XC0,XT0,XT1,0b0000
269 VPDI XC1,XT0,XT1,0b0101
270 VPDI XC2,XT2,XT3,0b0000
271 VPDI XC3,XT2,XT3,0b0101
277 VPDI XD0,XT0,XT1,0b0000
278 VPDI XD1,XT0,XT1,0b0101
279 VPDI XD2,XT2,XT3,0b0000
280 VPDI XD3,XT2,XT3,0b0101
287 VPERM XA0,XA0,XA0,BEPERM
288 VPERM XB0,XB0,XB0,BEPERM
289 VPERM XC0,XC0,XC0,BEPERM
290 VPERM XD0,XD0,XD0,BEPERM
310 VPERM XA0,XA0,XA0,BEPERM
311 VPERM XB0,XB0,XB0,BEPERM
312 VPERM XC0,XC0,XC0,BEPERM
313 VPERM XD0,XD0,XD0,BEPERM
337 VPERM XA0,XA0,XA0,BEPERM
338 VPERM XB0,XB0,XB0,BEPERM
339 VPERM XC0,XC0,XC0,BEPERM
340 VPERM XD0,XD0,XD0,BEPERM
364 VPERM XA0,XA0,XA0,BEPERM
365 VPERM XB0,XB0,XB0,BEPERM
366 VPERM XC0,XC0,XC0,BEPERM
367 VPERM XD0,XD0,XD0,BEPERM
402 brct LEN,.Loop_tail_4x
406 ENDPROC(chacha20_vx_4x)
422 #############################################################################
423 # void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
424 # counst u32 *key, const u32 *counter)
482 stg %r0,0(SP) # back-chain
487 VLM K1,K2,0,KEY,0 # load key
488 VL K3,0,,COUNTER # load counter
490 VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ...
507 VAF D1,K3,T1 # K[3]+1
508 VAF D2,K3,T2 # K[3]+2
509 VAF D3,K3,T3 # K[3]+3
510 VAF D4,D2,T2 # K[3]+4
511 VAF D5,D2,T3 # K[3]+5
721 VAF D1,D1,T1 # +K[3]+1
723 VPERM A0,A0,A0,BEPERM
724 VPERM B0,B0,B0,BEPERM
725 VPERM C0,C0,C0,BEPERM
726 VPERM D0,D0,D0,BEPERM
731 VAF D2,D2,T2 # +K[3]+2
732 VAF D3,D3,T3 # +K[3]+3
740 VLM K0,T3,0,%r7,4 # re-load sigma and increments
752 VPERM A0,A1,A1,BEPERM
753 VPERM B0,B1,B1,BEPERM
754 VPERM C0,C1,C1,BEPERM
755 VPERM D0,D1,D1,BEPERM
778 VPERM A0,A2,A2,BEPERM
779 VPERM B0,B2,B2,BEPERM
780 VPERM C0,C2,C2,BEPERM
781 VPERM D0,D2,D2,BEPERM
803 VAF D2,K3,T3 # K[3]+3
805 VPERM A0,A3,A3,BEPERM
806 VPERM B0,B3,B3,BEPERM
807 VPERM C0,C3,C3,BEPERM
808 VPERM D0,D3,D3,BEPERM
813 VAF D3,D2,T1 # K[3]+4
831 VAF D4,D4,D3 # +K[3]+4
832 VAF D3,D3,T1 # K[3]+5
833 VAF K3,D2,T3 # K[3]+=6
835 VPERM A0,A4,A4,BEPERM
836 VPERM B0,B4,B4,BEPERM
837 VPERM C0,C4,C4,BEPERM
838 VPERM D0,D4,D4,BEPERM
860 VAF D5,D5,D3 # +K[3]+5
862 VPERM A0,A5,A5,BEPERM
863 VPERM B0,B5,B5,BEPERM
864 VPERM C0,C5,C5,BEPERM
865 VPERM D0,D5,D5,BEPERM
886 lmg %r6,%r7,FRAME+6*8(SP)
900 brct LEN,.Loop_tail_vx
902 lmg %r6,%r7,FRAME+6*8(SP)