($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : "o32";
die "MIPS64 only" unless ($flavour =~ /64|n32/i);
$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
$code.=<<___;
.text
.set noat
.set noreorder
.align 5
.globl poly1305_init
.ent poly1305_init
poly1305_init:
.frame $sp,0,$ra
.set reorder
sd $zero,0($ctx)
sd $zero,8($ctx)
sd $zero,16($ctx)
beqz $inp,.Lno_key
ld $in0,0($inp)
ld $in1,8($inp)
ldl $in0,0+MSB($inp)
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
dsbh $in0,$in0
dsbh $in1,$in1
dshd $in0,$in0
dshd $in1,$in1
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2
and $tmp1,$in0,$tmp0
and $tmp3,$in1,$tmp0
dsrl $tmp2,$in0,24
dsrl $tmp4,$in1,24
dsll $tmp1,24
dsll $tmp3,24
and $tmp2,$tmp0
and $tmp4,$tmp0
dsll $tmp0,8
or $tmp1,$tmp2
or $tmp3,$tmp4
and $tmp2,$in0,$tmp0
and $tmp4,$in1,$tmp0
dsrl $in0,8
dsrl $in1,8
dsll $tmp2,8
dsll $tmp4,8
and $in0,$tmp0
and $in1,$tmp0
or $tmp1,$tmp2
or $tmp3,$tmp4
or $in0,$tmp1
or $in1,$tmp3
dsrl $tmp1,$in0,32
dsrl $tmp3,$in1,32
dsll $in0,32
dsll $in1,32
or $in0,$tmp1
or $in1,$tmp3
li $tmp0,1
dsll $tmp0,32
daddiu $tmp0,-63
dsll $tmp0,28
daddiu $tmp0,-1
and $in0,$tmp0
daddiu $tmp0,-3
and $in1,$tmp0
sd $in0,24($ctx)
dsrl $tmp0,$in1,2
sd $in1,32($ctx)
daddu $tmp0,$in1
sd $tmp0,40($ctx)
.Lno_key:
li $v0,0
jr $ra
.end poly1305_init
___
{
my ($h0,$h1,$h2,$r0,$r1,$s1,$d0,$d1,$d2) =
($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
$code.=<<___;
.align 5
.globl poly1305_blocks
.ent poly1305_blocks
poly1305_blocks:
.set noreorder
dsrl $len,4
bnez $len,poly1305_blocks_internal
nop
jr $ra
nop
.end poly1305_blocks
.align 5
.ent poly1305_blocks_internal
poly1305_blocks_internal:
.frame $sp,6*8,$ra
.mask $SAVED_REGS_MASK,-8
.set noreorder
dsubu $sp,6*8
sd $s5,40($sp)
sd $s4,32($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
sd $s3,24($sp)
sd $s2,16($sp)
sd $s1,8($sp)
sd $s0,0($sp)
___
$code.=<<___;
.set reorder
ld $h0,0($ctx)
ld $h1,8($ctx)
ld $h2,16($ctx)
ld $r0,24($ctx)
ld $r1,32($ctx)
ld $s1,40($ctx)
.Loop:
ld $in0,0($inp)
ld $in1,8($inp)
ldl $in0,0+MSB($inp)
ldl $in1,8+MSB($inp)
ldr $in0,0+LSB($inp)
ldr $in1,8+LSB($inp)
daddiu $len,-1
daddiu $inp,16
dsbh $in0,$in0
dsbh $in1,$in1
dshd $in0,$in0
dshd $in1,$in1
ori $tmp0,$zero,0xFF
dsll $tmp2,$tmp0,32
or $tmp0,$tmp2
and $tmp1,$in0,$tmp0
and $tmp3,$in1,$tmp0
dsrl $tmp2,$in0,24
dsrl $tmp4,$in1,24
dsll $tmp1,24
dsll $tmp3,24
and $tmp2,$tmp0
and $tmp4,$tmp0
dsll $tmp0,8
or $tmp1,$tmp2
or $tmp3,$tmp4
and $tmp2,$in0,$tmp0
and $tmp4,$in1,$tmp0
dsrl $in0,8
dsrl $in1,8
dsll $tmp2,8
dsll $tmp4,8
and $in0,$tmp0
and $in1,$tmp0
or $tmp1,$tmp2
or $tmp3,$tmp4
or $in0,$tmp1
or $in1,$tmp3
dsrl $tmp1,$in0,32
dsrl $tmp3,$in1,32
dsll $in0,32
dsll $in1,32
or $in0,$tmp1
or $in1,$tmp3
daddu $h0,$in0
daddu $h1,$in1
sltu $tmp0,$h0,$in0
sltu $tmp1,$h1,$in1
daddu $h1,$tmp0
dmultu ($r0,$h0)
daddu $h2,$padbit
sltu $tmp0,$h1,$tmp0
mflo ($d0,$r0,$h0)
mfhi ($d1,$r0,$h0)
dmultu ($s1,$h1)
daddu $tmp0,$tmp1
daddu $h2,$tmp0
mflo ($tmp0,$s1,$h1)
mfhi ($tmp1,$s1,$h1)
dmultu ($r1,$h0)
daddu $d0,$tmp0
daddu $d1,$tmp1
mflo ($tmp2,$r1,$h0)
mfhi ($d2,$r1,$h0)
sltu $tmp0,$d0,$tmp0
daddu $d1,$tmp0
dmultu ($r0,$h1)
daddu $d1,$tmp2
sltu $tmp2,$d1,$tmp2
mflo ($tmp0,$r0,$h1)
mfhi ($tmp1,$r0,$h1)
daddu $d2,$tmp2
dmultu ($s1,$h2)
daddu $d1,$tmp0
daddu $d2,$tmp1
mflo ($tmp2,$s1,$h2)
dmultu ($r0,$h2)
sltu $tmp0,$d1,$tmp0
daddu $d2,$tmp0
mflo ($tmp3,$r0,$h2)
daddu $d1,$tmp2
daddu $d2,$tmp3
sltu $tmp2,$d1,$tmp2
daddu $d2,$tmp2
li $tmp0,-4
and $tmp0,$d2
dsrl $tmp1,$d2,2
andi $h2,$d2,3
daddu $tmp0,$tmp1
daddu $h0,$d0,$tmp0
sltu $tmp0,$h0,$tmp0
daddu $h1,$d1,$tmp0
sltu $tmp0,$h1,$tmp0
daddu $h2,$h2,$tmp0
bnez $len,.Loop
sd $h0,0($ctx)
sd $h1,8($ctx)
sd $h2,16($ctx)
.set noreorder
ld $s5,40($sp)
ld $s4,32($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
ld $s3,24($sp)
ld $s2,16($sp)
ld $s1,8($sp)
ld $s0,0($sp)
___
$code.=<<___;
jr $ra
daddu $sp,6*8
.end poly1305_blocks_internal
___
}
{
my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
$code.=<<___;
.align 5
.globl poly1305_emit
.ent poly1305_emit
poly1305_emit:
.frame $sp,0,$ra
.set reorder
ld $tmp0,0($ctx)
ld $tmp1,8($ctx)
ld $tmp2,16($ctx)
daddiu $in0,$tmp0,5
sltiu $tmp3,$in0,5
daddu $in1,$tmp1,$tmp3
sltu $tmp3,$in1,$tmp3
daddu $tmp2,$tmp2,$tmp3
dsrl $tmp2,2
dsubu $tmp2,$zero,$tmp2
nor $tmp3,$zero,$tmp2
and $in0,$tmp2
and $tmp0,$tmp3
and $in1,$tmp2
and $tmp1,$tmp3
or $in0,$tmp0
or $in1,$tmp1
lwu $tmp0,0($nonce)
lwu $tmp1,4($nonce)
lwu $tmp2,8($nonce)
lwu $tmp3,12($nonce)
dsll $tmp1,32
dsll $tmp3,32
or $tmp0,$tmp1
or $tmp2,$tmp3
daddu $in0,$tmp0
daddu $in1,$tmp2
sltu $tmp0,$in0,$tmp0
daddu $in1,$tmp0
dsrl $tmp0,$in0,8
dsrl $tmp1,$in0,16
dsrl $tmp2,$in0,24
sb $in0,0($mac)
dsrl $tmp3,$in0,32
sb $tmp0,1($mac)
dsrl $tmp0,$in0,40
sb $tmp1,2($mac)
dsrl $tmp1,$in0,48
sb $tmp2,3($mac)
dsrl $tmp2,$in0,56
sb $tmp3,4($mac)
dsrl $tmp3,$in1,8
sb $tmp0,5($mac)
dsrl $tmp0,$in1,16
sb $tmp1,6($mac)
dsrl $tmp1,$in1,24
sb $tmp2,7($mac)
sb $in1,8($mac)
dsrl $tmp2,$in1,32
sb $tmp3,9($mac)
dsrl $tmp3,$in1,40
sb $tmp0,10($mac)
dsrl $tmp0,$in1,48
sb $tmp1,11($mac)
dsrl $tmp1,$in1,56
sb $tmp2,12($mac)
sb $tmp3,13($mac)
sb $tmp0,14($mac)
sb $tmp1,15($mac)
jr $ra
.end poly1305_emit
.rdata
.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
___
}
$output and open STDOUT,">$output";
print $code;
close STDOUT or die "error closing STDOUT: $!";