#!/usr/bin/perl

# **********************************************************
# Copyright (c) 2014-2015 Google, Inc.  All rights reserved.
# **********************************************************

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of Google, Inc. nor the names of its contributors may be
#   used to endorse or promote products derived from this software without
#   specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL VMWARE, INC. OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.

# Feed this the text from the ARM manual for the A32 instructions.

my $verbose = 0;
my $line = 0;
my $pred = 1; # Process predicated instrs, or non-pred?
my $simd = 0; # Assume SIMD?
my $t32 = 0; # Look for T32 instrs

while ($#ARGV >= 0) {
    if ($ARGV[0] eq '-nopred') {
        $pred = 0;
    } elsif ($ARGV[0] eq '-v') {
        $verbose++;
    } elsif ($ARGV[0] eq '-simd') {
        $simd = 1;
    } elsif ($ARGV[0] eq '-t32') {
        $t32 = 1;
    } else {
        die "Unknown argument $ARGV[0]\n";
    }
    shift;
}

while (<>) {
    $line++;
    chomp;
    chomp if (/\r$/); # DOS
    print "xxx $line $_\n" if ($verbose > 1);
  startover:
    if ((!$t32 && (/^Encoding A/ || /^Encoding ..\/A/)) ||
        ($t32  && (/^Encoding T/))) {
        my $name;
        my $asm;
        while (<>) {
            $line++;
            chomp;
            chomp if (/\r$/); # DOS
            if (/^ARMv/) {
                $flags .= "|v8" if (/^ARMv8/);
                last;
            } elsif (/^[A-Z].*<.*<.*>$/) {
                # Sometimes the encoding is after the name in the .text version
                goto at_name;
            }
            goto startover if (/^Encoding /); # some descriptions have Encoding A...
        }
        while (<>) {
            $line++;
            chomp;
            chomp if (/\r$/); # DOS
            next if (/^ARMv/);
            next if ($_ !~ /^[A-Z][A-Z]/ && $_ !~ /^[A-Z]</);
            last;
        }
        last if eof();
      at_name:
        if (/^(\w+)/) {
            $name = $1;
            $asm = $_;
        } else {
            print "unexpected asm on line $line: $_\n";
        }
        print "found $name: $asm\n" if ($verbose);
        my $last = "";
        while (<>) {
            $line++;
            chomp;
            chomp if (/\r$/); # DOS
            my $prefix = '';
            if ($t32) {
                if ($last =~ /^15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 15/) {
                    $prefix = "1 1 1 .";
                }
            } elsif (!$pred) {
                if ($last =~ /^31 30 29/ && /^1 1 1 1/) {
                    $prefix = "1 1 1 1";
                }
            } elsif (/^cond/) {
                $prefix = "cond";
            }
            if ($prefix ne '') {
                # We encode the "x x x P U {D,R} W S" specifiers either into
                # our opcodes or we have multiple entries with encoding chains.
                my $enc = $_;
                if (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){8})(.*)/) {
                    my $opc = $1;
                    my $rest = $3;
                    print "matched $name $enc\n" if ($verbose);
                    # Ignore parens: go w/ value inside.
                    $opc =~ s/\(//g;
                    $opc =~ s/\)//g;
                    generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
                } elsif (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){6})(.*)/) {
                    my $opc = $1 . "0 0";
                    my $rest = $3;
                    print "matched $name $enc\n" if ($verbose);
                    # Ignore parens: go w/ value inside.
                    $opc =~ s/\(//g;
                    $opc =~ s/\)//g;
                    generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
                } elsif (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){4})(.*)/) {
                    my $opc = $1 . "0 0 0 0";
                    my $rest = $3;
                    print "matched $name $enc\n" if ($verbose);
                    # Ignore parens: go w/ value inside.
                    $opc =~ s/\(//g;
                    $opc =~ s/\)//g;
                    generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
                } elsif (/^$prefix\s+((\(?[01PUWSRDQi]\)? ){2})(.*)/) {
                    my $opc = $1 . "0 0 0 0 0 0";
                    my $rest = $3;
                    print "matched $name $enc\n" if ($verbose);
                    # Ignore parens: go w/ value inside.
                    $opc =~ s/\(//g;
                    $opc =~ s/\)//g;
                    generate_entry(lc($name), $asm, $enc, $opc, $rest, 0);
                } else {
                    print "no match for $name: $_\n";
                }
                last;
            }
            goto startover if (/^Encoding /);
            $last = $_;
        }
    }
}

sub generate_entry($,$,$,$,$,$)
{
    my ($name, $asm, $enc, $opc, $rest, $PUW) = @_;
    my $eflags = "x";
    my $other_opc;
    my $other_enc;
    my $other_rest;
    my $negative = 0;
    my $hexopc = 0;

    # Ensure we've got all the bits, and fill in opcode for lower bits
    my %bitlen = (
        '(0)' => 1, '(1)' => 1, '(S)' => 1,
        'J1' => 1, 'J2' => 1,
        'Rn' => '4', 'Rd' => 4, 'Rt' => 4, 'Rt2' => 4, 'Rs' => 4, 'Rm' => 4, 'Ra' => 4,
        'RdHi' => 4, 'RdLo' => 4,
        'CRd' => 4, 'CRn' => 4, 'CRm' => 4,
        'Vn' => '4', 'Vd' => 4, 'Vt' => 4, 'Vm' => 4, 'Va' => 4,
        'imm2' => 2, 'imm3' => 3, 'imm4' => 4, 'imm5' => 5, 'imm6' => 6, 'imm8' => 8,
        'imm10' => 10, 'imm11' => 11, 'imm12' => 12, 'imm24' => 24,
        'imm10H' => 10, 'imm10L' => 10, 'imm4H' => 4, 'imm4L' => 4,
        'sat_imm4' => 4, 'sat_imm5' => 5,
        'type' => 2, # shift type
        'type_vld' => 4, # OP_vld1
        'cond' => 4,
        'option' => 4,
        'msb' => 5, 'lsb' => 5,
        'coproc' => 4, 'opc1' => 4, 'opc2' => 3, # OP_cdp
        'opc1_mcr' => 3, # OP_mcr
        'opc1_vmov' => 2, 'opc2_vmov' => 2, # OP_vmov
        'opt' => 2, # OP_dcps
        'register_list_t32' => 13, # for T32
        'register_list' => 16, # for A32
        'register_list_priv' => 15, # for A32 priv ldm
        'mask' => 2, # OP_msr
        'mask_priv' => 4, # OP_msr priv
        'tb' => 1, # OP_pkh
        'widthm1' => 5, # OP_sbfx
        'sh' => 1, # OP_ssat
        'rotate' => 2, # OP_sxtab
        'imod' => 2, 'mode' => 5, # OP_cps
        'M1' => 4, # OP_mrs
        'reg' => 4, # OP_vmrs
        'opcode' => 4, # OP_subs pc
        'sz_crc32' => 2, # OP_crc32
        'sz' => 1, # OP_vabs
        # SIMD
        'size' => 2, 'size=8' => 2, 'size=16' => 2, 'size=32' => 2, 'size=64' => 2,
        'size=s8' => 2, 'size=s16' => 2, 'size=s32' => 2, 'size=s64' => 2,
        'size=u8' => 2, 'size=u16' => 2, 'size=u32' => 2, 'size=u64' => 2,
        'size=i8' => 2, 'size=i16' => 2, 'size=i32' => 2, 'size=i64' => 2,
        'sz=0' => 1, 'sz=1' => 1, 'cmode' => 4,
        'op' => 1, # OP_vacge
        'op_2b' => 2, # OP_vbif, OP_vcvt, OP_vqmov, OP_vrev
        'op_3b' => 3, # OP_vrint
        'sf' => 1, 'sx' => 1, 'RM' => 2, # OP_vcvt
        'align' => 2, 'index_align' => 4,
        'cc' => 2, # OP_vsel
        'len' => 2, # OP_vtbl
        );
    my @encbits = split(' ', $enc);
    my $totlen = 0;
    for (my $i = 0; $i <= $#encbits; $i++) {
        my $token = $encbits[$i];
        $token =~ s/register_list/register_list_t32/ if ($t32);
        $token =~ s/register_list/register_list_priv/
            if ($name eq 'ldm' && $asm =~ /amode/);
        $token =~ s/opc1/opc1_mcr/ if ($name eq 'mcr' || $name eq 'mcr2' ||
                                       $name eq 'mrc' || $name eq 'mrc2');
        $token =~ s/mask/mask_priv/ if ($name eq 'msr' && $enc =~ / R /);
        $token =~ s/\bsz\b/sz_crc32/ if ($name eq 'crc32');
        $token =~ s/\bop\b/op_2b/
            if (($name eq 'v' && $enc =~ /D op V/) ||
                ($name =~ /^vcvt/ && $enc =~ /1 op Q/) ||
                ($name =~ /^vqmov/) ||
                ($name =~ /^vrev/));
        $token =~ s/\bop\b/op_3b/ if ($name =~ /^vrint/ && $enc =~ /1 op Q/);
        $token =~ s/\btype\b/type_vld/ if ($name =~ /^vld/ || $name =~ /^vst/);
        $token =~ s/\b(opc\d)\b/\1_vmov/ if ($name =~ /^vmov/);
        my $len = 0;
        if (length($token) == 1) {
            $len = 1;
        } elsif (defined($bitlen{$token})) {
            $len = $bitlen{$token};
            my $unmod = $encbits[$i];
            if ($unmod eq 'type') {
                $rest =~ s/\btype\b/sh2/;
            } elsif ($unmod !~ /^R/ && $unmod !~ /^CR/ && $unmod !~ /^\(/) {
                my $pos = 32 - $totlen - $len;
                my $repl = $len . "_" . $pos;
                $rest =~ s/\b$unmod\b/imm$repl/;
            }
        } else {
            die "Unknown length for $name: \"$token\" ($enc)\n";
        }
        $totlen += $len;
        if ($token eq '1' || $token eq '(1)') {
            $hexopc |= 1 << (32 - $totlen);
        }
    }
    die "Missing chars (have $totlen) for $name $asm:  $enc\n" unless ($totlen == 32);

    # Handle "x x x P U {D,R} W S" by expanding the chars
    my @bits = split(' ', $opc);
    for (my $i = 0; $i <= $#bits; $i++) {
        if ($bits[$i] eq 'S') {
            $other_opc = $opc;
            $other_opc =~ s/S/0/;
            generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
            $name .= "s";
            $bits[$i] = '1';
            $eflags = "fWNZCV";
        } elsif ($bits[$i] eq 'P') {
            $PUW = 1;
            $other_opc = $opc;
            $other_opc =~ s/P/0/;
            generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
            $opc =~ s/P/1/;
            $bits[$i] = '1';
        } elsif ($bits[$i] eq 'U') {
            $PUW = 1;
            $other_opc = $opc;
            $other_opc =~ s/U/0/;
            generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
            $opc =~ s/U/1/;
            $bits[$i] = '1';
            $negative = 1;
        } elsif ($bits[$i] eq 'W') {
            $PUW = 1;
            $other_opc = $opc;
            $other_opc =~ s/W/0/;
            generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
            $bits[$i] = '1';
            $opc =~ s/W/1/;
        } elsif ($bits[$i] eq 'D' || $bits[$i] eq 'R' || $bits[$i] eq 'i') {
            $other_opc = $opc;
            $other_opc =~ s/$bits[$i]/0/;
            generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
            $opc =~ s/$bits[$i]/1/;
            $bits[$i] = '1';
        } elsif ($bits[$i] eq 'Q') {
            $other_opc = $opc;
            $other_opc =~ s/Q/0/;
            generate_entry($name, $asm, $enc, $other_opc, $rest, $PUW);
            $bits[$i] = '1';
            $opc =~ s/Q/1/;
            $rest =~ s/ V/ VQ/g;
        }
        if ($bits[$i] eq '1' || $bits[$i] eq '0') {
            $hexopc |= $bits[$i] << (27 - $i);
        } else {
            die "invalid code $bits[$i]\n";
        }
    }

    # Floating-point precision bit: bit 8 == "sz"
    if ($simd && $enc =~ / sz /) {
        $other_name = $name . ".f32";
        $other_enc = $enc;
        $other_enc =~ s/ sz / sz=0 /;
        generate_entry($other_name, $asm, $other_enc, $opc, $rest, $PUW);
        $name .= ".f64";
        $enc =~ s/ sz / sz=1 /;
        $hexopc |= 0x100;
    }
    # For SIMD, Q bit is down low
    if ($simd && $rest =~ / Q /) {
        $other_rest = $rest;
        $other_rest =~ s/Q //;
        $other_rest =~ s/Vn/VAq/;
        $other_rest =~ s/Vd/VBq/;
        $other_rest =~ s/Vm/VCq/;
        generate_entry($name, $asm, $enc, $opc, $other_rest, $PUW);
        $rest =~ s/Q //;
        $rest =~ s/Vn/VAdq/;
        $rest =~ s/Vd/VBdq/;
        $rest =~ s/Vm/VCdq/;
        $hexopc |= 0x40;
    }

    # Data type: "<dt>" or "<size>"
    if ($simd && $enc =~ / size /) {
        # We bail on the precise hex encoding: we just try to pre-generate
        # entries that can be manually tweaked
        my @subtypes;
        if ($asm =~ /.<dt>/) {
            if ($enc =~ / U /) {
                @subtypes = ('s8', 's16', 's32', 'u8', 'u16', 'u32');
            } else {
                @subtypes = ('i8', 'i16', 'i32', 'i64');
            }
        } else {
            @subtypes = ('8', '16', '32', '64');
        }
        $rest =~ s/size\s*//;
        foreach my $sub (@subtypes) {
            $other_name = $name . "." . $sub;
            $other_enc = $enc;
            $other_enc =~ s/ size / size=$sub /;
            generate_entry($other_name, $asm, $other_enc, $opc, $rest, $PUW);
        }
    }

    if ($t32) {
        my @topbits = split(' ', $enc);
        for (my $i = 0; $i < 4; $i++) {
            if ($topbits[$i] eq '1' || $topbits[$i] eq '0') {
                $hexopc |= $topbits[$i] << (31 - $i);
            }
        }
    } elsif (!$pred) {
        $hexopc |= 0xf0000000;
    }
    $opname = $name;
    $opname =~ s/\./_/g;
    $opname .= ",";
    $name .= "\",";
    printf "    {OP_%-8s 0x%08x, \"%-8s ", $opname, $hexopc, $name;

    # Clean up extra spaces, parens, digits
    $enc =~ s/\s\s+/ /g;
    $rest =~ s/\s\s+/ /g;
    $rest =~ s/\(//g;
    $rest =~ s/\)//g;
    $rest =~ s/\s\d+\s/ /g;

    # Put Rd or Rt first, as dst
    $rest =~ s/(.*) (R[dt])/\2 \1/;
    # Put shift last, in disasm order
    $rest =~ s/imm5 type (.*)/\1 type imm5/;
    $rest =~ s/Rs type (.*)/\1 type Rs/;
    # Rn is (usually) before Rm
    $rest =~ s/Rm (.*) Rn/Rn \1 Rm/;

    # Names of types
    $rest =~ s/imm(\d+)/i\1/g;
    $rest =~ s/type/sh2/g;

    $rest =~ s/Rm/-Rm/ if ($negative);

    # Get the 2nd empty dest in there for SIMD with Q.
    # XXX: do the same for the others!
    $rest =~ s/(VA\w+) (VB\w+)/\2 xx \1/;

    my @opnds = split(' ', $rest);
    my $opcnt = 0;
    for (my $i = 0; $i <= $#opnds; $i++) {
        if ($opnds[$i] ne '0' && $opnds[$i] ne '1' &&
            (!$simd || ($opnds[$i] ne 'sz' && $opnds[$i] ne 'N' &&
                        $opnds[$i] ne 'M' && $opnds[$i] ne 'F'))) {
            if ($opcnt == 0 && $opnds[$i] !~ /Rd$/) {
                print "xx, xx, ";
                $opcnt += 2;
            }
            # Convert to the new types
            my $toprint = $opnds[$i];
            $toprint =~ s/Rn/RAw/;
            # XXX: convert these based on bit positions up above -- but keep dst
            # vs src info too
            if ($t32) {
                $toprint =~ s/Rd/RCw/;
                $toprint =~ s/Rt/RCw/;
                die "No Rs in T32!\n" if ($toprint =~ /Rs/);
            } else {
                $toprint =~ s/Rd/RBw/;
                $toprint =~ s/Rt/RBw/;
                $toprint =~ s/Rs/RCw/;
            }
            $toprint =~ s/Rm/RDw/;
            print "$toprint, ";
            $opcnt++;
            if ($opcnt == 1) {
                print "xx, ";
                $opcnt++;
            }
        }
    }
    for (my $i = $opcnt; $i < 5; $i++) {
        print "xx, ";
    }
    print (($pred && !$t32) ? "pred" : "no");
    print ", $eflags, END_LIST},";
    if ($PUW) {
        $PUW_str = $bits[3] . $bits[4] . $bits[6];
        print "/*PUW=$PUW_str*/";
    }
    print "/* ($asm) */ /* <$enc> */\n";
}