| 1 | package App::Scheme79asm; |
| 2 | |
| 3 | use 5.014000; |
| 4 | use strict; |
| 5 | use warnings; |
| 6 | |
| 7 | use Data::Dumper qw/Dumper/; |
| 8 | use Data::SExpression qw/consp scalarp/; |
| 9 | use Scalar::Util qw/looks_like_number/; |
| 10 | |
| 11 | our $VERSION = '0.004'; |
| 12 | |
| 13 | our %TYPES = ( |
| 14 | LIST => 0, |
| 15 | SYMBOL => 1, |
| 16 | NUMBER => 1, |
| 17 | VAR => 2, |
| 18 | VARIABLE => 2, |
| 19 | CLOSURE => 3, |
| 20 | PROC => 4, |
| 21 | PROCEDURE => 4, |
| 22 | IF => 5, |
| 23 | COND => 5, |
| 24 | CONDITIONAL => 5, |
| 25 | CALL => 6, |
| 26 | QUOTE => 7, |
| 27 | QUOTED => 7, |
| 28 | |
| 29 | MORE => 0, |
| 30 | CAR => 1, |
| 31 | CDR => 2, |
| 32 | CONS => 3, |
| 33 | ATOM => 4, |
| 34 | PROGN => 5, |
| 35 | 'REVERSE-LIST' => 6, |
| 36 | FUNCALL => 7, |
| 37 | ); |
| 38 | |
| 39 | *consp = *Data::SExpression::consp; |
| 40 | *scalarp = *Data::SExpression::scalarp; |
| 41 | |
| 42 | sub process { |
| 43 | my ($self, $sexp, $location) = @_; |
| 44 | die 'Toplevel is not a list: ', Dumper($sexp), "\n" unless ref $sexp eq 'ARRAY'; |
| 45 | my ($type, @addrs) = @$sexp; |
| 46 | my $addr; |
| 47 | |
| 48 | die 'Type of toplevel is not atom: '. Dumper($type), "\n" unless scalarp($type); |
| 49 | |
| 50 | if (@addrs > 1) { |
| 51 | $addr = $self->{freeptr} + 1; |
| 52 | $self->{freeptr} += @addrs; |
| 53 | $self->process($addrs[$_], $addr + $_) for 0 .. $#addrs; |
| 54 | } else { |
| 55 | $addr = $addrs[0]; |
| 56 | } |
| 57 | |
| 58 | $addr = $self->process($addr) if ref $addr eq 'ARRAY'; |
| 59 | die 'Addr of toplevel is not atom: ', Dumper($addr), "\n" unless scalarp($addr); |
| 60 | my ($comment_type, $comment_addr) = ($type, $addr); |
| 61 | die 'Computed addr is not a number: ', Dumper($addr), "\n" unless looks_like_number $addr; |
| 62 | |
| 63 | if (!looks_like_number $type) { |
| 64 | die "No such type: $type\n" unless exists $TYPES{$type}; |
| 65 | $type = $TYPES{$type}; |
| 66 | } |
| 67 | |
| 68 | $addr += (1 << $self->{addr_bits}) if $addr < 0; |
| 69 | die "Type too large: $type\n" unless $type < (1 << $self->{type_bits}); |
| 70 | die "Addr too large: $addr\n" unless $addr < (1 << $self->{addr_bits}); |
| 71 | my $result = ($type << $self->{addr_bits}) + $addr; |
| 72 | unless ($location) { |
| 73 | $self->{freeptr}++; |
| 74 | $location = $self->{freeptr} |
| 75 | } |
| 76 | $self->{memory}[$location] = $result; |
| 77 | $self->{comment}[$location] = "$comment_type $comment_addr"; |
| 78 | $location |
| 79 | } |
| 80 | |
| 81 | sub parse { |
| 82 | my ($self, $string) = @_; |
| 83 | my $ds = Data::SExpression->new({symbol_case => 'up', use_symbol_class => 1, fold_lists => 1}); |
| 84 | |
| 85 | my $sexp; |
| 86 | while () { |
| 87 | last if $string =~ /^\s*$/; |
| 88 | ($sexp, $string) = $ds->read($string); |
| 89 | $self->process($sexp) |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | sub finish { |
| 94 | my ($self) = @_; |
| 95 | $self->{memory}[5] = $self->{memory}[$self->{freeptr}]; |
| 96 | $self->{comment}[5] = $self->{comment}[$self->{freeptr}]; |
| 97 | $self->{memory}[4] = $self->{freeptr}; |
| 98 | delete $self->{memory}[$self->{freeptr}] |
| 99 | } |
| 100 | |
| 101 | sub new { |
| 102 | my ($class, %args) = @_; |
| 103 | $args{type_bits} //= 3; |
| 104 | $args{addr_bits} //= 8; |
| 105 | $args{freeptr} //= 6; |
| 106 | $args{memory} //= [0, 0, (1<<$args{addr_bits}), (1<<$args{addr_bits}), 0, 0, 0]; |
| 107 | $args{comment} = ['(cdr part of NIL)', '(car part of NIL)', '(cdr part of T)', '(car part of T)', '(free storage pointer)', '', '(result of computation)']; |
| 108 | bless \%args, $class |
| 109 | } |
| 110 | |
| 111 | sub print_binary16 { |
| 112 | my ($self, $fh) = @_; |
| 113 | $fh //= \*STDOUT; # uncoverable condition right |
| 114 | |
| 115 | die "addr_bits + type_bits >= 16\n"if $self->{addr_bits} + $self->{type_bits} > 16; |
| 116 | |
| 117 | my $length = @{$self->{memory}}; |
| 118 | print $fh pack('n', $length); |
| 119 | for (@{$self->{memory}}) { |
| 120 | print $fh pack('n', $_) |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | sub print_verilog { |
| 125 | my ($self, $fh) = @_; |
| 126 | $fh //= \*STDOUT; # uncoverable condition right |
| 127 | |
| 128 | my $bits = $self->{type_bits} + $self->{addr_bits}; |
| 129 | my $index_length = length $#{$self->{memory}}; |
| 130 | my $index_format = '%' . $index_length . 'd'; |
| 131 | for my $index (0 .. $#{$self->{memory}}) { |
| 132 | my $val = $self->{memory}[$index]; |
| 133 | my $comment = $self->{comment}[$index]; |
| 134 | if ($index == 4) { |
| 135 | $val = "${bits}'d$val" |
| 136 | } else { |
| 137 | $val = $val ? sprintf "%d'b%0${bits}b", $bits, $val : '0'; |
| 138 | } |
| 139 | my $spaces = ' ' x ($bits + 5 - (length $val)); |
| 140 | $index = sprintf $index_format, $index; |
| 141 | |
| 142 | print $fh "mem[$index] <= $val;"; |
| 143 | print $fh "$spaces // $comment" if defined $comment; |
| 144 | print $fh "\n"; |
| 145 | } |
| 146 | |
| 147 | } |
| 148 | sub parse_and_print_binary16 { |
| 149 | my ($self, $string, $fh) = @_; |
| 150 | $self->parse($string); |
| 151 | $self->finish; |
| 152 | $self->print_binary16($fh); |
| 153 | } |
| 154 | |
| 155 | sub parse_and_print_verilog { |
| 156 | my ($self, $string, $fh) = @_; |
| 157 | $self->parse($string); |
| 158 | $self->finish; |
| 159 | $self->print_verilog($fh); |
| 160 | } |
| 161 | |
| 162 | 1; |
| 163 | __END__ |
| 164 | |
| 165 | =encoding utf-8 |
| 166 | |
| 167 | =head1 NAME |
| 168 | |
| 169 | App::Scheme79asm - assemble sexp to Verilog ROM for SIMPLE processor |
| 170 | |
| 171 | =head1 SYNOPSIS |
| 172 | |
| 173 | use App::Scheme79asm; |
| 174 | my $asm = App::Scheme79asm->new(type_bits => 3, addr_bits => 5); |
| 175 | $asm->parse_and_print_verilog('(number 70)'); |
| 176 | |
| 177 | =head1 DESCRIPTION |
| 178 | |
| 179 | SIMPLE is a LISP processor defined in the 1979 |
| 180 | B<Design of LISP-Based Processors> paper by Steele and Sussman. |
| 181 | |
| 182 | The SIMPLE processor expects input in a particular tagged-pointer |
| 183 | format. This module takes a string containing a sequence of |
| 184 | S-expressions. Each S-expression is a list of one of three types: |
| 185 | |
| 186 | C<(tag value)>, for example C<(symbol 2)>, represents a value to be |
| 187 | put in memory (for example a number, or a symbol, or a variable |
| 188 | reference). |
| 189 | |
| 190 | C<(tag list)>, where C<list> is of one of these three types, |
| 191 | represents a tagged pointer. In this case, C<list> is (recursively) |
| 192 | laid out in memory as per these rules, and a pointer to that location |
| 193 | (and tagged C<tag>) is put somewhere in memory. |
| 194 | |
| 195 | C<(tag list1 list2)>, where C<list1> and C<list2> are of one of these |
| 196 | three types (not necessarily the same type). In this case, C<list1> |
| 197 | and C<list2> are (recursively) laid out in memory such that C<list1> |
| 198 | is at position X and C<list2> is at position X+1, and a pointer of |
| 199 | type tag and value X is put somewhere in memory. |
| 200 | |
| 201 | After this process the very last pointer placed in memory is moved to |
| 202 | the special location 5 (which is where SIMPLE expects to find the |
| 203 | expression to be evaluated). |
| 204 | |
| 205 | In normal use a single S-expression will be supplied, representing an |
| 206 | entire program. |
| 207 | |
| 208 | The C<tag> is either a number, a type, or a primitive. |
| 209 | The available types are: |
| 210 | |
| 211 | =over |
| 212 | |
| 213 | =item LIST |
| 214 | |
| 215 | =item SYMBOL (syn. NUMBER) |
| 216 | |
| 217 | =item VAR (syn. VARIABLE) |
| 218 | |
| 219 | =item CLOSURE |
| 220 | |
| 221 | =item PROC (syn. PROCEDURE) |
| 222 | |
| 223 | =item IF (syn. COND, CONDITIONAL) |
| 224 | |
| 225 | =item CALL |
| 226 | |
| 227 | =item QUOTE (syn. QUOTED) |
| 228 | |
| 229 | =back |
| 230 | |
| 231 | The available primitives are: |
| 232 | |
| 233 | =over |
| 234 | |
| 235 | =item MORE |
| 236 | |
| 237 | =item CAR |
| 238 | |
| 239 | =item CDR |
| 240 | |
| 241 | =item CONS |
| 242 | |
| 243 | =item ATOM |
| 244 | |
| 245 | =item PROGN |
| 246 | |
| 247 | =item REVERSE-LIST |
| 248 | |
| 249 | =item FUNCALL |
| 250 | |
| 251 | =back |
| 252 | |
| 253 | The following methods are available: |
| 254 | |
| 255 | =over |
| 256 | |
| 257 | =item App::Scheme79asm->B<new>([key => value, key => value, ...]) |
| 258 | |
| 259 | Create a new assembler object. Takes a list of keys and values, here |
| 260 | are the possible keys: |
| 261 | |
| 262 | =over |
| 263 | |
| 264 | =item type_bits |
| 265 | |
| 266 | =item address_bits |
| 267 | |
| 268 | A word is made of a type and an address, with the type occupying the |
| 269 | most significant C<type_bits> (default 3) bits, and the address |
| 270 | occupying the least significant C<address_bits> (default 8) bits. |
| 271 | Therefore the word size is C<type_bits + address_bits> (default 13). |
| 272 | |
| 273 | =item freeptr |
| 274 | |
| 275 | A pointer to the last used byte in memory (default 6). The program |
| 276 | will be laid out starting with location C<freeptr + 1>. |
| 277 | |
| 278 | =item memory |
| 279 | |
| 280 | The initial contents of the memory. Note that locations 4, 5, 6 will |
| 281 | be overwritten, as will every location larger than the value of |
| 282 | C<freeptr>. |
| 283 | |
| 284 | =item comment |
| 285 | |
| 286 | The initial comments for memory entries. C<< $comment->[$i] >> is the |
| 287 | comment for C<< $memory->[$i] >>. |
| 288 | |
| 289 | =back |
| 290 | |
| 291 | =item $asm->B<parse>(I<$string>) |
| 292 | |
| 293 | Parse a sequence of S-expressions and lay it out in memory. |
| 294 | Can be called multiple times to lay out multiple sequences of |
| 295 | S-expressions one after another. |
| 296 | |
| 297 | =item $asm->B<process>(I<$sexp>) |
| 298 | |
| 299 | Given an already-parsed sexp (meaning a |
| 300 | L<Data::SExpression> object), lay it out in memory. |
| 301 | Can be called multiple times to lay out multiple sequences of |
| 302 | S-expressions one after another. |
| 303 | |
| 304 | =item $asm->B<finish> |
| 305 | |
| 306 | Move the last pointer to position 5, and put the free pointer at |
| 307 | position 4. After all sequences of S-expressions have been given to |
| 308 | B<parse>, this method should be called. |
| 309 | |
| 310 | =item $asm->B<print_binary16>([I<$fh>]) |
| 311 | |
| 312 | Print the length of the memory (as a big-endian 16-bit value), |
| 313 | followed by the memory contents as a sequence of big-endian 16-bit |
| 314 | values to the given filehandle (default STDOUT). Dies if |
| 315 | C<addr_bits + type_bits> is more than 16. |
| 316 | |
| 317 | Big-endian 16-bit values can be decoded with C<unpack 'n', $value>. |
| 318 | |
| 319 | =item $asm->B<print_verilog>([I<$fh>]) |
| 320 | |
| 321 | Print a block of Verilog code assigning the memory contents to an |
| 322 | array named C<mem> to the given filehandle (default STDOUT). |
| 323 | |
| 324 | =item $asm->B<parse_and_print_binary16>(I<$string>[, I<$fh>]) |
| 325 | |
| 326 | Convenience method that calls B<parse>($string), B<finish>, and then |
| 327 | B<print_binary16>($fh). |
| 328 | |
| 329 | =item $asm->B<parse_and_print_verilog>(I<$string>[, I<$fh>]) |
| 330 | |
| 331 | Convenience method that calls B<parse>($string), B<finish>, and then |
| 332 | B<print_verilog>($fh). |
| 333 | |
| 334 | =back |
| 335 | |
| 336 | =head1 SEE ALSO |
| 337 | |
| 338 | L<http://repository.readscheme.org/ftp/papers/ai-lab-pubs/AIM-514.pdf> |
| 339 | |
| 340 | =head1 AUTHOR |
| 341 | |
| 342 | Marius Gavrilescu, E<lt>marius@ieval.roE<gt> |
| 343 | |
| 344 | =head1 COPYRIGHT AND LICENSE |
| 345 | |
| 346 | Copyright (C) 2018 by Marius Gavrilescu |
| 347 | |
| 348 | This library is free software; you can redistribute it and/or modify |
| 349 | it under the same terms as Perl itself, either Perl version 5.24.3 or, |
| 350 | at your option, any later version of Perl 5 you may have available. |
| 351 | |
| 352 | |
| 353 | =cut |