This commit is contained in:
Dmitry Boulytchev 2021-09-28 03:02:05 +03:00
parent 11203f3a85
commit fa874b4a4c
11 changed files with 486 additions and 23 deletions

View file

@ -7,6 +7,7 @@ MKDIR ?= mkdir
all: all:
$(MAKE) -C src $(MAKE) -C src
$(MAKE) -C runtime $(MAKE) -C runtime
$(MAKE) -C byterun
$(MAKE) -C stdlib $(MAKE) -C stdlib
STD_FILES=$(shell ls stdlib/*.[oi] stdlib/*.lama runtime/runtime.a runtime/Std.i) STD_FILES=$(shell ls stdlib/*.[oi] stdlib/*.lama runtime/runtime.a runtime/Std.i)

12
byterun/., Normal file
View file

@ -0,0 +1,12 @@
all: gc_runtime.o runtime.o
ar rc runtime.a gc_runtime.o runtime.o
gc_runtime.o: gc_runtime.s
$(CC) -g -fstack-protector-all -m32 -c gc_runtime.s
runtime.o: runtime.c
$(CC) -g -fstack-protector-all -m32 -c runtime.c
clean:
$(RM) *.a *.o *~

8
byterun/Makefile Normal file
View file

@ -0,0 +1,8 @@
all: byterun.o
$(CC) -m32 -g -o byterun byterun.o ../runtime/runtime.a
byterun.o: byterun.c
$(CC) -g -fstack-protector-all -m32 -c byterun.c
clean:
$(RM) *.a *.o *~

260
byterun/byterun.c Normal file
View file

@ -0,0 +1,260 @@
/* Lama SM Bytecode interpreter */
# include <string.h>
# include <stdio.h>
# include <errno.h>
# include <malloc.h>
# include "../runtime/runtime.h"
void *__start_custom_data;
void *__stop_custom_data;
typedef struct {
char *string_ptr;
int *public_ptr;
char *code_ptr;
int stringtab_size;
int global_area_size;
int public_symbols_number;
char buffer[0];
} bytefile;
char* get_string (bytefile *f, int pos) {
return &f->string_ptr[pos];
}
char* get_public_name (bytefile *f, int i) {
return get_string (f, f->public_ptr[i*2]);
}
int get_public_offset (bytefile *f, int i) {
return f->public_ptr[i*2+1];
}
bytefile* read_file (char *fname) {
FILE *f = fopen (fname, "rb");
long size;
bytefile *file;
if (f == 0) {
failure ("%s\n", strerror (errno));
}
if (fseek (f, 0, SEEK_END) == -1) {
failure ("%s\n", strerror (errno));
}
file = (bytefile*) malloc (sizeof(int)*3 + (size = ftell (f)));
if (file == 0) {
failure ("*** FAILURE: unable to allocate memory.\n");
}
rewind (f);
if (size != fread (&file->stringtab_size, 1, size, f)) {
failure ("%s\n", strerror (errno));
}
fclose (f);
file->string_ptr = &file->buffer [file->public_symbols_number * 2 * sizeof(int)];
file->public_ptr = (int*) file->buffer;
file->code_ptr = &file->string_ptr [file->stringtab_size];
return file;
}
void disassemble (FILE *f, bytefile *bf) {
# define INT (ip += sizeof (int), *(int*)(ip - sizeof (int)))
# define BYTE *ip++
# define STRING get_string (bf, INT)
# define FAIL failure ("ERROR: invalid opcode %d-%d\n", h, l)
char *ip = bf->code_ptr;
char *ops [] = {"+", "-", "*", "/", "%", "<", "<=", ">", ">=", "==", "!=", "&&", "!!"};
char *pats[] = {"=str", "#string", "#array", "#sexp", "#ref", "#val", "#fun"};
char *lds [] = {"LD\t", "LDA\t", "ST\t"};
do {
char x = BYTE,
h = (x & 0xF0) >> 4,
l = x & 0x0F;
fprintf (f, "0x%.8x:\t", ip-bf->code_ptr-1);
switch (h) {
case 15:
goto stop;
/* BINOP */
case 0:
fprintf (f, "BINOP\t%s", ops[l-1]);
break;
case 1:
switch (l) {
case 0:
fprintf (f, "CONST\t%d", INT);
break;
case 1:
fprintf (f, "STRING\t%s", STRING);
break;
case 2:
fprintf (f, "SEXP\t%s ", STRING);
fprintf (f, "%d", INT);
break;
case 3:
fprintf (f, "STI");
break;
case 4:
fprintf (f, "STA");
break;
case 5:
fprintf (f, "JMP\t0x%.8x", INT);
break;
case 6:
fprintf (f, "END");
break;
case 7:
fprintf (f, "RET");
break;
case 8:
fprintf (f, "DROP");
break;
case 9:
fprintf (f, "DUP");
break;
case 10:
fprintf (f, "SWAP");
break;
default:
FAIL;
}
break;
case 2:
case 3:
case 4:
fprintf (f, "%s\t", lds[h-2]);
switch (l) {
case 0: fprintf (f, "G(%d)", INT); break;
case 1: fprintf (f, "L(%d)", INT); break;
case 2: fprintf (f, "A(%d)", INT); break;
case 3: fprintf (f, "C(%d)", INT); break;
default: FAIL;
}
break;
case 5:
switch (l) {
case 0:
fprintf (f, "CJMPz\t%0x.8x", INT);
break;
case 1:
fprintf (f, "CJMPnz\t%0x.8x", INT);
break;
case 2:
fprintf (f, "BEGIN\t%d ", INT);
fprintf (f, "%d", INT);
break;
case 3:
fprintf (f, "CBEGIN\t%d ", INT);
fprintf (f, "%d", INT);
break;
case 4:
fprintf (f, "CLOSURE\t0x%.8x", INT);
{int n = INT;
for (int i = 0; i<n; i++) {
switch (BYTE) {
case 0: fprintf (f, "G(%d)", INT); break;
case 1: fprintf (f, "L(%d)", INT); break;
case 2: fprintf (f, "A(%d)", INT); break;
case 3: fprintf (f, "C(%d)", INT); break;
default: FAIL;
}
}
};
break;
case 5:
fprintf (f, "CALLC\t%d", INT);
break;
case 6:
fprintf (f, "CALL\t%0x%.8x ", INT);
fprintf (f, "%d", INT);
break;
case 7:
fprintf (f, "TAG\t%s ", STRING);
fprintf (f, "%d", INT);
break;
case 8:
fprintf (f, "ARRAY\t%d", INT);
break;
case 9:
fprintf (f, "FAIL\t%d", INT);
fprintf (f, "%d", INT);
break;
case 10:
fprintf (f, "LINE\t%d", INT);
break;
default:
FAIL;
}
break;
case 6:
fprintf (f, "PATT\t%s", pats[l]);
break;
default:
FAIL;
}
fprintf (f, "\n");
}
while (1);
stop: {}
}
void dump_file (FILE *f, bytefile *bf) {
int i;
fprintf (f, "String table size : %d\n", bf->stringtab_size);
fprintf (f, "Global area size : %d\n", bf->global_area_size);
fprintf (f, "Number of public symbols: %d\n", bf->public_symbols_number);
fprintf (f, "Public symbols :\n");
for (i=0; i < bf->public_symbols_number; i++)
fprintf (f, " 0x%.8x: %s\n", get_public_offset (bf, i), get_public_name (bf, i));
fprintf (f, "Code:\n");
disassemble (f, bf);
}
int main (int argc, char* argv[]) {
bytefile *f = read_file (argv[1]);
dump_file (stdout, f);
return 0;
}

View file

@ -5,7 +5,7 @@ all: gc_runtime.o runtime.o
gc_runtime.o: gc_runtime.s gc_runtime.o: gc_runtime.s
$(CC) -g -fstack-protector-all -m32 -c gc_runtime.s $(CC) -g -fstack-protector-all -m32 -c gc_runtime.s
runtime.o: runtime.c runtime.o: runtime.c runtime.h
$(CC) -g -fstack-protector-all -m32 -c runtime.c $(CC) -g -fstack-protector-all -m32 -c runtime.c
clean: clean:

View file

@ -1,26 +1,14 @@
/* Runtime library */ /* Runtime library */
#define _GNU_SOURCE 1 # define _GNU_SOURCE 1
# include <stdio.h> # include "runtime.h"
# include <stdio.h>
# include <string.h>
# include <stdarg.h>
# include <stdlib.h>
# include <sys/mman.h>
# include <assert.h>
# include <errno.h>
# include <regex.h>
# include <time.h>
# include <limits.h>
# define __ENABLE_GC__ # define __ENABLE_GC__
# ifndef __ENABLE_GC__ # ifndef __ENABLE_GC__
# define alloc malloc # define alloc malloc
# endif # endif
# define WORD_SIZE (CHAR_BIT * sizeof(int))
/* # define DEBUG_PRINT 1 */ /* # define DEBUG_PRINT 1 */
#ifdef DEBUG_PRINT #ifdef DEBUG_PRINT
@ -144,7 +132,7 @@ static void vfailure (char *s, va_list args) {
exit (255); exit (255);
} }
static void failure (char *s, ...) { void failure (char *s, ...) {
va_list args; va_list args;
va_start (args, s); va_start (args, s);

20
runtime/runtime.h Normal file
View file

@ -0,0 +1,20 @@
# ifndef __LAMA_RUNTIME__
# define __LAMA_RUNTIME__
# include <stdio.h>
# include <stdio.h>
# include <string.h>
# include <stdarg.h>
# include <stdlib.h>
# include <sys/mman.h>
# include <assert.h>
# include <errno.h>
# include <regex.h>
# include <time.h>
# include <limits.h>
# define WORD_SIZE (CHAR_BIT * sizeof(int))
void failure (char *s, ...);
# endif

View file

@ -20,6 +20,7 @@ class options args =
" -dsrc --- dump pretty-printed source code\n" ^ " -dsrc --- dump pretty-printed source code\n" ^
" -ds --- dump stack machine code (the output will be written into .sm file; has no\n" ^ " -ds --- dump stack machine code (the output will be written into .sm file; has no\n" ^
" effect if -i option is specfied)\n" ^ " effect if -i option is specfied)\n" ^
" -b --- compile to a stack machine bytecode\n" ^
" -v --- show version\n" ^ " -v --- show version\n" ^
" -h --- show this help\n" " -h --- show this help\n"
in in
@ -30,7 +31,7 @@ class options args =
val infile = ref (None : string option) val infile = ref (None : string option)
val outfile = ref (None : string option) val outfile = ref (None : string option)
val paths = ref [X86.get_std_path ()] val paths = ref [X86.get_std_path ()]
val mode = ref (`Default : [`Default | `Eval | `SM | `Compile ]) val mode = ref (`Default : [`Default | `Eval | `SM | `Compile | `BC])
val curdir = Unix.getcwd () val curdir = Unix.getcwd ()
val debug = ref false val debug = ref false
(* Workaround until Ostap starts to memoize properly *) (* Workaround until Ostap starts to memoize properly *)
@ -49,6 +50,7 @@ class options args =
| "-o" -> (match self#peek with None -> raise (Commandline_error "File name expected after '-o' specifier") | Some fname -> self#set_outfile fname) | "-o" -> (match self#peek with None -> raise (Commandline_error "File name expected after '-o' specifier") | Some fname -> self#set_outfile fname)
| "-I" -> (match self#peek with None -> raise (Commandline_error "Path expected after '-I' specifier") | Some path -> self#add_include_path path) | "-I" -> (match self#peek with None -> raise (Commandline_error "Path expected after '-I' specifier") | Some path -> self#add_include_path path)
| "-s" -> self#set_mode `SM | "-s" -> self#set_mode `SM
| "-b" -> self#set_mode `BC
| "-i" -> self#set_mode `Eval | "-i" -> self#set_mode `Eval
| "-ds" -> self#set_dump dump_sm | "-ds" -> self#set_dump dump_sm
| "-dsrc" -> self#set_dump dump_source | "-dsrc" -> self#set_dump dump_source
@ -131,8 +133,6 @@ class options args =
method dump_source (ast: Language.Expr.t) = method dump_source (ast: Language.Expr.t) =
if (!dump land dump_source) > 0 if (!dump land dump_source) > 0
then Pprinter.pp Format.std_formatter ast; then Pprinter.pp Format.std_formatter ast;
method dump_SM sm = method dump_SM sm =
if (!dump land dump_sm) > 0 if (!dump land dump_sm) > 0
then self#dump_file "sm" (SM.show_prg sm) then self#dump_file "sm" (SM.show_prg sm)
@ -161,6 +161,8 @@ let main =
(match cmd#get_mode with (match cmd#get_mode with
| `Default | `Compile -> | `Default | `Compile ->
ignore @@ X86.build cmd prog ignore @@ X86.build cmd prog
| `BC ->
SM.ByteCode.compile cmd (SM.compile cmd prog)
| _ -> | _ ->
let rec read acc = let rec read acc =
try try

174
src/SM.ml
View file

@ -2,7 +2,7 @@ open GT
open Language open Language
(* The type for patters *) (* The type for patters *)
@type patt = StrCmp | String | Array | Sexp | Boxed | UnBoxed | Closure with show @type patt = StrCmp | String | Array | Sexp | Boxed | UnBoxed | Closure with show, enum
(* The type for local scopes tree *) (* The type for local scopes tree *)
@type scope = { @type scope = {
@ -54,6 +54,178 @@ with show
(* The type for the stack machine program *) (* The type for the stack machine program *)
@type prg = insn list with show @type prg = insn list with show
module ByteCode =
struct
module M = Map.Make (String)
module S = Set.Make (String)
module StringTab =
struct
type t = {mutable smap : int M.t; buffer: Buffer.t; mutable index : int}
let create () = {smap = M.empty; buffer = Buffer.create 255; index = 0}
let add st s =
try let i = M.find s st.smap in i
with Not_found ->
let i = st.index in
Buffer.add_string st.buffer s;
Buffer.add_char st.buffer (Char.chr 0);
st.smap <- M.add s i st.smap;
st.index <- st.index + String.length s + 1;
i
end
exception Found of int
let opnum =
let optab = ["+"; "-"; "*"; "/"; "%"; "<"; "<="; ">"; ">="; "=="; "!="; "&&"; "!!"] in
fun s ->
try
ignore @@ List.fold_left (fun i op -> if s = op then raise (Found i) else i+1) 1 optab;
failwith (Printf.sprintf "ERROR: undefined binary operator '%s'" s)
with
Found i -> i
(* Below are the the numbers of occurrencies of SM instructions for the stdlib+lama compiler itself
7328 SLABEL
5351 CALL
5321 DROP
4437 LABEL
4331 LD
4213 DUP
3979 EXTERN
3525 CONST
2503 LINE
2281 JMP
1400 ST
1122 CJMP
922 END
922 BEGIN
790 SEXP
770 CLOSURE
519 TAG
493 STRING
354 FAIL
349 CALLC
339 BINOP
289 ARRAY
270 PUBLIC
87 PATT
39 STA
16 FLABEL
*)
let compile cmd insns =
let word_size = 4 in
let code = Buffer.create 256 in
let st = StringTab.create () in
let lmap = Stdlib.ref M.empty in
let pubs = Stdlib.ref S.empty in
let globals = Stdlib.ref M.empty in
let glob_count = Stdlib.ref 0 in
let fixups = Stdlib.ref [] in
let add_lab l = lmap := M.add l (Buffer.length code) !lmap in
let add_public l = pubs := S.add l !pubs in
let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in
let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char .chr x) in
let add_ints = List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int x) in
let add_strings = List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int @@ StringTab.add st x) in
let add_designations n =
let b x =
match n with
None -> x
| Some b -> b * 16 + x
in
List.iter (function
| Value.Global s ->
let i =
try M.find s !globals
with Not_found ->
let i = !glob_count in
incr glob_count;
globals := M.add s i !globals;
i
in
add_bytes [b 0]; add_ints [i]
| Value.Local n -> add_bytes [b 1]; add_ints [n]
| Value.Arg n -> add_bytes [b 2]; add_ints [n]
| Value.Access n -> add_bytes [b 3]; add_ints [n]
)
in
let insn_code = function
(* 0x0s *) | BINOP s -> add_bytes [opnum s]
(* 0x10 n:32 *) | CONST n -> add_bytes [1*16 + 0]; add_ints [n]
(* 0x11 s:32 *) | STRING s -> add_bytes [1*16 + 1]; add_strings [s]
(* 0x12 s:32 n:32 *) | SEXP (s, n) -> add_bytes [1*16 + 2]; add_strings [s]; add_ints [n]
(* 0x13 *) | STI -> add_bytes [1*16 + 3]
(* 0x14 *) | STA -> add_bytes [1*16 + 4]
| LABEL s
| FLABEL s
| SLABEL s -> add_lab s
(* 0x15 l:32 *) | JMP s -> add_bytes [1*16 + 5]; add_fixup s; add_ints [0]
(* 0x16 *) | END -> add_bytes [1*16 + 6]
(* 0x17 *) | RET -> add_bytes [1*16 + 7]
(* 0x18 *) | DROP -> add_bytes [1*16 + 8]
(* 0x19 *) | DUP -> add_bytes [1*16 + 9]
(* 0x1a *) | SWAP -> add_bytes [1*16 + 10]
(* 0x2d n:32 *) | LD d -> add_designations (Some 2) [d]
(* 0x3d n:32 *) | LDA d -> add_designations (Some 3) [d]
(* 0x4d n:32 *) | ST d -> add_designations (Some 4) [d]
(* 0x50 l:32 *) | CJMP ("z" , s) -> add_bytes [5*16 + 0]; add_fixup s; add_ints [0]
(* 0x51 l:32 *) | CJMP ("nz", s) -> add_bytes [5*16 + 1]; add_fixup s; add_ints [0]
(* 0x52 n:32 n:32 *) | BEGIN (_, a, l, [], _, _) -> add_bytes [5*16 + 2]; add_ints [a; l] (* with no closure *)
(* 0x53 n:32 n:32 *) | BEGIN (_, a, l, _, _, _) -> add_bytes [5*16 + 3]; add_ints [a; l] (* with a closure *)
(* 0x54 l:32 n:32 d*:32 *) | CLOSURE (s, ds) -> add_bytes [5*16 + 4]; add_fixup s; add_ints [0; List.length ds]; add_designations None ds
(* 0x55 n:32 *) | CALLC (n, tail) -> add_bytes [5*16 + 5]; add_ints [n]
(* 0x56 l:32 n:32 *) | CALL (fn, n, tail) -> add_bytes [5*16 + 6]; add_fixup fn; add_ints [0; n]
(* 0x57 s:32 n:32 *) | TAG (s, n) -> add_bytes [5*16 + 7]; add_strings [s]; add_ints [n]
(* 0x58 n:32 *) | ARRAY n -> add_bytes [5*16 + 8]; add_ints [n]
(* 0x59 n:32 n:32 *) | FAIL ((l, c), _) -> add_bytes [5*16 + 9]; add_ints [l; c]
(* 0x5a n:32 *) | LINE n -> add_bytes [5*16 + 10]; add_ints [n]
(* 0x6p *) | PATT p -> add_bytes [6*16 + enum(patt) p]
| EXTERN s -> ()
| PUBLIC s -> add_public s
in
List.iter insn_code insns;
add_bytes [255];
let code = Buffer.to_bytes code in
List.iter
(fun (ofs, l) ->
Bytes.set_int32_ne code ofs (Int32.of_int @@ try M.find l !lmap with Not_found -> failwith (Printf.sprintf "ERROR: undefined label '%s'" l))
)
!fixups;
let pubs = List.map
(fun l ->
Int32.of_int @@ StringTab.add st l,
(Int32.of_int @@ try M.find l !lmap with Not_found -> failwith (Printf.sprintf "ERROR: undefined label '%s'" l))
) @@ S.elements !pubs
in
let st = Buffer.to_bytes st.StringTab.buffer in
let file = Buffer.create 1024 in
Buffer.add_int32_ne file (Int32.of_int @@ Bytes.length st);
Buffer.add_int32_ne file (Int32.of_int @@ !glob_count);
Buffer.add_int32_ne file (Int32.of_int @@ List.length pubs);
List.iter (fun (n, o) -> Buffer.add_int32_ne file n; Buffer.add_int32_ne file o) pubs;
Buffer.add_bytes file st;
Buffer.add_bytes file code;
let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in
Buffer.output_buffer f file;
close_out f
end
let show_prg p = let show_prg p =
let b = Buffer.create 512 in let b = Buffer.create 512 in
List.iter (fun i -> Buffer.add_string b (show(insn) i); Buffer.add_string b "\n") p; List.iter (fun i -> Buffer.add_string b (show(insn) i); Buffer.add_string b "\n") p;

View file

@ -1 +1 @@
let version = "Version 1.10, 9e5c562d6, Fri Aug 13 09:50:07 2021 +0300" let version = "Version 1.10, 11203f3a8, Tue Aug 31 01:47:49 2021 +0300"

View file

@ -2,7 +2,7 @@ SHELL := /bin/bash
FILES=$(wildcard *.lama) FILES=$(wildcard *.lama)
ALL=$(sort $(FILES:.lama=.o)) ALL=$(sort $(FILES:.lama=.o))
LAMAC=../src/lamac LAMAC=../src/lamac -ds
all: $(ALL) all: $(ALL)