From fa874b4a4c4ee4a7ada92a0f9dd3806c46476b5a Mon Sep 17 00:00:00 2001 From: Dmitry Boulytchev Date: Tue, 28 Sep 2021 03:02:05 +0300 Subject: [PATCH] Byterun --- Makefile | 1 + byterun/., | 12 +++ byterun/Makefile | 8 ++ byterun/byterun.c | 260 ++++++++++++++++++++++++++++++++++++++++++++++ runtime/Makefile | 2 +- runtime/runtime.c | 18 +--- runtime/runtime.h | 20 ++++ src/Driver.ml | 10 +- src/SM.ml | 174 ++++++++++++++++++++++++++++++- src/version.ml | 2 +- stdlib/Makefile | 2 +- 11 files changed, 486 insertions(+), 23 deletions(-) create mode 100644 byterun/., create mode 100644 byterun/Makefile create mode 100644 byterun/byterun.c create mode 100644 runtime/runtime.h diff --git a/Makefile b/Makefile index b92b9bbec..8a3b6313c 100644 --- a/Makefile +++ b/Makefile @@ -7,6 +7,7 @@ MKDIR ?= mkdir all: $(MAKE) -C src $(MAKE) -C runtime + $(MAKE) -C byterun $(MAKE) -C stdlib STD_FILES=$(shell ls stdlib/*.[oi] stdlib/*.lama runtime/runtime.a runtime/Std.i) diff --git a/byterun/., b/byterun/., new file mode 100644 index 000000000..27657559f --- /dev/null +++ b/byterun/., @@ -0,0 +1,12 @@ + +all: gc_runtime.o runtime.o + ar rc runtime.a gc_runtime.o runtime.o + +gc_runtime.o: gc_runtime.s + $(CC) -g -fstack-protector-all -m32 -c gc_runtime.s + +runtime.o: runtime.c + $(CC) -g -fstack-protector-all -m32 -c runtime.c + +clean: + $(RM) *.a *.o *~ diff --git a/byterun/Makefile b/byterun/Makefile new file mode 100644 index 000000000..45fc9c562 --- /dev/null +++ b/byterun/Makefile @@ -0,0 +1,8 @@ +all: byterun.o + $(CC) -m32 -g -o byterun byterun.o ../runtime/runtime.a + +byterun.o: byterun.c + $(CC) -g -fstack-protector-all -m32 -c byterun.c + +clean: + $(RM) *.a *.o *~ diff --git a/byterun/byterun.c b/byterun/byterun.c new file mode 100644 index 000000000..4d52b8c3f --- /dev/null +++ b/byterun/byterun.c @@ -0,0 +1,260 @@ +/* Lama SM Bytecode interpreter */ + +# include +# include +# include +# include +# include "../runtime/runtime.h" + +void *__start_custom_data; +void *__stop_custom_data; + +typedef struct { + char *string_ptr; + int *public_ptr; + char *code_ptr; + int stringtab_size; + int global_area_size; + int public_symbols_number; + char buffer[0]; +} bytefile; + +char* get_string (bytefile *f, int pos) { + return &f->string_ptr[pos]; +} + +char* get_public_name (bytefile *f, int i) { + return get_string (f, f->public_ptr[i*2]); +} + +int get_public_offset (bytefile *f, int i) { + return f->public_ptr[i*2+1]; +} + +bytefile* read_file (char *fname) { + FILE *f = fopen (fname, "rb"); + long size; + bytefile *file; + + if (f == 0) { + failure ("%s\n", strerror (errno)); + } + + if (fseek (f, 0, SEEK_END) == -1) { + failure ("%s\n", strerror (errno)); + } + + file = (bytefile*) malloc (sizeof(int)*3 + (size = ftell (f))); + + if (file == 0) { + failure ("*** FAILURE: unable to allocate memory.\n"); + } + + rewind (f); + + if (size != fread (&file->stringtab_size, 1, size, f)) { + failure ("%s\n", strerror (errno)); + } + + fclose (f); + + file->string_ptr = &file->buffer [file->public_symbols_number * 2 * sizeof(int)]; + file->public_ptr = (int*) file->buffer; + file->code_ptr = &file->string_ptr [file->stringtab_size]; + + return file; +} + +void disassemble (FILE *f, bytefile *bf) { + +# define INT (ip += sizeof (int), *(int*)(ip - sizeof (int))) +# define BYTE *ip++ +# define STRING get_string (bf, INT) +# define FAIL failure ("ERROR: invalid opcode %d-%d\n", h, l) + + char *ip = bf->code_ptr; + char *ops [] = {"+", "-", "*", "/", "%", "<", "<=", ">", ">=", "==", "!=", "&&", "!!"}; + char *pats[] = {"=str", "#string", "#array", "#sexp", "#ref", "#val", "#fun"}; + char *lds [] = {"LD\t", "LDA\t", "ST\t"}; + do { + char x = BYTE, + h = (x & 0xF0) >> 4, + l = x & 0x0F; + + fprintf (f, "0x%.8x:\t", ip-bf->code_ptr-1); + + switch (h) { + case 15: + goto stop; + + /* BINOP */ + case 0: + fprintf (f, "BINOP\t%s", ops[l-1]); + break; + + case 1: + switch (l) { + case 0: + fprintf (f, "CONST\t%d", INT); + break; + + case 1: + fprintf (f, "STRING\t%s", STRING); + break; + + case 2: + fprintf (f, "SEXP\t%s ", STRING); + fprintf (f, "%d", INT); + break; + + case 3: + fprintf (f, "STI"); + break; + + case 4: + fprintf (f, "STA"); + break; + + case 5: + fprintf (f, "JMP\t0x%.8x", INT); + break; + + case 6: + fprintf (f, "END"); + break; + + case 7: + fprintf (f, "RET"); + break; + + case 8: + fprintf (f, "DROP"); + break; + + case 9: + fprintf (f, "DUP"); + break; + + case 10: + fprintf (f, "SWAP"); + break; + + default: + FAIL; + } + break; + + case 2: + case 3: + case 4: + fprintf (f, "%s\t", lds[h-2]); + switch (l) { + case 0: fprintf (f, "G(%d)", INT); break; + case 1: fprintf (f, "L(%d)", INT); break; + case 2: fprintf (f, "A(%d)", INT); break; + case 3: fprintf (f, "C(%d)", INT); break; + default: FAIL; + } + break; + + case 5: + switch (l) { + case 0: + fprintf (f, "CJMPz\t%0x.8x", INT); + break; + + case 1: + fprintf (f, "CJMPnz\t%0x.8x", INT); + break; + + case 2: + fprintf (f, "BEGIN\t%d ", INT); + fprintf (f, "%d", INT); + break; + + case 3: + fprintf (f, "CBEGIN\t%d ", INT); + fprintf (f, "%d", INT); + break; + + case 4: + fprintf (f, "CLOSURE\t0x%.8x", INT); + {int n = INT; + for (int i = 0; istringtab_size); + fprintf (f, "Global area size : %d\n", bf->global_area_size); + fprintf (f, "Number of public symbols: %d\n", bf->public_symbols_number); + fprintf (f, "Public symbols :\n"); + + for (i=0; i < bf->public_symbols_number; i++) + fprintf (f, " 0x%.8x: %s\n", get_public_offset (bf, i), get_public_name (bf, i)); + + fprintf (f, "Code:\n"); + disassemble (f, bf); +} + +int main (int argc, char* argv[]) { + bytefile *f = read_file (argv[1]); + dump_file (stdout, f); + return 0; +} diff --git a/runtime/Makefile b/runtime/Makefile index 27657559f..a82daa3f0 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -5,7 +5,7 @@ all: gc_runtime.o runtime.o gc_runtime.o: gc_runtime.s $(CC) -g -fstack-protector-all -m32 -c gc_runtime.s -runtime.o: runtime.c +runtime.o: runtime.c runtime.h $(CC) -g -fstack-protector-all -m32 -c runtime.c clean: diff --git a/runtime/runtime.c b/runtime/runtime.c index 4a80f6e34..6f8c35c1e 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -1,26 +1,14 @@ /* Runtime library */ -#define _GNU_SOURCE 1 +# define _GNU_SOURCE 1 -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include +# include "runtime.h" # define __ENABLE_GC__ # ifndef __ENABLE_GC__ # define alloc malloc # endif -# define WORD_SIZE (CHAR_BIT * sizeof(int)) - /* # define DEBUG_PRINT 1 */ #ifdef DEBUG_PRINT @@ -144,7 +132,7 @@ static void vfailure (char *s, va_list args) { exit (255); } -static void failure (char *s, ...) { +void failure (char *s, ...) { va_list args; va_start (args, s); diff --git a/runtime/runtime.h b/runtime/runtime.h new file mode 100644 index 000000000..16609672c --- /dev/null +++ b/runtime/runtime.h @@ -0,0 +1,20 @@ +# ifndef __LAMA_RUNTIME__ +# define __LAMA_RUNTIME__ + +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# define WORD_SIZE (CHAR_BIT * sizeof(int)) + +void failure (char *s, ...); + +# endif diff --git a/src/Driver.ml b/src/Driver.ml index 6213d941e..ebd5e5bf6 100644 --- a/src/Driver.ml +++ b/src/Driver.ml @@ -20,6 +20,7 @@ class options args = " -dsrc --- dump pretty-printed source code\n" ^ " -ds --- dump stack machine code (the output will be written into .sm file; has no\n" ^ " effect if -i option is specfied)\n" ^ + " -b --- compile to a stack machine bytecode\n" ^ " -v --- show version\n" ^ " -h --- show this help\n" in @@ -30,7 +31,7 @@ class options args = val infile = ref (None : string option) val outfile = ref (None : string option) val paths = ref [X86.get_std_path ()] - val mode = ref (`Default : [`Default | `Eval | `SM | `Compile ]) + val mode = ref (`Default : [`Default | `Eval | `SM | `Compile | `BC]) val curdir = Unix.getcwd () val debug = ref false (* Workaround until Ostap starts to memoize properly *) @@ -49,6 +50,7 @@ class options args = | "-o" -> (match self#peek with None -> raise (Commandline_error "File name expected after '-o' specifier") | Some fname -> self#set_outfile fname) | "-I" -> (match self#peek with None -> raise (Commandline_error "Path expected after '-I' specifier") | Some path -> self#add_include_path path) | "-s" -> self#set_mode `SM + | "-b" -> self#set_mode `BC | "-i" -> self#set_mode `Eval | "-ds" -> self#set_dump dump_sm | "-dsrc" -> self#set_dump dump_source @@ -131,8 +133,6 @@ class options args = method dump_source (ast: Language.Expr.t) = if (!dump land dump_source) > 0 then Pprinter.pp Format.std_formatter ast; - - method dump_SM sm = if (!dump land dump_sm) > 0 then self#dump_file "sm" (SM.show_prg sm) @@ -160,7 +160,9 @@ let main = cmd#dump_source (snd prog); (match cmd#get_mode with | `Default | `Compile -> - ignore @@ X86.build cmd prog + ignore @@ X86.build cmd prog + | `BC -> + SM.ByteCode.compile cmd (SM.compile cmd prog) | _ -> let rec read acc = try diff --git a/src/SM.ml b/src/SM.ml index 80f3b723c..1dea381ff 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -2,7 +2,7 @@ open GT open Language (* The type for patters *) -@type patt = StrCmp | String | Array | Sexp | Boxed | UnBoxed | Closure with show +@type patt = StrCmp | String | Array | Sexp | Boxed | UnBoxed | Closure with show, enum (* The type for local scopes tree *) @type scope = { @@ -54,6 +54,178 @@ with show (* The type for the stack machine program *) @type prg = insn list with show +module ByteCode = + struct + + module M = Map.Make (String) + module S = Set.Make (String) + + module StringTab = + struct + + type t = {mutable smap : int M.t; buffer: Buffer.t; mutable index : int} + + let create () = {smap = M.empty; buffer = Buffer.create 255; index = 0} + + let add st s = + try let i = M.find s st.smap in i + with Not_found -> + let i = st.index in + Buffer.add_string st.buffer s; + Buffer.add_char st.buffer (Char.chr 0); + st.smap <- M.add s i st.smap; + st.index <- st.index + String.length s + 1; + i + + end + + exception Found of int + + let opnum = + let optab = ["+"; "-"; "*"; "/"; "%"; "<"; "<="; ">"; ">="; "=="; "!="; "&&"; "!!"] in + fun s -> + try + ignore @@ List.fold_left (fun i op -> if s = op then raise (Found i) else i+1) 1 optab; + failwith (Printf.sprintf "ERROR: undefined binary operator '%s'" s) + with + Found i -> i + +(* Below are the the numbers of occurrencies of SM instructions for the stdlib+lama compiler itself + + 7328 SLABEL + 5351 CALL + 5321 DROP + 4437 LABEL + 4331 LD + 4213 DUP + 3979 EXTERN + 3525 CONST + 2503 LINE + 2281 JMP + 1400 ST + 1122 CJMP + 922 END + 922 BEGIN + 790 SEXP + 770 CLOSURE + 519 TAG + 493 STRING + 354 FAIL + 349 CALLC + 339 BINOP + 289 ARRAY + 270 PUBLIC + 87 PATT + 39 STA + 16 FLABEL + *) + + let compile cmd insns = + let word_size = 4 in + let code = Buffer.create 256 in + let st = StringTab.create () in + let lmap = Stdlib.ref M.empty in + let pubs = Stdlib.ref S.empty in + let globals = Stdlib.ref M.empty in + let glob_count = Stdlib.ref 0 in + let fixups = Stdlib.ref [] in + let add_lab l = lmap := M.add l (Buffer.length code) !lmap in + let add_public l = pubs := S.add l !pubs in + let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in + let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char .chr x) in + let add_ints = List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int x) in + let add_strings = List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int @@ StringTab.add st x) in + let add_designations n = + let b x = + match n with + None -> x + | Some b -> b * 16 + x + in + List.iter (function + | Value.Global s -> + let i = + try M.find s !globals + with Not_found -> + let i = !glob_count in + incr glob_count; + globals := M.add s i !globals; + i + in + add_bytes [b 0]; add_ints [i] + | Value.Local n -> add_bytes [b 1]; add_ints [n] + | Value.Arg n -> add_bytes [b 2]; add_ints [n] + | Value.Access n -> add_bytes [b 3]; add_ints [n] + ) + in + let insn_code = function + (* 0x0s *) | BINOP s -> add_bytes [opnum s] + (* 0x10 n:32 *) | CONST n -> add_bytes [1*16 + 0]; add_ints [n] + (* 0x11 s:32 *) | STRING s -> add_bytes [1*16 + 1]; add_strings [s] + (* 0x12 s:32 n:32 *) | SEXP (s, n) -> add_bytes [1*16 + 2]; add_strings [s]; add_ints [n] + (* 0x13 *) | STI -> add_bytes [1*16 + 3] + (* 0x14 *) | STA -> add_bytes [1*16 + 4] + + | LABEL s + | FLABEL s + | SLABEL s -> add_lab s + + (* 0x15 l:32 *) | JMP s -> add_bytes [1*16 + 5]; add_fixup s; add_ints [0] + (* 0x16 *) | END -> add_bytes [1*16 + 6] + (* 0x17 *) | RET -> add_bytes [1*16 + 7] + (* 0x18 *) | DROP -> add_bytes [1*16 + 8] + (* 0x19 *) | DUP -> add_bytes [1*16 + 9] + (* 0x1a *) | SWAP -> add_bytes [1*16 + 10] + + + (* 0x2d n:32 *) | LD d -> add_designations (Some 2) [d] + (* 0x3d n:32 *) | LDA d -> add_designations (Some 3) [d] + (* 0x4d n:32 *) | ST d -> add_designations (Some 4) [d] + + (* 0x50 l:32 *) | CJMP ("z" , s) -> add_bytes [5*16 + 0]; add_fixup s; add_ints [0] + (* 0x51 l:32 *) | CJMP ("nz", s) -> add_bytes [5*16 + 1]; add_fixup s; add_ints [0] + + (* 0x52 n:32 n:32 *) | BEGIN (_, a, l, [], _, _) -> add_bytes [5*16 + 2]; add_ints [a; l] (* with no closure *) + (* 0x53 n:32 n:32 *) | BEGIN (_, a, l, _, _, _) -> add_bytes [5*16 + 3]; add_ints [a; l] (* with a closure *) + (* 0x54 l:32 n:32 d*:32 *) | CLOSURE (s, ds) -> add_bytes [5*16 + 4]; add_fixup s; add_ints [0; List.length ds]; add_designations None ds + (* 0x55 n:32 *) | CALLC (n, tail) -> add_bytes [5*16 + 5]; add_ints [n] + (* 0x56 l:32 n:32 *) | CALL (fn, n, tail) -> add_bytes [5*16 + 6]; add_fixup fn; add_ints [0; n] + (* 0x57 s:32 n:32 *) | TAG (s, n) -> add_bytes [5*16 + 7]; add_strings [s]; add_ints [n] + (* 0x58 n:32 *) | ARRAY n -> add_bytes [5*16 + 8]; add_ints [n] + (* 0x59 n:32 n:32 *) | FAIL ((l, c), _) -> add_bytes [5*16 + 9]; add_ints [l; c] + (* 0x5a n:32 *) | LINE n -> add_bytes [5*16 + 10]; add_ints [n] + (* 0x6p *) | PATT p -> add_bytes [6*16 + enum(patt) p] + + | EXTERN s -> () + | PUBLIC s -> add_public s + in + List.iter insn_code insns; + add_bytes [255]; + let code = Buffer.to_bytes code in + List.iter + (fun (ofs, l) -> + Bytes.set_int32_ne code ofs (Int32.of_int @@ try M.find l !lmap with Not_found -> failwith (Printf.sprintf "ERROR: undefined label '%s'" l)) + ) + !fixups; + let pubs = List.map + (fun l -> + Int32.of_int @@ StringTab.add st l, + (Int32.of_int @@ try M.find l !lmap with Not_found -> failwith (Printf.sprintf "ERROR: undefined label '%s'" l)) + ) @@ S.elements !pubs + in + let st = Buffer.to_bytes st.StringTab.buffer in + let file = Buffer.create 1024 in + Buffer.add_int32_ne file (Int32.of_int @@ Bytes.length st); + Buffer.add_int32_ne file (Int32.of_int @@ !glob_count); + Buffer.add_int32_ne file (Int32.of_int @@ List.length pubs); + List.iter (fun (n, o) -> Buffer.add_int32_ne file n; Buffer.add_int32_ne file o) pubs; + Buffer.add_bytes file st; + Buffer.add_bytes file code; + let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in + Buffer.output_buffer f file; + close_out f + + end + let show_prg p = let b = Buffer.create 512 in List.iter (fun i -> Buffer.add_string b (show(insn) i); Buffer.add_string b "\n") p; diff --git a/src/version.ml b/src/version.ml index ff511dc75..112274efc 100644 --- a/src/version.ml +++ b/src/version.ml @@ -1 +1 @@ -let version = "Version 1.10, 9e5c562d6, Fri Aug 13 09:50:07 2021 +0300" +let version = "Version 1.10, 11203f3a8, Tue Aug 31 01:47:49 2021 +0300" diff --git a/stdlib/Makefile b/stdlib/Makefile index 3224cff78..e2f124938 100644 --- a/stdlib/Makefile +++ b/stdlib/Makefile @@ -2,7 +2,7 @@ SHELL := /bin/bash FILES=$(wildcard *.lama) ALL=$(sort $(FILES:.lama=.o)) -LAMAC=../src/lamac +LAMAC=../src/lamac -ds all: $(ALL)