From de018e76aaa8f7fd7a2d116f23415f7a74c2ae6b Mon Sep 17 00:00:00 2001 From: Dmitry Boulytchev Date: Wed, 7 Mar 2018 10:18:30 +0300 Subject: [PATCH] Added X86 codegeneration interface and tests --- Makefile | 2 + regression/Makefile | 1 + regression/deep-expressions/Makefile | 1 + regression/expressions/Makefile | 1 + regression/test000.log | 0 runtime/Makefile | 6 + runtime/runtime.c | 22 ++++ src/Driver.ml | 41 ++++--- src/Language.ml | 77 ++++++++++-- src/Makefile | 4 +- src/SM.ml | 31 ++++- src/X86.ml | 168 +++++++++++++++++++++++++++ 12 files changed, 320 insertions(+), 34 deletions(-) delete mode 100644 regression/test000.log create mode 100644 runtime/Makefile create mode 100644 runtime/runtime.c create mode 100644 src/X86.ml diff --git a/Makefile b/Makefile index 19a6c8037..466ce8eaa 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ SHELL := /bin/bash all: pushd src && make && popd + pushd runtime && make && popd install: ; @@ -12,5 +13,6 @@ regression: clean: pushd src && make clean && popd + pushd runtime && make clean && popd pushd regression && make clean && popd diff --git a/regression/Makefile b/regression/Makefile index a3f16f0ae..021bea2d6 100644 --- a/regression/Makefile +++ b/regression/Makefile @@ -7,6 +7,7 @@ RC=../src/rc.opt check: $(TESTS) $(TESTS): %: %.expr + $(RC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log cat $@.input | $(RC) -i $< > $@.log && diff $@.log orig/$@.log cat $@.input | $(RC) -s $< > $@.log && diff $@.log orig/$@.log diff --git a/regression/deep-expressions/Makefile b/regression/deep-expressions/Makefile index 939e151d8..eb36d1702 100644 --- a/regression/deep-expressions/Makefile +++ b/regression/deep-expressions/Makefile @@ -7,6 +7,7 @@ RC = ../../src/rc.opt check: $(TESTS) $(TESTS): %: %.expr + RC_RUNTIME=../../runtime $(RC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log cat $@.input | $(RC) -i $< > $@.log && diff $@.log orig/$@.log cat $@.input | $(RC) -s $< > $@.log && diff $@.log orig/$@.log diff --git a/regression/expressions/Makefile b/regression/expressions/Makefile index 939e151d8..eb36d1702 100644 --- a/regression/expressions/Makefile +++ b/regression/expressions/Makefile @@ -7,6 +7,7 @@ RC = ../../src/rc.opt check: $(TESTS) $(TESTS): %: %.expr + RC_RUNTIME=../../runtime $(RC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log cat $@.input | $(RC) -i $< > $@.log && diff $@.log orig/$@.log cat $@.input | $(RC) -s $< > $@.log && diff $@.log orig/$@.log diff --git a/regression/test000.log b/regression/test000.log deleted file mode 100644 index e69de29bb..000000000 diff --git a/runtime/Makefile b/runtime/Makefile new file mode 100644 index 000000000..c4414a5c8 --- /dev/null +++ b/runtime/Makefile @@ -0,0 +1,6 @@ +all: + gcc -m32 -c runtime.c + +clean: + rm -f runtime.o *~ + diff --git a/runtime/runtime.c b/runtime/runtime.c new file mode 100644 index 000000000..c24283f99 --- /dev/null +++ b/runtime/runtime.c @@ -0,0 +1,22 @@ +/* Runtime library */ + +# include + +/* Lread is an implementation of the "read" construct */ +extern int Lread () { + int result; + + printf ("> "); + fflush (stdout); + scanf ("%d", &result); + + return result; +} + +/* Lwrite is an implementation of the "write" construct */ +extern int Lwrite (int n) { + printf ("%d\n", n); + fflush (stdout); + + return 0; +} diff --git a/src/Driver.ml b/src/Driver.ml index 643762517..f500a3774 100644 --- a/src/Driver.ml +++ b/src/Driver.ml @@ -14,28 +14,35 @@ let parse infile = ] s end ) - (ostap (!(Language.Stmt.parse) -EOF)) + (ostap (!(Language.parse) -EOF)) let main = try let interpret = Sys.argv.(1) = "-i" in - let infile = Sys.argv.(2) in + let stack = Sys.argv.(1) = "-s" in + let to_compile = not (interpret || stack) in + let infile = Sys.argv.(if not to_compile then 2 else 1) in match parse infile with | `Ok prog -> - let rec read acc = - try - let r = read_int () in - Printf.printf "> "; - read (acc @ [r]) - with End_of_file -> acc - in - let input = read [] in - let output = - if interpret - then Language.eval prog input - else SM.run (SM.compile prog) input - in - List.iter (fun i -> Printf.printf "%d\n" i) output + if to_compile + then + let basename = Filename.chop_suffix infile ".expr" in + ignore @@ X86.build prog basename + else + let rec read acc = + try + let r = read_int () in + Printf.printf "> "; + read (acc @ [r]) + with End_of_file -> acc + in + let input = read [] in + let output = + if interpret + then Language.eval prog input + else SM.run (SM.compile prog) input + in + List.iter (fun i -> Printf.printf "%d\n" i) output | `Fail er -> Printf.eprintf "Syntax error: %s\n" er with Invalid_argument _ -> - Printf.printf "Usage: rc [-i] \n" + Printf.printf "Usage: rc [-i | -s] \n" diff --git a/src/Language.ml b/src/Language.ml index ed24bdc1c..0fb34aba0 100644 --- a/src/Language.ml +++ b/src/Language.ml @@ -43,19 +43,62 @@ module Expr = Takes a state and an expression, and returns the value of the expression in the given state. - *) - let eval _ = failwith "Not implemented yet" + *) + let to_func op = + let bti = function true -> 1 | _ -> 0 in + let itb b = b <> 0 in + let (|>) f g = fun x y -> f (g x y) in + match op with + | "+" -> (+) + | "-" -> (-) + | "*" -> ( * ) + | "/" -> (/) + | "%" -> (mod) + | "<" -> bti |> (< ) + | "<=" -> bti |> (<=) + | ">" -> bti |> (> ) + | ">=" -> bti |> (>=) + | "==" -> bti |> (= ) + | "!=" -> bti |> (<>) + | "&&" -> fun x y -> bti (itb x && itb y) + | "!!" -> fun x y -> bti (itb x || itb y) + | _ -> failwith (Printf.sprintf "Unknown binary operator %s" op) + + let rec eval st expr = + match expr with + | Const n -> n + | Var x -> st x + | Binop (op, x, y) -> to_func op (eval st x) (eval st y) (* Expression parser. You can use the following terminals: IDENT --- a non-empty identifier a-zA-Z[a-zA-Z0-9_]* as a string DECIMAL --- a decimal constant [0-9]+ as a string - + *) - ostap ( - parse: empty {failwith "Not implemented yet"} + ostap ( + parse: + !(Ostap.Util.expr + (fun x -> x) + (Array.map (fun (a, s) -> a, + List.map (fun s -> ostap(- $(s)), (fun x y -> Binop (s, x, y))) s + ) + [| + `Lefta, ["!!"]; + `Lefta, ["&&"]; + `Nona , ["=="; "!="; "<="; "<"; ">="; ">"]; + `Lefta, ["+" ; "-"]; + `Lefta, ["*" ; "/"; "%"]; + |] + ) + primary); + + primary: + n:DECIMAL {Const n} + | x:IDENT {Var x} + | -"(" parse -")" ) - + end (* Simple statements: syntax and sematics *) @@ -74,15 +117,26 @@ module Stmt = (* Statement evaluator - val eval : config -> t -> config + val eval : config -> t -> config Takes a configuration and a statement, and returns another configuration *) - let eval _ = failwith "Not implemented yet" - + let rec eval ((st, i, o) as conf) stmt = + match stmt with + | Read x -> (match i with z::i' -> (Expr.update x z st, i', o) | _ -> failwith "Unexpected end of input") + | Write e -> (st, i, o @ [Expr.eval st e]) + | Assign (x, e) -> (Expr.update x (Expr.eval st e) st, i, o) + | Seq (s1, s2) -> eval (eval conf s1) s2 + (* Statement parser *) ostap ( - parse: empty {failwith "Not implemented yet"} + parse: + s:stmt ";" ss:parse {Seq (s, ss)} + | stmt; + stmt: + "read" "(" x:IDENT ")" {Read x} + | "write" "(" e:!(Expr.parse) ")" {Write e} + | x:IDENT ":=" e:!(Expr.parse) {Assign (x, e)} ) end @@ -100,3 +154,6 @@ type t = Stmt.t *) let eval p i = let _, _, o = Stmt.eval (Expr.empty, i, []) p in o + +(* Top-level parser *) +let parse = Stmt.parse diff --git a/src/Makefile b/src/Makefile index 764b8e4fc..8be318c26 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,8 +2,8 @@ TOPFILE = rc OCAMLC = ocamlc OCAMLOPT = ocamlopt OCAMLDEP = ocamldep -SOURCES = Language.ml SM.ml Driver.ml -LIBS = GT.cma unix.cma re.cma emacs/re_emacs.cma str/re_str.cma +SOURCES = Language.ml SM.ml X86.ml Driver.ml +LIBS = GT.cma unix.cma re.cma re_emacs.cma re_str.cma CAMLP5 = -pp "camlp5o -I `ocamlfind -query GT.syntax` -I `ocamlfind -query ostap.syntax` pa_ostap.cmo pa_gt.cmo -L `ocamlfind -query GT.syntax`" PXFLAGS = $(CAMLP5) BFLAGS = -rectypes -I `ocamlfind -query GT` -I `ocamlfind -query re` -I `ocamlfind -query ostap` diff --git a/src/SM.ml b/src/SM.ml index 5bc14e6a0..98b00a9ba 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -24,15 +24,26 @@ type config = int list * Stmt.config Takes a configuration and a program, and returns a configuration as a result *) -let eval _ = failwith "Not yet implemented" +let rec eval ((stack, ((st, i, o) as c)) as conf) = function +| [] -> conf +| insn :: prg' -> + eval + (match insn with + | BINOP op -> let y::x::stack' = stack in (Expr.to_func op x y :: stack', c) + | READ -> let z::i' = i in (z::stack, (st, i', o)) + | WRITE -> let z::stack' = stack in (stack', (st, i, o @ [z])) + | CONST i -> (i::stack, c) + | LD x -> (st x :: stack, c) + | ST x -> let z::stack' = stack in (stack', (Expr.update x z st, i, o)) + ) prg' (* Top-level evaluation val run : prg -> int list -> int list - Takes a program, an input stream, and returns an output stream this program calculates + Takes an input stream, a program, and returns an output stream this program calculates *) -let run p i = let (_, (_, _, o)) = eval ([], (Language.Expr.empty, i, [])) p in o +let run p i = let (_, (_, _, o)) = eval ([], (Expr.empty, i, [])) p in o (* Stack machine compiler @@ -40,5 +51,15 @@ let run p i = let (_, (_, _, o)) = eval ([], (Language.Expr.empty, i, [])) p in Takes a program in the source language and returns an equivalent program for the stack machine - *) -let compile _ = failwith "Not yet implemented" +*) +let rec compile = + let rec expr = function + | Expr.Var x -> [LD x] + | Expr.Const n -> [CONST n] + | Expr.Binop (op, x, y) -> expr x @ expr y @ [BINOP op] + in + function + | Stmt.Seq (s1, s2) -> compile s1 @ compile s2 + | Stmt.Read x -> [READ; ST x] + | Stmt.Write e -> expr e @ [WRITE] + | Stmt.Assign (x, e) -> expr e @ [ST x] diff --git a/src/X86.ml b/src/X86.ml new file mode 100644 index 000000000..3dfc50a73 --- /dev/null +++ b/src/X86.ml @@ -0,0 +1,168 @@ +(* X86 codegeneration interface *) + +(* The registers: *) +let regs = [|"%ebx"; "%ecx"; "%esi"; "%edi"; "%eax"; "%edx"; "%ebp"; "%esp"|] + +(* We can not freely operate with all register; only 3 by now *) +let num_of_regs = Array.length regs - 5 + +(* We need to know the word size to calculate offsets correctly *) +let word_size = 4 + +(* We need to distinguish the following operand types: *) +type opnd = +| R of int (* hard register *) +| S of int (* a position on the hardware stack *) +| M of string (* a named memory location *) +| L of int (* an immediate operand *) + +(* For convenience we define the following synonyms for the registers: *) +let ebx = R 0 +let ecx = R 1 +let esi = R 2 +let edi = R 3 +let eax = R 4 +let edx = R 5 +let ebp = R 6 +let esp = R 7 + +(* Now x86 instruction (we do not need all of them): *) +type instr = +(* copies a value from the first to the second operand *) | Mov of opnd * opnd +(* makes a binary operation; note, the first operand *) | Binop of string * opnd * opnd +(* designates x86 operator, not the source language one *) +(* x86 integer division, see instruction set reference *) | IDiv of opnd +(* see instruction set reference *) | Cltd +(* sets a value from flags; the first operand is the *) | Set of string * string +(* suffix, which determines the value being set, the *) +(* the second --- (sub)register name *) +(* pushes the operand on the hardware stack *) | Push of opnd +(* pops from the hardware stack to the operand *) | Pop of opnd +(* call a function by a name *) | Call of string +(* returns from a function *) | Ret + +(* Instruction printer *) +let show instr = + let binop = function + | "+" -> "addl" + | "-" -> "subl" + | "*" -> "imull" + | "&&" -> "andl" + | "!!" -> "orl" + | "^" -> "xorl" + | "cmp" -> "cmpl" + | _ -> failwith "unknown binary operator" + in + let opnd = function + | R i -> regs.(i) + | S i -> Printf.sprintf "-%d(%%ebp)" ((i+1) * word_size) + | M x -> x + | L i -> Printf.sprintf "$%d" i + in + match instr with + | Cltd -> "\tcltd" + | Set (suf, s) -> Printf.sprintf "\tset%s\t%s" suf s + | IDiv s1 -> Printf.sprintf "\tidivl\t%s" (opnd s1) + | Binop (op, s1, s2) -> Printf.sprintf "\t%s\t%s,\t%s" (binop op) (opnd s1) (opnd s2) + | Mov (s1, s2) -> Printf.sprintf "\tmovl\t%s,\t%s" (opnd s1) (opnd s2) + | Push s -> Printf.sprintf "\tpushl\t%s" (opnd s) + | Pop s -> Printf.sprintf "\tpopl\t%s" (opnd s) + | Ret -> "\tret" + | Call p -> Printf.sprintf "\tcall\t%s" p + +(* Opening stack machine to use instructions without fully qualified names *) +open SM + +(* Symbolic stack machine evaluator + + compile : env -> prg -> env * instr list + + Take an environment, a stack machine program, and returns a pair --- the updated environment and the list + of x86 instructions +*) +let compile env code = failwith "Not yet implemented" + +(* A set of strings *) +module S = Set.Make (String) + +(* Environment implementation *) +class env = + object (self) + val stack_slots = 0 (* maximal number of stack positions *) + val globals = S.empty (* a set of global variables *) + val stack = [] (* symbolic stack *) + + (* gets a name for a global variable *) + method loc x = "global_" ^ x + + (* allocates a fresh position on a symbolic stack *) + method allocate = + let x, n = + let rec allocate' = function + | [] -> ebx , 0 + | (S n)::_ -> S (n+1) , n+1 + | (R n)::_ when n < num_of_regs -> R (n+1) , stack_slots + | _ -> S 0 , 1 + in + allocate' stack + in + x, {< stack_slots = max n stack_slots; stack = x::stack >} + + (* pushes an operand to the symbolic stack *) + method push y = {< stack = y::stack >} + + (* pops one operand from the symbolic stack *) + method pop = let x::stack' = stack in x, {< stack = stack' >} + + (* pops two operands from the symbolic stack *) + method pop2 = let x::y::stack' = stack in x, y, {< stack = stack' >} + + (* registers a global variable in the environment *) + method global x = {< globals = S.add ("global_" ^ x) globals >} + + (* gets the number of allocated stack slots *) + method allocated = stack_slots + + (* gets all global variables *) + method globals = S.elements globals + end + +(* compiles a unit: generates x86 machine code for the stack program and surrounds it + with function prologue/epilogue +*) +let compile_unit env scode = + let env, code = compile env scode in + env, + ([Push ebp; Mov (esp, ebp); Binop ("-", L (word_size*env#allocated), esp)] @ + code @ + [Mov (ebp, esp); Pop ebp; Binop ("^", eax, eax); Ret] + ) + +(* Generates an assembler text for a program: first compiles the program into + the stack code, then generates x86 assember code, then prints the assembler file +*) +let genasm prog = + let env, code = compile_unit (new env) (SM.compile prog) in + let asm = Buffer.create 1024 in + Buffer.add_string asm "\t.data\n"; + List.iter + (fun s -> + Buffer.add_string asm (Printf.sprintf "%s:\t.int\t0\n" s) + ) + env#globals; + Buffer.add_string asm "\t.text\n"; + Buffer.add_string asm "\t.globl\tmain\n"; + Buffer.add_string asm "main:\n"; + List.iter + (fun i -> Buffer.add_string asm (Printf.sprintf "%s\n" @@ show i)) + code; + Buffer.contents asm + +(* Builds a program: generates the assembler file and compiles it with the gcc toolchain *) +let build stmt name = + let outf = open_out (Printf.sprintf "%s.s" name) in + Printf.fprintf outf "%s" (genasm stmt); + close_out outf; + let inc = try Sys.getenv "RC_RUNTIME" with _ -> "../runtime" in + Sys.command (Printf.sprintf "gcc -m32 -o %s %s/runtime.o %s.s" name inc name) +