2018-03-05 00:54:50 +03:00
|
|
|
(* X86 codegeneration interface *)
|
|
|
|
|
|
|
|
|
|
(* The registers: *)
|
2018-03-07 10:13:34 +03:00
|
|
|
let regs = [|"%ebx"; "%ecx"; "%esi"; "%edi"; "%eax"; "%edx"; "%ebp"; "%esp"|]
|
2018-03-05 00:54:50 +03:00
|
|
|
|
|
|
|
|
(* We can not freely operate with all register; only 3 by now *)
|
|
|
|
|
let num_of_regs = Array.length regs - 5
|
|
|
|
|
|
|
|
|
|
(* We need to know the word size to calculate offsets correctly *)
|
|
|
|
|
let word_size = 4
|
|
|
|
|
|
|
|
|
|
(* We need to distinguish the following operand types: *)
|
|
|
|
|
type opnd =
|
|
|
|
|
| R of int (* hard register *)
|
|
|
|
|
| S of int (* a position on the hardware stack *)
|
2018-03-07 10:13:34 +03:00
|
|
|
| M of string (* a named memory location *)
|
|
|
|
|
| L of int (* an immediate operand *)
|
2018-03-05 00:54:50 +03:00
|
|
|
|
|
|
|
|
(* For convenience we define the following synonyms for the registers: *)
|
|
|
|
|
let ebx = R 0
|
|
|
|
|
let ecx = R 1
|
|
|
|
|
let esi = R 2
|
|
|
|
|
let edi = R 3
|
|
|
|
|
let eax = R 4
|
|
|
|
|
let edx = R 5
|
|
|
|
|
let ebp = R 6
|
|
|
|
|
let esp = R 7
|
|
|
|
|
|
|
|
|
|
(* Now x86 instruction (we do not need all of them): *)
|
|
|
|
|
type instr =
|
2018-03-07 10:13:34 +03:00
|
|
|
(* copies a value from the first to the second operand *) | Mov of opnd * opnd
|
|
|
|
|
(* makes a binary operation; note, the first operand *) | Binop of string * opnd * opnd
|
|
|
|
|
(* designates x86 operator, not the source language one *)
|
|
|
|
|
(* x86 integer division, see instruction set reference *) | IDiv of opnd
|
|
|
|
|
(* see instruction set reference *) | Cltd
|
|
|
|
|
(* sets a value from flags; the first operand is the *) | Set of string * string
|
|
|
|
|
(* suffix, which determines the value being set, the *)
|
|
|
|
|
(* the second --- (sub)register name *)
|
|
|
|
|
(* pushes the operand on the hardware stack *) | Push of opnd
|
|
|
|
|
(* pops from the hardware stack to the operand *) | Pop of opnd
|
|
|
|
|
(* call a function by a name *) | Call of string
|
|
|
|
|
(* returns from a function *) | Ret
|
2018-03-11 22:30:01 +03:00
|
|
|
(* a label in the code *) | Label of string
|
|
|
|
|
(* a conditional jump *) | CJmp of string * string
|
|
|
|
|
(* a non-conditional jump *) | Jmp of string
|
2018-04-11 00:47:46 +03:00
|
|
|
(* directive *) | Meta of string
|
|
|
|
|
|
2018-03-05 00:54:50 +03:00
|
|
|
(* Instruction printer *)
|
|
|
|
|
let show instr =
|
|
|
|
|
let binop = function
|
|
|
|
|
| "+" -> "addl"
|
|
|
|
|
| "-" -> "subl"
|
|
|
|
|
| "*" -> "imull"
|
|
|
|
|
| "&&" -> "andl"
|
|
|
|
|
| "!!" -> "orl"
|
|
|
|
|
| "^" -> "xorl"
|
|
|
|
|
| "cmp" -> "cmpl"
|
|
|
|
|
| _ -> failwith "unknown binary operator"
|
|
|
|
|
in
|
|
|
|
|
let opnd = function
|
|
|
|
|
| R i -> regs.(i)
|
2018-04-11 00:47:46 +03:00
|
|
|
| S i -> if i >= 0
|
|
|
|
|
then Printf.sprintf "-%d(%%ebp)" ((i+1) * word_size)
|
|
|
|
|
else Printf.sprintf "%d(%%ebp)" (8+(-i-1) * word_size)
|
2018-03-05 00:54:50 +03:00
|
|
|
| M x -> x
|
|
|
|
|
| L i -> Printf.sprintf "$%d" i
|
|
|
|
|
in
|
|
|
|
|
match instr with
|
|
|
|
|
| Cltd -> "\tcltd"
|
|
|
|
|
| Set (suf, s) -> Printf.sprintf "\tset%s\t%s" suf s
|
|
|
|
|
| IDiv s1 -> Printf.sprintf "\tidivl\t%s" (opnd s1)
|
|
|
|
|
| Binop (op, s1, s2) -> Printf.sprintf "\t%s\t%s,\t%s" (binop op) (opnd s1) (opnd s2)
|
|
|
|
|
| Mov (s1, s2) -> Printf.sprintf "\tmovl\t%s,\t%s" (opnd s1) (opnd s2)
|
|
|
|
|
| Push s -> Printf.sprintf "\tpushl\t%s" (opnd s)
|
|
|
|
|
| Pop s -> Printf.sprintf "\tpopl\t%s" (opnd s)
|
|
|
|
|
| Ret -> "\tret"
|
|
|
|
|
| Call p -> Printf.sprintf "\tcall\t%s" p
|
2018-03-11 22:30:01 +03:00
|
|
|
| Label l -> Printf.sprintf "%s:\n" l
|
|
|
|
|
| Jmp l -> Printf.sprintf "\tjmp\t%s" l
|
|
|
|
|
| CJmp (s , l) -> Printf.sprintf "\tj%s\t%s" s l
|
2018-04-11 00:47:46 +03:00
|
|
|
| Meta s -> Printf.sprintf "%s\n" s
|
2018-03-05 00:54:50 +03:00
|
|
|
|
|
|
|
|
(* Opening stack machine to use instructions without fully qualified names *)
|
|
|
|
|
open SM
|
|
|
|
|
|
|
|
|
|
(* Symbolic stack machine evaluator
|
|
|
|
|
|
|
|
|
|
compile : env -> prg -> env * instr list
|
|
|
|
|
|
2018-03-07 10:13:34 +03:00
|
|
|
Take an environment, a stack machine program, and returns a pair --- the updated environment and the list
|
2018-03-05 00:54:50 +03:00
|
|
|
of x86 instructions
|
|
|
|
|
*)
|
|
|
|
|
let compile env code =
|
|
|
|
|
let suffix = function
|
|
|
|
|
| "<" -> "l"
|
|
|
|
|
| "<=" -> "le"
|
|
|
|
|
| "==" -> "e"
|
|
|
|
|
| "!=" -> "ne"
|
|
|
|
|
| ">=" -> "ge"
|
|
|
|
|
| ">" -> "g"
|
|
|
|
|
| _ -> failwith "unknown operator"
|
|
|
|
|
in
|
2018-04-11 00:47:46 +03:00
|
|
|
let rec compile' env scode =
|
2018-03-05 00:54:50 +03:00
|
|
|
let on_stack = function S _ -> true | _ -> false in
|
|
|
|
|
match scode with
|
|
|
|
|
| [] -> env, []
|
|
|
|
|
| instr :: scode' ->
|
|
|
|
|
let env', code' =
|
|
|
|
|
match instr with
|
|
|
|
|
| READ ->
|
|
|
|
|
let s, env' = env#allocate in
|
|
|
|
|
(env', [Call "Lread"; Mov (eax, s)])
|
|
|
|
|
| WRITE ->
|
|
|
|
|
let s, env' = env#pop in
|
|
|
|
|
(env', [Push s; Call "Lwrite"; Pop eax])
|
2018-03-08 01:00:01 +03:00
|
|
|
| CONST n ->
|
2018-03-11 22:30:01 +03:00
|
|
|
let s, env' = env#allocate in
|
|
|
|
|
(env', [Mov (L n, s)])
|
2018-03-05 00:54:50 +03:00
|
|
|
| LD x ->
|
2018-03-11 22:30:01 +03:00
|
|
|
let s, env' = (env#global x)#allocate in
|
2018-03-05 00:54:50 +03:00
|
|
|
env',
|
|
|
|
|
(match s with
|
2018-04-11 00:47:46 +03:00
|
|
|
| S _ | M _ -> [Mov (env'#loc x, eax); Mov (eax, s)]
|
|
|
|
|
| _ -> [Mov (env'#loc x, s)]
|
2018-03-05 00:54:50 +03:00
|
|
|
)
|
|
|
|
|
| ST x ->
|
|
|
|
|
let s, env' = (env#global x)#pop in
|
|
|
|
|
env',
|
|
|
|
|
(match s with
|
2018-04-11 00:47:46 +03:00
|
|
|
| S _ | M _ -> [Mov (s, eax); Mov (eax, env'#loc x)]
|
|
|
|
|
| _ -> [Mov (s, env'#loc x)]
|
2018-03-05 00:54:50 +03:00
|
|
|
)
|
|
|
|
|
| BINOP op ->
|
2018-03-11 15:41:35 +03:00
|
|
|
let x, y, env' = env#pop2 in
|
|
|
|
|
env'#push y,
|
2018-03-11 22:30:01 +03:00
|
|
|
(match op with
|
2018-03-11 15:41:35 +03:00
|
|
|
| "/" | "%" ->
|
|
|
|
|
[Mov (y, eax);
|
|
|
|
|
Cltd;
|
|
|
|
|
IDiv x;
|
|
|
|
|
Mov ((match op with "/" -> eax | _ -> edx), y)
|
|
|
|
|
]
|
|
|
|
|
| "<" | "<=" | "==" | "!=" | ">=" | ">" ->
|
|
|
|
|
(match x with
|
|
|
|
|
| M _ | S _ ->
|
2018-03-07 10:13:34 +03:00
|
|
|
[Binop ("^", eax, eax);
|
2018-03-05 00:54:50 +03:00
|
|
|
Mov (x, edx);
|
|
|
|
|
Binop ("cmp", edx, y);
|
|
|
|
|
Set (suffix op, "%al");
|
|
|
|
|
Mov (eax, y)
|
|
|
|
|
]
|
2018-03-11 15:41:35 +03:00
|
|
|
| _ ->
|
2018-03-05 00:54:50 +03:00
|
|
|
[Binop ("^" , eax, eax);
|
|
|
|
|
Binop ("cmp", x, y);
|
|
|
|
|
Set (suffix op, "%al");
|
|
|
|
|
Mov (eax, y)
|
|
|
|
|
]
|
2018-03-11 15:41:35 +03:00
|
|
|
)
|
|
|
|
|
| "*" ->
|
|
|
|
|
if on_stack x && on_stack y
|
|
|
|
|
then [Mov (y, eax); Binop (op, x, eax); Mov (eax, y)]
|
|
|
|
|
else [Binop (op, x, y)]
|
|
|
|
|
| "&&" ->
|
|
|
|
|
[Mov (x, eax);
|
|
|
|
|
Binop (op, x, eax);
|
|
|
|
|
Mov (L 0, eax);
|
|
|
|
|
Set ("ne", "%al");
|
|
|
|
|
|
|
|
|
|
Mov (y, edx);
|
|
|
|
|
Binop (op, y, edx);
|
|
|
|
|
Mov (L 0, edx);
|
|
|
|
|
Set ("ne", "%dl");
|
|
|
|
|
|
|
|
|
|
Binop (op, edx, eax);
|
|
|
|
|
Set ("ne", "%al");
|
|
|
|
|
|
|
|
|
|
Mov (eax, y)
|
|
|
|
|
]
|
|
|
|
|
| "!!" ->
|
|
|
|
|
[Mov (y, eax);
|
|
|
|
|
Binop (op, x, eax);
|
|
|
|
|
Mov (L 0, eax);
|
|
|
|
|
Set ("ne", "%al");
|
|
|
|
|
Mov (eax, y)
|
|
|
|
|
]
|
|
|
|
|
| _ ->
|
|
|
|
|
if on_stack x && on_stack y
|
|
|
|
|
then [Mov (x, eax); Binop (op, eax, y)]
|
|
|
|
|
else [Binop (op, x, y)]
|
|
|
|
|
)
|
2018-03-11 22:30:01 +03:00
|
|
|
| LABEL s -> env, [Label s]
|
|
|
|
|
| JMP l -> env, [Jmp l]
|
|
|
|
|
| CJMP (s, l) ->
|
|
|
|
|
let x, env = env#pop in
|
|
|
|
|
env, [Binop ("cmp", L 0, x); CJmp (s, l)]
|
2018-04-11 00:47:46 +03:00
|
|
|
|
|
|
|
|
| BEGIN (f, a, l) ->
|
|
|
|
|
let env = env#enter f a l in
|
|
|
|
|
env, [Push ebp; Mov (esp, ebp); Binop ("-", M ("$" ^ env#lsize), esp)]
|
|
|
|
|
|
|
|
|
|
| END ->
|
|
|
|
|
env, [Label env#epilogue;
|
|
|
|
|
Mov (ebp, esp);
|
|
|
|
|
Pop ebp;
|
|
|
|
|
Ret;
|
|
|
|
|
Meta (Printf.sprintf "\t.set\t%s,\t%d" env#lsize (env#allocated * word_size))
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
| RET b ->
|
|
|
|
|
if b
|
|
|
|
|
then let x, env = env#pop in env, [Mov (x, eax); Jmp env#epilogue]
|
|
|
|
|
else env, [Jmp env#epilogue]
|
|
|
|
|
|
|
|
|
|
| CALL (f, n, p) ->
|
|
|
|
|
let pushr, popr =
|
|
|
|
|
List.split @@ List.map (fun r -> (Push r, Pop r)) env#live_registers
|
|
|
|
|
in
|
|
|
|
|
let env, code =
|
|
|
|
|
if n = 0
|
|
|
|
|
then env, pushr @ [Call f] @ (List.rev popr)
|
|
|
|
|
else
|
|
|
|
|
let rec push_args env acc = function
|
|
|
|
|
| 0 -> env, acc
|
|
|
|
|
| n -> let x, env = env#pop in
|
|
|
|
|
push_args env ((Push x)::acc) (n-1)
|
|
|
|
|
in
|
|
|
|
|
let env, pushs = push_args env [] n in
|
|
|
|
|
env, pushr @ pushs @ [Call f; Binop ("+", L (n*4), esp)] @ (List.rev popr)
|
|
|
|
|
in
|
|
|
|
|
(if p then env, code else let y, env = env#allocate in env, code @ [Mov (eax, y)])
|
2018-03-05 00:54:50 +03:00
|
|
|
in
|
|
|
|
|
let env'', code'' = compile' env' scode' in
|
|
|
|
|
env'', code' @ code''
|
|
|
|
|
in
|
|
|
|
|
compile' env code
|
|
|
|
|
|
|
|
|
|
(* A set of strings *)
|
|
|
|
|
module S = Set.Make (String)
|
|
|
|
|
|
|
|
|
|
(* Environment implementation *)
|
2018-04-11 00:47:46 +03:00
|
|
|
let make_assoc l = List.combine l (List.init (List.length l) (fun x -> x))
|
|
|
|
|
|
2018-03-05 00:54:50 +03:00
|
|
|
class env =
|
|
|
|
|
object (self)
|
2018-04-11 00:47:46 +03:00
|
|
|
val globals = S.empty (* a set of global variables *)
|
|
|
|
|
val stack_slots = 0 (* maximal number of stack positions *)
|
|
|
|
|
val stack = [] (* symbolic stack *)
|
|
|
|
|
val args = [] (* function arguments *)
|
|
|
|
|
val locals = [] (* function local variables *)
|
|
|
|
|
val fname = "" (* function name *)
|
|
|
|
|
|
2018-03-05 00:54:50 +03:00
|
|
|
(* gets a name for a global variable *)
|
2018-04-11 00:47:46 +03:00
|
|
|
method loc x =
|
|
|
|
|
try S (- (List.assoc x args) - 1)
|
|
|
|
|
with Not_found ->
|
|
|
|
|
try S (List.assoc x locals) with Not_found -> M ("global_" ^ x)
|
|
|
|
|
|
2018-03-05 00:54:50 +03:00
|
|
|
(* allocates a fresh position on a symbolic stack *)
|
|
|
|
|
method allocate =
|
|
|
|
|
let x, n =
|
|
|
|
|
let rec allocate' = function
|
|
|
|
|
| [] -> ebx , 0
|
|
|
|
|
| (S n)::_ -> S (n+1) , n+1
|
|
|
|
|
| (R n)::_ when n < num_of_regs -> R (n+1) , stack_slots
|
2018-03-11 15:41:35 +03:00
|
|
|
| (M _)::s -> allocate' s
|
2018-03-05 00:54:50 +03:00
|
|
|
| _ -> S 0 , 1
|
|
|
|
|
in
|
|
|
|
|
allocate' stack
|
|
|
|
|
in
|
|
|
|
|
x, {< stack_slots = max n stack_slots; stack = x::stack >}
|
|
|
|
|
|
|
|
|
|
(* pushes an operand to the symbolic stack *)
|
|
|
|
|
method push y = {< stack = y::stack >}
|
|
|
|
|
|
|
|
|
|
(* pops one operand from the symbolic stack *)
|
2018-04-11 00:47:46 +03:00
|
|
|
method pop = let x::stack' = stack in x, {< stack = stack' >}
|
2018-03-05 00:54:50 +03:00
|
|
|
|
|
|
|
|
(* pops two operands from the symbolic stack *)
|
|
|
|
|
method pop2 = let x::y::stack' = stack in x, y, {< stack = stack' >}
|
|
|
|
|
|
|
|
|
|
(* registers a global variable in the environment *)
|
|
|
|
|
method global x = {< globals = S.add ("global_" ^ x) globals >}
|
|
|
|
|
|
|
|
|
|
(* gets all global variables *)
|
|
|
|
|
method globals = S.elements globals
|
|
|
|
|
|
2018-04-11 00:47:46 +03:00
|
|
|
(* gets a number of stack positions allocated *)
|
|
|
|
|
method allocated = stack_slots
|
|
|
|
|
|
|
|
|
|
(* enters a function *)
|
|
|
|
|
method enter f a l =
|
|
|
|
|
{< stack_slots = List.length l; stack = []; locals = make_assoc l; args = make_assoc a; fname = f >}
|
2018-03-05 00:54:50 +03:00
|
|
|
|
2018-04-11 00:47:46 +03:00
|
|
|
(* returns a label for the epilogue *)
|
|
|
|
|
method epilogue = Printf.sprintf "L%s_epilogue" fname
|
|
|
|
|
|
|
|
|
|
(* returns a name for local size meta-symbol *)
|
|
|
|
|
method lsize = Printf.sprintf "L%s_SIZE" fname
|
|
|
|
|
|
|
|
|
|
(* returns a list of live registers *)
|
|
|
|
|
method live_registers =
|
|
|
|
|
List.filter (function R _ -> true | _ -> false) stack
|
|
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
2018-03-05 01:01:36 +03:00
|
|
|
(* Generates an assembler text for a program: first compiles the program into
|
|
|
|
|
the stack code, then generates x86 assember code, then prints the assembler file
|
2018-03-05 00:54:50 +03:00
|
|
|
*)
|
2018-04-11 00:47:46 +03:00
|
|
|
let genasm (ds, stmt) =
|
|
|
|
|
let stmt = Language.Stmt.Seq (stmt, Language.Stmt.Return (Some (Language.Expr.Const 0))) in
|
|
|
|
|
let env, code =
|
|
|
|
|
compile
|
|
|
|
|
(new env)
|
|
|
|
|
((LABEL "main") :: (BEGIN ("main", [], [])) :: SM.compile (ds, stmt))
|
|
|
|
|
in
|
|
|
|
|
let data = Meta "\t.data" :: (List.map (fun s -> Meta (s ^ ":\t.int\t0")) env#globals) in
|
2018-03-05 00:54:50 +03:00
|
|
|
let asm = Buffer.create 1024 in
|
|
|
|
|
List.iter
|
|
|
|
|
(fun i -> Buffer.add_string asm (Printf.sprintf "%s\n" @@ show i))
|
2018-04-11 00:47:46 +03:00
|
|
|
(data @ [Meta "\t.text"; Meta "\t.globl\tmain"] @ code);
|
2018-03-05 00:54:50 +03:00
|
|
|
Buffer.contents asm
|
|
|
|
|
|
|
|
|
|
(* Builds a program: generates the assembler file and compiles it with the gcc toolchain *)
|
2018-04-11 00:47:46 +03:00
|
|
|
let build prog name =
|
2018-03-05 00:54:50 +03:00
|
|
|
let outf = open_out (Printf.sprintf "%s.s" name) in
|
2018-04-11 00:47:46 +03:00
|
|
|
Printf.fprintf outf "%s" (genasm prog);
|
2018-03-05 00:54:50 +03:00
|
|
|
close_out outf;
|
|
|
|
|
let inc = try Sys.getenv "RC_RUNTIME" with _ -> "../runtime" in
|
|
|
|
|
Sys.command (Printf.sprintf "gcc -m32 -o %s %s/runtime.o %s.s" name inc name)
|
|
|
|
|
|