diff --git a/src/X86.ml b/src/X86.ml new file mode 100644 index 000000000..ab4b842c5 --- /dev/null +++ b/src/X86.ml @@ -0,0 +1,269 @@ +(* X86 codegeneration interface *) + +(* The registers: *) +let regs = [|"%ebx"; "%ecx"; "%esi"; "%edi"; "%eax"; "%edx"; "%ebp"; "%esp"|] + +(* We can not freely operate with all register; only 3 by now *) +let num_of_regs = Array.length regs - 5 + +(* We need to know the word size to calculate offsets correctly *) +let word_size = 4 + +(* We need to distinguish the following operand types: *) +type opnd = +| R of int (* hard register *) +| S of int (* a position on the hardware stack *) +| M of string (* named memory location *) +| L of int (* immediate operand *) + +(* For convenience we define the following synonyms for the registers: *) +let ebx = R 0 +let ecx = R 1 +let esi = R 2 +let edi = R 3 +let eax = R 4 +let edx = R 5 +let ebp = R 6 +let esp = R 7 + +(* Now x86 instruction (we do not need all of them): *) +type instr = +| Cltd +| Set of string * string +| IDiv of opnd +| Binop of string * opnd * opnd +| Mov of opnd * opnd +| Push of opnd +| Pop of opnd +| Ret +| Call of string + +(* Instruction printer *) +let show instr = + let binop = function + | "+" -> "addl" + | "-" -> "subl" + | "*" -> "imull" + | "&&" -> "andl" + | "!!" -> "orl" + | "^" -> "xorl" + | "cmp" -> "cmpl" + | _ -> failwith "unknown binary operator" + in + let opnd = function + | R i -> regs.(i) + | S i -> Printf.sprintf "-%d(%%ebp)" ((i+1) * word_size) + | M x -> x + | L i -> Printf.sprintf "$%d" i + in + match instr with + | Cltd -> "\tcltd" + | Set (suf, s) -> Printf.sprintf "\tset%s\t%s" suf s + | IDiv s1 -> Printf.sprintf "\tidivl\t%s" (opnd s1) + | Binop (op, s1, s2) -> Printf.sprintf "\t%s\t%s,\t%s" (binop op) (opnd s1) (opnd s2) + | Mov (s1, s2) -> Printf.sprintf "\tmovl\t%s,\t%s" (opnd s1) (opnd s2) + | Push s -> Printf.sprintf "\tpushl\t%s" (opnd s) + | Pop s -> Printf.sprintf "\tpopl\t%s" (opnd s) + | Ret -> "\tret" + | Call p -> Printf.sprintf "\tcall\t%s" p + +(* Opening stack machine to use instructions without fully qualified names *) +open SM + +(* Symbolic stack machine evaluator + + compile : env -> prg -> env * instr list + + Take an environment, a stack machine program, and returns a pair --- updated environment and the list + of x86 instructions +*) +let compile env code = + let suffix = function + | "<" -> "l" + | "<=" -> "le" + | "==" -> "e" + | "!=" -> "ne" + | ">=" -> "ge" + | ">" -> "g" + | _ -> failwith "unknown operator" + in + let rec compile' env scode = + let on_stack = function S _ -> true | _ -> false in + match scode with + | [] -> env, [] + | instr :: scode' -> + let env', code' = + match instr with + | READ -> + let s, env' = env#allocate in + (env', [Call "Lread"; Mov (eax, s)]) + | WRITE -> + let s, env' = env#pop in + (env', [Push s; Call "Lwrite"; Pop eax]) + | CONST n -> + let s, env' = env#allocate in + (env', [Mov (L n, s)]) + | LD x -> + let s, env' = (env#global x)#allocate in + env', + (match s with + | S _ | M _ -> [Mov (M (env'#loc x), eax); Mov (eax, s)] + | _ -> [Mov (M (env'#loc x), s)] + ) + | ST x -> + let s, env' = (env#global x)#pop in + env', + (match s with + | S _ | M _ -> [Mov (s, eax); Mov (eax, M (env'#loc x))] + | _ -> [Mov (s, M (env'#loc x))] + ) + | BINOP op -> + let x, y, env' = env#pop2 in + let env' = env'#push y in + env', + (match op with + | "/" | "%" -> + [Mov (y, eax); + Cltd; + IDiv x; + Mov ((match op with "/" -> eax | _ -> edx), y) + ] + | "<" | "<=" | "==" | "!=" | ">=" | ">" -> + (match x with + | M _ | S _ -> + [Binop ("^" , eax, eax); + Mov (x, edx); + Binop ("cmp", edx, y); + Set (suffix op, "%al"); + Mov (eax, y) + ] + | _ -> + [Binop ("^" , eax, eax); + Binop ("cmp", x, y); + Set (suffix op, "%al"); + Mov (eax, y) + ] + ) + | "*" -> + if on_stack x && on_stack y + then [Mov (y, eax); Binop (op, x, eax); Mov (eax, y)] + else [Binop (op, x, y)] + | "&&" -> + [Mov (x, eax); + Binop (op, x, eax); + Mov (L 0, eax); + Set ("ne", "%al"); + + Mov (y, edx); + Binop (op, y, edx); + Mov (L 0, edx); + Set ("ne", "%dl"); + + Binop (op, edx, eax); + Set ("ne", "%al"); + + Mov (eax, y) + ] + | "!!" -> + [Mov (y, eax); + Binop (op, x, eax); + Mov (L 0, eax); + Set ("ne", "%al"); + Mov (eax, y) + ] + | _ -> + if on_stack x && on_stack y + then [Mov (x, eax); Binop (op, eax, y)] + else [Binop (op, x, y)] + ) + in + let env'', code'' = compile' env' scode' in + env'', code' @ code'' + in + compile' env code + +(* A set of strings *) +module S = Set.Make (String) + + +(* Environment implementation *) +class env = + object (self) + val stack_slots = 0 (* maximal number of stack positions *) + val globals = S.empty (* a set of global variables *) + val stack = [] (* symbolic stack *) + + (* gets a name for a global variable *) + method loc x = "global_" ^ x + + (* allocates a fresh position on a symbolic stack *) + method allocate = + let x, n = + let rec allocate' = function + | [] -> ebx , 0 + | (S n)::_ -> S (n+1) , n+1 + | (R n)::_ when n < num_of_regs -> R (n+1) , stack_slots + | _ -> S 0 , 1 + in + allocate' stack + in + x, {< stack_slots = max n stack_slots; stack = x::stack >} + + (* pushes an operand to the symbolic stack *) + method push y = {< stack = y::stack >} + + (* pops one operand from the symbolic stack *) + method pop = let x::stack' = stack in x, {< stack = stack' >} + + (* pops two operands from the symbolic stack *) + method pop2 = let x::y::stack' = stack in x, y, {< stack = stack' >} + + (* registers a global variable in the environment *) + method global x = {< globals = S.add ("global_" ^ x) globals >} + + (* gets the number of allocated stack slots *) + method allocated = stack_slots + + (* gets all global variables *) + method globals = S.elements globals + end + +(* compiles a unit: generates machine code for the stack program and surrounds it + with function prologue/epilogue +*) +let compile_unit env scode = + let env, code = compile env scode in + env, + ([Push ebp; Mov (esp, ebp); Binop ("-", L (word_size*env#allocated), esp)] @ + code @ + [Mov (ebp, esp); Pop ebp; Binop ("^", eax, eax); Ret] + ) + +(* Generates an assembler text for a program: first compile the program into + the stack code, then generate x86 assember code, then print the assembler file +*) +let genasm prog = + let env, code = compile_unit (new env) (SM.compile prog) in + let asm = Buffer.create 1024 in + Buffer.add_string asm "\t.data\n"; + List.iter + (fun s -> + Buffer.add_string asm (Printf.sprintf "%s:\t.int\t0\n" s) + ) + env#globals; + Buffer.add_string asm "\t.text\n"; + Buffer.add_string asm "\t.globl\tmain\n"; + Buffer.add_string asm "main:\n"; + List.iter + (fun i -> Buffer.add_string asm (Printf.sprintf "%s\n" @@ show i)) + code; + Buffer.contents asm + +(* Builds a program: generates the assembler file and compiles it with the gcc toolchain *) +let build stmt name = + let outf = open_out (Printf.sprintf "%s.s" name) in + Printf.fprintf outf "%s" (genasm stmt); + close_out outf; + let inc = try Sys.getenv "RC_RUNTIME" with _ -> "../runtime" in + Sys.command (Printf.sprintf "gcc -m32 -o %s %s/runtime.o %s.s" name inc name) +