Module InliningCFG


RTL function inlining

Require Import Coqlib.
Require Import Wfsimpl.
Require Import Errors.
Require Import Maps.
Require Import AST.
Require Import Integers.
Require Import Values.
Require Import Op.
Require Import Cminor.
Require Import CFG.

Ltac xomega := unfold Plt, Ple in *; zify; omega.

Environment of inlinable functions


We maintain a mapping from function names to their definitions. In this mapping, we only include functions that are eligible for inlining, as determined by the external heuristic should_inline.

Definition funenv : Type := PTree.t function.

Definition size_fenv (fenv: funenv) := PTree_Properties.cardinal fenv.

Parameter should_inline: ident -> function -> bool.

Definition add_fundef (fenv: funenv) (idf: ident * fundef) : funenv :=
  match idf with
  | (id, External ef) =>
      PTree.remove id fenv
  | (id, Internal f) =>
      if should_inline id f
      then PTree.set id f fenv
      else PTree.remove id fenv
  end.

Definition remove_vardef (fenv: funenv) (idv: ident * globvar unit) : funenv :=
  PTree.remove (fst idv) fenv.

Definition funenv_program (p: program) : funenv :=
  List.fold_left remove_vardef p.(prog_vars)
    (List.fold_left add_fundef p.(prog_funct) (PTree.empty function)).

Resources used by a function.

Maximum PC (node number) in the CFG of a function. All nodes of the CFG of f are between 1 and max_pc_function f (inclusive).

Definition max_pc_function (f: function) :=
  PTree.fold (fun m pc i => Pmax m pc) f.(fn_code) 1%positive.

Fixpoint max_id_expr e : positive :=
  match e with
    | Evar id => id
    | Econst _ => 1%positive
    | Eunop unop e => max_id_expr e
    | Ebinop binop e1 e2 => Pmax (max_id_expr e1) (max_id_expr e2)
    | Eload chunk addr => max_id_expr addr
    | Econdition cond e1 e2 => Pmax (max_id_expr cond) (Pmax (max_id_expr e1) (max_id_expr e2))
  end.

Definition max_id_exprs es : positive :=
  List.fold_left (fun m0 e => Pmax m0 (max_id_expr e)) es 1%positive.

Definition max_id_instr (i: instruction) :=
  match i with
  | Iskip s => 1%positive
  | Iassign id a s => Pmax id (max_id_expr a)
  | Istore chunk addr res s =>
    Pmax (max_id_expr addr) (max_id_expr res)
  | Icall optres sig fn args s =>
    let m := Pmax (max_id_expr fn) (max_id_exprs args) in
    match optres with
      | None => m
      | Some res => Pmax m res
    end
  | Itailcall sig fn args =>
    Pmax (max_id_expr fn) (max_id_exprs args)
  | Ibuiltin optres ef args s =>
    let m := max_id_exprs args in
    match optres with
      | None => m
      | Some res => Pmax m res
    end
  | Iifthenelse cond ifso ifnot =>
    max_id_expr cond
  | Iswitch a cases default =>
    max_id_expr a
  | Ireturn optres =>
    match optres with
      | None => 1%positive
      | Some res => max_id_expr res
    end
  end.

Definition max_def_instr (i: instruction) :=
  match i with
  | Iassign id a s => id
  | Istore chunk addr res s => match res with
                                   | Evar id => id
                                   | _ => 1%positive
                               end
  | Icall optres sig fn args s =>
    match optres with
      | None => 1%positive
      | Some res => res
    end
  | Ibuiltin optres ef args s =>
    match optres with
      | None => 1%positive
      | Some res => res
    end
  | _ => 1%positive
  end.

Maximum ident used in a function. All results of an instruction of f, as well as all parameters of f, are between 1 and max_def_function (inclusive).
Definition max_id_function (f : function) :=
  let max_params := List.fold_left (fun m r => Pmax m r) f.(fn_params) 1%positive in
  let max_vars := List.fold_left (fun m r => Pmax m r) f.(fn_vars) 1%positive in
  Pmax
    (PTree.fold (fun m pc i => Pmax m (max_id_instr i)) f.(fn_code) 1%positive)
    (Pmax max_params max_vars).

State monad

To construct incrementally the CFG of a function after inlining, we use a state monad similar to that used in module RTLgen. It records the current state of the CFG, plus counters to generate fresh pseudo-registers and fresh CFG nodes. It also records the stack size needed for the inlined function.

Record state : Type := mkstate {
  st_nextid: positive; (* last used ident *)
  st_nextnode: positive; (* last used CFG node *)
  st_code: code; (* current CFG *)
  st_stksize: Z (* current stack size *)
}.

Monotone evolution of the state.

Inductive sincr (s1 s2: state) : Prop :=
  Sincr (NEXTID: Ple s1.(st_nextid) s2.(st_nextid))
        (NEXTNODE: Ple s1.(st_nextnode) s2.(st_nextnode))
        (STKSIZE: s1.(st_stksize) <= s2.(st_stksize)).

Remark sincr_refl: forall s, sincr s s.
Proof.
  intros; constructor; xomega.
Qed.

Lemma sincr_trans: forall s1 s2 s3, sincr s1 s2 -> sincr s2 s3 -> sincr s1 s3.
Proof.
  intros. inv H; inv H0. constructor; xomega.
Qed.

Dependently-typed state monad, ensuring that the final state is greater or equal (in the sense of predicate sincr above) than the initial state.

Inductive res {A: Type} {s: state}: Type := R (x: A) (s': state) (I: sincr s s').

Definition mon (A: Type) : Type := forall (s: state), @res A s.

Operations on this monad.

Definition ret {A: Type} (x: A): mon A :=
  fun s => R x s (sincr_refl s).

Definition bind {A B: Type} (x: mon A) (f: A -> mon B): mon B :=
  fun s1 => match x s1 with R vx s2 I1 =>
              match f vx s2 with R vy s3 I2 =>
                R vy s3 (sincr_trans s1 s2 s3 I1 I2)
              end
            end.

Notation "'do' X <- A ; B" := (bind A (fun X => B))
   (at level 200, X ident, A at level 100, B at level 200).

Definition initstate :=
  mkstate 1%positive 1%positive (PTree.empty instruction) 0.

Program Definition set_instr (pc: node) (i: instruction): mon unit :=
  fun s =>
    R tt
      (mkstate s.(st_nextid) s.(st_nextnode) (PTree.set pc i s.(st_code)) s.(st_stksize))
      _.
Next Obligation.
  intros; constructor; simpl; xomega.
Qed.

Program Definition add_instr (i: instruction): mon node :=
  fun s =>
    let pc := Psucc s.(st_nextnode) in
    R pc
      (mkstate s.(st_nextid) pc (PTree.set pc i s.(st_code)) s.(st_stksize))
      _.
Next Obligation.
  intros; constructor; simpl; xomega.
Qed.

Program Definition reserve_nodes (numnodes: positive): mon positive :=
  fun s =>
    R s.(st_nextnode)
      (mkstate s.(st_nextid) (Pplus s.(st_nextnode) numnodes) s.(st_code) s.(st_stksize))
      _.
Next Obligation.
  intros; constructor; simpl; xomega.
Qed.

Program Definition reserve_ids (numids: positive): mon positive :=
  fun s =>
    let n := Pplus s.(st_nextid) numids in
    R s.(st_nextid)
      (mkstate n s.(st_nextnode) s.(st_code) s.(st_stksize))
      _.
Next Obligation.
  intros; constructor; simpl; xomega.
Qed.

Program Definition request_stack (sz: Z): mon unit :=
  fun s =>
    R tt
      (mkstate s.(st_nextid) s.(st_nextnode) s.(st_code) (Zmax s.(st_stksize) sz))
      _.
Next Obligation.
  intros; constructor; simpl; xomega.
Qed.

Fixpoint mlist_iter2 {A B: Type} (f: A -> B -> mon unit) (l: list (A*B)): mon unit :=
  match l with
  | nil => ret tt
  | (x,y) :: l' => do z <- f x y; mlist_iter2 f l'
  end.

Inlining contexts


A context describes how to insert the CFG for a source function into the CFG for the function after inlining:

Notation pmap := (positive * PTree.t positive)%type.

Record context: Type := mkcontext {
  dpc: positive; (* offset for PCs *)
  did: positive; (* offset for (local) variables *)
  dstk: Z; (* offset for stack references *)
  mid: positive; (* max offset for (local) vars *)
  mstk: Z; (* original stack block size *)
  retinfo: option(node * option ident) (* where to branch on return *)
}.

Renumbering of local variables - not necessary, but improves readability of the compiled code *

Definition locals_mapping (vars : list ident) (pm0 : pmap) : pmap :=
  let (next, m0) := pm0 in
  List.fold_left (fun (next_t0 : pmap) x =>
                    let (next, t0) := next_t0 in
                    (Psucc next, PTree.set x next t0)) vars pm0.

Definition ren_id (m: pmap) (r: ident) :=
  match (snd m) ! r with
      | None => r
      | Some v => v
  end.

Definition ren_ids (m: pmap) (ids: list ident) := List.map (ren_id m) ids.

Fixpoint ren_expr (m: pmap) (e: expr) : expr :=
  match e with
    | Evar id => Evar (ren_id m id)
    | Econst c => e
    | Eunop unop e => Eunop unop (ren_expr m e)
    | Ebinop binop e1 e2 => Ebinop binop (ren_expr m e1) (ren_expr m e2)
    | Eload chunk addr => Eload chunk (ren_expr m addr)
    | Econdition cond ifso ifnot => Econdition (ren_expr m cond) (ren_expr m ifso) (ren_expr m ifnot)
  end.

Definition ren_exprs (m: pmap) (el: list expr) := List.map (ren_expr m) el.

Definition ren_instr (m: pmap) (i: instruction) : instruction :=
  match i with
  | Iskip s => i
  | Iassign id args s => Iassign (ren_id m id) (ren_expr m args) s
  | Istore chunk addr src s => Istore chunk (ren_expr m addr) src s
  | Icall optres sg fn args s => Icall (option_map (ren_id m) optres) sg (ren_expr m fn) (ren_exprs m args) s
  | Itailcall sg fn args => Itailcall sg (ren_expr m fn) (ren_exprs m args)
  | Ibuiltin optres ef args s => Ibuiltin (option_map (ren_id m) optres) ef (ren_exprs m args) s
  | Iifthenelse a ifso ifnot => Iifthenelse (ren_expr m a) ifso ifnot
  | Iswitch a cases default => Iswitch (ren_expr m a) cases default
  | Ireturn oe => Ireturn (option_map (ren_expr m) oe)
  end.

Definition ren_code (f : function) (m : pmap) : PTree.t instruction :=
  PTree.fold (fun t0 p i => PTree.set p (ren_instr m i) t0) f.(fn_code) (PTree.empty _).

Definition ren_function (f : function) (m : pmap) : function :=
  let params := ren_ids m f.(fn_params) in
  let vars := ren_ids m f.(fn_vars) in
  let code := ren_code f m in
  mkfunction f.(fn_sig) params vars f.(fn_stacksize) code f.(fn_entrypoint).

Definition ren_program (p : program) : program :=
  let prog_funct :=
      List.map (fun (id_fd : ident * fundef) =>
                  let (id, fd) := id_fd in
                  match fd with
                    | Internal f =>
                      let locals := f.(fn_params) ++ f.(fn_vars) in
                      let m := locals_mapping locals (1%positive, PTree.empty _) in
                      (id, Internal (ren_function f m))
                    | External _ => (id, fd)
                  end) p.(prog_funct)
  in
  mkprogram prog_funct p.(prog_main) p.(prog_vars).


The following functions "shift" (relocate) PCs, registers, operations, etc.

Definition spc (ctx: context) (pc: node) := Pplus pc ctx.(dpc).

Definition ple (x y: positive) : {Ple x y} + {~ Ple x y}.
Proof.
  intros. unfold Ple. apply Z_le_dec.
Qed.

Definition sid (ctx: context) (r: ident) :=
  let new_id := Pplus r ctx.(did) in
  new_id.

Definition sids (ctx: context) (ids: list ident) := List.map (sid ctx) ids.

Definition sstk (ctx: context) (i: Int.int) :=
  Int.add (Int.repr ctx.(dstk)) i.

Fixpoint sexpr (ctx: context) (e: expr) : expr :=
  match e with
    | Evar id => Evar (sid ctx id)
    | Econst c => match c with
                    | Oaddrsymbol id offset =>
                      Econst (Oaddrsymbol id offset)
                    | Oaddrstack offset =>
                      Econst (Oaddrstack (sstk ctx offset))
                    | _ => e
                  end
    | Eunop unop e => Eunop unop (sexpr ctx e)
    | Ebinop binop e1 e2 => Ebinop binop (sexpr ctx e1) (sexpr ctx e2)
    | Eload chunk addr => Eload chunk (sexpr ctx addr)
    | Econdition cond ifso ifnot => Econdition (sexpr ctx cond) (sexpr ctx ifso) (sexpr ctx ifnot)
  end.

Definition sexprs (ctx: context) (el: list expr) := List.map (sexpr ctx) el.

The initial context, used to copy the CFG of a toplevel function.

Definition initcontext (dpc did nid: positive) (sz: Z) :=
  {| dpc := dpc;
     did := did;
     dstk := 0;
     mid := nid;
     mstk := Zmax sz 0;
     retinfo := None |}.

The context used to inline a call to another function.

Definition min_alignment (sz: Z) :=
  if zle sz 1 then 1
  else if zle sz 2 then 2
  else if zle sz 4 then 4 else 8.

Definition callcontext (ctx: context)
                       (dpc did nid: positive) (sz: Z)
                       (retpc: node) (optretid: option ident) :=
  {| dpc := dpc;
     did := did;
     dstk := align (ctx.(dstk) + ctx.(mstk)) (min_alignment sz);
     mid := nid;
     mstk := Zmax sz 0;
     retinfo := Some (spc ctx retpc, option_map (sid ctx) optretid) |}.

The context used to inline a tail call to another function.

Definition tailcontext (ctx: context)
                       (dpc did nid: positive) (sz: Z) :=
  {| dpc := dpc;
     did := did;
     dstk := align ctx.(dstk) (min_alignment sz);
     mid := nid;
     mstk := Zmax sz 0;
     retinfo := ctx.(retinfo) |}.

Recursive expansion and copying of a CFG


Insert "move" instructions to copy the arguments of an inlined function into its parameters.
Fixpoint add_moves (srcs : list expr) (dsts: list ident) (succ: node): mon node :=
  match srcs, dsts with
  | s1 :: sl, d1 :: dl =>
      do n <- add_instr (Iassign d1 s1 succ);
      add_moves sl dl n
  | _, _ =>
      ret succ
  end.

To prevent infinite inlining of a recursive function, when we inline the body of a function f, this function is removed from the environment of inlinable functions and therefore becomes ineligible for inlining. This decreases the size (number of entries) of the environment and guarantees termination. Inlining is, therefore, presented as a well-founded recursion over the size of the environment.

Section EXPAND_CFG.

Variable fenv: funenv.

The rec parameter is the recursor: rec fenv' P ctx f copies the body of function f, with inline expansion within, as governed by context ctx. It can only be called for function environments fenv' strictly smaller than the current environment fenv.

Variable rec: forall fenv', (size_fenv fenv' < size_fenv fenv)%nat -> context -> function -> mon unit.

Given an expression fn, can we inline the corresponding call?
Inductive inline_decision (fn: expr) : Type :=
  | Cannot_inline
  | Can_inline (id: ident) (offset : int) (f: function)
               (P: fn = Econst (Oaddrsymbol id offset))
               (Q: fenv!id = Some f).

Program Definition can_inline (fn: expr): inline_decision fn :=
  match fn with
    | Econst c =>
      match c with
          | Oaddrsymbol id offset =>
              match fenv!id with Some f => Can_inline _ id offset f _ _ | None => Cannot_inline _ end
          | _ => Cannot_inline _
      end
    | _ => Cannot_inline _
  end.

Inlining of a call to function f. An appropriate context is created, then the CFG of f is recursively copied, then moves are inserted to copy the arguments of the call to the parameters of f.

Definition inline_function (ctx: context) (id: ident) (f: function)
                           (P: PTree.get id fenv = Some f)
                           (args: list expr)
                           (retpc: node) (optretid: option ident) : mon node :=
  let npc := max_pc_function f in
  let nids := max_id_function f in
  do dpc <- reserve_nodes npc;
  do did <- reserve_ids nids;
  let ctx' := callcontext ctx dpc did nids f.(fn_stacksize) retpc optretid in
  do x <- rec (PTree.remove id fenv) (PTree_Properties.cardinal_remove P) ctx' f;
  add_moves (sexprs ctx args) (sids ctx' f.(fn_params)) (spc ctx' f.(fn_entrypoint)).

Inlining of a tail call to function f. Similar to inline_function, but the new context is different.

Definition inline_tail_function (ctx: context) (id: ident) (f: function)
                               (P: PTree.get id fenv = Some f)
                               (args: list expr): mon node :=
  let npc := max_pc_function f in
  let nids := max_id_function f in
  do dpc <- reserve_nodes npc;
  do did <- reserve_ids nids;
  let ctx' := tailcontext ctx dpc did nids f.(fn_stacksize) in
  do x <- rec (PTree.remove id fenv) (PTree_Properties.cardinal_remove P) ctx' f;
  add_moves (sexprs ctx args) (sids ctx' f.(fn_params)) (spc ctx' f.(fn_entrypoint)).

The instruction generated for a Ireturn instruction found in an inlined function body.

Definition inline_return (ctx: context) (oe: option expr) (retinfo: node * option ident) :=
  match retinfo, oe with
  | (retpc, oretid), Some e =>
    match oretid with
        | Some retid => Iassign retid (sexpr ctx e) retpc
        | None => Iskip retpc
    end
  | (retpc, retid), None => Iskip retpc
  end.

Expansion and copying of an instruction. For most instructions, its registers and successor PC are shifted as per the context ctx, then the instruction is inserted in the final CFG at its final position spc ctx pc. Icall instructions are either replaced by a "goto" to the expansion of the called function, or shifted as described above. Itailcall instructions are similar, with one additional case. If the Itailcall occurs in the body of an inlined function, and cannot be inlined itself, it must be turned into an Icall instruction that branches to the return point of the inlined function. Finally, Ireturn instructions within an inlined function are turned into a "move" or "goto" that stores the result, if any, into the destination register, then branches back to the successor of the inlined call.

Definition expand_instr (ctx: context) (pc: node) (i: instruction): mon unit :=
  match i with
  | Iskip s =>
    set_instr (spc ctx pc) (Iskip (spc ctx s))
  | Iassign id args s =>
    set_instr (spc ctx pc)
              (Iassign (sid ctx id) (sexpr ctx args) (spc ctx s))
  | Istore chunk addr src s =>
    set_instr (spc ctx pc)
              (Istore chunk (sexpr ctx addr) (sexpr ctx src) (spc ctx s))
  | Icall optres sg fn args s =>
    match can_inline fn with
      | Cannot_inline =>
        set_instr (spc ctx pc)
                  (Icall (option_map (sid ctx) optres) sg (sexpr ctx fn) (sexprs ctx args) (spc ctx s))
      | Can_inline id offset f P Q =>
        do n <- inline_function ctx id f Q args s optres;
          set_instr (spc ctx pc) (Iskip n)
    end
  | Itailcall sg fn args =>
      match can_inline fn with
      | Cannot_inline =>
          match ctx.(retinfo) with
          | None =>
              set_instr (spc ctx pc)
                        (Itailcall sg (sexpr ctx fn) (sexprs ctx args))
          | Some(rpc, rreg) =>
              set_instr (spc ctx pc)
                        (Icall rreg sg (sexpr ctx fn) (sexprs ctx args) rpc)
          end
      | Can_inline id offset f P Q =>
        do n <- inline_tail_function ctx id f Q args;
          set_instr (spc ctx pc) (Iskip n)
      end
  | Ibuiltin optres ef args s =>
      set_instr (spc ctx pc)
                (Ibuiltin (option_map (sid ctx) optres) ef (sexprs ctx args) (spc ctx s))
  | Iifthenelse a ifso ifnot =>
      set_instr (spc ctx pc)
                (Iifthenelse (sexpr ctx a) (spc ctx ifso) (spc ctx ifnot))
  | Iswitch a cases default =>
    set_instr (spc ctx pc)
              (Iswitch (sexpr ctx a) (List.map (fun i_pc => (fst i_pc, spc ctx (snd i_pc))) cases) (spc ctx default))
  | Ireturn oe =>
      match ctx.(retinfo) with
      | None =>
          set_instr (spc ctx pc) (Ireturn (option_map (sexpr ctx) oe))
      | Some rinfo =>
          set_instr (spc ctx pc) (inline_return ctx oe rinfo)
      end
  end
  .

The expansion of a function f iteratively expands all its instructions, after recording how much stack it needs.

Definition expand_cfg_rec (ctx: context) (f: function): mon unit :=
  do x <- request_stack (ctx.(dstk) + ctx.(mstk));
  mlist_iter2 (expand_instr ctx) (PTree.elements f.(fn_code)).

End EXPAND_CFG.

Here we "tie the knot" of the recursion, taking the fixpoint of expand_cfg_rec.

Definition expand_cfg := Fixm size_fenv expand_cfg_rec.

Start of the recursion: copy and inline function f in the initial context.

Definition expand_function (fenv: funenv) (f: function) : mon context :=
  let npc := max_pc_function f in
  let nids := max_id_function f in
  do dpc <- reserve_nodes npc;
  do did <- reserve_ids nids;
  let ctx := initcontext dpc did nids f.(fn_stacksize) in
  do x <- expand_cfg fenv ctx f;
  ret ctx.

Inlining in functions and whole programs.


Local Open Scope string_scope.

Require Import MSetPositive.
Notation "{}" := PositiveSet.empty.
Notation "{ a }" := (PositiveSet.singleton a).
Notation "ab" := (PositiveSet.union a b) (at level 100).
Fixpoint get_ids_expr e : PositiveSet.t :=
  match e with
    | Evar id => {id}
    | Econst _ => {}
    | Eunop unop e => get_ids_expr e
    | Ebinop binop e1 e2 => get_ids_expr e1get_ids_expr e2
    | Eload chunk addr => get_ids_expr addr
    | Econdition cond e1 e2 => get_ids_expr condget_ids_expr e1get_ids_expr e2
  end.

Definition optlist (ol : option ident) : PositiveSet.t :=
  match ol with
      | None => {}
      | Some p => {p}
  end.

Definition get_ids_exprs es : PositiveSet.t :=
  List.fold_left (fun l0 e => l0get_ids_expr e) es {}.

Definition get_ids_instr (i: instruction) : PositiveSet.t :=
  match i with
  | Iskip s => {}
  | Iassign id a s => (PositiveSet.singleton id) ∪ (get_ids_expr a)
  | Istore chunk addr res s =>
    get_ids_expr addrget_ids_expr res
  | Icall optres sig fn args s =>
    optlist optresget_ids_expr fnget_ids_exprs args
  | Itailcall sig fn args =>
    get_ids_expr fnget_ids_exprs args
  | Ibuiltin optres ef args s =>
    optlist optresget_ids_exprs args
  | Iifthenelse cond ifso ifnot =>
    get_ids_expr cond
  | Iswitch a cases default =>
    get_ids_expr a
  | Ireturn optres =>
    match optres with
        | None => {}
        | Some res => get_ids_expr res
    end
  end.

Definition get_ids_code (code: PTree.t instruction) : PositiveSet.t :=
  PTree.fold (fun s0 p i => s0get_ids_instr i) code {}.

Fixpoint set_from_list (l : list ident) : PositiveSet.t :=
  match l with
    | nil => {}
    | a :: r => PositiveSet.singleton aset_from_list r
  end.

Inlining can increase the size of the function's stack block. We must make sure that the new size does not exceed Int.max_unsigned, otherwise address computations within the stack would overflow and produce incorrect results.
Definition transf_function (fenv: funenv) (f: function) : Errors.res function :=
  let '(R ctx s _) := expand_function fenv f initstate in
  if zle s.(st_stksize) Int.max_unsigned then
    let params := set_from_list (sids ctx f.(fn_params)) in
    let local_variables := PositiveSet.elements (PositiveSet.diff (get_ids_code s.(st_code)) params) in
    OK (mkfunction f.(fn_sig)
                   (sids ctx f.(fn_params))
                   local_variables
                   s.(st_stksize)
                   s.(st_code)
                   (spc ctx f.(fn_entrypoint)))
  else
    Error(msg "Inlining: stack too big").

Definition transf_fundef (fenv: funenv) (fd: fundef) : Errors.res fundef :=
  AST.transf_partial_fundef (transf_function fenv) fd.

Definition transf_program (p: program): Errors.res program :=
  let p := ren_program p in
  let fenv := funenv_program p in
  AST.transform_partial_program (transf_fundef fenv) p.