Module ControlFlowGraphFlattening

Require Import Clight.
Require Import AST.
Require Import Integers.
Require Import Ctypes.
Require Import Maps.
Require Import Cop.
Require Import Coqlib.
Require Import Smallstep.
Require Import Errors.

Require Import Coq.Program.Equality.

Open Scope error_monad_scope.

Control Flow Graph Flattening obfuscation using a ``switch'' dispatch

Section PARAMETRISATION.

The obfuscation is parameterised by a ``numeration'' function f
Variable f: Z -> Int.int.

max_ident_in_expr e acc returns the biggest identifier (positive) in the expression e greater than acc
Fixpoint max_ident_in_expr (e: expr) (acc: ident): ident :=
  match e with
    | Econst_int x x0 => acc
    | Econst_float x x0 => acc
    | Econst_single x x0 => acc
    | Econst_long x x0 => acc
    | Evar x x0 => if plt acc x then x else acc
    | Etempvar x x0 => if plt acc x then x else acc
    | Ederef x x0 => max_ident_in_expr x acc
    | Eaddrof x x0 => max_ident_in_expr x acc
    | Eunop x x0 x1 => max_ident_in_expr x0 acc
    | Ebinop x x0 x1 x2 => max_ident_in_expr x1 (max_ident_in_expr x0 acc)
    | Ecast x x0 => max_ident_in_expr x acc
    | Efield x x0 x1 => if plt acc x0 then max_ident_in_expr x x0 else max_ident_in_expr x acc
    | Esizeof x x0 => acc
    | Ealignof x x0 => acc
  end.

max_ident_in_stmt s acc returns the biggest identifier (positive) in the statement s greater than acc
Fixpoint max_ident_in_stmt (s: statement) (acc: ident): ident :=
  match s with
    | Sskip => acc
    | Sassign x x0 => max_ident_in_expr x0 (max_ident_in_expr x acc)
    | Sset x x0 => if plt acc x then max_ident_in_expr x0 x else max_ident_in_expr x0 acc
    | Scall x x0 x1 =>
      match x with
        | Some id => if plt acc id then fold_right max_ident_in_expr id (x0::x1) else
                       fold_right max_ident_in_expr acc (x0::x1)
        | None => fold_right max_ident_in_expr acc (x0::x1)
      end
    | Sbuiltin x x0 x1 x2 =>
      match x with
        | Some id => if plt acc id then fold_right max_ident_in_expr id x2 else
                       fold_right max_ident_in_expr acc x2
        | None => fold_right max_ident_in_expr acc x2
      end
    | Ssequence x x0 => max_ident_in_stmt x0 (max_ident_in_stmt x acc)
    | Sifthenelse x x0 x1 => max_ident_in_stmt x1 (max_ident_in_stmt x0 (max_ident_in_expr x acc))
    | Sloop x x0 => max_ident_in_stmt x0 (max_ident_in_stmt x acc)
    | Sbreak => acc
    | Scontinue => acc
    | Sreturn x =>
      match x with
        | Some e => max_ident_in_expr e acc
        | None => acc
      end
    | Sswitch x x0 => max_ident_in_lstmt x0 (max_ident_in_expr x acc)
    | Slabel x x0 => max_ident_in_stmt x0 acc
    | Sgoto x => acc
  end
with max_ident_in_lstmt (ls: labeled_statements) (acc: ident): ident :=
       match ls with
         | LSnil => acc
         | LScons n s ls' => max_ident_in_lstmt ls' (max_ident_in_stmt s acc)
       end.

new_ident_for_function f returns an identifier not present in f
Definition new_ident_for_function (f: function): ident :=
  Psucc (max_ident_in_stmt (fn_body f) (fold_right (fun x y => if plt x y then y else x) (1%positive) (var_names (fn_vars f) ++ var_names (fn_params f) ++ var_names (fn_temps f)))).

append function for labeled_statements
Fixpoint app_lstmt (l1 l2: labeled_statements) :=
  match l1 with
    | LSnil => l2
    | LScons o s l1' => LScons o s (app_lstmt l1' l2)
  end.

sizeof_stmt s returns the ``size'' of s (the number of cases that will correspond to s in the transformation)
Fixpoint sizeof_stmt (s: statement) :=
  match s with
    | Sskip => 1
    | Sassign x x0 => 1
    | Sset x x0 => 1
    | Scall x x0 x1 => 1
    | Sbuiltin x x0 x1 x2 => 1
    | Ssequence x x0 => sizeof_stmt x + sizeof_stmt x0
    | Sifthenelse x x0 x1 => 1 + sizeof_stmt x0 + sizeof_stmt x1
    | Sloop x x0 => 1 + sizeof_stmt x + sizeof_stmt x0
    | Sbreak => 1
    | Scontinue => 1
    | Sreturn x => 1
    | Sswitch x x0 => 1
    | Slabel x x0 => 1
    | Sgoto x => 1
  end.

flatten_loop1 id_t s counter k to_continue to_break transforms the statement s into the labeled_statements used in the ``switch dispatch'', it assumes that id_t is the identifier and type corresponding to the pseudo ``program counter'' variable, s is also assumed to have been in the left side of a Sloop
Fixpoint flatten_loop1 (id_t: ident * type) (s: statement) (counter k: Z) (to_continue: Z) (to_break: Z) :=
  match s with
    | Sskip => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sassign x x0 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sset x x0 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Scall x x0 x1 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sbuiltin x x0 x1 x2 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Ssequence x x0 =>
      do s1 <- flatten_loop1 id_t x (counter) (counter + sizeof_stmt x) to_continue to_break;
      do s2 <- flatten_loop1 id_t x0 (counter + sizeof_stmt x) k to_continue to_break;
      OK (app_lstmt s1 s2)
    | Sifthenelse x x0 x1 =>
      do s1 <- flatten_loop1 id_t x0 (counter + 1) k to_continue to_break;
      do s2 <- flatten_loop1 id_t x1 (counter + 1 + sizeof_stmt x0) k to_continue to_break;
      OK (LScons (Some (Int.unsigned (f counter))) (Sifthenelse x (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1)) (snd id_t))) Sbreak) (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1 + sizeof_stmt x0)) (snd id_t))) Sbreak)) (app_lstmt s1 s2))
    | Sloop x x0 =>
      do s1 <- flatten_loop1 id_t x (counter + 1) (counter + 1 + sizeof_stmt x) (counter + 1 + sizeof_stmt x) k;
      do s2 <- flatten_loop2 id_t x0 (counter + 1 + sizeof_stmt x) counter k;
      OK (LScons (Some (Int.unsigned (f counter))) (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1)) (snd id_t))) Sbreak) (app_lstmt s1 s2))
    | Sbreak => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence (Sset (fst id_t) (Econst_int (f to_break) (snd id_t))) Sbreak) LSnil)
    | Scontinue => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence (Sset (fst id_t) (Econst_int (f to_continue) (snd id_t))) Sbreak) LSnil)
    | Sreturn x => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sswitch x x0 => Error (msg "no switch in loop")
    | Slabel x x0 => Error (msg "no label in loop")
    | Sgoto x => Error (msg "no goto in loop")
  end
flatten_loop2 id_t s counter k to_break transforms the statement s into the labeled_statements used in the ``switch dispatch'', it assumes that id_t is the identifier and type corresponding to the pseudo ``program counter'' variable, s is also assumed to have been in the right side of a Sloop. A Scontinue that appears within the right side of a Sloop doesn't have a semantics and is thus rejected
with flatten_loop2 (id_t: ident * type) (s: statement) (counter k: Z) (to_break: Z) :=
  match s with
    | Sskip => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sassign x x0 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sset x x0 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Scall x x0 x1 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sbuiltin x x0 x1 x2 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Ssequence x x0 =>
      do s1 <- flatten_loop2 id_t x (counter) (counter + sizeof_stmt x) to_break;
      do s2 <- flatten_loop2 id_t x0 (counter + sizeof_stmt x) k to_break;
      OK (app_lstmt s1 s2)
    | Sifthenelse x x0 x1 =>
      do s1 <- flatten_loop2 id_t x0 (counter + 1) k to_break;
      do s2 <- flatten_loop2 id_t x1 (counter + 1 + sizeof_stmt x0) k to_break;
      OK (LScons (Some (Int.unsigned (f counter))) (Sifthenelse x (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1)) (snd id_t))) Sbreak) (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1 + sizeof_stmt x0)) (snd id_t))) Sbreak)) (app_lstmt s1 s2))
    | Sloop x x0 =>
      do s1 <- flatten_loop1 id_t x (counter + 1) (counter + 1 + sizeof_stmt x) (counter + 1 + sizeof_stmt x) k;
      do s2 <- flatten_loop2 id_t x0 (counter + 1 + sizeof_stmt x) counter k;
      OK (LScons (Some (Int.unsigned (f counter))) (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1)) (snd id_t))) Sbreak) (app_lstmt s1 s2))
    | Sbreak => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence (Sset (fst id_t) (Econst_int (f to_break) (snd id_t))) Sbreak) LSnil)
    | Scontinue => Error (msg "continue in loop2")
    | Sreturn x => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sswitch x x0 => Error (msg "switch in loop2")
    | Slabel x x0 => Error (msg "label in loop2")
    | Sgoto x => Error (msg "goto in loop2")
  end.

flatten id_t s counter k transforms the statement s into the labeled_statements used in the ``switch dispatch'', it assumes that id_t is the identifier and type corresponding to the pseudo ``program counter'' variable, s is assumed to not be within a Sloop, Scontinue and Sbreak don't have semantics outside of a Sloop and are thus rejected
Fixpoint flatten (id_t: ident * type) (s: statement) (counter k: Z): res (labeled_statements) :=
  match s with
    | Sskip => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sassign x x0 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sset x x0 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Scall x x0 x1 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sbuiltin x x0 x1 x2 => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Ssequence x x0 =>
      do s1 <- flatten id_t x (counter) (counter + sizeof_stmt x);
      do s2 <- flatten id_t x0 (counter + sizeof_stmt x) k;
      OK (app_lstmt s1 s2)
    | Sifthenelse x x0 x1 =>
      do s1 <- flatten id_t x0 (counter + 1) k;
      do s2 <- flatten id_t x1 (counter + 1 + sizeof_stmt x0) k;
      OK (LScons (Some (Int.unsigned (f counter))) (Sifthenelse x (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1)) (snd id_t))) Sbreak) (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1 + sizeof_stmt x0)) (snd id_t))) Sbreak)) (app_lstmt s1 s2))
    | Sloop x x0 =>
      do s1 <- flatten_loop1 id_t x (counter + 1) (counter + 1 + sizeof_stmt x) (counter + 1 + sizeof_stmt x) k;
      do s2 <- flatten_loop2 id_t x0 (counter + 1 + sizeof_stmt x) counter k;
      OK (LScons (Some (Int.unsigned (f counter))) (Ssequence (Sset (fst id_t) (Econst_int (f (counter + 1)) (snd id_t))) Sbreak) (app_lstmt s1 s2))
    | Sbreak => Error (msg "break not in loop or switch")
    | Scontinue => Error (msg "continue not in loop")
    | Sreturn x => OK (LScons (Some (Int.unsigned (f counter))) (Ssequence s (Ssequence (Sset (fst id_t) (Econst_int (f k) (snd id_t))) Sbreak)) LSnil)
    | Sswitch x x0 => Error (msg "no switch")
    | Slabel x x0 => Error (msg "no label")
    | Sgoto x => Error (msg "no goto")
  end.

obf_body id_t s assumes that s is the body of some function and returns the corresponding obfuscated body while assuming that id_t is the identifier and type of the variable that is to be used as pseudo ``program counter'', the size of the body is also checked to be less than 2^32 - 1 so that it can fit inside of the ``switch'' dispatcher
Definition obf_body (id_t: ident * type) (s: statement) :=
  if zlt (sizeof_stmt s) Int.max_unsigned then
    do ls <- flatten id_t s 1 0;
    OK (Ssequence
          (Sset (fst id_t) (Econst_int (f 1) (snd id_t)))
          (Sloop
             (Ssequence
                (Sifthenelse (Ebinop One (Etempvar (fst id_t) (snd id_t)) (Econst_int (f 0) (snd id_t)) (snd id_t))
                             Sskip
                             Sbreak)
                (Sswitch (Etempvar (fst id_t) (snd id_t)) ls)) Sskip))
  else Error (msg "Program too big").

transf_function f returns the obfuscated function corresponding to f, i.e. obfuscated body and new variable declaration corresponding to the pseudo ``program counter''
Definition transf_function (f: function): res function :=
  let id_t := (new_ident_for_function f, Tint I32 Unsigned noattr) in
  do body <- obf_body id_t (fn_body f);
    OK {| fn_return := fn_return f;
          fn_callconv := fn_callconv f;
          fn_params := fn_params f;
          fn_vars := fn_vars f;
          fn_temps := id_t::fn_temps f;
          fn_body := body
       |}.

transf_fundef transforms function definitions, it obfuscates if the function definition is internal and does nothing otherwise
Definition transf_fundef (fd: Clight.fundef): res Clight.fundef :=
  match fd with
    | Clight.Internal f => do tf <- transf_function f; OK (Clight.Internal tf)
    | Clight.External ef targs tres cconv => OK (Clight.External ef targs tres cconv)
  end.

transf_program p applies the transformation on the program p
Definition transf_program (p: Clight.program): res Clight.program :=
  do p' <- AST.transform_partial_program transf_fundef p;
  OK {| Clight.prog_defs := AST.prog_defs p';
        Clight.prog_public := AST.prog_public p';
        Clight.prog_main := AST.prog_main p';
        Clight.prog_types := prog_types p;
        Clight.prog_comp_env := prog_comp_env p;
        Clight.prog_comp_env_eq := prog_comp_env_eq p |}.

End PARAMETRISATION.