(* *********************************************************************)
(* *)
(* The Compcert verified compiler *)
(* *)
(* Laurence Rideau, INRIA Sophia-Antipolis-M\u00e9diterran\u00e9e *)
(* Bernard Paul Serpette, INRIA Sophia-Antipolis-M\u00e9diterran\u00e9e *)
(* Xavier Leroy, INRIA Paris-Rocquencourt *)
(* *)
(* Copyright Institut National de Recherche en Informatique et en *)
(* Automatique. All rights reserved. This file is distributed *)
(* under the terms of the GNU General Public License as published by *)
(* the Free Software Foundation, either version 2 of the License, or *)
(* (at your option) any later version. This file is also distributed *)
(* under the terms of the INRIA Non-Commercial License Agreement. *)
(* *)
(* *********************************************************************)
(** Translation of parallel moves into sequences of individual moves *)
(** The ``parallel move'' problem, also known as ``parallel assignment'',
is the following. We are given a list of (source, destination) pairs
of locations. The goal is to find a sequence of elementary
moves ([loc <- loc] assignments) such that, at the end of the sequence,
location [dst] contains the value of location [src] at the beginning of
the sequence, for each ([src], [dst]) pairs in the given problem.
Moreover, other locations should keep their values, except one register
of each type, which can be used as temporaries in the generated sequences.
The parallel move problem is trivial if the sources and destinations do
not overlap. For instance,
<<
(R1, R2) <- (R3, R4) becomes R1 <- R3; R2 <- R4
>>
However, arbitrary overlap is allowed between sources and destinations.
This requires some care in ordering the individual moves, as in
<<
(R1, R2) <- (R3, R1) becomes R2 <- R1; R1 <- R3
>>
Worse, cycles (permutations) can require the use of temporaries, as in
<<
(R1, R2, R3) <- (R2, R3, R1) becomes T <- R1; R1 <- R2; R2 <- R3; R3 <- T;
>>
An amazing fact is that for any parallel move problem, at most one temporary
(or in our case one integer temporary and one float temporary) is needed.
The development in this section was designed by Laurence Rideau and
Bernard Serpette. It is described in the paper
``Tilting at windmills with Coq: Formal verification of
a compilation algorithm for parallel moves'',
#<A HREF="http://hal.inria.fr/inria-00176007">#
http://hal.inria.fr/inria-00176007
#</A>#
*)
Require Import Relations. Require Import Axioms. Require Import Coqlib. Require Import Recdef. Section PARMOV. (** * Registers, moves, and their semantics *)
(** The development is parameterized by the type of registers,
equipped with a decidable equality. *)
Variable reg: Type. Variable reg_eq : forall (r1 r2: reg), {r1=r2} + {r1<>r2}. (** The [temp] function maps every register [r] to the register that
should be used to save the value of [r] temporarily while performing
a cyclic assignment involving [r]. In the simplest case, there is
only one such temporary register [rtemp] and [temp] is the constant
function [fun r => rtemp]. In more realistic cases, different temporary
registers can be used to hold different values. In the case of Compcert,
we have two temporary registers, one for integer values and the other
for floating-point values, and [temp] returns the appropriate temporary
depending on the type of its argument. *)
Variable temp: reg -> reg. Definition is_temp (r: reg): Prop := exists s, r = temp s. (** A set of moves is a list of pairs of registers, of the form
(source, destination). *)
Definition moves := (list (reg * reg))%type. (* src -> dst *)
Definition srcs (m: moves) := List.map (@fst reg reg) m. Definition dests (m: moves) := List.map (@snd reg reg) m. (** ** Semantics of moves *)
(** The dynamic semantics of moves is given in terms of environments.
An environment is a mapping of registers to their current values. *)
Variable val: Type. Definition env := reg -> val.
Proof. Admitted.
Proof (@extensionality reg val).
(** The main operation over environments is update: it assigns
a value [v] to a register [r] and preserves the values of other
registers. *)
Definition update (r: reg) (v: val) (e: env) : env :=
fun r' => if reg_eq r' r then v else e r'.
(** A list of moves [(src1, dst1), ..., (srcN, dstN)] can be given
three different semantics, as environment transformers.
The first semantics corresponds to a parallel assignment:
[dst1, ..., dstN] are set to the initial values of [src1, ..., srcN]. *)
Fixpoint exec_par (m: moves) (e: env) {struct m}: env :=
match m with
| nil => e
| (s, d) :: m' => update d (e s) (exec_par m' e)
end. (** The second semantics corresponds to a sequence of individual
assignments: first, [dst1] is set to the initial value of [src1];
then, [dst2] is set to the current value of [src2] after the first
assignment; etc. *)
Fixpoint exec_seq (m: moves) (e: env) {struct m}: env :=
match m with
| nil => e
| (s, d) :: m' => exec_seq m' (update d (e s) e)
end. (** The third semantics is also sequential, but executes the moves
in reverse order, starting with [srcN, dstN] and finishing with
[src1, dst1]. *)
Fixpoint exec_seq_rev (m: moves) (e: env) {struct m}: env :=
match m with
| nil => e
| (s, d) :: m' =>
let e' := exec_seq_rev m' e in
update d (e' s) e'
end. (** * Specification of the non-deterministic algorithm *)
(** Rideau and Serpette's parallel move compilation algorithm is first
specified as a non-deterministic set of rewriting rules operating
over triples [(mu, sigma, tau)] of moves. We define these rules
as an inductive predicate [transition] relating triples of moves,
and its reflexive transitive closure [transitions]. *)
Inductive state: Type :=
State (mu: moves) (sigma: moves) (tau: moves) : state. Definition no_read (mu: moves) (d: reg) : Prop :=
~In d (srcs mu). Inductive transition: state -> state -> Prop :=
| tr_nop: forall mu1 r mu2 sigma tau,
transition (State (mu1 ++ (r, r) :: mu2) sigma tau)
(State (mu1 ++ mu2) sigma tau)
| tr_start: forall mu1 s d mu2 tau,
transition (State (mu1 ++ (s, d) :: mu2) nil tau)
(State (mu1 ++ mu2) ((s, d) :: nil) tau)
| tr_push: forall mu1 d r mu2 s sigma tau,
transition (State (mu1 ++ (d, r) :: mu2) ((s, d) :: sigma) tau)
(State (mu1 ++ mu2) ((d, r) :: (s, d) :: sigma) tau)
| tr_loop: forall mu sigma s d tau,
transition (State mu (sigma ++ (s, d) :: nil) tau)
(State mu (sigma ++ (temp s, d) :: nil) ((s, temp s) :: tau))
| tr_pop: forall mu s0 d0 s1 d1 sigma tau,
no_read mu d1 -> d1 <> s0 ->
transition (State mu ((s1, d1) :: sigma ++ (s0, d0) :: nil) tau)
(State mu (sigma ++ (s0, d0) :: nil) ((s1, d1) :: tau))
| tr_last: forall mu s d tau,
no_read mu d ->
transition (State mu ((s, d) :: nil) tau)
(State mu nil ((s, d) :: tau)). Definition transitions: state -> state -> Prop :=
clos_refl_trans state transition. (** ** Well-formedness properties of states *)
(** A state [mu, sigma, tau] is well-formed if the following properties hold:
- [mu] concatenated with [sigma] is a ``mill'', that is, no destination
appears twice (predicate [is_mill] below).
- No temporary register appears in [mu] (predicate [move_no_temp]).
- No temporary register appears in [sigma] except perhaps as the source
of the last move in [sigma] (predicate [temp_last]).
- [sigma] is a ``path'', that is, the source of a move is the destination
of the following move.
*)
Definition is_mill (m: moves) : Prop :=
list_norepet (dests m). Definition is_not_temp (r: reg) : Prop :=
forall d, r <> temp d. Definition move_no_temp (m: moves) : Prop :=
forall s d, In (s, d) m -> is_not_temp s /\ is_not_temp d. Definition temp_last (m: moves) : Prop :=
match List.rev m with
| nil => True
| (s, d) :: m' => is_not_temp d /\ move_no_temp m'
end. Definition is_first_dest (m: moves) (d: reg) : Prop :=
match m with
| nil => True
| (s0, d0) :: _ => d = d0
end. Inductive is_path: moves -> Prop :=
| is_path_nil:
is_path nil
| is_path_cons: forall s d m,
is_first_dest m s ->
is_path m ->
is_path ((s, d) :: m). Inductive state_wf: state -> Prop :=
| state_wf_intro:
forall mu sigma tau,
is_mill (mu ++ sigma) ->
move_no_temp mu ->
temp_last sigma ->
is_path sigma ->
state_wf (State mu sigma tau).
(** ** Transitions preserve semantics *)
(** We define the semantics of states as follows.
For a triple [mu, sigma, tau], we perform the moves [tau] in
reverse sequential order, then the moves [mu ++ sigma] in parallel. *)
Definition statemove (st: state) (e: env) :=
match st with
| State mu sigma tau => exec_par (mu ++ sigma) (exec_seq_rev tau e)
end.
Proof. Admitted.
Proof. induction m. { simpl. intros. auto. } { simpl. intros. destruct a as [s d]. rewrite update_o. { apply IHm. tauto. } { simpl in H. intuition. } } Qed.
Proof. Admitted.
Proof. induction m1. { simpl. intros. auto. } { simpl. intros. destruct a as [s0 d0]. simpl in H. rewrite IHm1. { simpl. apply update_commut. tauto. } { tauto. } } Qed.
Proof. Admitted.
Proof. intros. autorewrite with pmov in H. rewrite exec_par_lift. { simpl. replace (e r) with (exec_par (m1 ++ m2) e r). { apply update_ident. } { apply exec_par_outside. autorewrite with pmov. tauto. } } { tauto. } Qed.
(** [env_equiv] is an equivalence relation between environments
capturing the fact that two environments assign the same values to
non-temporary registers, but can disagree on the values of temporary
registers. *)
Definition env_equiv (e1 e2: env) : Prop :=
forall r, is_not_temp r -> e1 r = e2 r.
Proof. intros. red. intros. eauto. Qed.
Proof. unfold env_equiv. auto. Qed.
Proof. intros. red. intros. subst. eauto. Qed.
Proof. unfold env_equiv. intros. rewrite H. auto. Qed.
Proof. intros. red. intros. rewrite H. eauto. eauto. Qed.
(** * Determinisation of the transition relation *)
(** We now define a deterministic variant [dtransition] of the
transition relation [transition]. [dtransition] is deterministic
in the sense that at most one transition applies to a given state. *)
Inductive dtransition: state -> state -> Prop :=
| dtr_nop: forall r mu tau,
dtransition (State ((r, r) :: mu) nil tau)
(State mu nil tau)
| dtr_start: forall s d mu tau,
s <> d ->
dtransition (State ((s, d) :: mu) nil tau)
(State mu ((s, d) :: nil) tau)
| dtr_push: forall mu1 d r mu2 s sigma tau,
no_read mu1 d ->
dtransition (State (mu1 ++ (d, r) :: mu2) ((s, d) :: sigma) tau)
(State (mu1 ++ mu2) ((d, r) :: (s, d) :: sigma) tau)
| dtr_loop_pop: forall mu s r0 d sigma tau,
no_read mu r0 ->
dtransition (State mu ((s, r0) :: sigma ++ (r0, d) :: nil) tau)
(State mu (sigma ++ (temp r0, d) :: nil) ((s, r0) :: (r0, temp r0) :: tau))
| dtr_pop: forall mu s0 d0 s1 d1 sigma tau,
no_read mu d1 -> d1 <> s0 ->
dtransition (State mu ((s1, d1) :: sigma ++ (s0, d0) :: nil) tau)
(State mu (sigma ++ (s0, d0) :: nil) ((s1, d1) :: tau))
| dtr_last: forall mu s d tau,
no_read mu d ->
dtransition (State mu ((s, d) :: nil) tau)
(State mu nil ((s, d) :: tau)). Definition dtransitions: state -> state -> Prop :=
clos_refl_trans state dtransition.
(** * The compilation function *)
(** We now define a function that takes a well-formed parallel move [mu]
and returns a sequence of elementary moves [tau] that is semantically
equivalent. We start by defining a number of auxiliary functions. *)
Fixpoint split_move (m: moves) (r: reg) {struct m} : option (moves * reg * moves) :=
match m with
| nil => None
| (s, d) :: tl =>
if reg_eq s r
then Some (nil, d, tl)
else match split_move tl r with
| None => None
| Some (before, d', after) => Some ((s, d) :: before, d', after)
end
end. Fixpoint is_last_source (r: reg) (m: moves) {struct m} : bool :=
match m with
| nil => false
| (s, d) :: nil =>
if reg_eq s r then true else false
| (s, d) :: tl =>
is_last_source r tl
end. Fixpoint replace_last_source (r: reg) (m: moves) {struct m} : moves :=
match m with
| nil => nil
| (s, d) :: nil => (r, d) :: nil
| s_d :: tl => s_d :: replace_last_source r tl
end. Definition final_state (st: state) : bool :=
match st with
| State nil nil _ => true
| _ => false
end. Definition parmove_step (st: state) : state :=
match st with
| State nil nil _ => st
| State ((s, d) :: tl) nil l =>
if reg_eq s d
then State tl nil l
else State tl ((s, d) :: nil) l
| State t ((s, d) :: b) l =>
match split_move t d with
| Some (t1, r, t2) =>
State (t1 ++ t2) ((d, r) :: (s, d) :: b) l
| None =>
match b with
| nil => State t nil ((s, d) :: l)
| _ =>
if is_last_source d b
then State t (replace_last_source (temp d) b) ((s, d) :: (d, temp d) :: l)
else State t b ((s, d) :: l)
end
end
end.
Proof. Admitted.
Proof. unfold no_read. induction m. { simpl. intros. tauto. } { simpl. intros. destruct a as [s d]. destruct (reg_eq s r). { subst s. auto. } { specialize (IHm r). destruct (split_move m r) as [[[before d'] after] | ]. { destruct IHm. subst m. simpl. intuition. } { simpl. intuition. } } } Qed.
Proof. Admitted.
Proof. induction m. { simpl. destruct (reg_eq s r). { congruence. } { congruence. } } { simpl. destruct a as [s0 d0]. case_eq (m ++ (s, d) :: nil). { intros. generalize (app_cons_not_nil m nil (s, d)). congruence. } { intros. rewrite <- H. auto. } } Qed.
Proof. Admitted.
Proof. induction m. { simpl. auto. } { simpl. destruct a as [s0 d0]. case_eq (m ++ (s, d) :: nil). { intros. generalize (app_cons_not_nil m nil (s, d)). congruence. } { intros. rewrite <- H. congruence. } } Qed.
(** The compilation function is obtained by iterating the [parmov_step]
function. To show that this iteration always terminates, we introduce
the following measure over states. *)
Open Scope nat_scope. Definition measure (st: state) : nat :=
match st with
| State mu sigma tau => 2 * List.length mu + List.length sigma
end.
(** Here is an alternate formulation of [parmove], where the
parallel move problem is given as two separate lists of sources
and destinations. *)
Definition parmove2 (sl dl: list reg) : moves :=
parmove (List.combine sl dl).
(** * Additional syntactic properties *)
(** We now show an additional property of the sequence of moves
generated by [parmove mu]: any such move [s -> d] is either
already present in [mu], or of the form [temp s -> d] or [s -> temp s]
for some [s -> d] in [mu]. This syntactic property is useful
to show that [parmove] preserves register classes, for instance. *)
Section PROPERTIES. Variable initial_move: moves. Inductive wf_move: reg -> reg -> Prop :=
| wf_move_same: forall s d,
In (s, d) initial_move -> wf_move s d
| wf_move_temp_left: forall s d,
wf_move s d -> wf_move (temp s) d
| wf_move_temp_right: forall s d,
wf_move s d -> wf_move s (temp s). Definition wf_moves (m: moves) : Prop :=
forall s d, In (s, d) m -> wf_move s d.
(** As a second corollary, we show that [parmov] preserves register
classes, in the sense made precise below. *)
Section REGISTER_CLASSES. Variable class: Type. Variable regclass: reg -> class. Hypothesis temp_preserves_class: forall r, regclass (temp r) = regclass r. Definition is_class_compatible (mu: moves) : Prop :=
forall s d, In (s, d) mu -> regclass s = regclass d.
Proof. Admitted.
Proof. intros. assert (forall s d, wf_move mu s d -> regclass s = regclass d). { induction 1. { apply H. auto. } { rewrite temp_preserves_class. auto. } { symmetry. apply temp_preserves_class. } } { red. intros. apply H0. apply parmove_wf_moves. auto. } Qed.
End REGISTER_CLASSES. (** * Extension to partially overlapping registers *)
(** We now extend the previous results to the case where distinct
registers can partially overlap, so that assigning to one register
changes the value of the other. We asuume given a disjointness relation
[disjoint] between registers. *)
Variable disjoint: reg -> reg -> Prop. Hypothesis disjoint_sym:
forall r1 r2, disjoint r1 r2 -> disjoint r2 r1. Hypothesis disjoint_not_equal:
forall r1 r2, disjoint r1 r2 -> r1 <> r2. (** Two registers partially overlap if they are different and not disjoint.
For the Coq development, it is easier to define the complement:
two registers do not partially overlap if they are identical or disjoint. *)
Definition no_overlap (r1 r2: reg) : Prop :=
r1 = r2 \/ disjoint r1 r2.
(** We axiomatize the effect of assigning a value to a register over an
execution environment. The target register is set to the given value
(property [weak_update_s]), and registers disjoint from the target
keep their previous values (property [weak_update_d]). The values of
other registers are undefined after the assignment. *)
Variable weak_update: reg -> val -> env -> env. Hypothesis weak_update_s:
forall r v e, weak_update r v e r = v. Hypothesis weak_update_d:
forall r1 v e r2, disjoint r1 r2 -> weak_update r1 v e r2 = e r2. Fixpoint weak_exec_seq (m: moves) (e: env) {struct m}: env :=
match m with
| nil => e
| (s, d) :: m' => weak_exec_seq m' (weak_update d (e s) e)
end. Definition disjoint_list (r: reg) (l: list reg) : Prop :=
forall r', In r' l -> disjoint r r'. Inductive pairwise_disjoint: list reg -> Prop :=
| pairwise_disjoint_nil:
pairwise_disjoint nil
| pairwise_disjoint_cons: forall r l,
disjoint_list r l ->
pairwise_disjoint l ->
pairwise_disjoint (r :: l). Definition disjoint_temps (r: reg) : Prop :=
forall t, is_temp t -> disjoint r t. Section OVERLAP. (** We consider a parallel move problem [mu] that satisfies the following
conditions, which are stronger, overlap-aware variants of the
[move_no_temp mu] and [is_mill mu] conditions used previously. *)
Variable mu: moves. (** Sources and destinations are disjoint from all temporary registers. *)
Hypothesis mu_no_temporaries_src:
forall s, In s (srcs mu) -> disjoint_temps s. Hypothesis mu_no_temporaries_dst:
forall d, In d (dests mu) -> disjoint_temps d. (** Destinations are pairwise disjoint. *)
Hypothesis mu_dest_pairwise_disjoint:
pairwise_disjoint (dests mu). (** Sources and destinations do not partially overlap. *)
Hypothesis mu_src_dst_no_overlap:
forall s d, In s (srcs mu) -> In d (dests mu) -> no_overlap s d. (** Distinct temporaries do not partially overlap. *)
Hypothesis temps_no_overlap:
forall t1 t2, is_temp t1 -> is_temp t2 -> no_overlap t1 t2.
Proof. red. intros. split. { apply disjoint_temps_not_temp. apply mu_no_temporaries_src. auto. unfold srcs. change s with (fst (s,d)). apply List.in_map. auto. } { apply disjoint_temps_not_temp. apply mu_no_temporaries_dst. auto. unfold dests. change d with (snd (s,d)). apply List.in_map. auto. } Qed.
(** We define the ``adherence'' of the problem [mu] as the set of
registers that partially overlap with one of the registers
possibly assigned by the parallel move: destinations and temporaries.
Again, we define the complement of the ``adherence'' set, which is
more convenient for Coq reasoning. *)
Definition no_adherence (r: reg) : Prop :=
forall x, In x (dests mu) \/ is_temp x -> no_overlap x r.
Proof. induction m. simpl. intros. red. left. easy. simpl. intuition. red. inv H. eauto. red. inv H. eauto. red. inv H. eauto. red. inv H. intuition. Qed.
(** The relation [env_match] holds between two environments [e1] and [e2]
if they assign the same values to all registers not in the adherence set. *)
Definition env_match (e1 e2: env) : Prop :=
forall r, no_adherence r -> e1 r = e2 r.