diff --git a/TP2/cfg.py b/TP2/cfg.py new file mode 100644 index 0000000000000000000000000000000000000000..08fa8d04abcddee3e330c6267f0f3a7dc31f2b06 --- /dev/null +++ b/TP2/cfg.py @@ -0,0 +1,144 @@ +"""Control flow graphs + +- Label and its subclasses describe the label on the edges of the CfGs +- Nodes describes nodes, with an id and its successors +- Graph describes a graph +- Cfg describes a cfg, i.e. a graph with initial, final, and error nodes +- function cfg creates a cfg for the given program +""" +from syntax import * +from dataclasses import dataclass + +@dataclass +class Label: + """generic class for edges' labels""" + pass + +@dataclass +class LSkip(Label): + """label skip (does nothing)""" + def __str__(self): + return "skip" + +@dataclass +class LSet(Label): + """assignment label""" + var: str + expr: ArithExpr + def __str__(self): + return f"{self.var} := {self.expr}" + +@dataclass +class LTest(Label): + """test label""" + cond: BoolExpr + def __str__(self): + return f"?{self.cond}" + +@dataclass +class LErr(Label): + """error label""" + err: Expr + def __str__(self): + return f"Err({self.err})" + +class Node: + """node of the graph, with its id and its successors""" + def __init__(self,nid): + self.nid = nid + self.succs = [] + def __str__(self): + return f"Node {self.nid}" + +class Graph: + nb_id: int + nodes: list[Node] + + """a graph, i.e. a list of nodes""" + def __init__(self): + self.nb_nid = 0 + self.nodes = [] + + def new_node(self) -> Node: + """creates a new node, with a fresh id and no succs""" + node = Node(self.nb_nid) + self.nb_nid += 1 + self.nodes += [node] + return node + + def new_edge(self,n1: Node,n2: Node,l: Label) -> None: + """creates a new edge. + If the nodes are not already in the graph, they are added. + In that case, no check is made that a node with the same id + is already present or not. + """ + if n1 not in self.nodes: + self.nodes += [n1] + if n2 not in self.nodes: + self.nodes += [n2] + n1.succs += [(n2,l)] + +def cfg_aux(g:Graph,e:Node,i:Node,instr:Instr) -> Node: + """auxiliary function to build a CfG + takes as argument a graph, the error node, the node starting + the current instruction, and the instruction itself, returns + the final node of current instruction + """ + match instr: + case ISkip(): + f = g.new_node() + g.new_edge(i,f,LSkip()) + return f + case ISet(var,expr): + f = g.new_node() + g.new_edge(i,f,LSet(var,expr)) + g.new_edge(i,e,LErr(expr)) + return f + case ISeq(first,second): + m = cfg_aux(g,e,i,first) + return cfg_aux(g,e,m,second) + case ICond(cond,true_branch,false_branch): + t = g.new_node() + f = g.new_node() + g.new_edge(i,t,LTest(cond)) + g.new_edge(i,f,LTest(BENeg(cond))) + g.new_edge(i,e,LErr(cond)) + tf = cfg_aux(g,e,t,true_branch) + ff = cfg_aux(g,e,f,false_branch) + f = g.new_node() + g.new_edge(tf,f,LSkip()) + g.new_edge(ff,f,LSkip()) + return f + case ILoop(cond,body): + ib = g.new_node() + f = g.new_node() + g.new_edge(i,ib,LTest(cond)) + g.new_edge(i,f,LTest(BENeg(cond))) + g.new_edge(i,e,LErr(cond)) + fb = cfg_aux(g,e,ib,body) + g.new_edge(fb,i,LSkip()) + return f + case _: assert False + +class Cfg: + """describes a CfG""" + g: Graph + err_node: Node + init_node: Node + final_node: Node + def __init__(self,instr): + self.g = Graph() + self.err_node = self.g.new_node() + self.init_node = self.g.new_node() + self.final_node = cfg_aux(self.g,self.err_node,self.init_node,instr) + + def __str__(self): + res = "" + for node in self.g.nodes: + res+=f"{node}:\n" + for (dest,lab) in node.succs: + res+=f" {lab} --> {dest}\n" + res+=f"init node is {self.init_node}\n" + res+=f"final node is {self.final_node}\n" + res+=f"err node is {self.err_node}\n" + return res diff --git a/TP2/constant_propagation.py b/TP2/constant_propagation.py new file mode 100644 index 0000000000000000000000000000000000000000..a530c34071106acc53ced1ee96915ab2cb9deb93 --- /dev/null +++ b/TP2/constant_propagation.py @@ -0,0 +1,107 @@ +""" +implements a constant propagation analysis +""" + +from dataclasses import dataclass +from cfg import Label, LSkip, LSet, LTest, LErr +from iteration import Transfer +from syntax import * + +@dataclass +class abstract_value[T]: + """None indicates that we don't know the precise value""" + value: T | None + def __str__(self): + return f"{self.value}" + +type abstract_env = dict[str, abstract_value[int]] +"""mapping from variables to abstract values. + +As with concrete environment, a variable not in the dictionary will +lead to an error if we try to obtain its value +""" +type state = abstract_env | None +"""abstract state is either an abstract env or bottom +""" + +def eval_aexp(env: abstract_env, e: ArithExpr) -> abstract_value[int] | None: + """evaluate an arithmetic expression in an abstract environment + returns None in case of error + """ + match e: + case AECst(value): return abstract_value(value) + case AEVar(var): + if var in env: return env[var] + else: return None + case AEUop(uop,expr): + res = eval_aexp(env,expr) + if res is None: return None + if res.value is None: return abstract_value(None) + if uop == Uop.OPP: return abstract_value(-res.value) + return None + case AEBop(bop,left_expr,right_expr): + v1 = eval_aexp(env,left_expr) + v2 = eval_aexp(env,right_expr) + if v1 is None or v2 is None: return None + if v1.value is None or v2.value is None: + return abstract_value(None) + match bop: + case Bop.ADD: return abstract_value(v1.value+v2.value) + case Bop.MUL: return abstract_value(v1.value*v2.value) + case Bop.DIV: + if v2.value == 0: return None + return abstract_value(v1.value//v2.value) + case _: assert False + +def eval_bexp(env: abstract_env, e: BoolExpr) -> abstract_value[bool] | None: + """abstract evaluation of a boolean expression""" + match e: + case BEPlain(aexpr): + res = eval_aexp(env, aexpr) + if res is None: return None + if res.value is None: return abstract_value(None) + return abstract_value(res.value!=0) + case BEEq(left_expr,right_expr): + v1 = eval_aexp(env, left_expr) + v2 = eval_aexp(env, right_expr) + if v1 is None or v2 is None: return None + if v1.value is None or v2.value is None: return abstract_value(None) + return abstract_value(v1.value == v2.value) + case BELeq(left_expr,right_expr): + v1 = eval_aexp(env, left_expr) + v2 = eval_aexp(env, right_expr) + if v1 is None or v2 is None: return None + if v1.value is None or v2.value is None: return abstract_value(None) + return abstract_value(v1.value <= v2.value) + case BENeg(expr): + v = eval_bexp(env,expr) + if v is None: return None + if v.value is None: return v + return abstract_value(not v.value) + case _: assert False + +class Constant_propagation(Transfer[state]): + variables: frozenset[str] + def __init__(self,instr): + self.variables = variables_of_instr(instr) + def bottom(self) -> state: + ... + def init_state(self) -> state: + ... + def join(self,s1:state,s2:state) -> state: + ... + + def included(self,s1: state,s2: state) -> bool: + ... + + def tr_skip(self,s: state) -> state: + ... + + def tr_set(self,s: state,v: str,e: ArithExpr) -> state: + ... + + def tr_test(self,s: state,c: BoolExpr) -> state: + ... + + def tr_err(self,s: state,e: Expr) -> state: + ... diff --git a/TP2/example_constant_propagation.py b/TP2/example_constant_propagation.py new file mode 100644 index 0000000000000000000000000000000000000000..0bf11f1a7a796124d13f8a3ea0e579835985a6d2 --- /dev/null +++ b/TP2/example_constant_propagation.py @@ -0,0 +1,29 @@ +from syntax import * +from cfg import Cfg +from iteration import fixpoint_iteration +from constant_propagation import Constant_propagation + +x = AEVar("x") +y = AEVar("y") +c = AEVar("c") +one = AECst(1) +zero = AECst(0) + +propagate_compute=ISeq(ISet("x",one),ISet("x", AEBop(Bop.ADD,x,one))) +same_branch_if=ICond(BELeq(c,zero), ISet("y", one), ISet("y",one)) +equal_branch=ICond(BEEq(c,one),ISkip(),ISet("c",one)) + +prog = ISeq(ISeq(propagate_compute,same_branch_if),equal_branch) + +cfg = Cfg(prog) + +cst_propagation = Constant_propagation(prog) + +res = fixpoint_iteration(cst_propagation,cfg) + +for (node, state) in res.items(): + print(f"{node}:") + if state is None: print(" None") + else: + for (var, value) in state.items(): + print(f" {var} -> {value}") diff --git a/TP2/example_iteration.py b/TP2/example_iteration.py new file mode 100644 index 0000000000000000000000000000000000000000..3a9a71bb66f797cb0a6ac81b285ed1c2a8da2f75 --- /dev/null +++ b/TP2/example_iteration.py @@ -0,0 +1,52 @@ +from syntax import * +from cfg import Cfg +from iteration import * +from opsem import eval_bexp, eval_aexp + +one = AECst(1) +zero = AECst(0) +x = AEVar("x") +dead_if=ICond(BEPlain(one),ISet("x",one),ISet("y",zero)) +infinite_loop=ILoop(BEPlain(one),ISet("x",AEBop(Bop.ADD,x,one))) +prog_with_dead_code=ISeq(dead_if,infinite_loop) + +class NoInfo(Transfer[frozenset[bool]]): + """Almost trivial static analysis + + We only have two elements in our lattice, the empty set and the singleton {True}. + The latter is propagated from initial state to any potentially reachable state. + In the end, the only nodes still at bottom are the one which are guarded by + always-false tests. + """ + type S = frozenset[bool] + def bottom(self) -> S: return frozenset() + def init_state(self) -> S: return frozenset([True]) + def join(self, s1:S, s2:S) -> S: return s1 | s2 + def included(self, s1:S, s2:S) -> bool: return s1.issubset(s2) + def tr_skip(self,s:S): return s + def tr_set(self, s:S, v:str, e: ArithExpr) -> S: return s + def tr_test(self,s:S, c:BoolExpr) -> S: + if eval_bexp({},c) is False: + return frozenset() + else: + return s + def tr_err(self, s:S, e:Expr) -> S: + if isinstance(e,ArithExpr): + if eval_aexp({},e) is not None: + return frozenset() + else: + return s + else: + if isinstance(e,BoolExpr): + if eval_bexp({},e) is not None: + return frozenset() + else: + return s + else: assert False + +cfg = Cfg(prog_with_dead_code) +print(f"Cfg is {cfg}") +res = fixpoint_iteration(NoInfo(),cfg) +unreachable = [node for node, state in res.items() if state == frozenset()] +for node in unreachable: + print(f"{node} is unreachable") diff --git a/TP2/example_opsem.py b/TP2/example_opsem.py new file mode 100644 index 0000000000000000000000000000000000000000..bcc5dca4e8bbb042e3de024382c4cab6ac304b65 --- /dev/null +++ b/TP2/example_opsem.py @@ -0,0 +1,21 @@ +from syntax import * +from opsem import eval_instr + +x = AEVar("x") +y = AEVar("y") +zero = AECst(0) +one = AECst(1) +xneg = BELeq(x, zero) +bound_check = ICond(xneg,ISet("x",one),ISkip()) +init_y = ISet("y",one) +body = ISeq(ISet("y",AEBop(Bop.MUL,y,x)),ISet("x",AEBop(Bop.ADD,x,AEUop(Uop.OPP,one)))) +loop = ILoop(BENeg(BELeq(x,zero)),body) +factorial = ISeq(ISeq(bound_check,init_y),loop) + +print(f"prog is\n{factorial}") + +env = { "x" : 4 } +envf = eval_instr(env,factorial) + +print(f"init is \n{env}") +print(f"result is\n{envf}") diff --git a/TP2/iteration.py b/TP2/iteration.py new file mode 100644 index 0000000000000000000000000000000000000000..f91169661c566371dc324842b44f562fe95b4d76 --- /dev/null +++ b/TP2/iteration.py @@ -0,0 +1,46 @@ +"""Fixpoint iterator over a CfG + +- Transfer is a base class that must be instantiated for +each static analysis. +- fixpoint_iteration computes a fixpoint over a cfg for the given +transfer instance +""" +from cfg import * +from typing import Protocol + +class Transfer[S](Protocol): + """lists the functions that must be implemented for a static analysis over Lattice S""" + def bottom(self) -> S: + """the empty state, associated to unvisited nodes""" + ... + def init_state(self) -> S: + """the state associated to the initial node""" + ... + def join(self,s1:S,s2:S) -> S: + """join of two states when merging branches""" + ... + def included(self,s1: S,s2: S) -> bool: + """return True when the first state is included in the second""" + ... + def tr_skip(self,s: S) -> S: + """transfer state across a Skip transition + + Almost always transfer the state unchanged + """ + return s + def tr_set(self,s: S,v: str,e: ArithExpr) -> S: + """transfer state across an assignment""" + ... + def tr_test(self,s: S,c: BoolExpr) -> S: + """transfer state across a test transition""" + ... + def tr_err(self,s: S,e: Expr) -> S: + """transfer state across a transition to error state""" + ... + +def fixpoint_iteration[T](transfer: Transfer[T], cfg: Cfg) -> dict[Node,T]: + """computes the fixpoint for the given transfer functions over the given CfG + + output maps each node to the computed state + """ + raise NotImplementedError diff --git a/TP2/opsem.py b/TP2/opsem.py new file mode 100644 index 0000000000000000000000000000000000000000..1313e985e2c11ca43cc8fcaa926eec02eba135f1 --- /dev/null +++ b/TP2/opsem.py @@ -0,0 +1,50 @@ +""" +Operational semantics for the Why language + +Provides evaluation functions for the three syntactic classes, +based on a environment mapping variables to integer values. +These functions return None when an error occur somewhere (Big step semantics) +""" + +from syntax import * + +def eval_aexp(env: dict[str,int], exp: ArithExpr) -> int | None: + """evaluates an arithmetic expression""" + match exp: + case AECst(value): + raise NotImplementedError + case AEVar(var): + raise NotImplementedError + case AEUop(uop,expr): + raise NotImplementedError + case AEBop(bop,left_expr,right_expr): + raise NotImplementedError + case _: assert False + +def eval_bexp(env: dict[str,int], exp:BoolExpr) -> bool | None: + """evaluates a boolean expression""" + match exp: + case BEPlain(aexpr): + raise NotImplementedError + case BEEq(left_expr,right_expr): + raise NotImplementedError + case BELeq(left_expr,right_expr): + raise NotImplementedError + case BENeg(expr): + raise NotImplementedError + case _: assert False + +def eval_instr(env: dict[str,int], instr: Instr) -> dict[str,int] | None: + """evaluates an instruction""" + match instr: + case ISkip(): + raise NotImplementedError + case ISet(var,expr): + raise NotImplementedError + case ISeq(first,second): + raise NotImplementedError + case ICond(cond,true_branch,false_branch): + raise NotImplementedError + case ILoop(cond,body): + raise NotImplementedError + case _: assert False diff --git a/TP2/syntax.py b/TP2/syntax.py new file mode 100644 index 0000000000000000000000000000000000000000..c0938bda1f9a7554cc4db3a13763d0d68732c5bc --- /dev/null +++ b/TP2/syntax.py @@ -0,0 +1,185 @@ +"""Nodes of the abstract syntax tree of the program: + +- Uop/Bop represent unary/binary arithmetic operator +- ArithExpr and its children describe arithmetic expressions +- BoolExpr and its children describe boolean expressions +- Instr and its children describe instruction + +""" +from enum import Enum +from dataclasses import dataclass + +class Uop(Enum): + """unary operators. Currently only unary minus""" + OPP = 1 + def __str__(self): + if self is Uop.OPP: + return "-" + return "" + +class Bop(Enum): + """binary operators: addition, multiplication, and division""" + ADD = 1 + MUL = 2 + DIV = 3 + def __str__(self): + match self: + case Bop.ADD: return "+" + case Bop.MUL: return "*" + case Bop.DIV: return "/" + +class Expr: + """generic class for expressions, either arithmetic or boolean""" + pass + +@dataclass +class ArithExpr(Expr): + """generic class for arithmetic expressions""" + pass + +@dataclass +class AECst(ArithExpr): + """integer constants""" + value: int + def __str__(self): + return f"{self.value}" + +@dataclass +class AEVar(ArithExpr): + """variables""" + var: str + def __str__(self): + return self.var + +@dataclass +class AEUop(ArithExpr): + """unary operation""" + uop: Uop + expr: ArithExpr + def __str__(self): + return f"{self.uop}{self.expr}" + +@dataclass +class AEBop(ArithExpr): + """binary operation""" + bop: Bop + left_expr: ArithExpr + right_expr: ArithExpr + def __str__(self): + #TODO: only put parentheses if necessary + return f"({self.left_expr}) {self.bop} ({self.right_expr})" + +@dataclass +class BoolExpr(Expr): + """generic class for boolean expressions""" + pass + +@dataclass +class BEPlain(BoolExpr): + """arithmetic expression (true if not 0)""" + aexpr: ArithExpr + def __str__(self): + return f"{self.aexpr}" + +@dataclass +class BEEq(BoolExpr): + """equality between two arithmetic expression""" + left_expr: ArithExpr + right_expr:ArithExpr + def __str__(self): + return f"{self.left_expr} = {self.right_expr}" + +@dataclass +class BELeq(BoolExpr): + """less or equal""" + left_expr: ArithExpr + right_expr: ArithExpr + def __str__(self): + return f"{self.left_expr} <= {self.right_expr}" + +@dataclass +class BENeg(BoolExpr): + """Negation""" + expr: BoolExpr + def __str__(self): + return f"!{self.expr}" + +class Instr: + """generic class for instruction""" + pass + +@dataclass +class ISkip(Instr): + """Skip (do nothing)""" + def __str__(self): + return "skip" + +@dataclass +class ISet(Instr): + """assignment to a variable""" + var: str + expr: ArithExpr + def __str__(self): + return f"{self.var} := {self.expr}" + +@dataclass +class ISeq(Instr): + """sequence of two instructions""" + first: Instr + second: Instr + def __str__(self): + return f"{self.first};\n{self.second}" + +@dataclass +class ICond(Instr): + """if then else""" + cond: BoolExpr + true_branch: Instr + false_branch: Instr + def __str__(self): + return f"if {self.cond} then\n{self.true_branch}\nelse\n{self.false_branch}\nfi" + +@dataclass +class ILoop(Instr): + """while loop""" + cond: BoolExpr + body: Instr + def __str__(self): + return f"while {self.cond} do\n{self.body}\ndone" + +def variables_of_aexpr(e: ArithExpr) -> frozenset[str]: + """return the set of variables appearing in the expression""" + match e: + case AECst(_): return frozenset() + case AEVar(var): return frozenset(var) + case AEUop(expr=e): return variables_of_aexpr(e) + case AEBop(left_expr=le,right_expr=re): + return variables_of_aexpr(le) | variables_of_aexpr(re) + case _: assert False + +def variables_of_bexpr(e: BoolExpr) -> frozenset[str]: + """return the set of variable appearing in the expression""" + match e: + case BEPlain(expr): return variables_of_aexpr(expr) + case BEEq(left_expr=le,right_expr=re): + return variables_of_aexpr(le) | variables_of_aexpr(re) + case BELeq(left_expr=le,right_expr=re): + return variables_of_aexpr(le) | variables_of_aexpr(re) + case BENeg(expr): + return variables_of_bexpr(expr) + case _: assert False + +def variables_of_instr(instr: Instr) -> frozenset[str]: + """return the set of variables appearing in a program""" + match instr: + case ISkip(): return frozenset() + case ISet(var=v,expr=e): + return frozenset([v]) | variables_of_aexpr(e) + case ISeq(first=i1,second=i2): + return variables_of_instr(i1) | variables_of_instr(i2) + case ICond(cond=c,true_branch=tb,false_branch=fb): + vbranches = variables_of_instr(tb) | variables_of_instr(fb) + return variables_of_bexpr(c) | vbranches + case ILoop(cond=c,body=b): + return variables_of_bexpr(c) | variables_of_instr(b) + case _ : assert False