From 77cb68bf44e1895afc00435067bbf69067c4b47d Mon Sep 17 00:00:00 2001
From: Nicolas Descamps <nicolas.descamps@theodo.com>
Date: Fri, 21 Mar 2025 23:41:39 +0100
Subject: [PATCH] add files for tp2

---
 TP2/cfg.py                          | 144 ++++++++++++++++++++++
 TP2/constant_propagation.py         | 107 ++++++++++++++++
 TP2/example_constant_propagation.py |  29 +++++
 TP2/example_iteration.py            |  52 ++++++++
 TP2/example_opsem.py                |  21 ++++
 TP2/iteration.py                    |  46 +++++++
 TP2/opsem.py                        |  50 ++++++++
 TP2/syntax.py                       | 185 ++++++++++++++++++++++++++++
 8 files changed, 634 insertions(+)
 create mode 100644 TP2/cfg.py
 create mode 100644 TP2/constant_propagation.py
 create mode 100644 TP2/example_constant_propagation.py
 create mode 100644 TP2/example_iteration.py
 create mode 100644 TP2/example_opsem.py
 create mode 100644 TP2/iteration.py
 create mode 100644 TP2/opsem.py
 create mode 100644 TP2/syntax.py

diff --git a/TP2/cfg.py b/TP2/cfg.py
new file mode 100644
index 0000000..08fa8d0
--- /dev/null
+++ b/TP2/cfg.py
@@ -0,0 +1,144 @@
+"""Control flow graphs
+
+- Label and its subclasses describe the label on the edges of the CfGs
+- Nodes describes nodes, with an id and its successors
+- Graph describes a graph
+- Cfg describes a cfg, i.e. a graph with initial, final, and error nodes
+- function cfg creates a cfg for the given program
+"""
+from syntax import *
+from dataclasses import dataclass
+
+@dataclass
+class Label:
+    """generic class for edges' labels"""
+    pass
+
+@dataclass
+class LSkip(Label):
+    """label skip (does nothing)"""
+    def __str__(self):
+        return "skip"
+
+@dataclass
+class LSet(Label):
+    """assignment label"""
+    var: str
+    expr: ArithExpr
+    def __str__(self):
+        return f"{self.var} := {self.expr}"
+
+@dataclass
+class LTest(Label):
+    """test label"""
+    cond: BoolExpr
+    def __str__(self):
+        return f"?{self.cond}"
+
+@dataclass
+class LErr(Label):
+    """error label"""
+    err: Expr
+    def __str__(self):
+        return f"Err({self.err})"
+
+class Node:
+    """node of the graph, with its id and its successors"""
+    def __init__(self,nid):
+        self.nid = nid
+        self.succs = []
+    def __str__(self):
+        return f"Node {self.nid}"
+
+class Graph:
+    nb_id: int
+    nodes: list[Node]
+
+    """a graph, i.e. a list of nodes"""
+    def __init__(self):
+        self.nb_nid = 0
+        self.nodes = []
+
+    def new_node(self) -> Node:
+        """creates a new node, with a fresh id and no succs"""
+        node = Node(self.nb_nid)
+        self.nb_nid += 1
+        self.nodes += [node]
+        return node
+
+    def new_edge(self,n1: Node,n2: Node,l: Label) -> None:
+        """creates a new edge.
+        If the nodes are not already in the graph, they are added.
+        In that case, no check is made that a node with the same id
+        is already present or not.
+        """
+        if n1 not in self.nodes:
+            self.nodes += [n1]
+        if n2 not in self.nodes:
+            self.nodes += [n2]
+        n1.succs += [(n2,l)]
+
+def cfg_aux(g:Graph,e:Node,i:Node,instr:Instr) -> Node:
+    """auxiliary function to build a CfG
+    takes as argument a graph, the error node, the node starting
+    the current instruction, and the instruction itself, returns
+    the final node of current instruction
+    """
+    match instr:
+        case ISkip():
+           f = g.new_node()
+           g.new_edge(i,f,LSkip())
+           return f
+        case ISet(var,expr):
+            f = g.new_node()
+            g.new_edge(i,f,LSet(var,expr))
+            g.new_edge(i,e,LErr(expr))
+            return f
+        case ISeq(first,second):
+            m = cfg_aux(g,e,i,first)
+            return cfg_aux(g,e,m,second)
+        case ICond(cond,true_branch,false_branch):
+            t = g.new_node()
+            f = g.new_node()
+            g.new_edge(i,t,LTest(cond))
+            g.new_edge(i,f,LTest(BENeg(cond)))
+            g.new_edge(i,e,LErr(cond))
+            tf = cfg_aux(g,e,t,true_branch)
+            ff = cfg_aux(g,e,f,false_branch)
+            f = g.new_node()
+            g.new_edge(tf,f,LSkip())
+            g.new_edge(ff,f,LSkip())
+            return f
+        case ILoop(cond,body):
+            ib = g.new_node()
+            f = g.new_node()
+            g.new_edge(i,ib,LTest(cond))
+            g.new_edge(i,f,LTest(BENeg(cond)))
+            g.new_edge(i,e,LErr(cond))
+            fb = cfg_aux(g,e,ib,body)
+            g.new_edge(fb,i,LSkip())
+            return f
+        case _: assert False
+
+class Cfg:
+    """describes a CfG"""
+    g: Graph
+    err_node: Node
+    init_node: Node
+    final_node: Node
+    def __init__(self,instr):
+        self.g = Graph()
+        self.err_node = self.g.new_node()
+        self.init_node = self.g.new_node()
+        self.final_node = cfg_aux(self.g,self.err_node,self.init_node,instr)
+
+    def __str__(self):
+        res = ""
+        for node in self.g.nodes:
+            res+=f"{node}:\n"
+            for (dest,lab) in node.succs:
+                res+=f"  {lab} --> {dest}\n"
+        res+=f"init node is {self.init_node}\n"
+        res+=f"final node is {self.final_node}\n"
+        res+=f"err node is {self.err_node}\n"
+        return res
diff --git a/TP2/constant_propagation.py b/TP2/constant_propagation.py
new file mode 100644
index 0000000..a530c34
--- /dev/null
+++ b/TP2/constant_propagation.py
@@ -0,0 +1,107 @@
+"""
+implements a constant propagation analysis
+"""
+
+from dataclasses import dataclass
+from cfg import Label, LSkip, LSet, LTest, LErr
+from iteration import Transfer
+from syntax import *
+
+@dataclass
+class abstract_value[T]:
+    """None indicates that we don't know the precise value"""
+    value: T | None
+    def __str__(self):
+        return f"{self.value}"
+
+type abstract_env = dict[str, abstract_value[int]]
+"""mapping from variables to abstract values.
+
+As with concrete environment, a variable not in the dictionary will
+lead to an error if we try to obtain its value
+"""
+type state = abstract_env | None
+"""abstract state is either an abstract env or bottom
+"""
+
+def eval_aexp(env: abstract_env, e: ArithExpr) -> abstract_value[int] | None:
+    """evaluate an arithmetic expression in an abstract environment
+    returns None in case of error
+    """
+    match e:
+        case AECst(value): return abstract_value(value)
+        case AEVar(var):
+            if var in env: return env[var]
+            else: return None
+        case AEUop(uop,expr):
+            res = eval_aexp(env,expr)
+            if res is None: return None
+            if res.value is None: return abstract_value(None)
+            if uop == Uop.OPP: return abstract_value(-res.value)
+            return None
+        case AEBop(bop,left_expr,right_expr):
+            v1 = eval_aexp(env,left_expr)
+            v2 = eval_aexp(env,right_expr)
+            if v1 is None or v2 is None: return None
+            if v1.value is None or v2.value is None:
+                return abstract_value(None)
+            match bop:
+                case Bop.ADD: return abstract_value(v1.value+v2.value)
+                case Bop.MUL: return abstract_value(v1.value*v2.value)
+                case Bop.DIV:
+                    if v2.value == 0: return None
+                    return abstract_value(v1.value//v2.value)
+        case _: assert False
+
+def eval_bexp(env: abstract_env, e: BoolExpr) -> abstract_value[bool] | None:
+    """abstract evaluation of a boolean expression"""
+    match e:
+        case BEPlain(aexpr):
+            res = eval_aexp(env, aexpr)
+            if res is None: return None
+            if res.value is None: return abstract_value(None)
+            return abstract_value(res.value!=0)
+        case BEEq(left_expr,right_expr):
+            v1 = eval_aexp(env, left_expr)
+            v2 = eval_aexp(env, right_expr)
+            if v1 is None or v2 is None: return None
+            if v1.value is None or v2.value is None: return abstract_value(None)
+            return abstract_value(v1.value == v2.value)
+        case BELeq(left_expr,right_expr):
+            v1 = eval_aexp(env, left_expr)
+            v2 = eval_aexp(env, right_expr)
+            if v1 is None or v2 is None: return None
+            if v1.value is None or v2.value is None: return abstract_value(None)
+            return abstract_value(v1.value <= v2.value)
+        case BENeg(expr):
+            v = eval_bexp(env,expr)
+            if v is None: return None
+            if v.value is None: return v
+            return abstract_value(not v.value)
+        case _: assert False
+
+class Constant_propagation(Transfer[state]):
+    variables: frozenset[str]
+    def __init__(self,instr):
+        self.variables = variables_of_instr(instr)
+    def bottom(self) -> state:
+        ...
+    def init_state(self) -> state:
+        ...
+    def join(self,s1:state,s2:state) -> state:
+        ...
+
+    def included(self,s1: state,s2: state) -> bool:
+        ...
+
+    def tr_skip(self,s: state) -> state:
+        ...
+
+    def tr_set(self,s: state,v: str,e: ArithExpr) -> state:
+        ...
+
+    def tr_test(self,s: state,c: BoolExpr) -> state:
+        ...
+
+    def tr_err(self,s: state,e: Expr) -> state:
+        ...
diff --git a/TP2/example_constant_propagation.py b/TP2/example_constant_propagation.py
new file mode 100644
index 0000000..0bf11f1
--- /dev/null
+++ b/TP2/example_constant_propagation.py
@@ -0,0 +1,29 @@
+from syntax import *
+from cfg import Cfg
+from iteration import fixpoint_iteration
+from constant_propagation import Constant_propagation
+
+x = AEVar("x")
+y = AEVar("y")
+c = AEVar("c")
+one = AECst(1)
+zero = AECst(0)
+
+propagate_compute=ISeq(ISet("x",one),ISet("x", AEBop(Bop.ADD,x,one)))
+same_branch_if=ICond(BELeq(c,zero), ISet("y", one), ISet("y",one))
+equal_branch=ICond(BEEq(c,one),ISkip(),ISet("c",one))
+
+prog = ISeq(ISeq(propagate_compute,same_branch_if),equal_branch)
+
+cfg = Cfg(prog)
+
+cst_propagation = Constant_propagation(prog)
+
+res = fixpoint_iteration(cst_propagation,cfg)
+
+for (node, state) in res.items():
+    print(f"{node}:")
+    if state is None: print("  None")
+    else:
+        for (var, value) in state.items():
+            print(f"  {var} -> {value}")
diff --git a/TP2/example_iteration.py b/TP2/example_iteration.py
new file mode 100644
index 0000000..3a9a71b
--- /dev/null
+++ b/TP2/example_iteration.py
@@ -0,0 +1,52 @@
+from syntax import *
+from cfg import Cfg
+from iteration import *
+from opsem import eval_bexp, eval_aexp
+
+one = AECst(1)
+zero = AECst(0)
+x = AEVar("x")
+dead_if=ICond(BEPlain(one),ISet("x",one),ISet("y",zero))
+infinite_loop=ILoop(BEPlain(one),ISet("x",AEBop(Bop.ADD,x,one)))
+prog_with_dead_code=ISeq(dead_if,infinite_loop)
+
+class NoInfo(Transfer[frozenset[bool]]):
+    """Almost trivial static analysis
+ 
+    We only have two elements in our lattice, the empty set and the singleton {True}.
+    The latter is propagated from initial state to any potentially reachable state.
+    In the end, the only nodes still at bottom are the one which are guarded by
+    always-false tests.
+    """
+    type S = frozenset[bool]
+    def bottom(self) -> S: return frozenset()
+    def init_state(self) -> S: return frozenset([True])
+    def join(self, s1:S, s2:S) -> S: return s1 | s2
+    def included(self, s1:S, s2:S) -> bool: return s1.issubset(s2)
+    def tr_skip(self,s:S): return s
+    def tr_set(self, s:S, v:str, e: ArithExpr) -> S: return s
+    def tr_test(self,s:S, c:BoolExpr) -> S:
+        if eval_bexp({},c) is False:
+            return frozenset()
+        else:
+            return s
+    def tr_err(self, s:S, e:Expr) -> S:
+        if isinstance(e,ArithExpr):
+            if eval_aexp({},e) is not None:
+                return frozenset()
+            else:
+                return s
+        else:
+            if isinstance(e,BoolExpr):
+                if eval_bexp({},e) is not None:
+                    return frozenset()
+                else:
+                    return s
+            else: assert False
+
+cfg = Cfg(prog_with_dead_code)
+print(f"Cfg is {cfg}")
+res = fixpoint_iteration(NoInfo(),cfg)
+unreachable = [node for node, state in res.items() if state == frozenset()]
+for node in unreachable:
+    print(f"{node} is unreachable")
diff --git a/TP2/example_opsem.py b/TP2/example_opsem.py
new file mode 100644
index 0000000..bcc5dca
--- /dev/null
+++ b/TP2/example_opsem.py
@@ -0,0 +1,21 @@
+from syntax import *
+from opsem import eval_instr
+
+x = AEVar("x")
+y = AEVar("y")
+zero = AECst(0)
+one = AECst(1)
+xneg = BELeq(x, zero)
+bound_check = ICond(xneg,ISet("x",one),ISkip())
+init_y = ISet("y",one)
+body = ISeq(ISet("y",AEBop(Bop.MUL,y,x)),ISet("x",AEBop(Bop.ADD,x,AEUop(Uop.OPP,one))))
+loop = ILoop(BENeg(BELeq(x,zero)),body)
+factorial = ISeq(ISeq(bound_check,init_y),loop)
+
+print(f"prog is\n{factorial}")
+
+env = { "x" : 4 }
+envf = eval_instr(env,factorial)
+
+print(f"init is \n{env}")
+print(f"result is\n{envf}")
diff --git a/TP2/iteration.py b/TP2/iteration.py
new file mode 100644
index 0000000..f911696
--- /dev/null
+++ b/TP2/iteration.py
@@ -0,0 +1,46 @@
+"""Fixpoint iterator over a CfG
+
+- Transfer is a base class that must be instantiated for
+each static analysis.
+- fixpoint_iteration computes a fixpoint over a cfg for the given
+transfer instance
+"""
+from cfg import *
+from typing import Protocol
+
+class Transfer[S](Protocol):
+    """lists the functions that must be implemented for a static analysis over Lattice S"""
+    def bottom(self) -> S:
+        """the empty state, associated to unvisited nodes"""
+        ...
+    def init_state(self) -> S:
+        """the state associated to the initial node"""
+        ...
+    def join(self,s1:S,s2:S) -> S:
+        """join of two states when merging branches"""
+        ...
+    def included(self,s1: S,s2: S) -> bool:
+        """return True when the first state is included in the second"""
+        ...
+    def tr_skip(self,s: S) -> S:
+        """transfer state across a Skip transition
+
+        Almost always transfer the state unchanged
+        """
+        return s
+    def tr_set(self,s: S,v: str,e: ArithExpr) -> S:
+        """transfer state across an assignment"""
+        ...
+    def tr_test(self,s: S,c: BoolExpr) -> S:
+        """transfer state across a test transition"""
+        ...
+    def tr_err(self,s: S,e: Expr) -> S:
+        """transfer state across a transition to error state"""
+        ...
+
+def fixpoint_iteration[T](transfer: Transfer[T], cfg: Cfg) -> dict[Node,T]:
+    """computes the fixpoint for the given transfer functions over the given CfG
+ 
+   output maps each node to the computed state
+    """
+    raise NotImplementedError
diff --git a/TP2/opsem.py b/TP2/opsem.py
new file mode 100644
index 0000000..1313e98
--- /dev/null
+++ b/TP2/opsem.py
@@ -0,0 +1,50 @@
+"""
+Operational semantics for the Why language
+
+Provides evaluation functions for the three syntactic classes,
+based on a environment mapping variables to integer values.
+These functions return None when an error occur somewhere (Big step semantics)
+"""
+
+from syntax import *
+
+def eval_aexp(env: dict[str,int], exp: ArithExpr) -> int | None:
+    """evaluates an arithmetic expression"""
+    match exp:
+        case AECst(value):
+            raise NotImplementedError
+        case AEVar(var):
+            raise NotImplementedError
+        case AEUop(uop,expr):
+            raise NotImplementedError
+        case AEBop(bop,left_expr,right_expr):
+            raise NotImplementedError
+        case _: assert False
+
+def eval_bexp(env: dict[str,int], exp:BoolExpr) -> bool | None:
+    """evaluates a boolean expression"""
+    match exp:
+        case BEPlain(aexpr):
+            raise NotImplementedError
+        case BEEq(left_expr,right_expr):
+            raise NotImplementedError
+        case BELeq(left_expr,right_expr):
+            raise NotImplementedError
+        case BENeg(expr):
+            raise NotImplementedError
+        case _: assert False
+
+def eval_instr(env: dict[str,int], instr: Instr) -> dict[str,int] | None:
+    """evaluates an instruction"""
+    match instr:
+        case ISkip():
+            raise NotImplementedError
+        case ISet(var,expr):
+            raise NotImplementedError
+        case ISeq(first,second):
+            raise NotImplementedError
+        case ICond(cond,true_branch,false_branch):
+            raise NotImplementedError
+        case ILoop(cond,body):
+            raise NotImplementedError
+        case _: assert False
diff --git a/TP2/syntax.py b/TP2/syntax.py
new file mode 100644
index 0000000..c0938bd
--- /dev/null
+++ b/TP2/syntax.py
@@ -0,0 +1,185 @@
+"""Nodes of the abstract syntax tree of the program:
+
+- Uop/Bop represent unary/binary arithmetic operator
+- ArithExpr and its children describe arithmetic expressions
+- BoolExpr and its children describe boolean expressions
+- Instr and its children describe instruction
+
+"""
+from enum import Enum
+from dataclasses import dataclass
+
+class Uop(Enum):
+    """unary operators. Currently only unary minus"""
+    OPP = 1
+    def __str__(self):
+        if self is Uop.OPP:
+            return "-"
+        return ""
+
+class Bop(Enum):
+    """binary operators: addition, multiplication, and division"""
+    ADD = 1
+    MUL = 2
+    DIV = 3
+    def __str__(self):
+        match self:
+            case Bop.ADD: return "+"
+            case Bop.MUL: return "*"
+            case Bop.DIV: return "/"
+
+class Expr:
+    """generic class for expressions, either arithmetic or boolean"""
+    pass
+
+@dataclass
+class ArithExpr(Expr):
+    """generic class for arithmetic expressions"""
+    pass
+
+@dataclass
+class AECst(ArithExpr):
+    """integer constants"""
+    value: int
+    def __str__(self):
+        return f"{self.value}"
+
+@dataclass
+class AEVar(ArithExpr):
+    """variables"""
+    var: str
+    def __str__(self):
+        return self.var
+
+@dataclass
+class AEUop(ArithExpr):
+    """unary operation"""
+    uop: Uop
+    expr: ArithExpr
+    def __str__(self):
+        return f"{self.uop}{self.expr}"
+
+@dataclass
+class AEBop(ArithExpr):
+    """binary operation"""
+    bop: Bop
+    left_expr: ArithExpr
+    right_expr: ArithExpr
+    def __str__(self):
+        #TODO: only put parentheses if necessary
+        return f"({self.left_expr}) {self.bop} ({self.right_expr})"
+
+@dataclass
+class BoolExpr(Expr):
+    """generic class for boolean expressions"""
+    pass
+
+@dataclass
+class BEPlain(BoolExpr):
+    """arithmetic expression (true if not 0)"""
+    aexpr: ArithExpr
+    def __str__(self):
+        return f"{self.aexpr}"
+
+@dataclass
+class BEEq(BoolExpr):
+    """equality between two arithmetic expression"""
+    left_expr: ArithExpr
+    right_expr:ArithExpr
+    def __str__(self):
+        return f"{self.left_expr} = {self.right_expr}"
+
+@dataclass
+class BELeq(BoolExpr):
+    """less or equal"""
+    left_expr: ArithExpr
+    right_expr: ArithExpr
+    def __str__(self):
+        return f"{self.left_expr} <= {self.right_expr}"
+
+@dataclass
+class BENeg(BoolExpr):
+    """Negation"""
+    expr: BoolExpr
+    def __str__(self):
+        return f"!{self.expr}"
+
+class Instr:
+    """generic class for instruction"""
+    pass
+
+@dataclass
+class ISkip(Instr):
+    """Skip (do nothing)"""
+    def __str__(self):
+        return "skip"
+
+@dataclass
+class ISet(Instr):
+    """assignment to a variable"""
+    var: str
+    expr: ArithExpr
+    def __str__(self):
+        return f"{self.var} := {self.expr}"
+
+@dataclass
+class ISeq(Instr):
+    """sequence of two instructions"""
+    first: Instr
+    second: Instr
+    def __str__(self):
+        return f"{self.first};\n{self.second}"
+
+@dataclass
+class ICond(Instr):
+    """if then else"""
+    cond: BoolExpr
+    true_branch: Instr
+    false_branch: Instr
+    def __str__(self):
+        return f"if {self.cond} then\n{self.true_branch}\nelse\n{self.false_branch}\nfi"
+
+@dataclass
+class ILoop(Instr):
+    """while loop"""
+    cond: BoolExpr
+    body: Instr
+    def __str__(self):
+        return f"while {self.cond} do\n{self.body}\ndone"
+
+def variables_of_aexpr(e: ArithExpr) -> frozenset[str]:
+    """return the set of variables appearing in the expression"""
+    match e:
+        case AECst(_): return frozenset()
+        case AEVar(var): return frozenset(var)
+        case AEUop(expr=e): return variables_of_aexpr(e)
+        case AEBop(left_expr=le,right_expr=re):
+            return variables_of_aexpr(le) | variables_of_aexpr(re)
+        case _: assert False
+
+def variables_of_bexpr(e: BoolExpr) -> frozenset[str]:
+    """return the set of variable appearing in the expression"""
+    match e:
+        case BEPlain(expr): return variables_of_aexpr(expr)
+        case BEEq(left_expr=le,right_expr=re):
+            return variables_of_aexpr(le) | variables_of_aexpr(re)
+        case BELeq(left_expr=le,right_expr=re):
+            return variables_of_aexpr(le) | variables_of_aexpr(re)
+        case BENeg(expr):
+            return variables_of_bexpr(expr)
+        case _: assert False
+
+def variables_of_instr(instr: Instr) -> frozenset[str]:
+    """return the set of variables appearing in a program"""
+    match instr:
+        case ISkip(): return frozenset()
+        case ISet(var=v,expr=e):
+            return frozenset([v]) | variables_of_aexpr(e)
+        case ISeq(first=i1,second=i2):
+            return variables_of_instr(i1) | variables_of_instr(i2)
+        case ICond(cond=c,true_branch=tb,false_branch=fb):
+            vbranches = variables_of_instr(tb) | variables_of_instr(fb)
+            return variables_of_bexpr(c) | vbranches
+        case ILoop(cond=c,body=b):
+            return variables_of_bexpr(c) | variables_of_instr(b)
+        case _ : assert False
-- 
GitLab