注:这跟genetic programming毫无关系。
最近,我有一个任务:我要写一个Partial Evaluator。
更具体的,这是个Simply Typed Lambda Calculus加上Reference/ADT的Partial Evaluator(PE)。Lambda Calculus上的PE很多人都做过,但是加上Reference就不好办了。我找了很多Scheme/ML的PE的paper,但是在里面,很多都对Effect闭口不提。就算提Effect,也是‘遇到Effect不做PE,跳过就好’。
没办法,我只好自己设计。凭借着我对Partial Evaluation跟Staging的理解,我弄了一个这样的设计:
0:我们先写一个Definitional Interpreter。
1:我们reify the Store。
2:我们利用MetaOCaml式的LetList写一个ANF converter。
3:我们把Definitional Interpreter的Value lift上Partially Static Domain,然后跟ANF converter‘合并’- 这样,Partially Evaluated Code会生成ANF代码,于是就没有code duplication跟capture avoidance substitution的问题。
别急,我们来一步步来看这是啥意思。
0:我们先写一个Definitional Interpreter。
type ('a, 'b) sum = Left of 'a | Right of 'b
type var = Var of int
type term =
| Let of (var * term * term)
| FromVar of var
| Abs of (var * term)
| App of (term * term)
| Unit
| Int of int
| Add of (term * term)
| Mult of (term * term)
| IfZero of (term * term * term)
| MkProd of (term * term)
| Zro of term
| Fst of term
| MkRef of term
| SetRef of (term * term)
| GetRef of term
| TLeft of term
| TRight of term
| Match of term * term * term
type 'a env = int -> 'a
let emptyStore _ = raise Not_found
let extend e v x = function i when i == v -> x | i -> e i
let genCounter () =
let cnt = ref 0 in
let gen () =
let ret = !cnt in
cnt := ret + 1 ;
ret
in
gen
let freshVar = genCounter ()
type value =
| VFun of (value -> value)
| VUnit
| VInt of int
| VProd of value * value
| VRef of value ref
| VSum of (value, value) sum
(* The standard metacircular evaluator. *)
let rec evalAux (e : value env) : term -> value =
let recurse t = evalAux e t in
let app x y = match x with VFun f -> f y in
function
| Let (Var var, v, body) ->
let rv = recurse v in
evalAux (extend e var rv) body
| FromVar (Var v) -> e v
| Abs (Var v, b) -> VFun (fun p -> evalAux (extend e v p) b)
| App (f, x) -> app (recurse f) (recurse x)
| Unit -> VUnit
| Int f -> VInt f
| Add (x, y) -> (
let rx = recurse x in
let ry = recurse y in
match (rx, ry) with VInt x, VInt y -> VInt (x + y) )
| Mult (x, y) -> (
let rx = recurse x in
let ry = recurse y in
match (rx, ry) with VInt x, VInt y -> VInt (x * y) )
| IfZero (i, z, nz) -> (
match recurse i with VInt 0 -> recurse z | VInt _ -> recurse nz )
| MkProd (x, y) ->
let rx = recurse x in
let ry = recurse y in
VProd (rx, ry)
| Zro x -> ( match recurse x with VProd (x, _) -> x )
| Fst x -> ( match recurse x with VProd (_, y) -> y )
| MkRef x -> VRef (ref (recurse x))
| SetRef (r, v) -> (
let vr = recurse r in
let vv = recurse v in
match vr with VRef r ->
r := vv ;
VUnit )
| GetRef r -> ( match recurse r with VRef r -> !r )
| TLeft x -> VSum (Left (recurse x))
| TRight x -> VSum (Right (recurse x))
| Match (s, lcase, rcase) -> (
let ps = recurse s in
let pl = recurse lcase in
let pr = recurse rcase in
match ps with VSum (Left x) -> app pl x | VSum (Right x) -> app pr x )
let eval = evalAux emptyStore
这就是个标准,中规中矩的Definitional Interpreter。
1:我们reify the Store。
let freshStoreId = genCounter ()
type storeId = StoreId of int
type rValue =
| RFun of (rValue -> rValue)
| RUnit
| RInt of int
| RProd of rValue * rValue
| RRef of storeId
| RSum of (rValue, rValue) sum
(* The evaluator, but with the store reified -
it is now represented and manipulated explicitly. *)
let rec rEvalAux (curStore : rValue env ref) (e : rValue env) : term -> rValue
=
let recurse t = rEvalAux curStore e t in
let app x y = match x with RFun f -> f y in
function
| Let (Var var, v, body) ->
let rv = recurse v in
rEvalAux curStore (extend e var rv) body
| FromVar (Var v) -> e v
| Abs (Var v, b) -> RFun (fun p -> rEvalAux curStore (extend e v p) b)
| App (f, x) -> app (recurse f) (recurse x)
| Unit -> RUnit
| Int f -> RInt f
| Add (x, y) -> (
let rx = recurse x in
let ry = recurse y in
match (rx, ry) with RInt x, RInt y -> RInt (x + y) )
| Mult (x, y) -> (
let rx = recurse x in
let ry = recurse y in
match (rx, ry) with RInt x, RInt y -> RInt (x * y) )
| IfZero (i, z, nz) -> (
match recurse i with RInt 0 -> recurse z | RInt _ -> recurse nz )
| MkProd (x, y) ->
let rx = recurse x in
let ry = recurse y in
RProd (rx, ry)
| Zro x -> ( match recurse x with RProd (x, _) -> x )
| Fst x -> ( match recurse x with RProd (_, y) -> y )
| MkRef x ->
let rx = recurse x in
let id = freshStoreId () in
curStore := extend !curStore id rx ;
RRef (StoreId id)
| SetRef (r, v) ->
let rr = recurse r in
let rv = recurse v in
(match rr with RRef (StoreId s) -> curStore := extend !curStore s rv) ;
RUnit
| GetRef r -> ( match recurse r with RRef (StoreId s) -> !curStore s )
| TLeft x -> RSum (Left (recurse x))
| TRight x -> RSum (Right (recurse x))
| Match (s, lcase, rcase) -> (
let rs = recurse s in
let rl = recurse lcase in
let rr = recurse rcase in
match rs with RSum (Left x) -> app rl x | RSum (Right x) -> app rr x )
let rEval = rEvalAux (ref emptyStore) emptyStore
我们不用OCaml原生的reference,而是自己建一个数据结构来实现reference。reference的Value则从metalevel的reference,成为这个数据结构的一个索引。这就是reification的意思:我们把一个抽象的概念(heap)变成具体的代码(我们的reference的实现)。
2:我们利用MetaOCaml式的LetList写一个ANF converter。
(* letList bind complex expression to a simple variable,
so one can construct some complex expression, and use it
as a variable by storing a binding in the letlist. *)
type letList = (term -> term) ref
let withLetList f =
let l = ref (fun x -> x) in
let res = f l in
!l res
let pushVar l v x =
let lv = !l in
l := fun t -> lv (Let (v, x, t))
let push l x =
let v = Var (freshVar ()) in
pushVar l v x ; FromVar v
(* Using the letList to do anf conversion by 'running' the program in compile time. *)
let rec anfAux (l : letList) : term -> term =
let recurse t = anfAux l t in
function
| Let (Var var, v, body) ->
pushVar l (Var var) (recurse v) ;
recurse body
| FromVar (Var v) -> FromVar (Var v)
| Abs (Var v, b) -> push l (Abs (Var v, withLetList (fun l -> anfAux l b)))
| App (f, x) -> push l (App (recurse f, recurse x))
| Unit -> Unit
| Int f -> Int f
| Add (x, y) -> push l (Add (recurse x, recurse y))
| Mult (x, y) -> push l (Mult (recurse x, recurse y))
| IfZero (i, z, nz) -> push l (IfZero (recurse i, recurse z, recurse nz))
| MkProd (x, y) -> push l (MkProd (recurse x, recurse y))
| Zro x -> push l (Zro (recurse x))
| Fst x -> push l (Fst (recurse x))
| MkRef x -> push l (MkRef (recurse x))
| SetRef (r, v) -> push l (SetRef (recurse r, recurse v))
| GetRef r -> push l (GetRef (recurse r))
| TLeft x -> push l (TLeft (recurse x))
| TRight x -> push l (TRight (recurse x))
| Match (s, lcase, rcase) ->
push l (Match (recurse s, recurse lcase, recurse rcase))
let anf x = withLetList (fun l -> anfAux l x)
ANF的意思是说代码中没有compound expression:1 + 2 + 3这样的代码不可能出现,而是需要写成let a = 1 + 2 in let b = a + 3 in b这样形式的。这样,我们的所有表达式都会有一个binding。(注:ANF容许let a = 1 + 2 in a + 3,但我们不容许,因为这样最后表达式没binding,不满足我们等下的需求)
3:我们把Definitional Interpreter的Value lift上Partially Static Domain,然后。。。
(* The partially-static value is just like value with store reified, but might be empty,
and always come with a term that is semantically equivalent to the original expression.
The term must not be a compound expression as it duplicate computation and effect. *)
type sValue =
| SFun of (letList -> pValue -> pValue)
| SUnit
| SInt of int
| SProd of pValue * pValue
| SRef of storeId
| SSum of (pValue, pValue) sum
and pValue = {pStatic: sValue option; dynVal: term}
let static s d = {pStatic= Some s; dynVal= d}
let staticInt i = static (SInt i) (Int i)
let dynamic d = {pStatic= None; dynVal= d}
partially static data是partial evaluator界的一个常规操作,具体就是一个value可以是‘普通’的value(static),或者可以是代码(dynamic)。为了简化,我们强制一定有dynamic(dynVal)。这也同时保证一个value不会被多次转成code。同时,我们限制dynamic为atomic term - 也就是说,dynVal的term只能有场数大小。那复合term怎么样?存进LetList里。
跟ANF converter‘合并’- 这样,Partially Evaluated Code会生成ANF代码,于是就没有code duplication跟capture avoidance substitution的问题。
(* rEval on the static part(if exist), anf on the dynamic part.
Will try to recurse aggressively to optimize even with value/control unknown.
Must clear curStore when unknown code is executed, as the store is contaminated. *)
let rec peAux (curStore : pValue env ref) (e : pValue env) (l : letList) :
term -> pValue =
let recurse t = peAux curStore e l t in
let app x y =
match x.pStatic with
| Some (SFun f) -> f l y
| _ ->
curStore := emptyStore ;
dynamic (push l (App (x.dynVal, y.dynVal)))
in
function
| Let (Var var, v, body) ->
let pv = recurse v in
pushVar l (Var var) pv.dynVal ;
peAux curStore (extend e var pv) l body
| FromVar (Var v) -> e v
| Abs (Var v, b) ->
static
(SFun (fun l p -> peAux curStore (extend e v p) l b))
(push l
(Abs
( Var v
, withLetList (fun l ->
(peAux (ref emptyStore)
(extend e v (dynamic (FromVar (Var v))))
l b)
.dynVal ) )))
| App (f, x) -> app (recurse f) (recurse x)
| Unit -> static SUnit Unit
| Int f -> staticInt f
| Add (x, y) -> (
let px = recurse x in
let py = recurse y in
match (px.pStatic, py.pStatic) with
| Some (SInt x), Some (SInt y) -> staticInt (x + y)
| _ -> dynamic (push l (Add (px.dynVal, py.dynVal))) )
| Mult (x, y) -> (
let px = recurse x in
let py = recurse y in
match (px.pStatic, py.pStatic) with
| Some (SInt x), Some (SInt y) -> staticInt (x * y)
| _ -> dynamic (push l (Mult (px.dynVal, py.dynVal))) )
| IfZero (i, z, nz) -> (
let pi = recurse i in
match pi.pStatic with
| Some (SInt 0) -> recurse z
| Some (SInt _) -> recurse nz
| _ ->
let res =
dynamic
(push l
(IfZero
( pi.dynVal
, (peAux (ref !curStore) e l z).dynVal
, (peAux (ref !curStore) e l nz).dynVal )))
in
curStore := emptyStore ;
res )
| MkProd (x, y) ->
let px = recurse x in
let py = recurse y in
static (SProd (px, py)) (push l (MkProd (px.dynVal, py.dynVal)))
| Zro x -> (
let px = recurse x in
match px.pStatic with
| Some (SProd (x, _)) -> x
| _ -> dynamic (push l (Zro px.dynVal)) )
| Fst x -> (
let px = recurse x in
match px.pStatic with
| Some (SProd (_, y)) -> y
| _ -> dynamic (push l (Fst px.dynVal)) )
| MkRef x ->
let px = recurse x in
let id = freshStoreId () in
curStore := extend !curStore id px ;
static (SRef (StoreId id)) (push l (MkRef px.dynVal))
| SetRef (r, v) ->
let pr = recurse r in
let pv = recurse v in
let _ = push l (SetRef (pr.dynVal, pv.dynVal)) in
( match pr.pStatic with
| Some (SRef (StoreId s)) -> curStore := extend !curStore s pv
| _ -> curStore := emptyStore ) ;
static SUnit Unit
| GetRef r -> (
let pr = recurse r in
try
match pr.pStatic with
| Some (SRef (StoreId s)) -> !curStore s
| _ -> raise Not_found
with _ -> dynamic (push l (GetRef pr.dynVal)) )
| TLeft x ->
let px = recurse x in
static (SSum (Left px)) (push l (TLeft px.dynVal))
| TRight x ->
let px = recurse x in
static (SSum (Right px)) (push l (TRight px.dynVal))
| Match (s, lcase, rcase) -> (
let ps = recurse s in
let pl = recurse lcase in
let pr = recurse rcase in
match ps.pStatic with
| Some (SSum (Left x)) -> app pl x
| Some (SSum (Right x)) -> app pr x
| _ ->
curStore := emptyStore ;
dynamic (push l (Match (ps.dynVal, pl.dynVal, pr.dynVal))) )
let pe x = withLetList (fun l -> (peAux (ref emptyStore) emptyStore l x).dynVal)
剩下的代码。当Definitional Interpreter可以跑的时候,用之simplify,否则就用ANF生成dynamic term。在执行未知代码的时候,我们会利用store被reify的特性,直接复制/清空 store。
你可以看到,在这个代码中,我们做了三件事:reify the store,然后跟ANF合并,最后,在一定情况special case,清空store。
但是,有一个问题:我们这个‘更改’,‘合并’,都是在语言外进行的,所以我们的代码中,也许不会有一个reified evaluator,但是一定会有一个简单的interpreter,会有一个ANF convertor,然后还需要特定的去写一个partial evaluator。
为什么我们不能在一个语言中,写一个evaluator,然后写‘对reference如此这般改一下’,得到一个reified evaluator,然后写一个ANF Convertor,然后写‘合并ANF跟rEval’,然后再写‘对lambda, if else,setref,match special case一下,清空store’?这几个步骤是毫无二义性的,也不需要任何搜索,应该可以做进语言啊。
如果在什么语言里面,的确可以这样写,请告诉我。MLPolyR好像能做一半,但是merge做不了。
再多说点,在静态分析(Abstract Garbage Collection,Abstracting Abstract Machine)里,往往会从一个Abstract Machine出发,一步步修改该Abstract Machine。Program Transformation(Defunctionalization At Work,The Algebra of Programming)里面,也会同样先出现一个简单的算法,然后一步步,通过各自手段(CPS Conversion,Defunctionalization,Fusion)得出最后的算法。这些估计更难做,但大体idea还是一样的。