如何确定一个给定的模式是否“好”,即它是否是穷尽和非重叠的,适用于 ML 风格的编程语言?
假设你有以下类型的模式:
match lst with
x :: y :: [] -> ...
[] -> ...
或者:match lst with
x :: xs -> ...
x :: [] -> ...
[] -> ...
一个好的类型检查器会警告第一个不是穷尽的,而第二个是重叠的。通常情况下,类型检查器将如何对任意数据类型做出这些决策?如何确定一个给定的模式是否“好”,即它是否是穷尽和非重叠的,适用于 ML 风格的编程语言?
假设你有以下类型的模式:
match lst with
x :: y :: [] -> ...
[] -> ...
或者:match lst with
x :: xs -> ...
x :: [] -> ...
[] -> ...
一个好的类型检查器会警告第一个不是穷尽的,而第二个是重叠的。通常情况下,类型检查器将如何对任意数据类型做出这些决策?{[], x :: y :: zs}
的例子开始。我们从这里开始。 xs covering {[], x :: y :: zs}
[] covering {[]}
x :: ys covering {x :: y :: zs}
x :: [] covering {}
x :: y :: zs covering {x :: y :: zs}
xs covering {[], ys}
[] covering {[], []}
x :: xs covering {y :: ys}
::
情况一样)。[]
、[[]]
、[[];[]]
。-rectypes
,那么这组列表将具有单个类型:('a list) as 'a.
type reclist = ('a list) as 'a
reclist
类型匹配的模式:type p = Nil | Any | Cons of p * p
要将OCaml模式转换为这种形式,首先要使用(::)重写它。然后将[]替换为Nil,_替换为Any,(::)替换为Cons。所以模式[] :: _
可以翻译为Cons (Nil, Any)
。
下面是一个将模式与reclist匹配的函数:
let rec pmatch (p: p) (l: reclist) =
match p, l with
| Any, _ -> true
| Nil, [] -> true
| Cons (p', q'), h :: t -> pmatch p' h && pmatch q' t
| _ -> false
以下是使用示例。请注意,使用了-rectypes
:
$ ocaml312 -rectypes
Objective Caml version 3.12.0
# #use "pat.ml";;
type p = Nil | Any | Cons of p * p
type reclist = 'a list as 'a
val pmatch : p -> reclist -> bool = <fun>
# pmatch (Cons(Any, Nil)) [];;
- : bool = false
# pmatch (Cons(Any, Nil)) [[]];;
- : bool = true
# pmatch (Cons(Any, Nil)) [[]; []];;
- : bool = false
# pmatch (Cons (Any, Nil)) [ [[]; []] ];;
- : bool = true
#
模式Cons(Any,Nil)
应匹配任何长度为1的列表,并且它似乎确实有效。
因此,编写一个函数intersect
似乎非常简单,该函数接受两个模式并返回匹配两个模式的交集的模式。由于这些模式可能根本不相交,当没有交集时它返回None
,否则返回Some p
。
let rec inter_exc pa pb =
match pa, pb with
| Nil, Nil -> Nil
| Cons (a, b), Cons (c, d) -> Cons (inter_exc a c, inter_exc b d)
| Any, b -> b
| a, Any -> a
| _ -> raise Not_found
let intersect pa pb =
try Some (inter_exc pa pb) with Not_found -> None
let intersectn ps =
(* Intersect a list of patterns.
*)
match ps with
| [] -> None
| head :: tail ->
List.fold_left
(fun a b -> match a with None -> None | Some x -> intersect x b)
(Some head) tail
[_, []]
与模式[[], _]
相交。
前者与_ :: [] :: []
相同,因此是Cons(Any,Cons(Nil,Nil))
。
后者与[] :: _ :: []
相同,因此是Cons(Nil,(Cons(Any,Nil))
。# intersect (Cons (Any, Cons (Nil, Nil))) (Cons (Nil, Cons (Any, Nil)));;
- : p option = Some (Cons (Nil, Cons (Nil, Nil)))
[[], []]
。None
,它们就重叠了。exhaust
的函数,用于测试给定的模式列表是否全面:let twoparts l =
(* All ways of partitioning l into two sets.
*)
List.fold_left
(fun accum x ->
let absent = List.map (fun (a, b) -> (a, x :: b)) accum
in
List.fold_left (fun accum (a, b) -> (x :: a, b) :: accum)
absent accum)
[([], [])] l
let unique l =
(* Eliminate duplicates from the list. Makes things
* faster.
*)
let rec u sl=
match sl with
| [] -> []
| [_] -> sl
| h1 :: ((h2 :: _) as tail) ->
if h1 = h2 then u tail else h1 :: u tail
in
u (List.sort compare l)
let mkpairs ps =
List.fold_right
(fun p a -> match p with Cons (x, y) -> (x, y) :: a | _ -> a) ps []
let rec submatches pairs =
(* For each matchable subset of fsts, return a list of the
* associated snds. A matchable subset has a non-empty
* intersection, and the intersection is not covered by the rest of
* the patterns. I.e., there is at least one thing that matches the
* intersection without matching any of the other patterns.
*)
let noncovint (prs, rest) =
let prs_firsts = List.map fst prs in
let rest_firsts = unique (List.map fst rest) in
match intersectn prs_firsts with
| None -> false
| Some i -> not (cover i rest_firsts)
in let pairparts = List.filter noncovint (twoparts pairs)
in
unique (List.map (fun (a, b) -> List.map snd a) pairparts)
and cover_pairs basepr pairs =
cover (fst basepr) (unique (List.map fst pairs)) &&
List.for_all (cover (snd basepr)) (submatches pairs)
and cover_cons basepr ps =
let pairs = mkpairs ps
in let revpair (a, b) = (b, a)
in
pairs <> [] &&
cover_pairs basepr pairs &&
cover_pairs (revpair basepr) (List.map revpair pairs)
and cover basep ps =
List.mem Any ps ||
match basep with
| Nil -> List.mem Nil ps
| Any -> List.mem Nil ps && cover_cons (Any, Any) ps
| Cons (a, b) -> cover_cons (a, b) ps
let exhaust ps =
cover Any ps
# exhaust [Nil];;
- : bool = false
# exhaust [Any];;
- : bool = true
# exhaust [Nil; Cons (Nil, Any); Cons (Any, Nil)];;
- : bool = false
# exhaust [Nil; Cons (Any, Any)];;
- : bool = true
# exhaust [Nil; Cons (Any, Nil); Cons (Any, (Cons (Any, Any)))];;
- : bool = true
我对这段代码进行了30,000个随机生成的模式的测试,因此我相信它是正确的。希望这些谦虚的观察结果能够对你有所帮助。
我相信模式子语言足够简单,易于分析。这就是要求模式“线性”(每个变量只能出现一次)等的原因。在这些限制下,每个模式都是从一种嵌套元组空间到一组受限元组的投影。我认为在这个模型中检查穷举和重叠并不太困难。