summaryrefslogtreecommitdiff
path: root/lib/parsing
diff options
context:
space:
mode:
Diffstat (limited to 'lib/parsing')
-rw-r--r--lib/parsing/headers.ml74
-rw-r--r--lib/parsing/parser.ml81
-rw-r--r--lib/parsing/utils.ml69
3 files changed, 224 insertions, 0 deletions
diff --git a/lib/parsing/headers.ml b/lib/parsing/headers.ml
new file mode 100644
index 0000000..8d63381
--- /dev/null
+++ b/lib/parsing/headers.ml
@@ -0,0 +1,74 @@
+(* Useful types for parsing *)
+
+open Types
+
+exception Parse_Error of string
+
+type terminal = tok
+
+type nonterminal = S | E | E' | F | F' | G | G' | H | H' | I
+
+type symbol = Nt of nonterminal | Tm of terminal
+
+type production = nonterminal * (symbol list)
+
+(* possible actions to take when LL parsing *)
+type action =
+ | PREDICT of production
+ | MATCH of tok
+ | ACCEPT
+ | REJECT
+
+(* takes in top stack symbol and the next input token and selects action to take *)
+type table = symbol -> tok -> action
+
+(* mutable abstract syntax tree *)
+type ast =
+ | Br of symbol * (ast ref) list
+ | Lf of symbol
+
+(* parsing state *)
+type state = {
+ stack : symbol list;
+ ast_stack : ast ref list; (* list of ast refs corresponding to symbols on the stack *)
+ input : tok list;
+ finished : bool;
+ accepted : bool
+}
+
+(* convert ast to proposition *)
+let rec prop_of_ast (tree : ast) : proposition =
+ match tree with
+ | Lf (Tm TRUE) -> True
+ | Lf (Tm FALSE) -> False
+ | Lf (Tm (LIT s)) -> Lit s
+ | Br (Nt S, e_ref :: _) -> prop_of_ast !e_ref
+ | Br (Nt I, _ :: e_ref :: _ :: _) -> prop_of_ast !e_ref (* I ::= LEFT_PAREN E RIGHT_PAREN *)
+ | Br (Nt I, _ :: i_ref :: _) -> let inner_prop = prop_of_ast !i_ref in Not(inner_prop) (* I ::= NOT I *)
+ | Br (Nt I, [some_ref]) -> (* I ::= TRUE | FALSE | LIT s *)
+ begin
+ match !some_ref with
+ | Lf (Tm TRUE) -> True
+ | Lf (Tm FALSE) -> False
+ | Lf (Tm (LIT s)) -> Lit s
+ | _ -> failwith "some bug"
+ end
+ | Br (Nt a, b_ref :: a'_ref :: _) -> (* A ::= B A' *)
+ begin
+ match !a'_ref with
+ | Lf _ -> prop_of_ast !b_ref (* A' ::= eps *)
+ | Br (_, _ :: tl) -> (* A' ::= CONNECTOR B A' *)
+ let left = prop_of_ast !b_ref in
+ let right = prop_of_ast (Br(Nt a, tl)) in
+ begin
+ match a with
+ | E -> Iff(left, right)
+ | F -> Implies(left, right)
+ | G -> Or(left, right)
+ | H -> And(left, right)
+ | _ -> failwith "a bug!"
+ end
+ | _ -> failwith "another bug"
+ end
+ | _ -> failwith "found a bug!"
+ \ No newline at end of file
diff --git a/lib/parsing/parser.ml b/lib/parsing/parser.ml
new file mode 100644
index 0000000..e56411b
--- /dev/null
+++ b/lib/parsing/parser.ml
@@ -0,0 +1,81 @@
+open Types
+open Headers
+open Utils
+
+let my_productions : production list = (* currently not in use *)
+ [
+ (S, [Nt E; Tm (LIT "$")]); (* S ::= E $ *)
+ (E, [Nt F; Nt E']);
+ (E', [Tm IFF; Nt F; Nt E']);
+ (E', []);
+ (F, [Nt G; Nt F']);
+ (F', [Tm IMPLIES; Nt G; Nt F']);
+ (F', []);
+ (G, [Nt H; Nt G']);
+ (G', [Tm OR; Nt H; Nt G']);
+ (G', []);
+ (H, [Nt I; Nt H']);
+ (H', [Tm AND; Nt I; Nt H']);
+ (H', []);
+ (I, [Tm NOT; Nt I]);
+ (I, [Tm LEFT_PAREN; Nt E; Tm RIGHT_PAREN]);
+ (I, [Tm TRUE]);
+ (I, [Tm FALSE]);
+ (I, [Tm (LIT "any string")])
+ ]
+
+
+let my_action_table (sym : symbol) (tk : tok) : action =
+ let predict_if_in_first tk prod =
+ match tk with
+ | TRUE -> PREDICT prod
+ | FALSE -> PREDICT prod
+ | LIT _ -> PREDICT prod
+ | NOT -> PREDICT prod
+ | LEFT_PAREN -> PREDICT prod
+ | _ -> REJECT
+ in
+
+ let predict_if_in_follow tk follow prod =
+ if List.mem tk follow then
+ PREDICT prod
+ else
+ REJECT
+ in
+
+ match sym, tk with
+ | Tm (LIT "$"), LIT "$" -> ACCEPT
+ | Tm t, tk -> if t = tk then MATCH t else REJECT
+ | Nt S, tk -> predict_if_in_first tk (S, [Nt E; Tm (LIT "$")])
+ | Nt E, tk -> predict_if_in_first tk (E, [Nt F; Nt E'])
+ | Nt E', IFF -> PREDICT (E', [Tm IFF; Nt F; Nt E'])
+ | Nt E', tk -> predict_if_in_follow tk [LIT "$"; RIGHT_PAREN] (E', [])
+ | Nt F, tk -> predict_if_in_first tk (F, [Nt G; Nt F'])
+ | Nt F', IMPLIES -> PREDICT (F', [Tm IMPLIES; Nt G; Nt F'])
+ | Nt F', tk -> predict_if_in_follow tk [IFF; LIT "$"; RIGHT_PAREN] (F', [])
+ | Nt G, tk -> predict_if_in_first tk (G, [Nt H; Nt G'])
+ | Nt G', OR -> PREDICT (G', [Tm OR; Nt H; Nt G'])
+ | Nt G', tk -> predict_if_in_follow tk [IMPLIES; IFF; LIT "$"; RIGHT_PAREN] (G', [])
+ | Nt H, tk -> predict_if_in_first tk (H, [Nt I; Nt H'])
+ | Nt H', AND -> PREDICT (H', [Tm AND; Nt I; Nt H'])
+ | Nt H', tk -> predict_if_in_follow tk [OR; IMPLIES; IFF; LIT "$"; RIGHT_PAREN] (H', [])
+ | Nt I, NOT -> PREDICT (I, [Tm NOT; Nt I]);
+ | Nt I, LEFT_PAREN -> PREDICT (I, [Tm LEFT_PAREN; Nt E; Tm RIGHT_PAREN]);
+ | Nt I, TRUE -> PREDICT (I, [Tm TRUE]);
+ | Nt I, FALSE -> PREDICT (I, [Tm FALSE]);
+ | Nt I, LIT s when s <> "$" -> PREDICT (I, [Tm (LIT s)])
+ | _ -> REJECT
+
+
+let parse (input : tok list) =
+ let root = ref (Lf (Nt S)) in
+ let initial_parse_state = {
+ stack = [Nt S];
+ ast_stack = [root]; (* list of ast refs corresponding to symbols on the stack *)
+ input = input @ [LIT "$"];
+ finished = false;
+ accepted = false
+ } in
+ driver initial_parse_state my_action_table;
+ (* print_endline (string_of_ast (!root)); *)
+ prop_of_ast !root
diff --git a/lib/parsing/utils.ml b/lib/parsing/utils.ml
new file mode 100644
index 0000000..61211f0
--- /dev/null
+++ b/lib/parsing/utils.ml
@@ -0,0 +1,69 @@
+open Headers
+open To_string
+
+(* to string functions for debugging *)
+let string_of_nt = function
+ | S -> "S"
+ | E -> "E"
+ | E' -> "E'"
+ | F -> "F"
+ | F' -> "F'"
+ | G -> "G"
+ | G' -> "G'"
+ | H -> "H"
+ | H' -> "H'"
+ | I -> "I"
+
+let string_of_symbol = function
+ | Tm x -> string_of_tok x
+ | Nt x -> string_of_nt x
+
+let rec string_of_symbols = function
+ | [] -> ""
+ | sym :: xs -> "[" ^ string_of_symbol sym ^ "] " ^ string_of_symbols xs
+
+let print_state st =
+ print_endline ("stack: " ^ (string_of_symbols st.stack));
+ print_endline (" input: " ^ (string_of_tokens st.input))
+
+let rec string_of_ast = function
+ | Br (sym, ars) -> "Br(" ^ string_of_symbol sym ^ "| " ^ (String.concat ", " (List.map (fun ar -> string_of_ast !ar) ars)) ^ ")"
+ | Lf sym -> "(" ^ string_of_symbol sym ^ ")"
+
+(* step through parse states *)
+let step (parse_state : state) (action_table : table) =
+ (*print_state parse_state; currently printing the trace *)
+ match parse_state.stack, parse_state.ast_stack, parse_state.input with
+ | x::xs, t::ts, i::is -> (
+ match action_table x i with
+ | ACCEPT -> {stack = xs; ast_stack = ts; input = is; finished = true; accepted = true}
+ | REJECT -> {stack = xs; ast_stack = ts; input = is; finished = true; accepted = false}
+ | MATCH _ -> {stack = xs; ast_stack = ts; input = is; finished = false; accepted = false}
+ | PREDICT (_, l) ->
+ let update_ast_ref sym = (* for each symbol in the production, create an ast ref *)
+ let new_lf = ref (Lf sym) in
+ (match !t with
+ | Lf t_sym -> t := Br (t_sym, [new_lf])
+ | Br (t_sym, l) -> t := Br(t_sym, l @ [new_lf]));
+ new_lf
+ in
+ {
+ stack = l @ xs; (* add production symbols to symbol stack *)
+ ast_stack = List.map update_ast_ref l @ ts; (* update t's contents and add new refs to ast_stack *)
+ input = i :: is;
+ finished = false;
+ accepted = false
+ }
+ )
+ | xs, ts, is -> {stack = xs; ast_stack = ts; input = is; finished = true; accepted = false}
+
+(* driver function to perform multistepping *)
+let rec driver (parse_state : state) (action_table : table) : unit =
+ if parse_state.finished then
+ if parse_state.accepted then
+ ()
+ else
+ raise (Parse_Error "Input rejected")
+ else
+ driver (step parse_state action_table) action_table
+ \ No newline at end of file