diff options
Diffstat (limited to 'lib/parsing')
-rw-r--r-- | lib/parsing/headers.ml | 74 | ||||
-rw-r--r-- | lib/parsing/parser.ml | 81 | ||||
-rw-r--r-- | lib/parsing/utils.ml | 69 |
3 files changed, 224 insertions, 0 deletions
diff --git a/lib/parsing/headers.ml b/lib/parsing/headers.ml new file mode 100644 index 0000000..8d63381 --- /dev/null +++ b/lib/parsing/headers.ml @@ -0,0 +1,74 @@ +(* Useful types for parsing *) + +open Types + +exception Parse_Error of string + +type terminal = tok + +type nonterminal = S | E | E' | F | F' | G | G' | H | H' | I + +type symbol = Nt of nonterminal | Tm of terminal + +type production = nonterminal * (symbol list) + +(* possible actions to take when LL parsing *) +type action = + | PREDICT of production + | MATCH of tok + | ACCEPT + | REJECT + +(* takes in top stack symbol and the next input token and selects action to take *) +type table = symbol -> tok -> action + +(* mutable abstract syntax tree *) +type ast = + | Br of symbol * (ast ref) list + | Lf of symbol + +(* parsing state *) +type state = { + stack : symbol list; + ast_stack : ast ref list; (* list of ast refs corresponding to symbols on the stack *) + input : tok list; + finished : bool; + accepted : bool +} + +(* convert ast to proposition *) +let rec prop_of_ast (tree : ast) : proposition = + match tree with + | Lf (Tm TRUE) -> True + | Lf (Tm FALSE) -> False + | Lf (Tm (LIT s)) -> Lit s + | Br (Nt S, e_ref :: _) -> prop_of_ast !e_ref + | Br (Nt I, _ :: e_ref :: _ :: _) -> prop_of_ast !e_ref (* I ::= LEFT_PAREN E RIGHT_PAREN *) + | Br (Nt I, _ :: i_ref :: _) -> let inner_prop = prop_of_ast !i_ref in Not(inner_prop) (* I ::= NOT I *) + | Br (Nt I, [some_ref]) -> (* I ::= TRUE | FALSE | LIT s *) + begin + match !some_ref with + | Lf (Tm TRUE) -> True + | Lf (Tm FALSE) -> False + | Lf (Tm (LIT s)) -> Lit s + | _ -> failwith "some bug" + end + | Br (Nt a, b_ref :: a'_ref :: _) -> (* A ::= B A' *) + begin + match !a'_ref with + | Lf _ -> prop_of_ast !b_ref (* A' ::= eps *) + | Br (_, _ :: tl) -> (* A' ::= CONNECTOR B A' *) + let left = prop_of_ast !b_ref in + let right = prop_of_ast (Br(Nt a, tl)) in + begin + match a with + | E -> Iff(left, right) + | F -> Implies(left, right) + | G -> Or(left, right) + | H -> And(left, right) + | _ -> failwith "a bug!" + end + | _ -> failwith "another bug" + end + | _ -> failwith "found a bug!" +
\ No newline at end of file diff --git a/lib/parsing/parser.ml b/lib/parsing/parser.ml new file mode 100644 index 0000000..e56411b --- /dev/null +++ b/lib/parsing/parser.ml @@ -0,0 +1,81 @@ +open Types +open Headers +open Utils + +let my_productions : production list = (* currently not in use *) + [ + (S, [Nt E; Tm (LIT "$")]); (* S ::= E $ *) + (E, [Nt F; Nt E']); + (E', [Tm IFF; Nt F; Nt E']); + (E', []); + (F, [Nt G; Nt F']); + (F', [Tm IMPLIES; Nt G; Nt F']); + (F', []); + (G, [Nt H; Nt G']); + (G', [Tm OR; Nt H; Nt G']); + (G', []); + (H, [Nt I; Nt H']); + (H', [Tm AND; Nt I; Nt H']); + (H', []); + (I, [Tm NOT; Nt I]); + (I, [Tm LEFT_PAREN; Nt E; Tm RIGHT_PAREN]); + (I, [Tm TRUE]); + (I, [Tm FALSE]); + (I, [Tm (LIT "any string")]) + ] + + +let my_action_table (sym : symbol) (tk : tok) : action = + let predict_if_in_first tk prod = + match tk with + | TRUE -> PREDICT prod + | FALSE -> PREDICT prod + | LIT _ -> PREDICT prod + | NOT -> PREDICT prod + | LEFT_PAREN -> PREDICT prod + | _ -> REJECT + in + + let predict_if_in_follow tk follow prod = + if List.mem tk follow then + PREDICT prod + else + REJECT + in + + match sym, tk with + | Tm (LIT "$"), LIT "$" -> ACCEPT + | Tm t, tk -> if t = tk then MATCH t else REJECT + | Nt S, tk -> predict_if_in_first tk (S, [Nt E; Tm (LIT "$")]) + | Nt E, tk -> predict_if_in_first tk (E, [Nt F; Nt E']) + | Nt E', IFF -> PREDICT (E', [Tm IFF; Nt F; Nt E']) + | Nt E', tk -> predict_if_in_follow tk [LIT "$"; RIGHT_PAREN] (E', []) + | Nt F, tk -> predict_if_in_first tk (F, [Nt G; Nt F']) + | Nt F', IMPLIES -> PREDICT (F', [Tm IMPLIES; Nt G; Nt F']) + | Nt F', tk -> predict_if_in_follow tk [IFF; LIT "$"; RIGHT_PAREN] (F', []) + | Nt G, tk -> predict_if_in_first tk (G, [Nt H; Nt G']) + | Nt G', OR -> PREDICT (G', [Tm OR; Nt H; Nt G']) + | Nt G', tk -> predict_if_in_follow tk [IMPLIES; IFF; LIT "$"; RIGHT_PAREN] (G', []) + | Nt H, tk -> predict_if_in_first tk (H, [Nt I; Nt H']) + | Nt H', AND -> PREDICT (H', [Tm AND; Nt I; Nt H']) + | Nt H', tk -> predict_if_in_follow tk [OR; IMPLIES; IFF; LIT "$"; RIGHT_PAREN] (H', []) + | Nt I, NOT -> PREDICT (I, [Tm NOT; Nt I]); + | Nt I, LEFT_PAREN -> PREDICT (I, [Tm LEFT_PAREN; Nt E; Tm RIGHT_PAREN]); + | Nt I, TRUE -> PREDICT (I, [Tm TRUE]); + | Nt I, FALSE -> PREDICT (I, [Tm FALSE]); + | Nt I, LIT s when s <> "$" -> PREDICT (I, [Tm (LIT s)]) + | _ -> REJECT + + +let parse (input : tok list) = + let root = ref (Lf (Nt S)) in + let initial_parse_state = { + stack = [Nt S]; + ast_stack = [root]; (* list of ast refs corresponding to symbols on the stack *) + input = input @ [LIT "$"]; + finished = false; + accepted = false + } in + driver initial_parse_state my_action_table; + (* print_endline (string_of_ast (!root)); *) + prop_of_ast !root diff --git a/lib/parsing/utils.ml b/lib/parsing/utils.ml new file mode 100644 index 0000000..61211f0 --- /dev/null +++ b/lib/parsing/utils.ml @@ -0,0 +1,69 @@ +open Headers +open To_string + +(* to string functions for debugging *) +let string_of_nt = function + | S -> "S" + | E -> "E" + | E' -> "E'" + | F -> "F" + | F' -> "F'" + | G -> "G" + | G' -> "G'" + | H -> "H" + | H' -> "H'" + | I -> "I" + +let string_of_symbol = function + | Tm x -> string_of_tok x + | Nt x -> string_of_nt x + +let rec string_of_symbols = function + | [] -> "" + | sym :: xs -> "[" ^ string_of_symbol sym ^ "] " ^ string_of_symbols xs + +let print_state st = + print_endline ("stack: " ^ (string_of_symbols st.stack)); + print_endline (" input: " ^ (string_of_tokens st.input)) + +let rec string_of_ast = function + | Br (sym, ars) -> "Br(" ^ string_of_symbol sym ^ "| " ^ (String.concat ", " (List.map (fun ar -> string_of_ast !ar) ars)) ^ ")" + | Lf sym -> "(" ^ string_of_symbol sym ^ ")" + +(* step through parse states *) +let step (parse_state : state) (action_table : table) = + (*print_state parse_state; currently printing the trace *) + match parse_state.stack, parse_state.ast_stack, parse_state.input with + | x::xs, t::ts, i::is -> ( + match action_table x i with + | ACCEPT -> {stack = xs; ast_stack = ts; input = is; finished = true; accepted = true} + | REJECT -> {stack = xs; ast_stack = ts; input = is; finished = true; accepted = false} + | MATCH _ -> {stack = xs; ast_stack = ts; input = is; finished = false; accepted = false} + | PREDICT (_, l) -> + let update_ast_ref sym = (* for each symbol in the production, create an ast ref *) + let new_lf = ref (Lf sym) in + (match !t with + | Lf t_sym -> t := Br (t_sym, [new_lf]) + | Br (t_sym, l) -> t := Br(t_sym, l @ [new_lf])); + new_lf + in + { + stack = l @ xs; (* add production symbols to symbol stack *) + ast_stack = List.map update_ast_ref l @ ts; (* update t's contents and add new refs to ast_stack *) + input = i :: is; + finished = false; + accepted = false + } + ) + | xs, ts, is -> {stack = xs; ast_stack = ts; input = is; finished = true; accepted = false} + +(* driver function to perform multistepping *) +let rec driver (parse_state : state) (action_table : table) : unit = + if parse_state.finished then + if parse_state.accepted then + () + else + raise (Parse_Error "Input rejected") + else + driver (step parse_state action_table) action_table +
\ No newline at end of file |