diff options
Diffstat (limited to 'lib/lexing/lexer.ml')
-rw-r--r-- | lib/lexing/lexer.ml | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/lib/lexing/lexer.ml b/lib/lexing/lexer.ml new file mode 100644 index 0000000..af24e49 --- /dev/null +++ b/lib/lexing/lexer.ml @@ -0,0 +1,97 @@ +(* Lexer for transforming an input string into a list of tokens *) + +open Types +open Headers +open Utils + +(* + Ordered lexer rules: + + true => TRUE [1,2,3,4] + false => FALSE [5,6,7,8,9] + [a-zA-Z]+ as s => LIT s [10] + ¬ => NOT [11] + ∧ => AND [12] + ∨ => OR [13] + → => IMPLIES [14] + ↔ => IFF [15] + ( => LEFT_PAREN [16] + ) => RIGHT_PAREN [17] + [ \t\n] => SKIP [18] +*) + +(* Define a TDFA *) +let my_tdfa : tdfa = + let my_alphabet = + let lowercase = List.init 26 (fun i -> Char.chr (Char.code 'a' + i)) in + let uppercase = List.init 26 (fun i -> Char.chr (Char.code 'A' + i)) in + ['!'; '&'; '|'; '>'; '='; '('; ')'; ' '; '\t'; '\n'] @ lowercase @ uppercase + in + + let my_delta (i, c) = + if + not (List.mem c my_alphabet) + then + raise (Not_In_Alphabet c) + else + let append_lit = function + | LIT s -> LIT (s ^ Char.escaped c) + | _ -> LIT (Char.escaped c) + in + + let is_ltr c = + let ascii = Char.code c in + (65 <= ascii && ascii <= 90) || (97 <= ascii && ascii <= 122) + in + + match i, c with + | 0, '!' -> 11, (fun _ -> NOT) + | 0, '&' -> 12, (fun _ -> AND) + | 0, '|' -> 13, (fun _ -> OR) + | 0, '>' -> 14, (fun _ -> IMPLIES) + | 0, '=' -> 15, (fun _ -> IFF) + | 0, '(' -> 16, (fun _ -> LEFT_PAREN) + | 0, ')' -> 17, (fun _ -> RIGHT_PAREN) + | 0, ' ' -> 18, (fun _ -> SKIP) + | 0, '\t' -> 18, (fun _ -> SKIP) + | 0, '\n' -> 18, (fun _ -> SKIP) + | 0, 't' -> 1, append_lit + | 0, 'f' -> 5, append_lit + | 0, c when is_ltr c -> 10, append_lit + | 1, 'r' -> 2, append_lit + | 1, c when is_ltr c -> 10, append_lit + | 2, 'u' -> 3, append_lit + | 2, c when is_ltr c -> 10, append_lit + | 3, 'e' -> 4, (fun _ -> TRUE) + | 3, c when is_ltr c -> 10, append_lit + | 4, c when is_ltr c -> 10, (fun _ -> (LIT ("true" ^ Char.escaped c))) + | 5, 'a' -> 6, append_lit + | 5, c when is_ltr c -> 10, append_lit + | 6, 'l' -> 7, append_lit + | 6, c when is_ltr c -> 10, append_lit + | 7, 's' -> 8, append_lit + | 7, c when is_ltr c -> 10, append_lit + | 8, 'e' -> 9, (fun _ -> FALSE) + | 8, c when is_ltr c -> 10, append_lit + | 9, c when is_ltr c -> 10, (fun _ -> (LIT ("false" ^ Char.escaped c))) + | 10, c when is_ltr c -> 10, append_lit + | _ -> raise (No_Transition (i, c)) + in + + { + alphabet = my_alphabet; + start_state = 0; + register_init = SKIP; + delta = my_delta + } + + +let lex (s : string) : tok list = + let to_parse = s |> String.to_seq |> List.of_seq in + let initial_tdfa_state = { + state_id = my_tdfa.start_state; + register = my_tdfa.register_init; + tokens = []; + to_parse = to_parse + } in + driver my_tdfa initial_tdfa_state |