1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
(* Lexer for transforming an input string into a list of tokens *)
open Types
open Headers
open Utils
(*
Ordered lexer rules:
true => TRUE [1,2,3,4]
false => FALSE [5,6,7,8,9]
[a-zA-Z]+ as s => LIT s [10]
¬ => NOT [11]
∧ => AND [12]
∨ => OR [13]
→ => IMPLIES [14]
↔ => IFF [15]
( => LEFT_PAREN [16]
) => RIGHT_PAREN [17]
[ \t\n] => SKIP [18]
*)
(* Define a TDFA *)
let my_tdfa : tdfa =
let my_alphabet =
let lowercase = List.init 26 (fun i -> Char.chr (Char.code 'a' + i)) in
let uppercase = List.init 26 (fun i -> Char.chr (Char.code 'A' + i)) in
['!'; '&'; '|'; '>'; '='; '('; ')'; ' '; '\t'; '\n'] @ lowercase @ uppercase
in
let my_delta (i, c) =
if
not (List.mem c my_alphabet)
then
raise (Not_In_Alphabet c)
else
let append_lit = function
| LIT s -> LIT (s ^ Char.escaped c)
| _ -> LIT (Char.escaped c)
in
let is_ltr c =
let ascii = Char.code c in
(65 <= ascii && ascii <= 90) || (97 <= ascii && ascii <= 122)
in
match i, c with
| 0, '!' -> 11, (fun _ -> NOT)
| 0, '&' -> 12, (fun _ -> AND)
| 0, '|' -> 13, (fun _ -> OR)
| 0, '>' -> 14, (fun _ -> IMPLIES)
| 0, '=' -> 15, (fun _ -> IFF)
| 0, '(' -> 16, (fun _ -> LEFT_PAREN)
| 0, ')' -> 17, (fun _ -> RIGHT_PAREN)
| 0, ' ' -> 18, (fun _ -> SKIP)
| 0, '\t' -> 18, (fun _ -> SKIP)
| 0, '\n' -> 18, (fun _ -> SKIP)
| 0, 't' -> 1, append_lit
| 0, 'f' -> 5, append_lit
| 0, c when is_ltr c -> 10, append_lit
| 1, 'r' -> 2, append_lit
| 1, c when is_ltr c -> 10, append_lit
| 2, 'u' -> 3, append_lit
| 2, c when is_ltr c -> 10, append_lit
| 3, 'e' -> 4, (fun _ -> TRUE)
| 3, c when is_ltr c -> 10, append_lit
| 4, c when is_ltr c -> 10, (fun _ -> (LIT ("true" ^ Char.escaped c)))
| 5, 'a' -> 6, append_lit
| 5, c when is_ltr c -> 10, append_lit
| 6, 'l' -> 7, append_lit
| 6, c when is_ltr c -> 10, append_lit
| 7, 's' -> 8, append_lit
| 7, c when is_ltr c -> 10, append_lit
| 8, 'e' -> 9, (fun _ -> FALSE)
| 8, c when is_ltr c -> 10, append_lit
| 9, c when is_ltr c -> 10, (fun _ -> (LIT ("false" ^ Char.escaped c)))
| 10, c when is_ltr c -> 10, append_lit
| _ -> raise (No_Transition (i, c))
in
{
alphabet = my_alphabet;
start_state = 0;
register_init = SKIP;
delta = my_delta
}
let lex (s : string) : tok list =
let to_parse = s |> String.to_seq |> List.of_seq in
let initial_tdfa_state = {
state_id = my_tdfa.start_state;
register = my_tdfa.register_init;
tokens = [];
to_parse = to_parse
} in
driver my_tdfa initial_tdfa_state
|