|
|
1-- running a deterministic finite automaton
2-- for scanning an input sequence and computing the token sequence
3
4module DFA
5 ( I,
6 Alphabet,
7 Q,
8 SetOfQ,
9 Delta,
10 Input,
11 Token,
12 Tokens,
13 DFA,
14
15 run,
16 accept,
17 showTokens
18 )
19where
20
21import Data.Maybe
22
23-- automaton types
24
25type I = Char -- input symbols are Char's
26
27type Alphabet = [Char] -- input alphabet is a subset of the Char type
28
29type Q = Int -- states are represented as Int's
30
31type SetOfQ = [Q] -- state sets are represented as lists
32
33type Delta = Q -> I -> Maybe Q
34 -- transition table as function with Maybe result
35 -- to represent undefined as Nothing
36
37type Input = [I] -- sequence of input symbols
38
39type Token = [I] -- one token, one output symbol
40
41type Tokens = ([(Q, Token)], Input)
42 -- sequence of pairs of final state number and accepted token
43 -- and rest of input. if input can't be parsed
44
45type DFA = (SetOfQ, Alphabet, Q, SetOfQ, Delta)
46 -- the complete automaton, consisting of start state,
47 -- set of final states and transition function
48
49-- local data types
50
51type DFAState = (Q, Token, Input)
52 -- state of running automaton
53 -- consisting of current state q,
54 -- the already parsed char sequence
55 -- and the remaining input
56
57-- run an automaton on an input string
58
59run :: DFA -> Input -> Tokens
60
61run (_allStates, _allSymbols, start, finalStates, delta) input
62 | null input
63 &&
64 isFinalState start
65 = ([(start, "")], "")
66 | otherwise
67 = loop input
68
69 where
70 -- the main loop
71
72 loop :: Input -> Tokens
73 loop inp
74 | null s
75 = ([], inp)
76 | null inp'
77 = ([(q,s)], "")
78 | otherwise
79 = let
80 (ts, rest) = loop inp'
81 in
82 ((q, s): ts, rest)
83 where
84 init = (start, "", inp)
85 (q, s, inp') = symbol init init
86
87 isFinalState :: Q -> Bool
88 isFinalState q = q `elem` finalStates
89
90 -- scan one symbol
91
92 symbol :: DFAState -> DFAState -> DFAState
93
94 symbol lastFinalState currState@(q, s, i)
95 | isFinalState q && longestMatch -- success: token recognized
96 = currState
97
98 | isFinalState q && not longestMatch -- token may still be longer
99 = symbol currState nextState
100
101 | not (isFinalState q) && longestMatch -- failure: restore last possible token
102 = lastFinalState
103
104 | not (isFinalState q) && not longestMatch -- token not yet complete
105 = symbol lastFinalState nextState
106
107 where
108 longestMatch = null i -- EOF or delta undefined
109 ||
110 isNothing (delta q nextChar)
111
112 nextChar = head i
113 -- compute next state
114 -- and read next input char
115 nextState = (fromJust (delta q nextChar), s ++ [nextChar], tail i)
116
117
118-- word test
119
120accept :: DFA -> Input -> Bool
121accept a
122 = oneSymbol . run a
123 where
124 oneSymbol ([_], "") = True
125 oneSymbol _ = False
126
127-- format result
128
129showTokens :: Tokens -> String
130showTokens (ts, rest)
131 = concatMap showToken ts
132 ++
133 showRest rest
134 where
135 showToken (q, s)
136 = showState q ++ show s ++ "\n"
137 showState q'
138 = take 4 (show q' ++ repeat ' ') ++ ": "
139 showRest ""
140 = ""
141 showRest r
142 = "input not accepted: " ++ show r ++ "\n"
143
144
|
|
1module DFAexample1 ( dfa1 )
2where
3
4import DFA
5
6dfa1 :: DFA
7dfa1
8 = (states, alphabet, q0, f, delta)
9 where
10 states = [1..13]
11 alphabet = "\n\t" ++ [' '..'~']
12 q0 = 1
13 f = [2,3,4,5,6,7,8,9,11,12,13]
14
15 delta 1 c
16 | c == 'i' = Just 2
17 | c `elem` ['a'..'h']
18 ||
19 c `elem` ['j'..'z'] = Just 4
20 | c == '.' = Just 5
21 | c `elem` ['0'..'9'] = Just 7
22 | c == '-' = Just 9
23 | c `elem` " \t\n" = Just 12
24 | otherwise = Just 13
25
26 delta 2 c
27 | c == 'f' = Just 3
28 | c `elem` ['a'..'e']
29 ||
30 c `elem` ['g'..'z']
31 ||
32 c `elem` ['0'..'9'] = Just 4
33
34 delta 3 c
35 | c `elem` ['a'..'z']
36 ||
37 c `elem` ['0'..'9'] = Just 4
38
39 delta 4 c
40 | c `elem` ['a'..'z']
41 ||
42 c `elem` ['0'..'9'] = Just 4
43
44 delta 5 c
45 | c `elem` ['0'..'9'] = Just 6
46
47 delta 6 c
48 | c `elem` ['0'..'9'] = Just 6
49
50 delta 7 c
51 | c `elem` ['0'..'9'] = Just 7
52 | c == '.' = Just 8
53
54 delta 8 c
55 | c `elem` ['0'..'9'] = Just 8
56
57 delta 9 c
58 | c == '-' = Just 10
59
60 delta 10 c
61 | c `elem` ['a'..'z'] = Just 10
62 | c == '\n' = Just 11
63
64 delta 12 c
65 | c `elem` " \t\n" = Just 12
66
67 delta _ _ = Nothing
|
|
|
|
1module DFAexample2 ( dfa2 )
2where
3
4import DFA
5
6dfa2 :: DFA
7dfa2
8 = (states, alphabet, q0, f, delta)
9 where
10 states = [1..4]
11 alphabet = "ab"
12 q0 = 1
13 f = [2,4]
14 delta 1 'a' = Just 2
15 delta 1 'b' = Just 4
16 delta 2 'a' = Just 3
17 delta 3 'a' = Just 3
18 delta 3 'b' = Just 4
19 delta _ _ = Nothing
|
|
| Letzte Änderung: 14.02.2012 | © Prof. Dr. Uwe Schmidt |