2 // This module adapts streams of characters sent to one parser into a
3 // simplified normal form piped to another. Internally, a stream is
4 // represented as a function that accepts the next character and returns a new
7 // stream("a")("b")("c") -> stream
9 // The input ends with an empty character.
11 // stream("") -> stream
13 // Functions that return streams and produce a syntax node accept a
14 // callback that like a stream is required to return the initial stream state.
16 // parseAbc(function (result) {
17 // console.log(result);
18 // return expectEof();
19 // })("a")("b")("c")("")
22 var Parser
= require("./parser");
23 var makeTrie
= require("./trie");
24 var makeParserFromTrie
= require("./trie-parser");
25 var array_
= Array
.prototype;
27 // The `normalize` function accepts a stream and returns a stream. The
28 // character sequence sent to the returned stream will be converted to a
29 // normal form, where each character is lower-case and various clusters of
30 // characters will be converted to a "normal" phonetic form so the subsequent
31 // parser only has to deal with one input for each phonetic output.
33 // normalize(parseWord(callback))("Q")("u")("x")
35 // In this example, the callback would receive "cwcs", the normal form of
38 module
.exports
= normalize
;
39 function normalize(callback
) {
40 return toLowerCase(simplify(callback
));
43 // This is a parser adapter that always returns the same state, but internally
44 // tracks the state of the wrapped parser. Each time the adapter receives a
45 // character, it converts it to lower case and uses that character to advance
47 function toLowerCase(callback
) {
48 return function passthrough(character
) {
49 callback
= callback(character
.toLowerCase());
54 // the keys of this table are characters and clusters of characters that must
55 // be simplified to the corresponding values before pumping them into an
56 // adapted parser. The adapted parser therefore only needs to handle the
57 // normal phoneitc form of the cluster.
75 // This generates a data structure that can be walked by a parser, where each
76 // node corresponds to having parsed a certain prefix and follows to each
77 // common suffix. If the parser is standing at a particular node of the trie
78 // and receives a character that does not match any of the subsequent subtrees,
79 // it "produces" the corresponding value at that node.
80 var trie
= makeTrie(table
);
82 var simplify
= makeParserFromTrie(
84 function makeProducer(string
) {
85 // producing string involves advancing the state by individual
87 return function (callback
) {
88 return Array
.prototype.reduce
.call(string
, function (callback
, character
) {
89 return callback(character
);
93 function callback(callback
) {
94 // after a match has been emitted, loop back for another
95 return simplify(callback
);
97 function fallback(callback
) {
98 // if we reach a character that is not accounted for in the table, pass
99 // it through without alternation, then start scanning for matches
101 return function (character
) {
102 return simplify(callback(character
));