1 // TODO remove this since it canvases over the origin of certain clusters
3 // This module adapts streams of characters sent to one parser into a
4 // simplified normal form piped to another. Internally, a stream is
5 // represented as a function that accepts the next character and returns a new
8 // stream("a")("b")("c") -> stream
10 // The input ends with an empty character.
12 // stream("") -> stream
14 // Functions that return streams and produce a syntax node accept a
15 // callback that like a stream is required to return the initial stream state.
17 // parseAbc(function (result) {
18 // console.log(result);
19 // return expectEof();
20 // })("a")("b")("c")("")
23 var Parser
= require("./parser");
24 var makeTrie
= require("./trie");
25 var makeParserFromTrie
= require("./trie-parser");
26 var array_
= Array
.prototype;
28 // The `normalize` function accepts a stream and returns a stream. The
29 // character sequence sent to the returned stream will be converted to a
30 // normal form, where each character is lower-case and various clusters of
31 // characters will be converted to a "normal" phonetic form so the subsequent
32 // parser only has to deal with one input for each phonetic output.
34 // normalize(parseWord(callback))("Q")("u")("x")
36 // In this example, the callback would receive "cwcs", the normal form of
39 module
.exports
= normalize
;
40 function normalize(callback
) {
41 return toLowerCase(simplify(callback
));
44 // This is a parser adapter that always returns the same state, but internally
45 // tracks the state of the wrapped parser. Each time the adapter receives a
46 // character, it converts it to lower case and uses that character to advance
48 function toLowerCase(callback
) {
49 return function passthrough(character
) {
50 callback
= callback(character
.toLowerCase());
55 // the keys of this table are characters and clusters of characters that must
56 // be simplified to the corresponding values before pumping them into an
57 // adapted parser. The adapted parser therefore only needs to handle the
58 // normal phonetic form of the cluster.
75 // This generates a data structure that can be walked by a parser, where each
76 // node corresponds to having parsed a certain prefix and follows to each
77 // common suffix. If the parser is standing at a particular node of the trie
78 // and receives a character that does not match any of the subsequent subtrees,
79 // it "produces" the corresponding value at that node.
80 var trie
= makeTrie(table
);
82 var simplify
= makeParserFromTrie(
84 function makeProducer(string
) {
85 // producing string involves advancing the state by individual
87 return function (callback
) {
88 return Array
.prototype.reduce
.call(string
, function (callback
, character
) {
89 return callback(character
);
93 function callback(callback
) {
94 // after a match has been emitted, loop back for another
95 return simplify(callback
);
97 function fallback(callback
) {
98 // if we reach a character that is not accounted for in the table, pass
99 // it through without alternation, then start scanning for matches
101 return function (character
) {
102 return simplify(callback(character
));