New mode selector for editor
[tengwarjs.git] / normalize.js
1
2 // This module adapts streams of characters sent to one parser into a
3 // simplified normal form piped to another. Internally, a stream is
4 // represented as a function that accepts the next character and returns a new
5 // stream.
6 //
7 // stream("a")("b")("c") -> stream
8 //
9 // The input ends with an empty character.
10 //
11 // stream("") -> stream
12 //
13 // Functions that return streams and produce a syntax node accept a
14 // callback that like a stream is required to return the initial stream state.
15 //
16 // parseAbc(function (result) {
17 // console.log(result);
18 // return expectEof();
19 // })("a")("b")("c")("")
20 //
21
22 var Parser = require("./parser");
23 var makeTrie = require("./trie");
24 var makeParserFromTrie = require("./trie-parser");
25 var array_ = Array.prototype;
26
27 // The `normalize` function accepts a stream and returns a stream. The
28 // character sequence sent to the returned stream will be converted to a
29 // normal form, where each character is lower-case and various clusters of
30 // characters will be converted to a "normal" phonetic form so the subsequent
31 // parser only has to deal with one input for each phonetic output.
32 //
33 // normalize(parseWord(callback))("Q")("u")("x")
34 //
35 // In this example, the callback would receive "cwcs", the normal form of
36 // "Qux".
37 //
38 module.exports = normalize;
39 function normalize(callback) {
40 return toLowerCase(simplify(callback));
41 };
42
43 // This is a parser adapter that always returns the same state, but internally
44 // tracks the state of the wrapped parser. Each time the adapter receives a
45 // character, it converts it to lower case and uses that character to advance
46 // the state.
47 function toLowerCase(callback) {
48 return function passthrough(character) {
49 callback = callback(character.toLowerCase());
50 return passthrough;
51 };
52 }
53
54 // the keys of this table are characters and clusters of characters that must
55 // be simplified to the corresponding values before pumping them into an
56 // adapted parser. The adapted parser therefore only needs to handle the
57 // normal phoneitc form of the cluster.
58 var table = {
59 "k": "c",
60 "x": "cs",
61 "q": "cw",
62 "qu": "cw",
63 "p": "p",
64 "ph": "f",
65 "b": "b",
66 "bh": "v",
67 "ë": "e",
68 "â": "á",
69 "ê": "é",
70 "î": "í",
71 "ô": "ó",
72 "û": "ú"
73 };
74
75 // This generates a data structure that can be walked by a parser, where each
76 // node corresponds to having parsed a certain prefix and follows to each
77 // common suffix. If the parser is standing at a particular node of the trie
78 // and receives a character that does not match any of the subsequent subtrees,
79 // it "produces" the corresponding value at that node.
80 var trie = makeTrie(table);
81
82 var simplify = makeParserFromTrie(
83 trie,
84 function makeProducer(string) {
85 // producing string involves advancing the state by individual
86 // characters.
87 return function (callback) {
88 return Array.prototype.reduce.call(string, function (callback, character) {
89 return callback(character);
90 }, callback);
91 };
92 },
93 function callback(callback) {
94 // after a match has been emitted, loop back for another
95 return simplify(callback);
96 },
97 function fallback(callback) {
98 // if we reach a character that is not accounted for in the table, pass
99 // it through without alternation, then start scanning for matches
100 // again
101 return function (character) {
102 return simplify(callback(character));
103 };
104 }
105 );
106