Created regression tests.
authorKris Kowal <kris.kowal@cixar.com>
Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)
committerKris Kowal <kris.kowal@cixar.com>
Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)
index.html
samples.html
tengwar.js
tests.js [new file with mode: 0644]

index d5d9adc..d31397e 100644 (file)
@@ -1,6 +1,7 @@
 <html>
     <head>
         <meta http-equiv="content-type" content="text/html; charset=utf-8">
+        <title>Tengwar Transcriber</title>
         <link rel="stylesheet" type="text/css" href="style.css">
         <link rel="stylesheet" type="text/css" href="index.css">
     </head>
index bb03c76..77ed529 100644 (file)
 
         <p>Hobbits'': <span class="tengwar">Hobbits''</span>. Illustrates the use of a straight apostrophe to use alternate S-hooks.</p>
 
+        <p>XX ksks tsts IQS: <span class="tengwar">XX ksks tsts IQS</span>. Some problematic tehta and S-hook combinations. T-S-hook must only be final. K-S-hook can be medial.  ICWS cluster must not place S-hook on C.</p>
+
         <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.6.4/jquery.min.js"></script>
         <script src="tengwar.js" charset="utf-8"></script>
         <script>$(".tengwar").tengwar()</script>
index 19e16ff..d20b211 100644 (file)
@@ -150,7 +150,6 @@ var mode = {
             "ando": "+",
             "numen": "+",
             "lambe": "_",
-            "calma": "|",
             "quesse": "|",
             "short-carrier": "}",
         },
@@ -467,65 +466,90 @@ var mode = {
         "û": "ú"
     },
     "transcriptions": {
+
+        // consonants
         "t": "tinco",
         "nt": "tinco:tilde-above",
         "tt": "tinco:tilde-below",
+
         "p": "parma",
         "mp": "parma:tilde-above",
         "pp": "parma:tilde-below",
+
         "c": "quesse",
         "nc": "quesse:tilde-above",
+
         "d": "ando",
         "nd": "ando:tilde-above",
         "dd": "ando:tilde-below",
+
         "b": "umbar",
         "mb": "umbar:tilde-above",
         "bb": "umbar:tilde-below",
+
         "g": "ungwe",
         "ng": "ungwe:tilde-above",
         "gg": "ungwe:tilde-below",
+
         "th": "thule",
         "nth": "thule:tilde-above",
+
         "f": "formen",
         "ph": "formen",
         "mf": "formen:tilde-above",
         "mph": "formen:tilde-above",
+
+        "sh": "harma",
+
         "h": "hyarmen",
         "ch": "hwesta",
+        "hw": "hwesta-sindarinwa",
+        "wh": "hwesta-sindarinwa",
+
         "gh": "unque",
-        "d": "ando",
-        "nd": "ando:tilde-above",
+        "ngh": "unque:tilde-above",
+
         "dh": "anto",
         "ndh": "anto:tilde-above",
+
         "v": "ampa",
         "bh": "ampa",
         "mv": "ampa:tilde-above",
         "mbh": "ampa:tilde-above",
-        "ngh": "unque:tilde-above",
+
+        "j": "anca",
+        "nj": "anca:tilde-above",
+
         "n": "numen",
         "nn": "numen:tilde-above",
+
         "m": "malta",
         "mm": "malta:tilde-above",
+
         "ng": "nwalme",
+        "ñ": "nwalme",
+        "nwal": "nwalme:w;lambe:a",
+
         "r": "romen",
-        //"rd": "arda",
+        "rr": "romen:tilde-below",
+        "rh": "arda",
+
         "l": "lambe",
         "ll": "lambe:tilde-below",
-        "bb": "umbar:tilde-below",
+        "lh": "alda",
+
         "s": "silme",
         "ss": "silme:tilde-below",
-        "sh": "harma",
-        "j": "anca",
+
         "z": "esse",
-        //"ld": "alda",
-        "lh": "alda", // probably not
-        "rh": "arda",
+
         "á": "wilya:a",
         "é": "long-carrier:e",
         "í": "long-carrier:i",
         "ó": "long-carrier:o",
         "ú": "long-carrier:u",
         "w": "vala",
+
         "ai": "anna:a",
         "oi": "anna:o",
         "ui": "anna:u",
@@ -533,33 +557,37 @@ var mode = {
         "eu": "vala:e",
         "iu": "vala:i",
         "ae": "yanta:a",
-        "hw": "hwesta-sindarinwa",
-        "wh": "hwesta-sindarinwa",
-        "ñ": "nwalme",
-        "nwal": "nwalme:w;lambe:a",
+
     },
     "vowelTranscriptions": {
+
         "a": "short-carrier:a",
         "e": "short-carrier:e",
         "i": "short-carrier:i",
         "o": "short-carrier:o",
         "u": "short-carrier:u",
+
         "á": "wilya:a",
         "é": "long-carrier:e",
         "í": "long-carrier:i",
         "ó": "short-carrier:ó",
         "ú": "short-carrier:ú",
+
         "w": "vala",
         "y": "short-carrier:í"
+
     },
+
     "words": {
         "iant": "yanta;tinco:tilde-above:a",
         "iaur": "yanta;vala:a;ore",
+        "baranduiniant": "umbar;romen:a;ando:tilde-above:a;anna:u;yanta;anto:tilde-above:a",
         "ioreth": "yanta;romen:o;thule:e",
         "noldo": "nwalme;lambe:o;ando;short-carrier:o",
         "noldor": "nwalme;lambe:o;ando;ore:o",
         "is": "short-carrier:i:s"
     },
+
     "punctuation": {
         "-": "comma",
         ",": "comma",
@@ -573,6 +601,7 @@ var mode = {
         ">": "flourish-left",
         "<": "flourish-right"
     },
+
     "annotations": {
         "tinco": {"tengwa": "t"},
         "parma": {"tengwa": "p"},
@@ -708,7 +737,7 @@ var vowelTranscriptionsRe = new RegExp("^(" +
 var substitutionsRe = new RegExp("(" +
     Object.keys(mode.substitutions).join("|") +
 ")", "ig");
-                        
+
 function transcribeWordToEncoding(latin) {
     latin = latin
     .toLowerCase()
@@ -720,7 +749,10 @@ function transcribeWordToEncoding(latin) {
     var parts = [];
     var length;
     var first = true;
+    var maybeFinal;
     while (latin.length) {
+        if (latin[0] != "s")
+            maybeFinal = undefined;
         length = latin.length;
         latin = latin
         .replace(transcriptionsRe, function ($, vowel, tengwa, w, y, s, prime) {
@@ -735,8 +767,14 @@ function transcribeWordToEncoding(latin) {
             if (vowel) {
                 if (!voweled) {
                     // flip if necessary
-                    if (tehtaForTengwa(tengwa, vowel) === null && tehtaForTengwa(tengwa + "-nuquerna", vowel) !== null) {
-                        value = [tengwa + "-nuquerna"].concat(tehtar).concat([vowel]).filter(function (part) {
+                    if (
+                        tehtaForTengwa(tengwa, vowel) === null &&
+                        tehtaForTengwa(tengwa + "-nuquerna", vowel) !== null
+                    ) {
+                        value = [tengwa + "-nuquerna"]
+                        .concat(tehtar)
+                        .concat([vowel])
+                        .filter(function (part) {
                             return part;
                         }).join(":");
                     } else {
@@ -757,7 +795,7 @@ function transcribeWordToEncoding(latin) {
                 y = "";
             }
             // must go last because it has a non-zero width
-            if (s) {
+            if (s && !w) {
                 var length = prime.length;
                 var possibilities = [
                     "s",
@@ -772,13 +810,17 @@ function transcribeWordToEncoding(latin) {
                     length--;
                 }
                 if (possibilities.length) {
-                    value += ":" + possibilities.shift();
-                    s = "";
+                    if (value.split(":").indexOf("quesse") >= 0) {
+                        value = value + ":" + possibilities.shift();
+                        s = "";
+                    } else {
+                        maybeFinal = value + ":" + possibilities.shift();
+                    }
                 }
             }
             parts.push(value);
             first = false;
-            return w + s + y;
+            return w + y + s;
         });
         if (length === latin.length) {
             length = latin.length;
@@ -796,6 +838,11 @@ function transcribeWordToEncoding(latin) {
         }
     }
     if (parts.length) {
+        if (maybeFinal && parts[parts.length - 1] == "silme") {
+            parts.pop();
+            parts.pop();
+            parts.push(maybeFinal);
+        }
         parts.push(parts.pop().replace("romen", "ore"));
     }
     /*
@@ -828,6 +875,7 @@ function transcribeWordToEncoding(latin) {
     return parts.join(";");
 }
 
+exports.transcribeToEncoding = transcribeToEncoding;
 function transcribeToEncoding(latin) {
     latin = latin.replace(/[,:] +/g, ",");
     return latin.split(/\n\n\n+/).map(function (section) {
@@ -949,48 +997,6 @@ function transcribeHtml(latin) {
     return decodeToFontHtml(transcribeToEncoding(latin));
 }
 
-var tests = [
-    "tyelpe",
-    "telperion",
-    "hyarmen",
-    "hwesta sindarinwa",
-    "hobbits",
-    "hobytla",
-    "perian",
-    "periannath",
-    "istar",
-    "istari",
-    "nwalme",
-    "isildur",
-    "helcaraxë",
-    "sul",
-    "amon sûl",
-    "lothlórien",
-    "ardalambion",
-    "baranduiniant",
-    "iant iaur",
-    "glorfindel",
-    "galadriel",
-    "galadhrim",
-    "gwaith iaur arnor",
-    "aldost",
-    "noldo",
-    "noldor",
-    "gwathlo",
-    "ainaldo",
-    "varda",
-    "mae govannen"
-];
-
-exports.displayTests = displayTests;
-function displayTests() {
-    document.body.innerHTML = tests.map(function (test) {
-        var transcribed = transcribeHtml(test);
-        var fontified = decodeToFont(transcribed);
-        return "<p>" + test + ": " + transcribed + " <span class=\"tengwar\">" + fontified + "</span></p>\n";
-    }).join("");
-}
-
 if (typeof jQuery !== "undefined") {
     jQuery.fn.tengwar = function () {
         this.each(function () {
diff --git a/tests.js b/tests.js
new file mode 100644 (file)
index 0000000..5293a4c
--- /dev/null
+++ b/tests.js
@@ -0,0 +1,89 @@
+var T = require("./tengwar");
+
+var tests = {
+
+    // sindarin (sorted)
+    "ainur": "a/anna numen u/ore",
+    "aldost": "a/lambe ando o/silme-nuquerna tinco",
+    "amon sûl": "a/malta o/numen space silme ú/lambe",
+    "aragorn": "a/romen a/ungwe o/romen numen",
+    "atto": "a/tinco/tilde-below o/short-carrier",
+    "baranduiniant": "umbar a/romen a/ando/tilde-above u/anna yanta a/anto/tilde-above",
+    "dagor bragolach": "ando a/ungwe o/ore space umbar romen a/ungwe o/lambe a/hwesta",
+    "galadhrim": "ungwe a/lambe a/anto romen i/malta",
+    "galadriel": "ungwe a/lambe a/ando romen i/short-carrier e/lambe",
+    "gandalf": "ungwe a/ando/tilde-above a/lambe formen",
+    "glorfindel": "ungwe lambe o/romen formen i/ando/tilde-above e/lambe",
+    "gwaith iaur arnor": "ungwe/w a/anna thule space yanta a/vala ore space a/romen numen o/ore",
+    "gwathló": "ungwe/w a/thule lambe o/long-carrier",
+    "hwesta sindarinwa": "hwesta-sindarinwa e/silme-nuquerna tinco a/short-carrier space silme i/ando/tilde-above a/romen i/numen vala a/short-carrier",
+    "iant": "yanta a/tinco/tilde-above",
+    "iaur": "yanta a/vala ore",
+    "isildur": "i/silme-nuquerna i/lambe ando u/ore",
+    "lhûn": "alda ú/numen",
+    "lothlórien": "lambe o/thule lambe ó/romen i/short-carrier e/numen",
+    "mae govannen": "malta a/yanta space ungwe o/ampa a/numen/tilde-above e/numen",
+    "mellon": "malta e/lambe/tilde-below o/numen",
+    "mordor": "malta o/romen ando o/ore",
+    "moria": "malta o/romen i/short-carrier a/short-carrier",
+    "noldor": "nwalme o/lambe ando o/ore",
+    "nwalme": "nwalme/w a/lambe malta e/short-carrier",
+    "periannath": "parma e/romen i/short-carrier a/numen/tilde-above a/thule",
+    "rhûn": "arda ú/numen",
+    "tyelpe": "tinco/y e/lambe parma e/short-carrier",
+    "varda": "ampa a/romen ando a/short-carrier",
+    "á": "a/wilya",
+    "ñoldor": "nwalme o/lambe ando o/ore",
+
+    // quenya (sorted)
+    "ardalambion": "a/romen ando a/lambe a/umbar/tilde-above i/short-carrier o/numen",
+    "helcaraxë": "hyarmen e/lambe quesse a/romen a/quesse/s e/short-carrier",
+    "hyarmen": "hyarmen/y a/romen malta e/numen",
+    "istari": "i/silme-nuquerna tinco a/romen i/short-carrier",
+    "sinome maruvan": "silme i/numen o/malta e/short-carrier space malta a/romen u/ampa a/numen",
+    "sinome maruvan": "silme i/numen o/malta e/short-carrier space malta a/romen u/ampa a/numen",
+    "telperion": "tinco e/lambe parma e/romen i/short-carrier o/numen",
+    "yuldar": "í/short-carrier u/lambe ando a/ore",
+
+    // english
+    "hobbits": "hyarmen o/umbar/tilde-below i/tinco/s",
+    "hobbits'": "hyarmen o/umbar/tilde-below i/tinco/s-inverse",
+    "hobbits''": "hyarmen o/umbar/tilde-below i/tinco/s-extended",
+
+    // old english
+    "írensaga": "i/long-carrier romen e/numen silme a/ungwe a/short-carrier",
+
+    // interesting clusters
+    "xx": "quesse/s quesse/s",
+    "tsts": "tinco silme tinco/s",
+    "iqs": "i/quesse vala silme",
+    "aty": "a/tinco/y",
+    "is": "i/short-carrier/s",
+    "allys": "a/lambe/tilde-below/y/s",
+    "alyssa": "a/lambe/y silme/tilde-below a/short-carrier",
+
+
+
+
+}
+
+exports.tests = tests;
+
+Object.keys(tests).forEach(function (input) {
+    exports['test ' + input] = function (assert) {
+        var oracle = tests[input].split(" space ").map(function (phrase) {
+            return phrase.split(/\s+/).map(function (cluster) {
+                return cluster.split("/").sort();
+            });
+        });;
+        var actual = T.transcribeToEncoding(input).split(/\s+/).map(function (phrase) {
+            return phrase.split(";").map(function (cluster) {
+                return cluster.split(":").sort();
+            });
+        });
+        assert.deepEqual(actual, oracle, 'transcribe');
+    }
+});
+
+if (require.main === module)
+    require("test").run(exports);