Created regression tests.

author Kris Kowal <kris.kowal@cixar.com>

Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)

committer Kris Kowal <kris.kowal@cixar.com>

Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)
author Kris Kowal <kris.kowal@cixar.com>
Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)
committer Kris Kowal <kris.kowal@cixar.com>
Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)
diff --git a/index.html b/index.html

index d5d9adc..d31397e 100644 (file)
--- a/index.html
+++ b/index.html
@@ -1,6 +1,7 @@
  <html>
      <head>
          <meta http-equiv="content-type" content="text/html; charset=utf-8">
+        <title>Tengwar Transcriber</title>
          <link rel="stylesheet" type="text/css" href="style.css">
          <link rel="stylesheet" type="text/css" href="index.css">
      </head>
diff --git a/samples.html b/samples.html

index bb03c76..77ed529 100644 (file)
--- a/samples.html
+++ b/samples.html
@@ -117,6 +117,8 @@
  
          <p>Hobbits'': <span class="tengwar">Hobbits''</span>. Illustrates the use of a straight apostrophe to use alternate S-hooks.</p>
  
+        <p>XX ksks tsts IQS: <span class="tengwar">XX ksks tsts IQS</span>. Some problematic tehta and S-hook combinations. T-S-hook must only be final. K-S-hook can be medial.  ICWS cluster must not place S-hook on C.</p>
+
          <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.6.4/jquery.min.js"></script>
          <script src="tengwar.js" charset="utf-8"></script>
          <script>$(".tengwar").tengwar()</script>
diff --git a/tengwar.js b/tengwar.js

index 19e16ff..d20b211 100644 (file)
--- a/tengwar.js
+++ b/tengwar.js
@@ -150,7 +150,6 @@ var mode = {
              "ando": "+",
              "numen": "+",
              "lambe": "_",
-            "calma": "|",
              "quesse": "|",
              "short-carrier": "}",
          },
@@ -467,65 +466,90 @@ var mode = {
          "û": "ú"
      },
      "transcriptions": {
+
+        // consonants
          "t": "tinco",
          "nt": "tinco:tilde-above",
          "tt": "tinco:tilde-below",
+
          "p": "parma",
          "mp": "parma:tilde-above",
          "pp": "parma:tilde-below",
+
          "c": "quesse",
          "nc": "quesse:tilde-above",
+
          "d": "ando",
          "nd": "ando:tilde-above",
          "dd": "ando:tilde-below",
+
          "b": "umbar",
          "mb": "umbar:tilde-above",
          "bb": "umbar:tilde-below",
+
          "g": "ungwe",
          "ng": "ungwe:tilde-above",
          "gg": "ungwe:tilde-below",
+
          "th": "thule",
          "nth": "thule:tilde-above",
+
          "f": "formen",
          "ph": "formen",
          "mf": "formen:tilde-above",
          "mph": "formen:tilde-above",
+
+        "sh": "harma",
+
          "h": "hyarmen",
          "ch": "hwesta",
+        "hw": "hwesta-sindarinwa",
+        "wh": "hwesta-sindarinwa",
+
          "gh": "unque",
-        "d": "ando",
-        "nd": "ando:tilde-above",
+        "ngh": "unque:tilde-above",
+
          "dh": "anto",
          "ndh": "anto:tilde-above",
+
          "v": "ampa",
          "bh": "ampa",
          "mv": "ampa:tilde-above",
          "mbh": "ampa:tilde-above",
-        "ngh": "unque:tilde-above",
+
+        "j": "anca",
+        "nj": "anca:tilde-above",
+
          "n": "numen",
          "nn": "numen:tilde-above",
+
          "m": "malta",
          "mm": "malta:tilde-above",
+
          "ng": "nwalme",
+        "ñ": "nwalme",
+        "nwal": "nwalme:w;lambe:a",
+
          "r": "romen",
-        //"rd": "arda",
+        "rr": "romen:tilde-below",
+        "rh": "arda",
+
          "l": "lambe",
          "ll": "lambe:tilde-below",
-        "bb": "umbar:tilde-below",
+        "lh": "alda",
+
          "s": "silme",
          "ss": "silme:tilde-below",
-        "sh": "harma",
-        "j": "anca",
+
          "z": "esse",
-        //"ld": "alda",
-        "lh": "alda", // probably not
-        "rh": "arda",
+
          "á": "wilya:a",
          "é": "long-carrier:e",
          "í": "long-carrier:i",
          "ó": "long-carrier:o",
          "ú": "long-carrier:u",
          "w": "vala",
+
          "ai": "anna:a",
          "oi": "anna:o",
          "ui": "anna:u",
@@ -533,33 +557,37 @@ var mode = {
          "eu": "vala:e",
          "iu": "vala:i",
          "ae": "yanta:a",
-        "hw": "hwesta-sindarinwa",
-        "wh": "hwesta-sindarinwa",
-        "ñ": "nwalme",
-        "nwal": "nwalme:w;lambe:a",
+
      },
      "vowelTranscriptions": {
+
          "a": "short-carrier:a",
          "e": "short-carrier:e",
          "i": "short-carrier:i",
          "o": "short-carrier:o",
          "u": "short-carrier:u",
+
          "á": "wilya:a",
          "é": "long-carrier:e",
          "í": "long-carrier:i",
          "ó": "short-carrier:ó",
          "ú": "short-carrier:ú",
+
          "w": "vala",
          "y": "short-carrier:í"
+
      },
+
      "words": {
          "iant": "yanta;tinco:tilde-above:a",
          "iaur": "yanta;vala:a;ore",
+        "baranduiniant": "umbar;romen:a;ando:tilde-above:a;anna:u;yanta;anto:tilde-above:a",
          "ioreth": "yanta;romen:o;thule:e",
          "noldo": "nwalme;lambe:o;ando;short-carrier:o",
          "noldor": "nwalme;lambe:o;ando;ore:o",
          "is": "short-carrier:i:s"
      },
+
      "punctuation": {
          "-": "comma",
          ",": "comma",
@@ -573,6 +601,7 @@ var mode = {
          ">": "flourish-left",
          "<": "flourish-right"
      },
+
      "annotations": {
          "tinco": {"tengwa": "t"},
          "parma": {"tengwa": "p"},
@@ -708,7 +737,7 @@ var vowelTranscriptionsRe = new RegExp("^(" +
  var substitutionsRe = new RegExp("(" +
      Object.keys(mode.substitutions).join("|") +
  ")", "ig");
-                        
+
  function transcribeWordToEncoding(latin) {
      latin = latin
      .toLowerCase()
@@ -720,7 +749,10 @@ function transcribeWordToEncoding(latin) {
      var parts = [];
      var length;
      var first = true;
+    var maybeFinal;
      while (latin.length) {
+        if (latin[0] != "s")
+            maybeFinal = undefined;
          length = latin.length;
          latin = latin
          .replace(transcriptionsRe, function ($, vowel, tengwa, w, y, s, prime) {
@@ -735,8 +767,14 @@ function transcribeWordToEncoding(latin) {
              if (vowel) {
                  if (!voweled) {
                      // flip if necessary
-                    if (tehtaForTengwa(tengwa, vowel) === null && tehtaForTengwa(tengwa + "-nuquerna", vowel) !== null) {
-                        value = [tengwa + "-nuquerna"].concat(tehtar).concat([vowel]).filter(function (part) {
+                    if (
+                        tehtaForTengwa(tengwa, vowel) === null &&
+                        tehtaForTengwa(tengwa + "-nuquerna", vowel) !== null
+                    ) {
+                        value = [tengwa + "-nuquerna"]
+                        .concat(tehtar)
+                        .concat([vowel])
+                        .filter(function (part) {
                              return part;
                          }).join(":");
                      } else {
@@ -757,7 +795,7 @@ function transcribeWordToEncoding(latin) {
                  y = "";
              }
              // must go last because it has a non-zero width
-            if (s) {
+            if (s && !w) {
                  var length = prime.length;
                  var possibilities = [
                      "s",
@@ -772,13 +810,17 @@ function transcribeWordToEncoding(latin) {
                      length--;
                  }
                  if (possibilities.length) {
-                    value += ":" + possibilities.shift();
-                    s = "";
+                    if (value.split(":").indexOf("quesse") >= 0) {
+                        value = value + ":" + possibilities.shift();
+                        s = "";
+                    } else {
+                        maybeFinal = value + ":" + possibilities.shift();
+                    }
                  }
              }
              parts.push(value);
              first = false;
-            return w + s + y;
+            return w + y + s;
          });
          if (length === latin.length) {
              length = latin.length;
@@ -796,6 +838,11 @@ function transcribeWordToEncoding(latin) {
          }
      }
      if (parts.length) {
+        if (maybeFinal && parts[parts.length - 1] == "silme") {
+            parts.pop();
+            parts.pop();
+            parts.push(maybeFinal);
+        }
          parts.push(parts.pop().replace("romen", "ore"));
      }
      /*
@@ -828,6 +875,7 @@ function transcribeWordToEncoding(latin) {
      return parts.join(";");
  }
  
+exports.transcribeToEncoding = transcribeToEncoding;
  function transcribeToEncoding(latin) {
      latin = latin.replace(/[,:] +/g, ",");
      return latin.split(/\n\n\n+/).map(function (section) {
@@ -949,48 +997,6 @@ function transcribeHtml(latin) {
      return decodeToFontHtml(transcribeToEncoding(latin));
  }
  
-var tests = [
-    "tyelpe",
-    "telperion",
-    "hyarmen",
-    "hwesta sindarinwa",
-    "hobbits",
-    "hobytla",
-    "perian",
-    "periannath",
-    "istar",
-    "istari",
-    "nwalme",
-    "isildur",
-    "helcaraxë",
-    "sul",
-    "amon sûl",
-    "lothlórien",
-    "ardalambion",
-    "baranduiniant",
-    "iant iaur",
-    "glorfindel",
-    "galadriel",
-    "galadhrim",
-    "gwaith iaur arnor",
-    "aldost",
-    "noldo",
-    "noldor",
-    "gwathlo",
-    "ainaldo",
-    "varda",
-    "mae govannen"
-];
-
-exports.displayTests = displayTests;
-function displayTests() {
-    document.body.innerHTML = tests.map(function (test) {
-        var transcribed = transcribeHtml(test);
-        var fontified = decodeToFont(transcribed);
-        return "<p>" + test + ": " + transcribed + " <span class=\"tengwar\">" + fontified + "</span></p>\n";
-    }).join("");
-}
-
  if (typeof jQuery !== "undefined") {
      jQuery.fn.tengwar = function () {
          this.each(function () {
diff --git a/tests.js b/tests.js

new file mode 100644 (file)

index 0000000..5293a4c
--- /dev/null
+++ b/tests.js
@@ -0,0 +1,89 @@
+var T = require("./tengwar");
+
+var tests = {
+
+    // sindarin (sorted)
+    "ainur": "a/anna numen u/ore",
+    "aldost": "a/lambe ando o/silme-nuquerna tinco",
+    "amon sûl": "a/malta o/numen space silme ú/lambe",
+    "aragorn": "a/romen a/ungwe o/romen numen",
+    "atto": "a/tinco/tilde-below o/short-carrier",
+    "baranduiniant": "umbar a/romen a/ando/tilde-above u/anna yanta a/anto/tilde-above",
+    "dagor bragolach": "ando a/ungwe o/ore space umbar romen a/ungwe o/lambe a/hwesta",
+    "galadhrim": "ungwe a/lambe a/anto romen i/malta",
+    "galadriel": "ungwe a/lambe a/ando romen i/short-carrier e/lambe",
+    "gandalf": "ungwe a/ando/tilde-above a/lambe formen",
+    "glorfindel": "ungwe lambe o/romen formen i/ando/tilde-above e/lambe",
+    "gwaith iaur arnor": "ungwe/w a/anna thule space yanta a/vala ore space a/romen numen o/ore",
+    "gwathló": "ungwe/w a/thule lambe o/long-carrier",
+    "hwesta sindarinwa": "hwesta-sindarinwa e/silme-nuquerna tinco a/short-carrier space silme i/ando/tilde-above a/romen i/numen vala a/short-carrier",
+    "iant": "yanta a/tinco/tilde-above",
+    "iaur": "yanta a/vala ore",
+    "isildur": "i/silme-nuquerna i/lambe ando u/ore",
+    "lhûn": "alda ú/numen",
+    "lothlórien": "lambe o/thule lambe ó/romen i/short-carrier e/numen",
+    "mae govannen": "malta a/yanta space ungwe o/ampa a/numen/tilde-above e/numen",
+    "mellon": "malta e/lambe/tilde-below o/numen",
+    "mordor": "malta o/romen ando o/ore",
+    "moria": "malta o/romen i/short-carrier a/short-carrier",
+    "noldor": "nwalme o/lambe ando o/ore",
+    "nwalme": "nwalme/w a/lambe malta e/short-carrier",
+    "periannath": "parma e/romen i/short-carrier a/numen/tilde-above a/thule",
+    "rhûn": "arda ú/numen",
+    "tyelpe": "tinco/y e/lambe parma e/short-carrier",
+    "varda": "ampa a/romen ando a/short-carrier",
+    "á": "a/wilya",
+    "ñoldor": "nwalme o/lambe ando o/ore",
+
+    // quenya (sorted)
+    "ardalambion": "a/romen ando a/lambe a/umbar/tilde-above i/short-carrier o/numen",
+    "helcaraxë": "hyarmen e/lambe quesse a/romen a/quesse/s e/short-carrier",
+    "hyarmen": "hyarmen/y a/romen malta e/numen",
+    "istari": "i/silme-nuquerna tinco a/romen i/short-carrier",
+    "sinome maruvan": "silme i/numen o/malta e/short-carrier space malta a/romen u/ampa a/numen",
+    "sinome maruvan": "silme i/numen o/malta e/short-carrier space malta a/romen u/ampa a/numen",
+    "telperion": "tinco e/lambe parma e/romen i/short-carrier o/numen",
+    "yuldar": "í/short-carrier u/lambe ando a/ore",
+
+    // english
+    "hobbits": "hyarmen o/umbar/tilde-below i/tinco/s",
+    "hobbits'": "hyarmen o/umbar/tilde-below i/tinco/s-inverse",
+    "hobbits''": "hyarmen o/umbar/tilde-below i/tinco/s-extended",
+
+    // old english
+    "írensaga": "i/long-carrier romen e/numen silme a/ungwe a/short-carrier",
+
+    // interesting clusters
+    "xx": "quesse/s quesse/s",
+    "tsts": "tinco silme tinco/s",
+    "iqs": "i/quesse vala silme",
+    "aty": "a/tinco/y",
+    "is": "i/short-carrier/s",
+    "allys": "a/lambe/tilde-below/y/s",
+    "alyssa": "a/lambe/y silme/tilde-below a/short-carrier",
+
+
+
+
+}
+
+exports.tests = tests;
+
+Object.keys(tests).forEach(function (input) {
+    exports['test ' + input] = function (assert) {
+        var oracle = tests[input].split(" space ").map(function (phrase) {
+            return phrase.split(/\s+/).map(function (cluster) {
+                return cluster.split("/").sort();
+            });
+        });;
+        var actual = T.transcribeToEncoding(input).split(/\s+/).map(function (phrase) {
+            return phrase.split(";").map(function (cluster) {
+                return cluster.split(":").sort();
+            });
+        });
+        assert.deepEqual(actual, oracle, 'transcribe');
+    }
+});
+
+if (require.main === module)
+    require("test").run(exports);
author	Kris Kowal <kris.kowal@cixar.com>
	Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)
committer	Kris Kowal <kris.kowal@cixar.com>
	Mon, 19 Sep 2011 17:19:38 +0000 (10:19 -0700)
index.html		patch \| blob \| history
samples.html		patch \| blob \| history
tengwar.js		patch \| blob \| history
tests.js	[new file with mode: 0644]	patch \| blob