diff --git a/typo/dictionaries/en_US/en_US.dic_delta b/typo/dictionaries/en_US/en_US.dic_delta new file mode 100644 index 00000000..e69de29b diff --git a/typo/typo.js b/typo/typo.js index 9da598d1..003c641a 100644 --- a/typo/typo.js +++ b/typo/typo.js @@ -1,927 +1,945 @@ -/* globals chrome: false */ -/* globals __dirname: false */ -/* globals require: false */ -/* globals Buffer: false */ -/* globals module: false */ - -/** - * Typo is a JavaScript implementation of a spellchecker using hunspell-style - * dictionaries. - */ - -var Typo; - -(function () { -"use strict"; - -/** - * Typo constructor. - * - * @param {String} [dictionary] The locale code of the dictionary being used. e.g., - * "en_US". This is only used to auto-load dictionaries. - * @param {String} [affData] The data from the dictionary's .aff file. If omitted - * and Typo.js is being used in a Chrome extension, the .aff - * file will be loaded automatically from - * lib/typo/dictionaries/[dictionary]/[dictionary].aff - * In other environments, it will be loaded from - * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].aff - * @param {String} [wordsData] The data from the dictionary's .dic file. If omitted - * and Typo.js is being used in a Chrome extension, the .dic - * file will be loaded automatically from - * lib/typo/dictionaries/[dictionary]/[dictionary].dic - * In other environments, it will be loaded from - * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].dic - * @param {Object} [settings] Constructor settings. Available properties are: - * {String} [dictionaryPath]: path to load dictionary from in non-chrome - * environment. - * {Object} [flags]: flag information. - * {Boolean} [asyncLoad]: If true, affData and wordsData will be loaded - * asynchronously. - * {Function} [loadedCallback]: Called when both affData and wordsData - * have been loaded. Only used if asyncLoad is set to true. The parameter - * is the instantiated Typo object. - * - * @returns {Typo} A Typo object. - */ - -Typo = function (dictionary, affData, wordsData, settings) { - settings = settings || {}; - - this.dictionary = null; - - this.rules = {}; - this.dictionaryTable = {}; - - this.compoundRules = []; - this.compoundRuleCodes = {}; - - this.replacementTable = []; - - this.flags = settings.flags || {}; - - this.memoized = {}; - - this.loaded = false; - - var self = this; - - var path; - - // Loop-control variables. - var i, j, _len, _jlen; - - if (dictionary) { - self.dictionary = dictionary; - - // If the data is preloaded, just setup the Typo object. - if (affData && wordsData) { - setup(); - } - // Loading data for Chrome extentions. - else if (typeof window !== 'undefined' && 'chrome' in window && 'extension' in window.chrome && 'getURL' in window.chrome.extension) { - if (settings.dictionaryPath) { - path = settings.dictionaryPath; +(function (root, factory) { + if(typeof define === "function" && define.amd) { + define(["typo"], factory); + } else if(typeof module === "object" && module.exports) { + module.exports = factory(require("typo")); + } else { + root.typo = factory(root.typo); + } +}(this, function() { + /* globals chrome: false */ + /* globals __dirname: false */ + /* globals require: false */ + /* globals Buffer: false */ + /* globals module: false */ + + /** + * Typo is a JavaScript implementation of a spellchecker using hunspell-style + * dictionaries. + */ + + var Typo; + + (function () { + "use strict"; + + /** + * Typo constructor. + * + * @param {String} [dictionary] The locale code of the dictionary being used. e.g., + * "en_US". This is only used to auto-load dictionaries. + * @param {String} [affData] The data from the dictionary's .aff file. If omitted + * and Typo.js is being used in a Chrome extension, the .aff + * file will be loaded automatically from + * lib/typo/dictionaries/[dictionary]/[dictionary].aff + * In other environments, it will be loaded from + * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].aff + * @param {String} [wordsData] The data from the dictionary's .dic file. If omitted + * and Typo.js is being used in a Chrome extension, the .dic + * file will be loaded automatically from + * lib/typo/dictionaries/[dictionary]/[dictionary].dic + * In other environments, it will be loaded from + * [settings.dictionaryPath]/dictionaries/[dictionary]/[dictionary].dic + * @param {Object} [settings] Constructor settings. Available properties are: + * {String} [dictionaryPath]: path to load dictionary from in non-chrome + * environment. + * {Object} [flags]: flag information. + * {Boolean} [asyncLoad]: If true, affData and wordsData will be loaded + * asynchronously. + * {Function} [loadedCallback]: Called when both affData and wordsData + * have been loaded. Only used if asyncLoad is set to true. The parameter + * is the instantiated Typo object. + * + * @returns {Typo} A Typo object. + */ + + Typo = function (dictionary, affData, wordsData, wordsDelta, settings) { + settings = settings || {}; + + this.dictionary = null; + + this.rules = {}; + this.dictionaryTable = {}; + + this.compoundRules = []; + this.compoundRuleCodes = {}; + + this.replacementTable = []; + + this.flags = settings.flags || {}; + + this.memoized = {}; + + this.loaded = false; + + var self = this; + + var path; + + // Loop-control variables. + var i, j, _len, _jlen; + + if (dictionary) { + self.dictionary = dictionary; + + // If the data is preloaded, just setup the Typo object. + if (affData && wordsData) { + setup(); + } + // Loading data for Chrome extentions. + else if (typeof window !== 'undefined' && 'chrome' in window && 'extension' in window.chrome && 'getURL' in window.chrome.extension) { + if (settings.dictionaryPath) { + path = settings.dictionaryPath; + } + else { + path = "typo/dictionaries"; + } + + if (!affData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".aff"), setAffData); + if (!wordsData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".dic"), setWordsData); + if (!wordsDelta) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".dic_delta"), setWordsDelta); } else { - path = "typo/dictionaries"; + if (settings.dictionaryPath) { + path = settings.dictionaryPath; + } + else if (typeof __dirname !== 'undefined') { + path = __dirname + '/dictionaries'; + } + else { + path = './dictionaries'; + } + + if (!affData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".aff", setAffData); + if (!wordsData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".dic", setWordsData); + if (!wordsDelta) readDataFile(path + "/" + dictionary + "/" + dictionary + ".dic_delta", setWordsDelta); } - - if (!affData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".aff"), setAffData); - if (!wordsData) readDataFile(chrome.extension.getURL(path + "/" + dictionary + "/" + dictionary + ".dic"), setWordsData); } - else { - if (settings.dictionaryPath) { - path = settings.dictionaryPath; - } - else if (typeof __dirname !== 'undefined') { - path = __dirname + '/dictionaries'; + + function readDataFile(url, setFunc) { + var response = self._readFile(url, null, settings.asyncLoad); + + if (settings.asyncLoad) { + response.then(function(data) { + setFunc(data); + }); } else { - path = './dictionaries'; + setFunc(response); } - - if (!affData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".aff", setAffData); - if (!wordsData) readDataFile(path + "/" + dictionary + "/" + dictionary + ".dic", setWordsData); } - } - - function readDataFile(url, setFunc) { - var response = self._readFile(url, null, settings.asyncLoad); - - if (settings.asyncLoad) { - response.then(function(data) { - setFunc(data); - }); - } - else { - setFunc(response); - } - } - function setAffData(data) { - affData = data; + function setAffData(data) { + affData = data; - if (wordsData) { - setup(); + if (wordsData && wordsDelta) { + setup(); + } } - } - function setWordsData(data) { - wordsData = data; + function setWordsData(data) { + wordsData = data; - if (affData) { - setup(); - } - } - - function setup() { - self.rules = self._parseAFF(affData); - - // Save the rule codes that are used in compound rules. - self.compoundRuleCodes = {}; - - for (i = 0, _len = self.compoundRules.length; i < _len; i++) { - var rule = self.compoundRules[i]; - - for (j = 0, _jlen = rule.length; j < _jlen; j++) { - self.compoundRuleCodes[rule[j]] = []; + if (affData && wordsDelta) { + setup(); } } - - // If we add this ONLYINCOMPOUND flag to self.compoundRuleCodes, then _parseDIC - // will do the work of saving the list of words that are compound-only. - if ("ONLYINCOMPOUND" in self.flags) { - self.compoundRuleCodes[self.flags.ONLYINCOMPOUND] = []; - } - - self.dictionaryTable = self._parseDIC(wordsData); - - // Get rid of any codes from the compound rule codes that are never used - // (or that were special regex characters). Not especially necessary... - for (i in self.compoundRuleCodes) { - if (self.compoundRuleCodes[i].length === 0) { - delete self.compoundRuleCodes[i]; + + function setWordsDelta(data) { + wordsDelta = data; + + if (affData && wordsData) { + setup(); } } - - // Build the full regular expressions for each compound rule. - // I have a feeling (but no confirmation yet) that this method of - // testing for compound words is probably slow. - for (i = 0, _len = self.compoundRules.length; i < _len; i++) { - var ruleText = self.compoundRules[i]; - - var expressionText = ""; - - for (j = 0, _jlen = ruleText.length; j < _jlen; j++) { - var character = ruleText[j]; - - if (character in self.compoundRuleCodes) { - expressionText += "(" + self.compoundRuleCodes[character].join("|") + ")"; + + function setup() { + self.rules = self._parseAFF(affData); + + // Save the rule codes that are used in compound rules. + self.compoundRuleCodes = {}; + + for (i = 0, _len = self.compoundRules.length; i < _len; i++) { + var rule = self.compoundRules[i]; + + for (j = 0, _jlen = rule.length; j < _jlen; j++) { + self.compoundRuleCodes[rule[j]] = []; } - else { - expressionText += character; + } + + // If we add this ONLYINCOMPOUND flag to self.compoundRuleCodes, then _parseDIC + // will do the work of saving the list of words that are compound-only. + if ("ONLYINCOMPOUND" in self.flags) { + self.compoundRuleCodes[self.flags.ONLYINCOMPOUND] = []; + } + + self.dictionaryTable = self._parseDIC(wordsData + wordsDelta); + + // Get rid of any codes from the compound rule codes that are never used + // (or that were special regex characters). Not especially necessary... + for (i in self.compoundRuleCodes) { + if (self.compoundRuleCodes[i].length === 0) { + delete self.compoundRuleCodes[i]; } } - - self.compoundRules[i] = new RegExp(expressionText, "i"); - } - - self.loaded = true; - - if (settings.asyncLoad && settings.loadedCallback) { - settings.loadedCallback(self); - } - } - - return this; -}; -Typo.prototype = { - /** - * Loads a Typo instance from a hash of all of the Typo properties. - * - * @param object obj A hash of Typo properties, probably gotten from a JSON.parse(JSON.stringify(typo_instance)). - */ - - load : function (obj) { - for (var i in obj) { - if (obj.hasOwnProperty(i)) { - this[i] = obj[i]; + // Build the full regular expressions for each compound rule. + // I have a feeling (but no confirmation yet) that this method of + // testing for compound words is probably slow. + for (i = 0, _len = self.compoundRules.length; i < _len; i++) { + var ruleText = self.compoundRules[i]; + + var expressionText = ""; + + for (j = 0, _jlen = ruleText.length; j < _jlen; j++) { + var character = ruleText[j]; + + if (character in self.compoundRuleCodes) { + expressionText += "(" + self.compoundRuleCodes[character].join("|") + ")"; + } + else { + expressionText += character; + } + } + + self.compoundRules[i] = new RegExp(expressionText, "i"); + } + + self.loaded = true; + + if (settings.asyncLoad && settings.loadedCallback) { + settings.loadedCallback(self); } } - + return this; - }, - - /** - * Read the contents of a file. - * - * @param {String} path The path (relative) to the file. - * @param {String} [charset="ISO8859-1"] The expected charset of the file - * @param {Boolean} async If true, the file will be read asynchronously. For node.js this does nothing, all - * files are read synchronously. - * @returns {String} The file data if async is false, otherwise a promise object. If running node.js, the data is - * always returned. - */ - - _readFile : function (path, charset, async) { - charset = charset || "utf8"; - - if (typeof XMLHttpRequest !== 'undefined') { - var promise; - var req = new XMLHttpRequest(); - req.open("GET", path, async); - - if (async) { - promise = new Promise(function(resolve, reject) { - req.onload = function() { - if (req.status === 200) { - resolve(req.responseText); - } - else { + }; + + Typo.prototype = { + /** + * Loads a Typo instance from a hash of all of the Typo properties. + * + * @param object obj A hash of Typo properties, probably gotten from a JSON.parse(JSON.stringify(typo_instance)). + */ + + load : function (obj) { + for (var i in obj) { + if (obj.hasOwnProperty(i)) { + this[i] = obj[i]; + } + } + + return this; + }, + + /** + * Read the contents of a file. + * + * @param {String} path The path (relative) to the file. + * @param {String} [charset="ISO8859-1"] The expected charset of the file + * @param {Boolean} async If true, the file will be read asynchronously. For node.js this does nothing, all + * files are read synchronously. + * @returns {String} The file data if async is false, otherwise a promise object. If running node.js, the data is + * always returned. + */ + + _readFile : function (path, charset, async) { + charset = charset || "utf8"; + + if (typeof XMLHttpRequest !== 'undefined') { + var promise; + var req = new XMLHttpRequest(); + req.open("GET", path, async); + + if (async) { + promise = new Promise(function(resolve, reject) { + req.onload = function() { + if (req.status === 200) { + resolve(req.responseText); + } + else { + reject(req.statusText); + } + }; + + req.onerror = function() { reject(req.statusText); } - }; - - req.onerror = function() { - reject(req.statusText); - } - }); - } - - if (req.overrideMimeType) - req.overrideMimeType("text/plain; charset=" + charset); - - req.send(null); - - return async ? promise : req.responseText; - } - else if (typeof require !== 'undefined') { - // Node.js - var fs = require("fs"); - - try { - if (fs.existsSync(path)) { - var stats = fs.statSync(path); - - var fileDescriptor = fs.openSync(path, 'r'); - - var buffer = new Buffer(stats.size); - - fs.readSync(fileDescriptor, buffer, 0, buffer.length, null); - - return buffer.toString(charset, 0, buffer.length); + }); } - else { - console.log("Path " + path + " does not exist."); + + if (req.overrideMimeType) + req.overrideMimeType("text/plain; charset=" + charset); + + req.send(null); + + return async ? promise : req.responseText; + } + else if (typeof require !== 'undefined') { + // Node.js + var fs = require("fs"); + + try { + if (fs.existsSync(path)) { + var stats = fs.statSync(path); + + var fileDescriptor = fs.openSync(path, 'r'); + + var buffer = new Buffer(stats.size); + + fs.readSync(fileDescriptor, buffer, 0, buffer.length, null); + + return buffer.toString(charset, 0, buffer.length); + } + else { + console.log("Path " + path + " does not exist."); + } + } catch (e) { + console.log(e); + return ''; } - } catch (e) { - console.log(e); - return ''; } - } - }, - - /** - * Parse the rules out from a .aff file. - * - * @param {String} data The contents of the affix file. - * @returns object The rules from the file. - */ - - _parseAFF : function (data) { - var rules = {}; - - var line, subline, numEntries, lineParts; - var i, j, _len, _jlen; - - // Remove comment lines - data = this._removeAffixComments(data); - - var lines = data.split("\n"); - - for (i = 0, _len = lines.length; i < _len; i++) { - line = lines[i]; - - var definitionParts = line.split(/\s+/); - - var ruleType = definitionParts[0]; - - if (ruleType == "PFX" || ruleType == "SFX") { - var ruleCode = definitionParts[1]; - var combineable = definitionParts[2]; - numEntries = parseInt(definitionParts[3], 10); - - var entries = []; - - for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) { - subline = lines[j]; - - lineParts = subline.split(/\s+/); - var charactersToRemove = lineParts[2]; - - var additionParts = lineParts[3].split("/"); - - var charactersToAdd = additionParts[0]; - if (charactersToAdd === "0") charactersToAdd = ""; - - var continuationClasses = this.parseRuleCodes(additionParts[1]); - - var regexToMatch = lineParts[4]; - - var entry = {}; - entry.add = charactersToAdd; - - if (continuationClasses.length > 0) entry.continuationClasses = continuationClasses; - - if (regexToMatch !== ".") { - if (ruleType === "SFX") { - entry.match = new RegExp(regexToMatch + "$"); + }, + + /** + * Parse the rules out from a .aff file. + * + * @param {String} data The contents of the affix file. + * @returns object The rules from the file. + */ + + _parseAFF : function (data) { + var rules = {}; + + var line, subline, numEntries, lineParts; + var i, j, _len, _jlen; + + // Remove comment lines + data = this._removeAffixComments(data); + + var lines = data.split("\n"); + + for (i = 0, _len = lines.length; i < _len; i++) { + line = lines[i]; + + var definitionParts = line.split(/\s+/); + + var ruleType = definitionParts[0]; + + if (ruleType == "PFX" || ruleType == "SFX") { + var ruleCode = definitionParts[1]; + var combineable = definitionParts[2]; + numEntries = parseInt(definitionParts[3], 10); + + var entries = []; + + for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) { + subline = lines[j]; + + lineParts = subline.split(/\s+/); + var charactersToRemove = lineParts[2]; + + var additionParts = lineParts[3].split("/"); + + var charactersToAdd = additionParts[0]; + if (charactersToAdd === "0") charactersToAdd = ""; + + var continuationClasses = this.parseRuleCodes(additionParts[1]); + + var regexToMatch = lineParts[4]; + + var entry = {}; + entry.add = charactersToAdd; + + if (continuationClasses.length > 0) entry.continuationClasses = continuationClasses; + + if (regexToMatch !== ".") { + if (ruleType === "SFX") { + entry.match = new RegExp(regexToMatch + "$"); + } + else { + entry.match = new RegExp("^" + regexToMatch); + } } - else { - entry.match = new RegExp("^" + regexToMatch); + + if (charactersToRemove != "0") { + if (ruleType === "SFX") { + entry.remove = new RegExp(charactersToRemove + "$"); + } + else { + entry.remove = charactersToRemove; + } } + + entries.push(entry); } - - if (charactersToRemove != "0") { - if (ruleType === "SFX") { - entry.remove = new RegExp(charactersToRemove + "$"); - } - else { - entry.remove = charactersToRemove; - } + + rules[ruleCode] = { "type" : ruleType, "combineable" : (combineable == "Y"), "entries" : entries }; + + i += numEntries; + } + else if (ruleType === "COMPOUNDRULE") { + numEntries = parseInt(definitionParts[1], 10); + + for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) { + line = lines[j]; + + lineParts = line.split(/\s+/); + this.compoundRules.push(lineParts[1]); } - - entries.push(entry); - } - - rules[ruleCode] = { "type" : ruleType, "combineable" : (combineable == "Y"), "entries" : entries }; - - i += numEntries; - } - else if (ruleType === "COMPOUNDRULE") { - numEntries = parseInt(definitionParts[1], 10); - - for (j = i + 1, _jlen = i + 1 + numEntries; j < _jlen; j++) { - line = lines[j]; - + + i += numEntries; + } + else if (ruleType === "REP") { lineParts = line.split(/\s+/); - this.compoundRules.push(lineParts[1]); + + if (lineParts.length === 3) { + this.replacementTable.push([ lineParts[1], lineParts[2] ]); + } } - - i += numEntries; - } - else if (ruleType === "REP") { - lineParts = line.split(/\s+/); - - if (lineParts.length === 3) { - this.replacementTable.push([ lineParts[1], lineParts[2] ]); + else { + // ONLYINCOMPOUND + // COMPOUNDMIN + // FLAG + // KEEPCASE + // NEEDAFFIX + + this.flags[ruleType] = definitionParts[1]; } } - else { - // ONLYINCOMPOUND - // COMPOUNDMIN - // FLAG - // KEEPCASE - // NEEDAFFIX - - this.flags[ruleType] = definitionParts[1]; - } - } - - return rules; - }, - - /** - * Removes comment lines and then cleans up blank lines and trailing whitespace. - * - * @param {String} data The data from an affix file. - * @return {String} The cleaned-up data. - */ - - _removeAffixComments : function (data) { - // Remove comments - data = data.replace(/#.*$/mg, ""); - - // Trim each line - data = data.replace(/^\s\s*/m, '').replace(/\s\s*$/m, ''); - - // Remove blank lines. - data = data.replace(/\n{2,}/g, "\n"); - - // Trim the entire string - data = data.replace(/^\s\s*/, '').replace(/\s\s*$/, ''); - - return data; - }, - - /** - * Parses the words out from the .dic file. - * - * @param {String} data The data from the dictionary file. - * @returns object The lookup table containing all of the words and - * word forms from the dictionary. - */ - - _parseDIC : function (data) { - data = this._removeDicComments(data); - - var lines = data.split("\n"); - var dictionaryTable = {}; - - function addWord(word, rules) { - // Some dictionaries will list the same word multiple times with different rule sets. - if (!dictionaryTable.hasOwnProperty(word)) { - dictionaryTable[word] = null; - } - - if (rules.length > 0) { - if (dictionaryTable[word] === null) { - dictionaryTable[word] = []; - } - - dictionaryTable[word].push(rules); + + return rules; + }, + + /** + * Removes comment lines and then cleans up blank lines and trailing whitespace. + * + * @param {String} data The data from an affix file. + * @return {String} The cleaned-up data. + */ + + _removeAffixComments : function (data) { + // Remove comments + data = data.replace(/#.*$/mg, ""); + + // Trim each line + data = data.replace(/^\s\s*/m, '').replace(/\s\s*$/m, ''); + + // Remove blank lines. + data = data.replace(/\n{2,}/g, "\n"); + + // Trim the entire string + data = data.replace(/^\s\s*/, '').replace(/\s\s*$/, ''); + + return data; + }, + + /** + * Parses the words out from the .dic file. + * + * @param {String} data The data from the dictionary file. + * @returns object The lookup table containing all of the words and + * word forms from the dictionary. + */ + + _parseDIC : function (data) { + data = this._removeDicComments(data); + + var lines = data.split("\n"); + var dictionaryTable = {}; + + function addWord(word, rules) { + // Some dictionaries will list the same word multiple times with different rule sets. + if (!dictionaryTable.hasOwnProperty(word)) { + dictionaryTable[word] = null; + } + + if (rules.length > 0) { + if (dictionaryTable[word] === null) { + dictionaryTable[word] = []; + } + + dictionaryTable[word].push(rules); + } } - } - - // The first line is the number of words in the dictionary. - for (var i = 1, _len = lines.length; i < _len; i++) { - var line = lines[i]; - - var parts = line.split("/", 2); - - var word = parts[0]; - - // Now for each affix rule, generate that form of the word. - if (parts.length > 1) { - var ruleCodesArray = this.parseRuleCodes(parts[1]); - - // Save the ruleCodes for compound word situations. - if (!("NEEDAFFIX" in this.flags) || ruleCodesArray.indexOf(this.flags.NEEDAFFIX) == -1) { - addWord(word, ruleCodesArray); - } - - for (var j = 0, _jlen = ruleCodesArray.length; j < _jlen; j++) { - var code = ruleCodesArray[j]; - - var rule = this.rules[code]; - - if (rule) { - var newWords = this._applyRule(word, rule); - - for (var ii = 0, _iilen = newWords.length; ii < _iilen; ii++) { - var newWord = newWords[ii]; - - addWord(newWord, []); - - if (rule.combineable) { - for (var k = j + 1; k < _jlen; k++) { - var combineCode = ruleCodesArray[k]; - - var combineRule = this.rules[combineCode]; - - if (combineRule) { - if (combineRule.combineable && (rule.type != combineRule.type)) { - var otherNewWords = this._applyRule(newWord, combineRule); - - for (var iii = 0, _iiilen = otherNewWords.length; iii < _iiilen; iii++) { - var otherNewWord = otherNewWords[iii]; - addWord(otherNewWord, []); + + // The first line is the number of words in the dictionary. + for (var i = 1, _len = lines.length; i < _len; i++) { + var line = lines[i]; + + var parts = line.split("/", 2); + + var word = parts[0]; + + // Now for each affix rule, generate that form of the word. + if (parts.length > 1) { + var ruleCodesArray = this.parseRuleCodes(parts[1]); + + // Save the ruleCodes for compound word situations. + if (!("NEEDAFFIX" in this.flags) || ruleCodesArray.indexOf(this.flags.NEEDAFFIX) == -1) { + addWord(word, ruleCodesArray); + } + + for (var j = 0, _jlen = ruleCodesArray.length; j < _jlen; j++) { + var code = ruleCodesArray[j]; + + var rule = this.rules[code]; + + if (rule) { + var newWords = this._applyRule(word, rule); + + for (var ii = 0, _iilen = newWords.length; ii < _iilen; ii++) { + var newWord = newWords[ii]; + + addWord(newWord, []); + + if (rule.combineable) { + for (var k = j + 1; k < _jlen; k++) { + var combineCode = ruleCodesArray[k]; + + var combineRule = this.rules[combineCode]; + + if (combineRule) { + if (combineRule.combineable && (rule.type != combineRule.type)) { + var otherNewWords = this._applyRule(newWord, combineRule); + + for (var iii = 0, _iiilen = otherNewWords.length; iii < _iiilen; iii++) { + var otherNewWord = otherNewWords[iii]; + addWord(otherNewWord, []); + } } } } } } } + + if (code in this.compoundRuleCodes) { + this.compoundRuleCodes[code].push(word); + } } - - if (code in this.compoundRuleCodes) { - this.compoundRuleCodes[code].push(word); - } + } + else { + addWord(word.trim(), []); } } - else { - addWord(word.trim(), []); + + return dictionaryTable; + }, + + + /** + * Removes comment lines and then cleans up blank lines and trailing whitespace. + * + * @param {String} data The data from a .dic file. + * @return {String} The cleaned-up data. + */ + + _removeDicComments : function (data) { + // I can't find any official documentation on it, but at least the de_DE + // dictionary uses tab-indented lines as comments. + + // Remove comments + data = data.replace(/^\t.*$/mg, ""); + + return data; + }, + + parseRuleCodes : function (textCodes) { + if (!textCodes) { + return []; } - } - - return dictionaryTable; - }, - - - /** - * Removes comment lines and then cleans up blank lines and trailing whitespace. - * - * @param {String} data The data from a .dic file. - * @return {String} The cleaned-up data. - */ - - _removeDicComments : function (data) { - // I can't find any official documentation on it, but at least the de_DE - // dictionary uses tab-indented lines as comments. - - // Remove comments - data = data.replace(/^\t.*$/mg, ""); - - return data; - }, - - parseRuleCodes : function (textCodes) { - if (!textCodes) { - return []; - } - else if (!("FLAG" in this.flags)) { - return textCodes.split(""); - } - else if (this.flags.FLAG === "long") { - var flags = []; - - for (var i = 0, _len = textCodes.length; i < _len; i += 2) { - flags.push(textCodes.substr(i, 2)); - } - - return flags; - } - else if (this.flags.FLAG === "num") { - return textCodes.split(","); - } - }, - - /** - * Applies an affix rule to a word. - * - * @param {String} word The base word. - * @param {Object} rule The affix rule. - * @returns {String[]} The new words generated by the rule. - */ - - _applyRule : function (word, rule) { - var entries = rule.entries; - var newWords = []; - - for (var i = 0, _len = entries.length; i < _len; i++) { - var entry = entries[i]; - - if (!entry.match || word.match(entry.match)) { - var newWord = word; - - if (entry.remove) { - newWord = newWord.replace(entry.remove, ""); - } - - if (rule.type === "SFX") { - newWord = newWord + entry.add; + else if (!("FLAG" in this.flags)) { + return textCodes.split(""); + } + else if (this.flags.FLAG === "long") { + var flags = []; + + for (var i = 0, _len = textCodes.length; i < _len; i += 2) { + flags.push(textCodes.substr(i, 2)); } - else { - newWord = entry.add + newWord; - } - - newWords.push(newWord); - - if ("continuationClasses" in entry) { - for (var j = 0, _jlen = entry.continuationClasses.length; j < _jlen; j++) { - var continuationRule = this.rules[entry.continuationClasses[j]]; - - if (continuationRule) { - newWords = newWords.concat(this._applyRule(newWord, continuationRule)); - } - /* - else { - // This shouldn't happen, but it does, at least in the de_DE dictionary. - // I think the author mistakenly supplied lower-case rule codes instead - // of upper-case. + + return flags; + } + else if (this.flags.FLAG === "num") { + return textCodes.split(","); + } + }, + + /** + * Applies an affix rule to a word. + * + * @param {String} word The base word. + * @param {Object} rule The affix rule. + * @returns {String[]} The new words generated by the rule. + */ + + _applyRule : function (word, rule) { + var entries = rule.entries; + var newWords = []; + + for (var i = 0, _len = entries.length; i < _len; i++) { + var entry = entries[i]; + + if (!entry.match || word.match(entry.match)) { + var newWord = word; + + if (entry.remove) { + newWord = newWord.replace(entry.remove, ""); + } + + if (rule.type === "SFX") { + newWord = newWord + entry.add; + } + else { + newWord = entry.add + newWord; + } + + newWords.push(newWord); + + if ("continuationClasses" in entry) { + for (var j = 0, _jlen = entry.continuationClasses.length; j < _jlen; j++) { + var continuationRule = this.rules[entry.continuationClasses[j]]; + + if (continuationRule) { + newWords = newWords.concat(this._applyRule(newWord, continuationRule)); + } + /* + else { + // This shouldn't happen, but it does, at least in the de_DE dictionary. + // I think the author mistakenly supplied lower-case rule codes instead + // of upper-case. + } + */ } - */ } } } - } - - return newWords; - }, - - /** - * Checks whether a word or a capitalization variant exists in the current dictionary. - * The word is trimmed and several variations of capitalizations are checked. - * If you want to check a word without any changes made to it, call checkExact() - * - * @see http://blog.stevenlevithan.com/archives/faster-trim-javascript re:trimming function - * - * @param {String} aWord The word to check. - * @returns {Boolean} - */ - - check : function (aWord) { - if (!this.loaded) { - throw "Dictionary not loaded."; - } - - // Remove leading and trailing whitespace - var trimmedWord = aWord.replace(/^\s\s*/, '').replace(/\s\s*$/, ''); - - if (this.checkExact(trimmedWord)) { - return true; - } - - // The exact word is not in the dictionary. - if (trimmedWord.toUpperCase() === trimmedWord) { - // The word was supplied in all uppercase. - // Check for a capitalized form of the word. - var capitalizedWord = trimmedWord[0] + trimmedWord.substring(1).toLowerCase(); - - if (this.hasFlag(capitalizedWord, "KEEPCASE")) { - // Capitalization variants are not allowed for this word. - return false; - } - - if (this.checkExact(capitalizedWord)) { - return true; + + return newWords; + }, + + /** + * Checks whether a word or a capitalization variant exists in the current dictionary. + * The word is trimmed and several variations of capitalizations are checked. + * If you want to check a word without any changes made to it, call checkExact() + * + * @see http://blog.stevenlevithan.com/archives/faster-trim-javascript re:trimming function + * + * @param {String} aWord The word to check. + * @returns {Boolean} + */ + + check : function (aWord) { + if (!this.loaded) { + throw "Dictionary not loaded."; } - } - - var lowercaseWord = trimmedWord.toLowerCase(); - - if (lowercaseWord !== trimmedWord) { - if (this.hasFlag(lowercaseWord, "KEEPCASE")) { - // Capitalization variants are not allowed for this word. - return false; - } - - // Check for a lowercase form - if (this.checkExact(lowercaseWord)) { + + // Remove leading and trailing whitespace + var trimmedWord = aWord.replace(/^\s\s*/, '').replace(/\s\s*$/, ''); + + if (this.checkExact(trimmedWord)) { return true; } - } - - return false; - }, - - /** - * Checks whether a word exists in the current dictionary. - * - * @param {String} word The word to check. - * @returns {Boolean} - */ - - checkExact : function (word) { - if (!this.loaded) { - throw "Dictionary not loaded."; - } - var ruleCodes = this.dictionaryTable[word]; - - var i, _len; - - if (typeof ruleCodes === 'undefined') { - // Check if this might be a compound word. - if ("COMPOUNDMIN" in this.flags && word.length >= this.flags.COMPOUNDMIN) { - for (i = 0, _len = this.compoundRules.length; i < _len; i++) { - if (word.match(this.compoundRules[i])) { - return true; - } + // The exact word is not in the dictionary. + if (trimmedWord.toUpperCase() === trimmedWord) { + // The word was supplied in all uppercase. + // Check for a capitalized form of the word. + var capitalizedWord = trimmedWord[0] + trimmedWord.substring(1).toLowerCase(); + + if (this.hasFlag(capitalizedWord, "KEEPCASE")) { + // Capitalization variants are not allowed for this word. + return false; + } + + if (this.checkExact(capitalizedWord)) { + return true; } } - } - else if (ruleCodes === null) { - // a null (but not undefined) value for an entry in the dictionary table - // means that the word is in the dictionary but has no flags. - return true; - } - else if (typeof ruleCodes === 'object') { // this.dictionary['hasOwnProperty'] will be a function. - for (i = 0, _len = ruleCodes.length; i < _len; i++) { - if (!this.hasFlag(word, "ONLYINCOMPOUND", ruleCodes[i])) { + + var lowercaseWord = trimmedWord.toLowerCase(); + + if (lowercaseWord !== trimmedWord) { + if (this.hasFlag(lowercaseWord, "KEEPCASE")) { + // Capitalization variants are not allowed for this word. + return false; + } + + // Check for a lowercase form + if (this.checkExact(lowercaseWord)) { return true; } } - } - return false; - }, - - /** - * Looks up whether a given word is flagged with a given flag. - * - * @param {String} word The word in question. - * @param {String} flag The flag in question. - * @return {Boolean} - */ - - hasFlag : function (word, flag, wordFlags) { - if (!this.loaded) { - throw "Dictionary not loaded."; - } + return false; + }, + + /** + * Checks whether a word exists in the current dictionary. + * + * @param {String} word The word to check. + * @returns {Boolean} + */ - if (flag in this.flags) { - if (typeof wordFlags === 'undefined') { - wordFlags = Array.prototype.concat.apply([], this.dictionaryTable[word]); + checkExact : function (word) { + if (!this.loaded) { + throw "Dictionary not loaded."; } - - if (wordFlags && wordFlags.indexOf(this.flags[flag]) !== -1) { + + var ruleCodes = this.dictionaryTable[word]; + + var i, _len; + + if (typeof ruleCodes === 'undefined') { + // Check if this might be a compound word. + if ("COMPOUNDMIN" in this.flags && word.length >= this.flags.COMPOUNDMIN) { + for (i = 0, _len = this.compoundRules.length; i < _len; i++) { + if (word.match(this.compoundRules[i])) { + return true; + } + } + } + } + else if (ruleCodes === null) { + // a null (but not undefined) value for an entry in the dictionary table + // means that the word is in the dictionary but has no flags. return true; } - } - - return false; - }, - - /** - * Returns a list of suggestions for a misspelled word. - * - * @see http://www.norvig.com/spell-correct.html for the basis of this suggestor. - * This suggestor is primitive, but it works. - * - * @param {String} word The misspelling. - * @param {Number} [limit=5] The maximum number of suggestions to return. - * @returns {String[]} The array of suggestions. - */ - - alphabet : "", - - suggest : function (word, limit) { - if (!this.loaded) { - throw "Dictionary not loaded."; - } + else if (typeof ruleCodes === 'object') { // this.dictionary['hasOwnProperty'] will be a function. + for (i = 0, _len = ruleCodes.length; i < _len; i++) { + if (!this.hasFlag(word, "ONLYINCOMPOUND", ruleCodes[i])) { + return true; + } + } + } - limit = limit || 5; + return false; + }, - if (this.memoized.hasOwnProperty(word)) { - var memoizedLimit = this.memoized[word]['limit']; + /** + * Looks up whether a given word is flagged with a given flag. + * + * @param {String} word The word in question. + * @param {String} flag The flag in question. + * @return {Boolean} + */ - // Only return the cached list if it's big enough or if there weren't enough suggestions - // to fill a smaller limit. - if (limit <= memoizedLimit || this.memoized[word]['suggestions'].length < memoizedLimit) { - return this.memoized[word]['suggestions'].slice(0, limit); + hasFlag : function (word, flag, wordFlags) { + if (!this.loaded) { + throw "Dictionary not loaded."; } - } - - if (this.check(word)) return []; - - // Check the replacement table. - for (var i = 0, _len = this.replacementTable.length; i < _len; i++) { - var replacementEntry = this.replacementTable[i]; - - if (word.indexOf(replacementEntry[0]) !== -1) { - var correctedWord = word.replace(replacementEntry[0], replacementEntry[1]); - - if (this.check(correctedWord)) { - return [ correctedWord ]; + + if (flag in this.flags) { + if (typeof wordFlags === 'undefined') { + wordFlags = Array.prototype.concat.apply([], this.dictionaryTable[word]); + } + + if (wordFlags && wordFlags.indexOf(this.flags[flag]) !== -1) { + return true; } } - } - - var self = this; - self.alphabet = "abcdefghijklmnopqrstuvwxyz"; - - /* - if (!self.alphabet) { - // Use the alphabet as implicitly defined by the words in the dictionary. - var alphaHash = {}; - - for (var i in self.dictionaryTable) { - for (var j = 0, _len = i.length; j < _len; j++) { - alphaHash[i[j]] = true; - } - } - - for (var i in alphaHash) { - self.alphabet += i; - } - - var alphaArray = self.alphabet.split(""); - alphaArray.sort(); - self.alphabet = alphaArray.join(""); - } - */ - - function edits1(words) { - var rv = []; - - var ii, i, j, _iilen, _len, _jlen; - - for (ii = 0, _iilen = words.length; ii < _iilen; ii++) { - var word = words[ii]; - - for (i = 0, _len = word.length + 1; i < _len; i++) { - var s = [ word.substring(0, i), word.substring(i) ]; - - if (s[1]) { - rv.push(s[0] + s[1].substring(1)); + + return false; + }, + + /** + * Returns a list of suggestions for a misspelled word. + * + * @see http://www.norvig.com/spell-correct.html for the basis of this suggestor. + * This suggestor is primitive, but it works. + * + * @param {String} word The misspelling. + * @param {Number} [limit=5] The maximum number of suggestions to return. + * @returns {String[]} The array of suggestions. + */ + + alphabet : "", + + suggest : function (word, limit) { + if (!this.loaded) { + throw "Dictionary not loaded."; + } + + limit = limit || 5; + + if (this.memoized.hasOwnProperty(word)) { + var memoizedLimit = this.memoized[word]['limit']; + + // Only return the cached list if it's big enough or if there weren't enough suggestions + // to fill a smaller limit. + if (limit <= memoizedLimit || this.memoized[word]['suggestions'].length < memoizedLimit) { + return this.memoized[word]['suggestions'].slice(0, limit); + } + } + + if (this.check(word)) return []; + + // Check the replacement table. + for (var i = 0, _len = this.replacementTable.length; i < _len; i++) { + var replacementEntry = this.replacementTable[i]; + + if (word.indexOf(replacementEntry[0]) !== -1) { + var correctedWord = word.replace(replacementEntry[0], replacementEntry[1]); + + if (this.check(correctedWord)) { + return [ correctedWord ]; } - - // Eliminate transpositions of identical letters - if (s[1].length > 1 && s[1][1] !== s[1][0]) { - rv.push(s[0] + s[1][1] + s[1][0] + s[1].substring(2)); + } + } + + var self = this; + self.alphabet = "abcdefghijklmnopqrstuvwxyz"; + + /* + if (!self.alphabet) { + // Use the alphabet as implicitly defined by the words in the dictionary. + var alphaHash = {}; + + for (var i in self.dictionaryTable) { + for (var j = 0, _len = i.length; j < _len; j++) { + alphaHash[i[j]] = true; } + } + + for (var i in alphaHash) { + self.alphabet += i; + } + + var alphaArray = self.alphabet.split(""); + alphaArray.sort(); + self.alphabet = alphaArray.join(""); + } + */ + + function edits1(words) { + var rv = []; + + var ii, i, j, _iilen, _len, _jlen; + + for (ii = 0, _iilen = words.length; ii < _iilen; ii++) { + var word = words[ii]; + + for (i = 0, _len = word.length + 1; i < _len; i++) { + var s = [ word.substring(0, i), word.substring(i) ]; + + if (s[1]) { + rv.push(s[0] + s[1].substring(1)); + } - if (s[1]) { - for (j = 0, _jlen = self.alphabet.length; j < _jlen; j++) { - // Eliminate replacement of a letter by itself - if (self.alphabet[j] != s[1].substring(0,1)){ - rv.push(s[0] + self.alphabet[j] + s[1].substring(1)); + // Eliminate transpositions of identical letters + if (s[1].length > 1 && s[1][1] !== s[1][0]) { + rv.push(s[0] + s[1][1] + s[1][0] + s[1].substring(2)); + } + + if (s[1]) { + for (j = 0, _jlen = self.alphabet.length; j < _jlen; j++) { + // Eliminate replacement of a letter by itself + if (self.alphabet[j] != s[1].substring(0,1)){ + rv.push(s[0] + self.alphabet[j] + s[1].substring(1)); + } } } - } - if (s[1]) { - for (j = 0, _jlen = self.alphabet.length; j < _jlen; j++) { - rv.push(s[0] + self.alphabet[j] + s[1]); + if (s[1]) { + for (j = 0, _jlen = self.alphabet.length; j < _jlen; j++) { + rv.push(s[0] + self.alphabet[j] + s[1]); + } } } } + + return rv; } - - return rv; - } - - function known(words) { - var rv = []; - - for (var i = 0, _len = words.length; i < _len; i++) { - if (self.check(words[i])) { - rv.push(words[i]); + + function known(words) { + var rv = []; + + for (var i = 0, _len = words.length; i < _len; i++) { + if (self.check(words[i])) { + rv.push(words[i]); + } } + + return rv; } - - return rv; - } - - function correct(word) { - // Get the edit-distance-1 and edit-distance-2 forms of this word. - var ed1 = edits1([word]); - var ed2 = edits1(ed1); - - var corrections = known(ed1.concat(ed2)); - - var i, _len; - - // Sort the edits based on how many different ways they were created. - var weighted_corrections = {}; - - for (i = 0, _len = corrections.length; i < _len; i++) { - if (!(corrections[i] in weighted_corrections)) { - weighted_corrections[corrections[i]] = 1; - } - else { - weighted_corrections[corrections[i]] += 1; + + function correct(word) { + // Get the edit-distance-1 and edit-distance-2 forms of this word. + var ed1 = edits1([word]); + var ed2 = edits1(ed1); + + var corrections = known(ed1.concat(ed2)); + + var i, _len; + + // Sort the edits based on how many different ways they were created. + var weighted_corrections = {}; + + for (i = 0, _len = corrections.length; i < _len; i++) { + if (!(corrections[i] in weighted_corrections)) { + weighted_corrections[corrections[i]] = 1; + } + else { + weighted_corrections[corrections[i]] += 1; + } } - } - - var sorted_corrections = []; - - for (i in weighted_corrections) { - if (weighted_corrections.hasOwnProperty(i)) { - sorted_corrections.push([ i, weighted_corrections[i] ]); + + var sorted_corrections = []; + + for (i in weighted_corrections) { + if (weighted_corrections.hasOwnProperty(i)) { + sorted_corrections.push([ i, weighted_corrections[i] ]); + } } - } - - function sorter(a, b) { - if (a[1] < b[1]) { - return -1; + + function sorter(a, b) { + if (a[1] < b[1]) { + return -1; + } + + return 1; } - - return 1; - } - - sorted_corrections.sort(sorter).reverse(); - - var rv = []; - var capitalization_scheme = "lowercase"; - - if (word.toUpperCase() === word) { - capitalization_scheme = "uppercase"; - } - else if (word.substr(0, 1).toUpperCase() + word.substr(1).toLowerCase() === word) { - capitalization_scheme = "capitalized"; - } - - for (i = 0, _len = Math.min(limit, sorted_corrections.length); i < _len; i++) { - if ("uppercase" === capitalization_scheme) { - sorted_corrections[i][0] = sorted_corrections[i][0].toUpperCase(); + sorted_corrections.sort(sorter).reverse(); + + var rv = []; + + var capitalization_scheme = "lowercase"; + + if (word.toUpperCase() === word) { + capitalization_scheme = "uppercase"; } - else if ("capitalized" === capitalization_scheme) { - sorted_corrections[i][0] = sorted_corrections[i][0].substr(0, 1).toUpperCase() + sorted_corrections[i][0].substr(1); + else if (word.substr(0, 1).toUpperCase() + word.substr(1).toLowerCase() === word) { + capitalization_scheme = "capitalized"; } - - if (!self.hasFlag(sorted_corrections[i][0], "NOSUGGEST")) { - rv.push(sorted_corrections[i][0]); + + for (i = 0, _len = Math.min(limit, sorted_corrections.length); i < _len; i++) { + if ("uppercase" === capitalization_scheme) { + sorted_corrections[i][0] = sorted_corrections[i][0].toUpperCase(); + } + else if ("capitalized" === capitalization_scheme) { + sorted_corrections[i][0] = sorted_corrections[i][0].substr(0, 1).toUpperCase() + sorted_corrections[i][0].substr(1); + } + + if (!self.hasFlag(sorted_corrections[i][0], "NOSUGGEST")) { + rv.push(sorted_corrections[i][0]); + } } + + return rv; } - - return rv; + + this.memoized[word] = { + 'suggestions': correct(word), + 'limit': limit + }; + + return this.memoized[word]['suggestions']; } - - this.memoized[word] = { - 'suggestions': correct(word), - 'limit': limit - }; - - return this.memoized[word]['suggestions']; - } -}; -})(); - -// Support for use as a node.js module. -if (typeof module !== 'undefined') { - module.exports = Typo; -} \ No newline at end of file + }; + })(); + + return Typo; + +})); \ No newline at end of file