使用JavaScript函数将西里尔字母转换为拉丁字母

22
我写了这个函数:
function transliterate(word){

    var answer = "";

    A = new Array();
    A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
    A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
    A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
    A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
    A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
    A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";

for (i in word){

    if (A[word[i]] === 'undefined'){
        answer += word[i];
        }
    else {
        answer += A[word[i]];
        }

return answer;
}
}

现在应该将西里尔文转写成拉丁文,让拉丁文保持原样。但它只能转写第一个字母,在处理拉丁文时会给出未定义的答案。有人能给我一个想法,我做错了什么吗?

一个 IDE?是指 _集成开发环境_,还是指一个 _想法_? - Daedalus
8个回答

59

几件事情...

  1. 使用 undefined 而不是 'undefined'
  2. 不要在循环中放置返回
  3. 使用 hasOwnProperty 过滤掉原型上的函数和属性
  4. 使用 [] 而不是 new Array()
  5. 使用 {} 而不是 []
  6. 使用小写变量而不是大写。大写字母保留给构造函数

这是代码

function transliterate(word){
    var answer = ""
      , a = {};

   a["Ё"]="YO";a["Й"]="I";a["Ц"]="TS";a["У"]="U";a["К"]="K";a["Е"]="E";a["Н"]="N";a["Г"]="G";a["Ш"]="SH";a["Щ"]="SCH";a["З"]="Z";a["Х"]="H";a["Ъ"]="'";
   a["ё"]="yo";a["й"]="i";a["ц"]="ts";a["у"]="u";a["к"]="k";a["е"]="e";a["н"]="n";a["г"]="g";a["ш"]="sh";a["щ"]="sch";a["з"]="z";a["х"]="h";a["ъ"]="'";
   a["Ф"]="F";a["Ы"]="I";a["В"]="V";a["А"]="A";a["П"]="P";a["Р"]="R";a["О"]="O";a["Л"]="L";a["Д"]="D";a["Ж"]="ZH";a["Э"]="E";
   a["ф"]="f";a["ы"]="i";a["в"]="v";a["а"]="a";a["п"]="p";a["р"]="r";a["о"]="o";a["л"]="l";a["д"]="d";a["ж"]="zh";a["э"]="e";
   a["Я"]="Ya";a["Ч"]="CH";a["С"]="S";a["М"]="M";a["И"]="I";a["Т"]="T";a["Ь"]="'";a["Б"]="B";a["Ю"]="YU";
   a["я"]="ya";a["ч"]="ch";a["с"]="s";a["м"]="m";a["и"]="i";a["т"]="t";a["ь"]="'";a["б"]="b";a["ю"]="yu";

   for (i in word){
     if (word.hasOwnProperty(i)) {
       if (a[word[i]] === undefined){
         answer += word[i];
       } else {
         answer += a[word[i]];
       }
     }
   }
   return answer;
}

更加功能强大的代码如下所示...

var a = {"Ё":"YO","Й":"I","Ц":"TS","У":"U","К":"K","Е":"E","Н":"N","Г":"G","Ш":"SH","Щ":"SCH","З":"Z","Х":"H","Ъ":"'","ё":"yo","й":"i","ц":"ts","у":"u","к":"k","е":"e","н":"n","г":"g","ш":"sh","щ":"sch","з":"z","х":"h","ъ":"'","Ф":"F","Ы":"I","В":"V","А":"A","П":"P","Р":"R","О":"O","Л":"L","Д":"D","Ж":"ZH","Э":"E","ф":"f","ы":"i","в":"v","а":"a","п":"p","р":"r","о":"o","л":"l","д":"d","ж":"zh","э":"e","Я":"Ya","Ч":"CH","С":"S","М":"M","И":"I","Т":"T","Ь":"'","Б":"B","Ю":"YU","я":"ya","ч":"ch","с":"s","м":"m","и":"i","т":"t","ь":"'","б":"b","ю":"yu"};

function transliterate(word){
  return word.split('').map(function (char) { 
    return a[char] || char; 
  }).join("");
}

我发现自己不时地回顾这个问题,我非常确定这是最紧凑和可用的代码,解决这个问题,你会在那里找到。谢谢! - tftd
2
喜欢函数式编程,简洁明了! - Faramarz
你好,你知道是否有一个库可以做到这一点吗?(也需要用于其他字母表) - franck
1
这里有错误:例如a["А"]="a" ?? - Alex Kolarski

27
在我的项目中,我使用以下的转译方法:
var transliterate = function(text) {

    text = text
        .replace(/\u0401/g, 'YO')
        .replace(/\u0419/g, 'I')
        .replace(/\u0426/g, 'TS')
        .replace(/\u0423/g, 'U')
        .replace(/\u041A/g, 'K')
        .replace(/\u0415/g, 'E')
        .replace(/\u041D/g, 'N')
        .replace(/\u0413/g, 'G')
        .replace(/\u0428/g, 'SH')
        .replace(/\u0429/g, 'SCH')
        .replace(/\u0417/g, 'Z')
        .replace(/\u0425/g, 'H')
        .replace(/\u042A/g, '')
        .replace(/\u0451/g, 'yo')
        .replace(/\u0439/g, 'i')
        .replace(/\u0446/g, 'ts')
        .replace(/\u0443/g, 'u')
        .replace(/\u043A/g, 'k')
        .replace(/\u0435/g, 'e')
        .replace(/\u043D/g, 'n')
        .replace(/\u0433/g, 'g')
        .replace(/\u0448/g, 'sh')
        .replace(/\u0449/g, 'sch')
        .replace(/\u0437/g, 'z')
        .replace(/\u0445/g, 'h')
        .replace(/\u044A/g, "'")
        .replace(/\u0424/g, 'F')
        .replace(/\u042B/g, 'I')
        .replace(/\u0412/g, 'V')
        .replace(/\u0410/g, 'a')
        .replace(/\u041F/g, 'P')
        .replace(/\u0420/g, 'R')
        .replace(/\u041E/g, 'O')
        .replace(/\u041B/g, 'L')
        .replace(/\u0414/g, 'D')
        .replace(/\u0416/g, 'ZH')
        .replace(/\u042D/g, 'E')
        .replace(/\u0444/g, 'f')
        .replace(/\u044B/g, 'i')
        .replace(/\u0432/g, 'v')
        .replace(/\u0430/g, 'a')
        .replace(/\u043F/g, 'p')
        .replace(/\u0440/g, 'r')
        .replace(/\u043E/g, 'o')
        .replace(/\u043B/g, 'l')
        .replace(/\u0434/g, 'd')
        .replace(/\u0436/g, 'zh')
        .replace(/\u044D/g, 'e')
        .replace(/\u042F/g, 'Ya')
        .replace(/\u0427/g, 'CH')
        .replace(/\u0421/g, 'S')
        .replace(/\u041C/g, 'M')
        .replace(/\u0418/g, 'I')
        .replace(/\u0422/g, 'T')
        .replace(/\u042C/g, "'")
        .replace(/\u0411/g, 'B')
        .replace(/\u042E/g, 'YU')
        .replace(/\u044F/g, 'ya')
        .replace(/\u0447/g, 'ch')
        .replace(/\u0441/g, 's')
        .replace(/\u043C/g, 'm')
        .replace(/\u0438/g, 'i')
        .replace(/\u0442/g, 't')
        .replace(/\u044C/g, "'")
        .replace(/\u0431/g, 'b')
        .replace(/\u044E/g, 'yu');

    return text;
};

运行此示例进行音译:
transliterate('абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ');

我已经用它们的Unicode模拟替换了所有俄语字母(每个字母以\u开头),以解决Javascript文件中编码问题。为了检查执行速度,我采用了这个问题的最佳答案,并将其与我的例子进行比较。我的方法在几倍速度上更快(在Firebug中为0.16毫秒:-)。 firebug中的速度比较

7
你是在使用IE浏览器开发吗? - Matt Fletcher

7

不要使用数组来完成此任务。不要使用 for in 遍历字符串。不要检查字符串是否等于 "undefined"。不要在 for 循环内使用 return

function transliterate(word) {
    var A = {};
    var result = '';

    A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
    A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
    A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
    A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
    A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
    A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";

    for(var i = 0; i < word.length; i++) {
        var c = word.charAt(i);

        result += A[c] || c;
    }

    return result;
}

Here is a jsFiddle demonstration.


1
不要在循环结构中放置 var i。不要在循环体中放置 var c。为什么:可怜的、被误解的 var。否则会+1。 - T.J. Crowder
@T.J.Crowder:我知道它将它们定义为函数作用域,而且我对此非常满意。这只是一种编码约定。不过还是谢谢你。 - user1479055
@ 小火龍:不好的習慣。如果你想要故意誤導維護你程式碼的人,那就繼續使用吧。;-)(請注意,let正在進入JavaScript,這使得使用var更加不穩定。) - T.J. Crowder

4
你的主要问题是 return 放错了位置。它在循环内部,因此在第一次迭代时就返回了。请将其更改为:
function transliterate(word){

    var answer = "";

    A = new Array();
    A["Ё"]="YO";A["Й"]="I";A["Ц"]="TS";A["У"]="U";A["К"]="K";A["Е"]="E";A["Н"]="N";A["Г"]="G";A["Ш"]="SH";A["Щ"]="SCH";A["З"]="Z";A["Х"]="H";A["Ъ"]="'";
    A["ё"]="yo";A["й"]="i";A["ц"]="ts";A["у"]="u";A["к"]="k";A["е"]="e";A["н"]="n";A["г"]="g";A["ш"]="sh";A["щ"]="sch";A["з"]="z";A["х"]="h";A["ъ"]="'";
    A["Ф"]="F";A["Ы"]="I";A["В"]="V";A["А"]="A";A["П"]="P";A["Р"]="R";A["О"]="O";A["Л"]="L";A["Д"]="D";A["Ж"]="ZH";A["Э"]="E";
    A["ф"]="f";A["ы"]="i";A["в"]="v";A["а"]="a";A["п"]="p";A["р"]="r";A["о"]="o";A["л"]="l";A["д"]="d";A["ж"]="zh";A["э"]="e";
    A["Я"]="YA";A["Ч"]="CH";A["С"]="S";A["М"]="M";A["И"]="I";A["Т"]="T";A["Ь"]="'";A["Б"]="B";A["Ю"]="YU";
    A["я"]="ya";A["ч"]="ch";A["с"]="s";A["м"]="m";A["и"]="i";A["т"]="t";A["ь"]="'";A["б"]="b";A["ю"]="yu";

    for (i in word){

        if (A[word[i]] === 'undefined'){
            answer += word[i];
            }
        else {
            answer += A[word[i]];
            }

    }
    return answer; // <=== Was *above* the } on the previous line
}

请注意,我已经修正了缩进。一致的缩进有助于避免这些错误。
注意1:关于您的A对象,没有任何东西使用它是一个Array的事实。您只是将其用作映射。在JavaScript中,所有对象都是映射,因此不要使用“A = new Array();”,而应该使用“A = {};”。
注意2:您的函数中从未声明A和i,因此您会遭受“隐式全局变量的恐惧”。要修复它,请使用“var”声明它们。
注意3:使用for..in循环遍历字符串的字符,或使用[]索引进入字符串,在JavaScript引擎中都不可靠。相反,请使用“for (i = 0; i < word.length; ++i)”然后使用“ch = word.charAt(i);”获取该位置的字符,然后在循环内部的代码中使用“ch”。
注意事项4:您可以使用非常强大的||运算符来缩短代码,例如:
answer += A[ch] || ch;

2
在我的系统中,js中的俄语符号根本不起作用。我不知道为什么会这样。因此,我使用以下代码来解决问题:
它不仅可以进行音译,而且还可以将所有标点符号替换为“_”,并将所有字母转换为小写。

function translit(str){
 var sp = '_'; 
 var text = str.toLowerCase();
 var transl = { 
  '\u0430': 'a', '\u0431': 'b', '\u0432': 'v', '\u0433': 'g', '\u0434': 'd', '\u0435': 'e', '\u0451': 'e', '\u0436': 'zh',
  '\u0437': 'z', '\u0438': 'i', '\u0439': 'j', '\u043a': 'k', '\u043b': 'l', '\u043c': 'm', '\u043d': 'n', '\u043e': 'o',
  '\u043f': 'p', '\u0440': 'r', '\u0441': 's', '\u0442': 't', '\u0443': 'u', '\u0444': 'f', '\u0445': 'h', '\u0446': 'c', 
  '\u0447': 'ch', '\u0448': 'sh', '\u0449': 'shch', '\u044a': '\'', '\u044b': 'y', '\u044c': '', '\u044d': 'e', '\u044e': 'yu',
  '\u044f': 'ya',  
  '\u00AB':'_', '\u00BB':'_', // «»
  ' ': sp, '_': sp, '`': sp, '~': sp, 
  '!': sp, '@': sp, '#': sp, '$': sp,
  '%': sp, '^': sp, '&': sp, '*': sp, '(': sp, ')': sp, '-': sp, '\=': sp,
  '+': sp, '[': sp, ']': sp, '\\': sp, '|': sp, '/': sp, '.': sp, ',': sp,
  '{': sp, '}': sp, '\'': sp, '"': sp, ';': sp, ':': sp, '?': sp, '<': sp,
  '>': sp, '№': sp     
 }
    var result = '';
 var curent_sim = '';
    for(i=0; i < text.length; i++) {
  if(transl[text[i]] != undefined) {   
   if(curent_sim != transl[text[i]] || curent_sim != sp){
    result += transl[text[i]];
    curent_sim = transl[text[i]];    
   }     
  } else {
   result += text[i];
   curent_sim = text[i];
  }  
    }
 result = result.replace(/^_/, '').replace(/_$/, ''); // trim
 return result
}

var result = translit('Привет Мир!');
document.getElementById('alias').value = result;
<html>
  <body>
    <input name="name" type="text" id="alias" />
  </body>
</html>

该代码最初取自此处:http://ajaxs.ru/lesson/js/137-transliteracija_stroki_na_javascript.html ,然后进行了重构。


2

这是我在TypeScript中发现的最简单、最易读和可扩展的方法:

const cyrToLat = (str: string): string => {
  const a: Record<string, string> = {};
  a["а"] = "a";
  a["А"] = "А";
  a["Б"] = "B";
  a["б"] = "b";
  a["В"] = "V";
  a["в"] = "v";
  a["Г"] = "G";
  a["г"] = "g";
  a["Ґ"] = "G";
  a["ґ"] = "g";
  a["Д"] = "D";
  a["д"] = "d";
  a["Е"] = "E";
  a["е"] = "e";
  a["Ё"] = "YO";
  a["ё"] = "yo";
  a["є"] = "ie";
  a["Є"] = "Ye";
  a["Ж"] = "ZH";
  a["ж"] = "zh";
  a["З"] = "Z";
  a["з"] = "z";
  a["И"] = "I";
  a["и"] = "i";
  a["І"] = "I";
  a["і"] = "i";
  a["ї"] = "i";
  a["Ї"] = "Yi";
  a["Й"] = "I";
  a["й"] = "i";
  a["К"] = "K";
  a["к"] = "k";
  a["Л"] = "L";
  a["л"] = "l";
  a["М"] = "M";
  a["м"] = "m";
  a["Н"] = "N";
  a["н"] = "n";
  a["О"] = "O";
  a["о"] = "o";
  a["П"] = "P";
  a["п"] = "p";
  a["Р"] = "R";
  a["р"] = "r";
  a["С"] = "S";
  a["с"] = "s";
  a["Т"] = "T";
  a["т"] = "t";
  a["У"] = "U";
  a["у"] = "u";
  a["Ф"] = "F";
  a["ф"] = "f";
  a["Х"] = "H";
  a["х"] = "h";
  a["Ц"] = "TS";
  a["ц"] = "ts";
  a["Ч"] = "CH";
  a["ч"] = "ch";
  a["Ш"] = "SH";
  a["ш"] = "sh";
  a["Щ"] = "SCH";
  a["щ"] = "sch";
  a["Ъ"] = "'";
  a["ъ"] = "'";
  a["Ы"] = "I";
  a["ы"] = "i";
  a["Ь"] = "'";
  a["ь"] = "'";
  a["Э"] = "E";
  a["э"] = "e";
  a["Ю"] = "YU";
  a["ю"] = "yu";
  a["Я"] = "Ya";
  a["я"] = "ya";
  return str
    .split("")
    .map((char) => a[char] || char)
    .join("");
};

1
如果您需要将“ъ”和“ь”替换为空字符串,则应按以下方式修改@split-your-infinity解决方案:
var a = {"Ё":"YO","Й":"I","Ц":"TS","У":"U","К":"K","Е":"E","Н":"N","Г":"G","Ш":"SH","Щ":"SCH","З":"Z","Х":"H","Ъ":"","ё":"yo","й":"i","ц":"ts","у":"u","к":"k","е":"e","н":"n","г":"g","ш":"sh","щ":"sch","з":"z","х":"h","ъ":"","Ф":"F","Ы":"I","В":"V","А":"a","П":"P","Р":"R","О":"O","Л":"L","Д":"D","Ж":"ZH","Э":"E","ф":"f","ы":"i","в":"v","а":"a","п":"p","р":"r","о":"o","л":"l","д":"d","ж":"zh","э":"e","Я":"Ya","Ч":"CH","С":"S","М":"M","И":"I","Т":"T","Ь":"","Б":"B","Ю":"YU","я":"ya","ч":"ch","с":"s","м":"m","и":"i","т":"t","ь":"","б":"b","ю":"yu"};

function transliterate(word){
  return word.split('').map(function (char) { 
    return a.hasOwnProperty(char) ? a[char] : char; 
  }).join("");
}

如果您不想得到“Uncaught TypeError: word.split is not a function”的错误提示,您必须将“word”参数转换为字符串。例如:transiterate(“”+someTexOrNumberVar)。否则,您必须在函数内部进行转换。 - Sergey Beloglazov

0
通过结合Bart Riemens和T.J. Crowder的建议,我想出了这段代码,看起来很好地完成了任务。
 function transliterate(word){

    var answer = "";
    var a = {}

    a["Ё"]="YO";a["Й"]="I";a["Ц"]="TS";a["У"]="U";a["К"]="K";a["Е"]="E";a["Н"]="N";a["Г"]="G";a["Ш"]="SH";a["Щ"]="SCH";a["З"]="Z";a["Х"]="H";a["Ъ"]="'";
    a["ё"]="yo";a["й"]="i";a["ц"]="ts";a["у"]="u";a["к"]="k";a["е"]="e";a["н"]="n";a["г"]="g";a["ш"]="sh";a["щ"]="sch";a["з"]="z";a["х"]="h";a["ъ"]="'";
    a["Ф"]="F";a["Ы"]="I";a["В"]="V";a["А"]="a";a["П"]="P";a["Р"]="R";a["О"]="O";a["Л"]="L";a["Д"]="D";a["Ж"]="ZH";a["Э"]="E";
    a["ф"]="f";a["ы"]="i";a["в"]="v";a["а"]="a";a["п"]="p";a["р"]="r";a["о"]="o";a["л"]="l";a["д"]="d";a["ж"]="zh";a["э"]="e";
    a["Я"]="Ya";a["Ч"]="CH";a["С"]="S";a["М"]="M";a["И"]="I";a["Т"]="T";a["Ь"]="'";a["Б"]="B";a["Ю"]="YU";
    a["я"]="ya";a["ч"]="ch";a["с"]="s";a["м"]="m";a["и"]="i";a["т"]="t";a["ь"]="'";a["б"]="b";a["ю"]="yu";

    for (i = 0; i < word.length; ++i){

        answer += a[word[i]] === undefined ? word[i] : a[word[i]];
    }   
    return answer;
}

谢谢你!


1
酷!你看到我的最新函数版本了吗! :-) - Split Your Infinity
@Bart Riemens 一开始没有注意到,但它似乎更短更通用。 - kyng

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接