在字符串中查找希伯来语单词相当简单。使用与希伯来代码点连续匹配的正则表达式:
/[\u05D0-\u05FF]+/
由于JS支持函数式编程,我们可以轻松地编写自己的函数来遍历文档树,在每个文本节点上调用一个函数。首先,需要一些脚手架。
if (! window.assert) {
window.dbgLvl = 1;
window.assert=function(succeeded, msg) {
if (dbgLvl && !succeeded) {
if (!msg) msg = 'assertion failed';
throw msg;
}
}
}
接下来,我们定义一个方法将字符串拆分为一个数组,其中包括分隔符在输出中。
if ('-'.split(/(-)/).length & 1) {
assert('a'.split(/a/).length, 'split includes grouping but not empty strings');
String.prototype.separate = function (separator) {
if (typeof separator == 'string') {
if (separator.charAt(0) != '('
|| separator.charAt(separator.length-1) != ')')
{
separator = new RegExp('(' + separator + ')', 'g');
} else {
separator = new RegExp(separator, 'g');
}
}
return this.split(separator);
}
} else {
if ('a'.split(/a/).length) {
String.prototype.separate = function (separator) {
if (typeof separator == 'string') {
separator = new RegExp(separator, 'g');
}
var fence = this.match(separator);
if (!fence) {
return [this];
}
var posts = this.split(separator);
assert(posts.length = fence.length+1);
var result = [], i;
for (i=0; i<fence.length; ++i) {
result.push(posts[i]);
result.push(fence[i]);
}
result.push(posts[i]);
return result;
}
} else {
String.prototype.separate = function (separator) {
if (typeof separator == 'string') {
separator = new RegExp(separator, 'g');
}
var fence = this.match(separator);
if (!fence) {
return [this];
}
var posts = this.split(separator);
if (posts.length <= fence.length) {
if (posts.length < fence.length) {
posts.unshift('');
posts.push('');
} else {
if (this.substring(0, fence[0].length) == fence[0]) {
posts.unshift('');
} else {
posts.push('');
}
}
}
var result = [], i;
for (i=0; i<fence.length; ++i) {
result.push(posts[i]);
result.push(fence[i]);
}
result.push(posts[i]);
return result;
}
}
}
接下来是一些节点谓词。
if (! window.Node) {
window.Node={TEXT_NODE: 3};
} else if (typeof Node.TEXT_NODE == 'undefined') {
Node.TEXT_NODE = 3;
}
function isTextNode(node) {return node.nodeType == Node.TEXT_NODE;}
function hasKids(node) {return node.childNodes && node.childNodes.length;}
function allNodes(node) {return true;}
现在介绍一些遍历DOM树的函数。
var forEachChild = (function() {
function _forEachChild(which, action, node, descendInto) {
for (var child=node.firstChild; child; child=child.nextSibling) {
if (which(child)) {
action(child);
}
if (hasKids(child) && descendInto(child)) {
_forEachChild(which, action, child, descendInto);
}
}
}
return function (which, action, node, descendInto) {
if (!descendInto) {descendInto=allNodes}
_forEachChild(which, action, node, descendInto);
}
})();
function forEachNode(which, action, descendInto) {
return forEachChild(which, action, document, descendInto);
}
function forEachTextNode(action, descendInto) {
return forEachNode(isTextNode, action, descendInto);
}
function forEachTextNodeInBody(action, descendInto) {
return forEachChild(isTextNode, action, document.body, descendInto);
}
最后一组函数可以将匹配模式的文本节点替换为您选择的新节点。这个组(也就是wrapText
返回的函数)尚未进行完整的跨浏览器兼容性测试,包括是否正确处理文本方向。
function wrapText(pattern, replace) {
return function (node) {
var chunks = node.nodeValue.separate(pattern);
if (chunks.length < 2)
return;
var wordCount=0;
var fragment = document.createDocumentFragment();
var i;
if (chunks[0].length) {
fragment.appendChild(document.createTextNode(chunks[0]));
}
for (i=1; i < chunks.length; i+=2) {
fragment.appendChild(replace(chunks[i]));
fragment.appendChild(document.createTextNode(chunks[i+1]));
}
assert(i == chunks.length, 'even number of chunks in ['+chunks+'] when it should be odd.');
if (! chunks[i-1].length) {
fragment.removeChild(fragment.lastChild);
}
node.parentNode.replaceChild(fragment, node);
}
}
function createAnchorWrap(title) {
if (typeof title == 'string') {
title=createWordCounter(title);
} else if (!title) {
title=createWordCounter();
}
return function(word) {
var a = document.createElement('a');
a.title=title(word);
a.appendChild(document.createTextNode(word));
return a;
}
}
function createWordCounter(pre) {
var wordCount=0;
if (pre) {
pre = pre.replace(/ *$/, ' ');
} else {
pre = 'word ';
}
return function(text) {
return pre + wordCount;
}
}
最后一件要做的事情是在(例如)加载处理程序或页面底部的脚本中启动该过程。
forEachTextNodeInBody(wrapText(/([\u05D0-\u05FF]+)/g,
createAnchorWrap()));
如果您想更改标题的前缀,请将
createWordCounter(...)
的结果传递给
createAnchorWrap
。