JavaScript有没有一种不需要匹配标签等复杂操作就能截断HTML文本的方法呢?
谢谢。
JavaScript有没有一种不需要匹配标签等复杂操作就能截断HTML文本的方法呢?
谢谢。
我知道这个问题比较老,但是最近我也遇到了同样的问题。我写了下面这个库,可以安全地截取有效的HTML代码:https://github.com/arendjr/text-clipper
我也遇到了同样的问题,最后写了以下代码来处理它。它可以将HTML截断至指定长度,清理任何在结尾可能被截断的开/闭标签,然后关闭剩余未关闭的标签:
function truncateHTML(text, length) {
var truncated = text.substring(0, length);
// Remove line breaks and surrounding whitespace
truncated = truncated.replace(/(\r\n|\n|\r)/gm,"").trim();
// If the text ends with an incomplete start tag, trim it off
truncated = truncated.replace(/<(\w*)(?:(?:\s\w+(?:={0,1}(["']{0,1})\w*\2{0,1})))*$/g, '');
// If the text ends with a truncated end tag, fix it.
var truncatedEndTagExpr = /<\/((?:\w*))$/g;
var truncatedEndTagMatch = truncatedEndTagExpr.exec(truncated);
if (truncatedEndTagMatch != null) {
var truncatedEndTag = truncatedEndTagMatch[1];
// Check to see if there's an identifiable tag in the end tag
if (truncatedEndTag.length > 0) {
// If so, find the start tag, and close it
var startTagExpr = new RegExp(
"<(" + truncatedEndTag + "\\w?)(?:(?:\\s\\w+(?:=([\"\'])\\w*\\2)))*>");
var testString = truncated;
var startTagMatch = startTagExpr.exec(testString);
var startTag = null;
while (startTagMatch != null) {
startTag = startTagMatch[1];
testString = testString.replace(startTagExpr, '');
startTagMatch = startTagExpr.exec(testString);
}
if (startTag != null) {
truncated = truncated.replace(truncatedEndTagExpr, '</' + startTag + '>');
}
} else {
// Otherwise, cull off the broken end tag
truncated = truncated.replace(truncatedEndTagExpr, '');
}
}
// Now the tricky part. Reverse the text, and look for opening tags. For each opening tag,
// check to see that he closing tag before it is for that tag. If not, append a closing tag.
var testString = reverseHtml(truncated);
var reverseTagOpenExpr = /<(?:(["'])\w*\1=\w+ )*(\w*)>/;
var tagMatch = reverseTagOpenExpr.exec(testString);
while (tagMatch != null) {
var tag = tagMatch[0];
var tagName = tagMatch[2];
var startPos = tagMatch.index;
var endPos = startPos + tag.length;
var fragment = testString.substring(0, endPos);
// Test to see if an end tag is found in the fragment. If not, append one to the end
// of the truncated HTML, thus closing the last unclosed tag
if (!new RegExp("<" + tagName + "\/>").test(fragment)) {
truncated += '</' + reverseHtml(tagName) + '>';
}
// Get rid of the already tested fragment
testString = testString.replace(fragment, '');
// Get another tag to test
tagMatch = reverseTagOpenExpr.exec(testString);
}
return truncated;
}
function reverseHtml(str) {
var ph = String.fromCharCode(206);
var result = str.split('').reverse().join('');
while (result.indexOf('<') > -1) {
result = result.replace('<',ph);
}
while (result.indexOf('>') > -1) {
result = result.replace('>', '<');
}
while (result.indexOf(ph) > -1) {
result = result.replace(ph, '>');
}
return result;
}
JavaScript本身没有提供相关功能。不过,您可以考虑使用jQuery插件来实现。
这适用于多层嵌套:
let truncate = (content, maxLength = 255, append = '…') => {
let container = document.createElement('div');
container.innerHTML = content;
let limitReached = false;
let counted = 0;
let nodeHandler = node => {
if ( limitReached ) {
node.remove();
return;
}
let childNodes = Array.from( node.childNodes );
if ( childNodes.length ) {
childNodes.forEach( childNode => nodeHandler( childNode ) );
} else {
counted += node.textContent.length;
if ( counted >= maxLength ) {
limitReached = true;
if ( counted > maxLength ) {
node.textContent = node.textContent.slice( 0, -(counted - maxLength) )
}
node.textContent += append;
}
}
};
nodeHandler( container );
return container.innerHTML;
};
function truncateNode(node, limit) {
if (node.nodeType === Node.TEXT_NODE) {
node.textContent = node.textContent.substring(0, limit);
return limit - node.textContent.length;
}
node.childNodes.forEach((child) => {
limit = truncateNode(child, limit);
});
return limit;
}
const span = document.createElement('span');
span.innerHTML = '<b>foo</b><i>bar</i><u>baz</u>';
truncateNode(span, 5);
expect(span.outerHTML).toEqual('<span><b>foo</b><i>ba</i><u></u></span>');
我最近刚完成了一个使用容器的宽度和高度来实现此功能的jQuery函数。你可以试一下看看是否适用于你。我还不确定所有的兼容性问题、错误或限制,但我已经在FF、Chrome和IE7中进行了测试。
以上的解决方案都不完全符合我的使用情况,所以我自己创建了一个小型的原生JavaScript函数。它会留下空元素,但很容易进行修正。
const truncateWithHTML = (string, length) => {
// string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>"
const noHTML = string.replace(/<[^>]*>/g, '');
// if the string does not need to be truncated
if (noHTML.length <= max){
return string;
}
// if the string does not contains tags
if (noHTML.length === string.length){
// add <span title=""> to allow complete string to appear on hover
return `<span title="${string}">${string.substring(0, max).trim()}…</span>`;
}
const substrings = string.split(/(<[^>]*>)/g).filter(Boolean);
// substrings = ["<span class='className'>","My long string that","</span>"," I want shorter","<span>"," but just a little bit","</span>"]
let count = 0;
let truncated = [];
for (let i = 0; i < substrings.length; i++) {
let substr = substrings[i];
// if the substring isn't an HTML tag
if (! substr.startsWith("<")){
if (count > length){
continue;
} else if (substr.length > (length-count-1)){
truncated.push(substr.substring(0, (length-count) - 1) + '…');
} else {
truncated.push(substr);
}
count += substr.length;
} else {
truncated.push(substr);
}
}
return `<span title="${noHTML}">${truncated.join("")}…</span>`;
}
示例:
string = "<span class='className'>My long string that</span> I want shorter<span> but just a little bit</span>";
truncateWithHTML(string,10); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long s…</span><span></span></span>"
truncateWithHTML(string,22); // "<span title='My long string that I want shorter but just a little bit'><span class='className'>My long string that</span> I…<span></span></span>"
npm i --save text-clipper
赢了 - boatcoder