这里是我承诺的代码,可以将任意范围的DOM body包装成任意的html标记,以便于提取、移动、替换、复制/粘贴等操作。
更新时间:2015年12月19日 通过wrapRange()方法变体,在文本中添加TextNode分割,可选偏移量指定范围应该从文本节点的哪个位置开始或结束。现在可以在jsoup DOM模型内进行任意复制/粘贴/移动。
待办事项:(对自己或其他好心人)
RangeWrapper.java模块:
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.parser.Tag;
import java.util.ArrayList;
public class RangeWrapper {
public static Element wrapRange(Node startEl, Node endEl, String html) {
if (startEl == endEl) {
return (Element) startEl.wrap(html).parentNode();
}
int startDepth = NodeWalker.getNodeDepth(startEl);
int endDepth = NodeWalker.getNodeDepth(endEl);
int minDepth = getRangeMinDepth(startEl, endEl);
int n;
while (startDepth > minDepth) {
Element parent = (Element)startEl.parentNode();
if ((n = startEl.siblingIndex()) > 0) {
ArrayList<Node> children = new ArrayList<Node>(parent.childNodes());
Element parent2 = new Element(Tag.valueOf(parent.tagName()), parent.baseUri(), parent.attributes());
parent.after(parent2);
for (int i = n; i < children.size(); i++)
parent2.appendChild(children.get(i));
startEl = parent2;
} else {
startEl = parent;
}
startDepth--;
}
while (endDepth > minDepth) {
Element parent = (Element)endEl.parentNode();
if ((n = endEl.siblingIndex()) < parent.children().size()-1) {
ArrayList<Node> children = new ArrayList<Node>(parent.childNodes());
Element parent2 = new Element(Tag.valueOf(parent.tagName()), parent.baseUri(), parent.attributes());
parent.before(parent2);
for (int i = 0; i <= n; i++)
parent2.appendChild(children.get(i));
endEl = parent2;
} else {
endEl = parent;
}
endDepth--;
}
Element range = (Element) startEl.wrap(html).parentNode();
Node nextToAppend;
do {
nextToAppend = range.nextSibling();
range.appendChild(nextToAppend);
} while (nextToAppend != endEl);
return range;
}
public static Element wrapRange(Node startEl, int stOffset, Node endEl, int endOffset, String html) {
if (stOffset > 0 && startEl instanceof TextNode) {
TextNode tn = (TextNode) startEl;
if (endOffset < tn.getWholeText().length()-1) {
startEl = tn.splitText(stOffset);
}
}
if (endOffset > 0 && endEl instanceof TextNode) {
TextNode tn = (TextNode) endEl;
if (endOffset < tn.getWholeText().length()-1) {
tn.splitText(stOffset);
}
}
return wrapRange(startEl, endEl, html);
}
public static int getRangeMinDepth(final Node startNode, final Node endNode) {
class DepthVisitor implements NodeWalker.NodeWalkVisitor {
private int _minDepth = Integer.MAX_VALUE;
public boolean head(Node node, int depth) {
if (depth < _minDepth)
_minDepth = depth;
return true;
}
public boolean tail(Node node, int depth) {return true;}
int getMinDepth() { return _minDepth; }
};
DepthVisitor visitor = new DepthVisitor();
NodeWalker nw = new NodeWalker(visitor);
nw.walk(startNode, endNode);
return visitor.getMinDepth();
}
}
...上述代码使用的NodeWalker.java,是从jsoup包中的NodeTraversor和NodeVisitor类进行调整而来:
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.NodeVisitor;
public class NodeWalker {
private NodeWalkVisitor visitor;
public NodeWalker(NodeWalkVisitor visitor) {
this.visitor = visitor;
}
public void walk(Node startNode, Node endNode) {
Node node = startNode;
int depth = getNodeDepth(startNode);
while (node != null) {
if (!visitor.head(node, depth))
break;
if (node.childNodeSize() > 0) {
node = node.childNode(0);
depth++;
} else {
while (node.nextSibling() == null && depth > 0) {
if (!visitor.tail(node, depth) || node == endNode)
return;
node = node.parentNode();
depth--;
}
if (!visitor.tail(node, depth) || node == endNode)
break;
node = node.nextSibling();
}
}
}
public static int getNodeDepth(Node givenNode) {
Node node = givenNode;
int depth = 0;
if (!(node instanceof Element) || !"body".equals(((Element) node).tagName())) {
do {
depth++;
node = (Element)node.parentNode();
} while (node != null && !"body".equals(((Element) node).tagName()));
}
return depth;
}
public interface NodeWalkVisitor {
boolean head(Node node, int depth);
boolean tail(Node node, int depth);
}
}
Greg