use of org.loboevolution.info.ElementInfo in project LoboEvolution by LoboEvolution.
the class HtmlParser method parseToken.
/**
* Parses text followed by one element.
*
* @param parent
* @param reader
* @param stopTags If tags in this set are encountered, the method throws
* StopException.
* @return
* @throws IOException
* @throws StopException
* @throws SAXException
*/
private int parseToken(final Node parent, final LineNumberReader reader, final Set<HTMLTag> stopTags, final LinkedList<String> ancestors) throws IOException, StopException, SAXException {
final Document doc = this.document;
final HTMLDocumentImpl htmlDoc = (HTMLDocumentImpl) doc;
final StringBuilder textSb = this.readUpToTagBegin(reader);
if (textSb == null) {
return TOKEN_EOD;
}
if (textSb.length() > 0) {
// int textLine = reader.getLineNumber();
final StringBuilder decText = entityDecode(textSb);
final String text = decText.toString();
if (text.trim().length() > 0) {
final Node textNode = doc.createTextNode(decText.toString());
try {
safeAppendChild(parent, textNode);
} catch (final DOMException de) {
if ((parent.getNodeType() != NodeType.DOCUMENT_NODE) || (de.getCode() != DOMException.HIERARCHY_REQUEST_ERR)) {
logger.log(Level.WARNING, "parseToken(): Unable to append child to " + parent + ".", de);
}
}
}
}
if (this.justReadTagBegin) {
String tag = this.readTag(parent, reader);
if (tag == null) {
return TOKEN_EOD;
}
String normalTag = tag.toUpperCase();
try {
if (tag.startsWith("!")) {
if ("!--".equals(tag)) {
final StringBuilder comment = this.passEndOfComment(reader);
final StringBuilder decText = entityDecode(comment);
safeAppendChild(parent, doc.createComment(decText.toString()));
return TOKEN_COMMENT;
} else if ("!DOCTYPE".equals(tag)) {
final String doctypeStr = this.parseEndOfTag(reader);
String qName = null;
String publicId = null;
String systemId = null;
if (doctypeStr.contains("PUBLIC")) {
final Matcher doctypeMatcher = doctypePattern.matcher(doctypeStr);
if (doctypeMatcher.matches()) {
qName = doctypeMatcher.group(1);
publicId = doctypeMatcher.group(2);
systemId = doctypeMatcher.group(3);
}
} else {
qName = doctypeStr.replace(">", "");
}
final DocumentTypeImpl doctype = new DocumentTypeImpl(qName, publicId, systemId);
htmlDoc.setDoctype(doctype);
needRoot = false;
return TOKEN_BAD;
} else {
passEndOfTag(reader);
return TOKEN_BAD;
}
} else if (tag.startsWith("/")) {
tag = tag.substring(1);
normalTag = normalTag.substring(1);
this.passEndOfTag(reader);
return TOKEN_END_ELEMENT;
} else if (tag.startsWith("?")) {
tag = tag.substring(1);
final StringBuilder data = readProcessingInstruction(reader);
safeAppendChild(parent, doc.createProcessingInstruction(tag, data.toString()));
return TOKEN_FULL_ELEMENT;
} else {
final int localIndex = normalTag.indexOf(':');
final boolean tagHasPrefix = localIndex > 0;
final String localName = tagHasPrefix ? normalTag.substring(localIndex + 1) : normalTag;
Element element = doc.createElement(localName);
element.setUserData(MODIFYING_KEY, Boolean.TRUE, null);
try {
if (!this.justReadTagEnd) {
while (this.readAttribute(reader, element)) {
// EMPTY LOOP
}
}
if (stopTags != null && stopTags.contains(HTMLTag.get(normalTag))) {
// After MODIFYING_KEY is set.
throw new StopException(element);
}
// Add element to parent before children are added.
// This is necessary for incremental rendering.
safeAppendChild(parent, element);
if (!this.justReadEmptyElement) {
ElementInfo einfo = HTMLEntities.ELEMENT_INFOS.get(HTMLTag.get(localName.toUpperCase()));
int endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.getEndElementType();
if (endTagType != ElementInfo.END_ELEMENT_FORBIDDEN) {
boolean childrenOk = einfo == null || einfo.isChildElementOk();
Set<HTMLTag> newStopSet = einfo == null ? null : einfo.getStopTags();
if (newStopSet == null) {
if (endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
newStopSet = Collections.singleton(HTMLTag.get(normalTag));
}
}
if (stopTags != null) {
if (newStopSet != null) {
final Set<HTMLTag> newStopSet2 = new HashSet<>();
newStopSet2.addAll(stopTags);
newStopSet2.addAll(newStopSet);
newStopSet = newStopSet2;
} else {
newStopSet = endTagType == ElementInfo.END_ELEMENT_REQUIRED ? null : stopTags;
}
}
ancestors.addFirst(normalTag);
try {
for (; ; ) {
try {
int token;
if ((einfo != null) && einfo.isNoScriptElement()) {
final UserAgentContext ucontext = this.ucontext;
if ((ucontext == null) || ucontext.isScriptingEnabled()) {
token = this.parseForEndTag(parent, reader, tag, false, shouldDecodeEntities(einfo));
} else {
token = this.parseToken(element, reader, newStopSet, ancestors);
}
} else {
token = childrenOk ? this.parseToken(element, reader, newStopSet, ancestors) : this.parseForEndTag(element, reader, tag, true, shouldDecodeEntities(einfo));
}
if (token == TOKEN_END_ELEMENT) {
final String normalLastTag = this.normalLastTag;
if (normalTag.equalsIgnoreCase(normalLastTag)) {
return TOKEN_FULL_ELEMENT;
} else {
final ElementInfo closeTagInfo = HTMLEntities.ELEMENT_INFOS.get(HTMLTag.get(normalLastTag.toUpperCase()));
if ((closeTagInfo == null) || (closeTagInfo.getEndElementType() != ElementInfo.END_ELEMENT_FORBIDDEN)) {
// TODO: Rather inefficient algorithm, but it's
// probably executed infrequently?
final Iterator<String> i = ancestors.iterator();
if (i.hasNext()) {
i.next();
while (i.hasNext()) {
final String normalAncestorTag = i.next();
if (normalLastTag.equals(normalAncestorTag)) {
normalTag = normalLastTag;
return TOKEN_END_ELEMENT;
}
}
}
}
// TODO: Working here
}
} else if (token == TOKEN_EOD) {
return TOKEN_EOD;
}
} catch (final StopException se) {
// newElement does not have a parent.
final Element newElement = se.getElement();
tag = newElement.getTagName();
normalTag = tag.toUpperCase();
if (stopTags != null && stopTags.contains(HTMLTag.get(normalTag))) {
throw se;
}
einfo = HTMLEntities.ELEMENT_INFOS.get(HTMLTag.get(normalTag.toUpperCase()));
endTagType = einfo == null ? ElementInfo.END_ELEMENT_REQUIRED : einfo.getEndElementType();
childrenOk = einfo == null || einfo.isChildElementOk();
newStopSet = einfo == null ? null : einfo.getStopTags();
if (newStopSet == null) {
if (endTagType == ElementInfo.END_ELEMENT_OPTIONAL) {
newStopSet = Collections.singleton(HTMLTag.get(normalTag));
}
}
if (stopTags != null && newStopSet != null) {
final Set<HTMLTag> newStopSet2 = new HashSet<>();
newStopSet2.addAll(stopTags);
newStopSet2.addAll(newStopSet);
newStopSet = newStopSet2;
}
ancestors.removeFirst();
ancestors.addFirst(normalTag);
// Switch element
element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
// newElement should have been suspended.
element = newElement;
// Add to parent
safeAppendChild(parent, element);
if (this.justReadEmptyElement) {
return TOKEN_BEGIN_ELEMENT;
}
}
}
} finally {
ancestors.removeFirst();
}
}
}
return TOKEN_BEGIN_ELEMENT;
} finally {
// This can inform elements to continue with notifications.
// It can also cause Javascript to be loaded / processed.
// Update: Elements now use Document.addJob() to delay processing
element.setUserData(MODIFYING_KEY, Boolean.FALSE, null);
}
}
} finally {
this.normalLastTag = normalTag;
}
} else {
this.normalLastTag = null;
return TOKEN_TEXT;
}
}
Aggregations