use of gate.DocumentFormat in project gate-core by GateNLP.
the class DocumentImpl method init.
/**
* Initialise this resource, and return it.
*/
@Override
public Resource init() throws ResourceInstantiationException {
// set up the source URL and create the content
if (sourceUrl == null) {
if (stringContent == null) {
throw new ResourceInstantiationException("The sourceURL and document's content were null.");
}
content = new DocumentContentImpl(stringContent);
getFeatures().put("gate.SourceURL", "created from String");
} else {
try {
content = new DocumentContentImpl(sourceUrl, getEncoding(), sourceUrlStartOffset, sourceUrlEndOffset);
getFeatures().put("gate.SourceURL", sourceUrl.toExternalForm());
} catch (IOException e) {
throw new ResourceInstantiationException("DocumentImpl.init: " + e);
}
}
if (preserveOriginalContent.booleanValue() && content != null) {
String originalContent = ((DocumentContentImpl) content).getOriginalContent();
getFeatures().put(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME, originalContent);
}
// set up a DocumentFormat if markup unpacking required
if (getMarkupAware().booleanValue()) {
DocumentFormat docFormat = null;
// if a specific MIME type has been given, use it
if (this.mimeType != null && this.mimeType.length() > 0) {
MimeType theType = DocumentFormat.getMimeTypeForString(mimeType);
if (theType == null) {
throw new ResourceInstantiationException("MIME type \"" + this.mimeType + " has no registered DocumentFormat");
}
docFormat = DocumentFormat.getDocumentFormat(this, theType);
} else {
docFormat = DocumentFormat.getDocumentFormat(this, sourceUrl);
}
try {
if (docFormat != null) {
StatusListener sListener = (StatusListener) gate.Gate.getListeners().get("gate.event.StatusListener");
if (sListener != null)
docFormat.addStatusListener(sListener);
// set the flag if true and if the document format support collecting
docFormat.setShouldCollectRepositioning(collectRepositioningInfo);
if (docFormat.getShouldCollectRepositioning().booleanValue()) {
// unpack with collectiong of repositioning information
RepositioningInfo info = new RepositioningInfo();
String origContent = (String) getFeatures().get(GateConstants.ORIGINAL_DOCUMENT_CONTENT_FEATURE_NAME);
RepositioningInfo ampCodingInfo = new RepositioningInfo();
if (origContent != null) {
boolean shouldCorrectCR = docFormat instanceof XmlDocumentFormat;
collectInformationForAmpCodding(origContent, ampCodingInfo, shouldCorrectCR);
if (docFormat.getMimeType().equals(new MimeType("text", "html"))) {
collectInformationForWS(origContent, ampCodingInfo);
}
// if
}
// if
docFormat.unpackMarkup(this, info, ampCodingInfo);
if (origContent != null && docFormat instanceof XmlDocumentFormat) {
// CRLF correction of RepositioningInfo
correctRepositioningForCRLFInXML(origContent, info);
}
// if
getFeatures().put(GateConstants.DOCUMENT_REPOSITIONING_INFO_FEATURE_NAME, info);
} else {
// normal old fashioned unpack
docFormat.unpackMarkup(this);
}
docFormat.removeStatusListener(sListener);
}
// if format != null
} catch (DocumentFormatException e) {
throw new ResourceInstantiationException("Couldn't unpack markup in document " + (sourceUrl != null ? sourceUrl.toExternalForm() : "") + "!", e);
}
}
// }
return this;
}
Aggregations