use of gate.email.EmailDocumentHandler in project gate-core by GateNLP.
the class EmailDocumentFormat method unpackMarkup.
/**
* Unpack the markup in the document. This converts markup from the
* native format (e.g. EMAIL) into annotations in GATE format.
* Uses the markupElementsMap to determine which elements to convert, and
* what annotation type names to use.
* It always tryes to parse te doc's content. It doesn't matter if the
* sourceUrl is null or not.
*
* @param doc The gate document you want to parse.
*/
@Override
public void unpackMarkup(gate.Document doc) throws DocumentFormatException {
if ((doc == null) || (doc.getSourceUrl() == null && doc.getContent() == null)) {
throw new DocumentFormatException("GATE document is null or no content found. Nothing to parse!");
}
// End if
setNewLineProperty(doc);
// create an EmailDocumentHandler
EmailDocumentHandler emailDocHandler = null;
emailDocHandler = new gate.email.EmailDocumentHandler(doc, this.markupElementsMap, this.element2StringMap);
StatusListener statusListener = new StatusListener() {
@Override
public void statusChanged(String text) {
// this is implemented in DocumentFormat.java and inherited here
fireStatusChanged(text);
}
};
// Register a status listener with it
emailDocHandler.addStatusListener(statusListener);
try {
// Call the method that creates annotations on the gate document
emailDocHandler.annotateMessages();
// Process the body annotations and search for paragraphs
AnnotationSet bodyAnnotations = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME).get("body");
if (bodyAnnotations != null && !bodyAnnotations.isEmpty()) {
Iterator<Annotation> iter = bodyAnnotations.iterator();
while (iter.hasNext()) {
Annotation a = iter.next();
annotateParagraphs(doc, a.getStartNode().getOffset().intValue(), a.getEndNode().getOffset().intValue(), GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
}
// End while
}
// End if
} catch (IOException e) {
throw new DocumentFormatException("Couldn't create a buffered reader ", e);
} catch (InvalidOffsetException e) {
throw new DocumentFormatException(e);
} finally {
emailDocHandler.removeStatusListener(statusListener);
}
// End try
}
Aggregations