use of gate.Document in project gate-core by GateNLP.
the class TestAnnotationMerging method loadDocument.
// tearDown
private Document loadDocument(String path, String name) throws Exception {
Document doc = Factory.newDocument(new URL(TestDocument.getTestServerName() + path), "UTF-8");
doc.setName(name);
return doc;
}
use of gate.Document in project gate-core by GateNLP.
the class TestTikaFormats method doTest.
private void doTest(String ext) throws Exception {
String base = TestDocument.getTestServerName();
URL url = new URL(base + "tests/tika/tika-test." + ext);
Document doc = Factory.newDocument(url);
assertNotNull(doc);
assertTrue(doc.getContent().toString().indexOf("Testing Tika Format Parsers") != -1);
}
use of gate.Document in project gate-core by GateNLP.
the class TestCreole method testClassIndex.
// testLoading()
/**
* Test resource indexing by class
*/
public void testClassIndex() throws Exception {
ResourceData docRd = reg.get("gate.corpora.DocumentImpl");
assertNotNull("couldn't find document res data", docRd);
assertTrue("doc res data has wrong class name", docRd.getClassName().equals("gate.corpora.DocumentImpl"));
assertTrue("doc res data has wrong interface name", docRd.getInterfaceName().equals("gate.Document"));
Class<?> docClass = docRd.getResourceClass();
assertNotNull("couldn't get doc class", docClass);
LanguageResource docRes = (LanguageResource) docClass.newInstance();
assertTrue("instance of doc is wrong type", docRes instanceof gate.Document);
reg.clear();
}
use of gate.Document in project gate-core by GateNLP.
the class TestSgml method testSgmlLoading.
// setUp
public void testSgmlLoading() throws Exception {
assertTrue(true);
// create the markupElementsMap map
Map<String, String> markupElementsMap = null;
gate.Document doc = null;
/*
markupElementsMap = new HashMap();
// populate it
markupElementsMap.put ("S","Sentence");
markupElementsMap.put ("s","Sentence");
markupElementsMap.put ("W","Word");
markupElementsMap.put ("w","Word");
*/
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(TestDocument.getTestServerName() + "tests/sgml/Hds.sgm"));
params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
// get the docFormat that deals with it.
// the parameter MimeType doesn't affect right now the behaviour
// *
gate.DocumentFormat docFormat = gate.DocumentFormat.getDocumentFormat(doc, doc.getSourceUrl());
assertTrue("Bad document Format was produced. SgmlDocumentFormat was expected", docFormat instanceof gate.corpora.SgmlDocumentFormat);
// set's the map
docFormat.setMarkupElementsMap(markupElementsMap);
docFormat.unpackMarkup(doc, "DocumentContent");
AnnotationSet annotSet = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
assertEquals("For " + doc.getSourceUrl() + " the number of annotations" + " should be:1022", 1022, annotSet.size());
// Verfy if all annotations from the default annotation set are consistent
gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
}
use of gate.Document in project gate-core by GateNLP.
the class TestXml method runCompleteTestWithAFormat.
// testGateDocumentToAndFromXmlWithDifferentKindOfFormats
private void runCompleteTestWithAFormat(URL url, String urlDescription) throws Exception {
// Load the xml Key Document and unpack it
gate.Document keyDocument = null;
FeatureMap params = Factory.newFeatureMap();
params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url);
params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
keyDocument = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
assertTrue("Coudn't create a GATE document instance for " + url.toString() + " Can't continue.", keyDocument != null);
gate.DocumentFormat keyDocFormat = null;
keyDocFormat = gate.DocumentFormat.getDocumentFormat(keyDocument, keyDocument.getSourceUrl());
assertTrue("Fail to recognize " + url.toString() + " as being " + urlDescription + " !", keyDocFormat != null);
// Unpack the markup
keyDocFormat.unpackMarkup(keyDocument);
// Verfy if all annotations from the default annotation set are consistent
gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument);
// Verifies if the maximum annotation ID on the GATE doc is less than the
// Annotation ID generator of the document.
verifyAnnotationIDGenerator(keyDocument);
// Save the size of the document and the number of annotations
long keyDocumentSize = keyDocument.getContent().size().longValue();
int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size();
// Export the Gate document called keyDocument as XML, into a temp file,
// using the working encoding
File xmlFile = null;
xmlFile = Files.writeTempFile(keyDocument.toXml(), workingEncoding);
assertTrue("The temp GATE XML file is null. Can't continue.", xmlFile != null);
// Load the XML Gate document form the tmp file into memory
gate.Document gateDoc = null;
gateDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding);
assertTrue("Coudn't create a GATE document instance for " + xmlFile.toURI().toURL().toString() + " Can't continue.", gateDoc != null);
gate.DocumentFormat gateDocFormat = null;
gateDocFormat = DocumentFormat.getDocumentFormat(gateDoc, gateDoc.getSourceUrl());
assertTrue("Fail to recognize " + xmlFile.toURI().toURL().toString() + " as being a GATE XML document !", gateDocFormat != null);
gateDocFormat.unpackMarkup(gateDoc);
// Verfy if all annotations from the default annotation set are consistent
gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc);
// Save the size of the document snd the number of annotations
long gateDocSize = keyDocument.getContent().size().longValue();
int gateDocAnnotationSetSize = keyDocument.getAnnotations().size();
assertTrue("Exporting as GATE XML resulted in document content size lost." + " Something went wrong.", keyDocumentSize == gateDocSize);
assertTrue("Exporting as GATE XML resulted in annotation lost." + " No. of annotations missing = " + Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize), keyDocumentAnnotationSetSize == gateDocAnnotationSetSize);
// Verifies if the maximum annotation ID on the GATE doc is less than the
// Annotation ID generator of the document.
verifyAnnotationIDGenerator(gateDoc);
// Don't need tmp Gate XML file.
xmlFile.delete();
}
Aggregations