Search in sources :

Example 31 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestAnnotationMerging method loadDocument.

// tearDown
private Document loadDocument(String path, String name) throws Exception {
    Document doc = Factory.newDocument(new URL(TestDocument.getTestServerName() + path), "UTF-8");
    doc.setName(name);
    return doc;
}
Also used : Document(gate.Document) TestDocument(gate.corpora.TestDocument) URL(java.net.URL)

Example 32 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestTikaFormats method doTest.

private void doTest(String ext) throws Exception {
    String base = TestDocument.getTestServerName();
    URL url = new URL(base + "tests/tika/tika-test." + ext);
    Document doc = Factory.newDocument(url);
    assertNotNull(doc);
    assertTrue(doc.getContent().toString().indexOf("Testing Tika Format Parsers") != -1);
}
Also used : Document(gate.Document) URL(java.net.URL)

Example 33 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestCreole method testClassIndex.

// testLoading()
/**
 * Test resource indexing by class
 */
public void testClassIndex() throws Exception {
    ResourceData docRd = reg.get("gate.corpora.DocumentImpl");
    assertNotNull("couldn't find document res data", docRd);
    assertTrue("doc res data has wrong class name", docRd.getClassName().equals("gate.corpora.DocumentImpl"));
    assertTrue("doc res data has wrong interface name", docRd.getInterfaceName().equals("gate.Document"));
    Class<?> docClass = docRd.getResourceClass();
    assertNotNull("couldn't get doc class", docClass);
    LanguageResource docRes = (LanguageResource) docClass.newInstance();
    assertTrue("instance of doc is wrong type", docRes instanceof gate.Document);
    reg.clear();
}
Also used : LanguageResource(gate.LanguageResource) Document(gate.Document)

Example 34 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestSgml method testSgmlLoading.

// setUp
public void testSgmlLoading() throws Exception {
    assertTrue(true);
    // create the markupElementsMap map
    Map<String, String> markupElementsMap = null;
    gate.Document doc = null;
    /*
    markupElementsMap = new HashMap();
    // populate it
    markupElementsMap.put ("S","Sentence");
    markupElementsMap.put ("s","Sentence");
    markupElementsMap.put ("W","Word");
    markupElementsMap.put ("w","Word");
    */
    FeatureMap params = Factory.newFeatureMap();
    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, new URL(TestDocument.getTestServerName() + "tests/sgml/Hds.sgm"));
    params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
    doc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
    // get the docFormat that deals with it.
    // the parameter MimeType doesn't affect right now the behaviour
    // *
    gate.DocumentFormat docFormat = gate.DocumentFormat.getDocumentFormat(doc, doc.getSourceUrl());
    assertTrue("Bad document Format was produced. SgmlDocumentFormat was expected", docFormat instanceof gate.corpora.SgmlDocumentFormat);
    // set's the map
    docFormat.setMarkupElementsMap(markupElementsMap);
    docFormat.unpackMarkup(doc, "DocumentContent");
    AnnotationSet annotSet = doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
    assertEquals("For " + doc.getSourceUrl() + " the number of annotations" + " should be:1022", 1022, annotSet.size());
    // Verfy if all annotations from the default annotation set are consistent
    gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
}
Also used : FeatureMap(gate.FeatureMap) AnnotationSet(gate.AnnotationSet) Document(gate.Document) URL(java.net.URL)

Example 35 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestXml method runCompleteTestWithAFormat.

// testGateDocumentToAndFromXmlWithDifferentKindOfFormats
private void runCompleteTestWithAFormat(URL url, String urlDescription) throws Exception {
    // Load the xml Key Document and unpack it
    gate.Document keyDocument = null;
    FeatureMap params = Factory.newFeatureMap();
    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url);
    params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
    keyDocument = (Document) Factory.createResource("gate.corpora.DocumentImpl", params);
    assertTrue("Coudn't create a GATE document instance for " + url.toString() + " Can't continue.", keyDocument != null);
    gate.DocumentFormat keyDocFormat = null;
    keyDocFormat = gate.DocumentFormat.getDocumentFormat(keyDocument, keyDocument.getSourceUrl());
    assertTrue("Fail to recognize " + url.toString() + " as being " + urlDescription + " !", keyDocFormat != null);
    // Unpack the markup
    keyDocFormat.unpackMarkup(keyDocument);
    // Verfy if all annotations from the default annotation set are consistent
    gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument);
    // Verifies if the maximum annotation ID on the GATE doc is less than the
    // Annotation ID generator of the document.
    verifyAnnotationIDGenerator(keyDocument);
    // Save the size of the document and the number of annotations
    long keyDocumentSize = keyDocument.getContent().size().longValue();
    int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size();
    // Export the Gate document called keyDocument as  XML, into a temp file,
    // using the working encoding
    File xmlFile = null;
    xmlFile = Files.writeTempFile(keyDocument.toXml(), workingEncoding);
    assertTrue("The temp GATE XML file is null. Can't continue.", xmlFile != null);
    // Load the XML Gate document form the tmp file into memory
    gate.Document gateDoc = null;
    gateDoc = gate.Factory.newDocument(xmlFile.toURI().toURL(), workingEncoding);
    assertTrue("Coudn't create a GATE document instance for " + xmlFile.toURI().toURL().toString() + " Can't continue.", gateDoc != null);
    gate.DocumentFormat gateDocFormat = null;
    gateDocFormat = DocumentFormat.getDocumentFormat(gateDoc, gateDoc.getSourceUrl());
    assertTrue("Fail to recognize " + xmlFile.toURI().toURL().toString() + " as being a GATE XML document !", gateDocFormat != null);
    gateDocFormat.unpackMarkup(gateDoc);
    // Verfy if all annotations from the default annotation set are consistent
    gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc);
    // Save the size of the document snd the number of annotations
    long gateDocSize = keyDocument.getContent().size().longValue();
    int gateDocAnnotationSetSize = keyDocument.getAnnotations().size();
    assertTrue("Exporting as GATE XML resulted in document content size lost." + " Something went wrong.", keyDocumentSize == gateDocSize);
    assertTrue("Exporting as GATE XML resulted in annotation lost." + " No. of annotations missing =  " + Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize), keyDocumentAnnotationSetSize == gateDocAnnotationSetSize);
    // Verifies if the maximum annotation ID on the GATE doc is less than the
    // Annotation ID generator of the document.
    verifyAnnotationIDGenerator(gateDoc);
    // Don't need tmp Gate XML file.
    xmlFile.delete();
}
Also used : FeatureMap(gate.FeatureMap) DocumentFormat(gate.DocumentFormat) Document(gate.Document) File(java.io.File)

Aggregations

Document (gate.Document)47 File (java.io.File)17 FeatureMap (gate.FeatureMap)16 URL (java.net.URL)12 AnnotationSet (gate.AnnotationSet)9 TestDocument (gate.corpora.TestDocument)9 Annotation (gate.Annotation)7 Corpus (gate.Corpus)7 ResourceInstantiationException (gate.creole.ResourceInstantiationException)7 PersistenceException (gate.persist.PersistenceException)6 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LanguageAnalyser (gate.LanguageAnalyser)4 SerialDataStore (gate.persist.SerialDataStore)4 GateRuntimeException (gate.util.GateRuntimeException)4 ActionEvent (java.awt.event.ActionEvent)4 List (java.util.List)4 AbstractAction (javax.swing.AbstractAction)4