Search in sources :

Example 11 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestPersist method testDSR.

// testDelete()
/**
 * Test the DS register.
 */
public void testDSR() throws Exception {
    DataStoreRegister dsr = Gate.getDataStoreRegister();
    assertTrue("DSR has wrong number elements (not 0): " + dsr.size(), dsr.size() == 0);
    // create a temporary directory; because File.createTempFile actually
    // writes the bloody thing, we need to delete it from disk before calling
    // DataStore.create
    File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    storageDir.delete();
    // create and open a serial data store
    DataStore sds = Factory.createDataStore("gate.persist.SerialDataStore", storageDir.toURI().toURL().toString());
    // create a document with some annotations / features on it
    String server = TestDocument.getTestServerName();
    Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
    doc.getFeatures().put("hi there", new Integer(23232));
    doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
    // save the document
    Document persDoc = (Document) sds.adopt(doc);
    sds.sync(persDoc);
    // DSR should have one member
    assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
    // create and open another serial data store
    storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    storageDir.delete();
    DataStore sds2 = Factory.createDataStore("gate.persist.SerialDataStore", storageDir.toURI().toURL().toString());
    // DSR should have two members
    assertTrue("DSR has wrong number elements: " + dsr.size(), dsr.size() == 2);
    // peek at the DSR members
    Iterator<DataStore> dsrIter = dsr.iterator();
    while (dsrIter.hasNext()) {
        DataStore ds = dsrIter.next();
        assertNotNull("null ds in ds reg", ds);
        if (DEBUG)
            Out.prln(ds);
    }
    // delete the datastores
    sds.close();
    assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
    sds.delete();
    assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
    sds2.delete();
    assertTrue("DSR has wrong number elements (expected 0): " + dsr.size(), dsr.size() == 0);
}
Also used : DataStore(gate.DataStore) TestDocument(gate.corpora.TestDocument) Document(gate.Document) DataStoreRegister(gate.DataStoreRegister) File(java.io.File) URL(java.net.URL)

Example 12 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestPersist method testMultipleLrs.

// testSimple()
/**
 * Test multiple LRs
 */
public void testMultipleLrs() throws Exception {
    // create a temporary directory; because File.createTempFile actually
    // writes the bloody thing, we need to delete it from disk before calling
    // DataStore.create
    File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    storageDir.delete();
    // create and open a serial data store
    SerialDataStore sds = new SerialDataStore(storageDir.toURI().toURL().toString());
    sds.create();
    sds.open();
    // create a document with some annotations / features on it
    String server = TestDocument.getTestServerName();
    Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
    doc.getFeatures().put("hi there", new Integer(23232));
    doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
    // create another document with some annotations / features on it
    Document doc2 = Factory.newDocument(new URL(server + "tests/html/test1.htm"));
    doc.getFeatures().put("hi there again", new Integer(23232));
    doc.getAnnotations().add(new Long(5), new Long(25), "dog poo irritates", Factory.newFeatureMap());
    // create a corpus with the documents
    Corpus corp = Factory.newCorpus("Hamish test corpus");
    corp.add(doc);
    corp.add(doc2);
    LanguageResource persCorpus = sds.adopt(corp);
    sds.sync(persCorpus);
    // read the documents back
    List<Resource> lrsFromDisk = new ArrayList<Resource>();
    List<String> lrIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
    Iterator<String> idsIter = lrIds.iterator();
    while (idsIter.hasNext()) {
        String lrId = idsIter.next();
        FeatureMap features = Factory.newFeatureMap();
        features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
        features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
        Resource lr = Factory.createResource("gate.corpora.SerialCorpusImpl", features);
        lrsFromDisk.add(lr);
    }
    if (DEBUG)
        System.out.println("LRs on disk" + lrsFromDisk);
    // check that the versions we read back match the originals
    Corpus diskCorp = (Corpus) lrsFromDisk.get(0);
    Document diskDoc = diskCorp.get(0);
    if (DEBUG)
        Out.prln("Documents in corpus: " + corp.getDocumentNames());
    assertTrue("corp name != mem name", corp.getName().equals(diskCorp.getName()));
    if (DEBUG)
        Out.prln("Memory features " + corp.getFeatures());
    if (DEBUG)
        Out.prln("Disk features " + diskCorp.getFeatures());
    assertTrue("corp feat != mem feat", corp.getFeatures().equals(diskCorp.getFeatures()));
    if (DEBUG)
        Out.prln("Annotations in doc: " + diskDoc.getAnnotations());
    assertTrue("doc annotations from disk not equal to memory version", TestEqual.annotationSetsEqual(doc.getAnnotations(), diskDoc.getAnnotations()));
    assertTrue("doc from disk not equal to memory version", TestEqual.documentsEqual(doc, diskDoc));
    Iterator<Document> corpusIter = diskCorp.iterator();
    while (corpusIter.hasNext()) {
        if (DEBUG)
            Out.prln(corpusIter.next().getName());
        else
            corpusIter.next();
    }
    // assertTrue("doc2 from disk not equal to memory version",
    // doc2.equals(diskDoc2));
    // delete the datastore
    sds.delete();
}
Also used : LanguageResource(gate.LanguageResource) Resource(gate.Resource) LanguageResource(gate.LanguageResource) ArrayList(java.util.ArrayList) TestDocument(gate.corpora.TestDocument) Document(gate.Document) URL(java.net.URL) Corpus(gate.Corpus) FeatureMap(gate.FeatureMap) File(java.io.File)

Example 13 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestPersist method testSaveRestore.

// tearDown
/**
 * Test resource save and restore
 */
public void testSaveRestore() throws Exception {
    File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    // get rid of the temp file
    storageDir.delete();
    // create an empty dir of same name
    storageDir.mkdir();
    SerialDataStore sds = new SerialDataStore(storageDir.toURI().toURL().toString());
    sds.create();
    sds.open();
    // create a document
    String server = TestDocument.getTestServerName();
    assertNotNull(server);
    Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
    assertNotNull(doc);
    doc.getFeatures().put("hi there", new Integer(23232));
    doc.getAnnotations().add(new Long(0), new Long(20), "thingymajig", Factory.newFeatureMap());
    // check that we can't save a resource without adopting it
    boolean cannotSync = false;
    try {
        sds.sync(doc);
    } catch (PersistenceException e) {
        cannotSync = true;
    }
    if (!cannotSync)
        assertTrue("doc synced ok before adoption", false);
    // check that we can't adopt a resource that's stored somewhere else
    doc.setDataStore(new SerialDataStore(new File("z:\\").toURI().toURL().toString()));
    try {
        sds.adopt(doc);
    } catch (PersistenceException e) {
        cannotSync = true;
    }
    if (!cannotSync)
        assertTrue("doc adopted but in other datastore already", false);
    doc.setDataStore(null);
    doc.setName("Alicia Tonbridge, a Document");
    // save the document
    Document persDoc = (Document) sds.adopt(doc);
    sds.sync(persDoc);
    Object lrPersistenceId = persDoc.getLRPersistenceId();
    // test the getLrTypes method
    List<String> lrTypes = sds.getLrTypes();
    assertTrue("wrong number of types in SDS", lrTypes.size() == 1);
    assertTrue("wrong type LR in SDS", lrTypes.get(0).equals("gate.corpora.DocumentImpl"));
    // test the getLrNames method
    Iterator<String> iter = sds.getLrNames("gate.corpora.DocumentImpl").iterator();
    String name = iter.next();
    assertEquals(name, "Alicia Tonbridge, a Document");
    // read the document back
    FeatureMap features = Factory.newFeatureMap();
    features.put(DataStore.LR_ID_FEATURE_NAME, lrPersistenceId);
    features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
    Document doc2 = (Document) Factory.createResource("gate.corpora.DocumentImpl", features);
    Document doc3 = (Document) sds.getLr("gate.corpora.DocumentImpl", lrPersistenceId);
    try {
        boolean value = TestEqual.documentsEqual(doc3, doc2);
        assertTrue(TestEqual.message, value);
        value = TestEqual.documentsEqual(persDoc, doc2);
        assertTrue(TestEqual.message, value);
    } finally {
        // delete the datastore
        sds.delete();
    }
}
Also used : TestDocument(gate.corpora.TestDocument) Document(gate.Document) URL(java.net.URL) FeatureMap(gate.FeatureMap) File(java.io.File)

Example 14 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestAnnotationMerging method testWithfeat.

/**
 * The actual method for testing.
 */
public void testWithfeat(String nameAnnSets, String nameAnnType, String nameAnnFeat, Corpus data, boolean isUsingMajority) {
    // get the annotation sets
    String[] annSetsN = nameAnnSets.split(";");
    int numJudges = annSetsN.length;
    int numDocs = data.size();
    AnnotationSet[][] annArr2 = new AnnotationSet[numDocs][numJudges];
    for (int i = 0; i < numDocs; ++i) {
        Document doc = data.get(i);
        for (int j = 0; j < numJudges; ++j) {
            // Get the annotation
            annArr2[i][j] = doc.getAnnotations(annSetsN[j]).get(nameAnnType);
        }
    }
    // Annotation merging
    boolean isTheSameInstances = true;
    for (int i = 0; i < annArr2.length; ++i) if (!AnnotationMerging.isSameInstancesForAnnotators(annArr2[i], 1)) {
        isTheSameInstances = false;
        break;
    }
    HashMap<Annotation, String> mergeInfor = new HashMap<Annotation, String>();
    if (isUsingMajority)
        AnnotationMerging.mergeAnnotationMajority(annArr2[0], nameAnnFeat, mergeInfor, isTheSameInstances);
    else
        AnnotationMerging.mergeAnnotation(annArr2[0], nameAnnFeat, mergeInfor, 2, isTheSameInstances);
    int numAnns = 0;
    if (isTheSameInstances) {
        for (Annotation ann : mergeInfor.keySet()) {
            if (ann.getFeatures().get(nameAnnFeat) != null)
                ++numAnns;
        }
    } else {
        numAnns = mergeInfor.size();
    }
    checkNumbers(numAnns);
}
Also used : HashMap(java.util.HashMap) AnnotationSet(gate.AnnotationSet) Document(gate.Document) TestDocument(gate.corpora.TestDocument) Annotation(gate.Annotation)

Example 15 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestDiffer method testDiffer.

// tearDown
public void testDiffer() throws Exception {
    Document doc = Factory.newDocument(new URL(gate.corpora.TestDocument.getTestServerName() + "tests/ft-bt-03-aug-2001.html"), "windows-1252");
    AnnotationSet annSet = doc.getAnnotations();
    // create 100 annotations
    FeatureMap features = Factory.newFeatureMap();
    features.put("type", "BAR");
    for (int i = 0; i < 100; i++) {
        annSet.add(new Long(i * 10), new Long((i + 1) * 10), "Foo", features);
    }
    List<Annotation> keySet = new ArrayList<Annotation>(annSet);
    List<Annotation> responseSet = new ArrayList<Annotation>(annSet);
    // check 100% Precision and recall
    AnnotationDiffer differ = new AnnotationDiffer();
    differ.setSignificantFeaturesSet(null);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    double value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getPrecisionLenient();
    Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallLenient();
    Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
    // check low precision
    Integer id = annSet.add(new Long(2), new Long(4), "Foo", features);
    Annotation falsePositive = annSet.get(id);
    responseSet.add(falsePositive);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of .99!", .99, value, .001);
    // recall should still be 100%
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getRecallLenient();
    Assert.assertEquals("Recall Lenient: " + value + " instead of 1!", 1, value, 0);
    // check low recall
    responseSet.remove(falsePositive);
    keySet.add(falsePositive);
    differ.calculateDiff(keySet, responseSet);
    differ.sanityCheck();
    if (DEBUG)
        differ.printMissmatches();
    value = differ.getRecallStrict();
    Assert.assertEquals("Recall Strict: " + value + " instead of .99!", .99, value, .001);
    // precision should still be 100%
    value = differ.getPrecisionStrict();
    Assert.assertEquals("Precision Strict: " + value + " instead of 1!", 1, value, 0);
    value = differ.getPrecisionLenient();
    Assert.assertEquals("Precision Lenient: " + value + " instead of 1!", 1, value, 0);
}
Also used : FeatureMap(gate.FeatureMap) ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Document(gate.Document) URL(java.net.URL) Annotation(gate.Annotation)

Aggregations

Document (gate.Document)47 File (java.io.File)17 FeatureMap (gate.FeatureMap)16 URL (java.net.URL)12 AnnotationSet (gate.AnnotationSet)9 TestDocument (gate.corpora.TestDocument)9 Annotation (gate.Annotation)7 Corpus (gate.Corpus)7 ResourceInstantiationException (gate.creole.ResourceInstantiationException)7 PersistenceException (gate.persist.PersistenceException)6 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LanguageAnalyser (gate.LanguageAnalyser)4 SerialDataStore (gate.persist.SerialDataStore)4 GateRuntimeException (gate.util.GateRuntimeException)4 ActionEvent (java.awt.event.ActionEvent)4 List (java.util.List)4 AbstractAction (javax.swing.AbstractAction)4