Search in sources :

Example 16 with Document

use of gate.Document in project gate-core by GateNLP.

the class TestClassificationMeasures method test.

public void test() {
    String type = "sent";
    String feature = "Op";
    Document doc1 = null;
    Document doc2 = null;
    Document doc3 = null;
    Document doc4 = null;
    try {
        Gate.init();
        doc1 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/beijing-opera.xml"));
        doc2 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/beijing-opera.xml"));
        doc3 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/in-outlook-09-aug-2001.xml"));
        doc4 = Factory.newDocument(new URL(TestDocument.getTestServerName() + "tests/iaa/in-outlook-09-aug-2001.xml"));
    } catch (Exception e) {
        e.printStackTrace();
    }
    if (doc1 != null && doc2 != null && doc3 != null && doc4 != null) {
        AnnotationSet as1 = doc1.getAnnotations("ann1");
        AnnotationSet as2 = doc2.getAnnotations("ann2");
        ClassificationMeasures myClassificationMeasures1 = new ClassificationMeasures();
        myClassificationMeasures1.calculateConfusionMatrix(as1, as2, type, feature, true);
        assertEquals(myClassificationMeasures1.getObservedAgreement(), 0.7777778f);
        assertEquals(myClassificationMeasures1.getKappaCohen(), 0.6086957f);
        assertEquals(myClassificationMeasures1.getKappaPi(), 0.59550565f);
        AnnotationSet as3 = doc3.getAnnotations("ann1");
        AnnotationSet as4 = doc4.getAnnotations("ann2");
        ClassificationMeasures myClassificationMeasures2 = new ClassificationMeasures();
        myClassificationMeasures2.calculateConfusionMatrix(as3, as4, type, feature, true);
        assertEquals(myClassificationMeasures2.getObservedAgreement(), 0.96875f);
        assertEquals(myClassificationMeasures2.getKappaCohen(), 0.3263158f);
        assertEquals(myClassificationMeasures2.getKappaPi(), 0.3227513f);
        ArrayList<ClassificationMeasures> tablesList = new ArrayList<ClassificationMeasures>();
        tablesList.add(myClassificationMeasures1);
        tablesList.add(myClassificationMeasures2);
        ClassificationMeasures myNewClassificationMeasures = new ClassificationMeasures(tablesList);
        assertEquals(myNewClassificationMeasures.getObservedAgreement(), 0.94520545f);
        assertEquals(myNewClassificationMeasures.getKappaCohen(), 0.7784521f);
        assertEquals(myNewClassificationMeasures.getKappaPi(), 0.7778622f);
    } else {
        System.out.println("Failed to create docs from URLs.");
    }
}
Also used : ArrayList(java.util.ArrayList) AnnotationSet(gate.AnnotationSet) Document(gate.Document) TestDocument(gate.corpora.TestDocument) URL(java.net.URL)

Example 17 with Document

use of gate.Document in project gate-core by GateNLP.

the class SerialCorpusImpl method unloadDocument.

/**
 * Unloads a document from memory.
 *
 * @param index the index of the document to be unloaded.
 * @param sync should the document be sync'ed (i.e. saved) before
 *          unloading.
 */
public void unloadDocument(int index, boolean sync) {
    // if a persistent doc is not loaded, there's nothing we need to do
    if ((!isDocumentLoaded(index)) && isPersistentDocument(index))
        return;
    // any more
    if (sync) {
        Document doc = documents.get(index);
        try {
            // first
            if (doc.getLRPersistenceId() == null) {
                doc = (Document) this.getDataStore().adopt(doc);
                this.getDataStore().sync(doc);
                this.setDocumentPersistentID(index, doc.getLRPersistenceId());
            } else
                // if it is adopted, just sync it
                this.getDataStore().sync(doc);
        } catch (PersistenceException ex) {
            throw new GateRuntimeException("Error unloading document from corpus" + "because document sync failed: " + ex.getMessage(), ex);
        }
    }
    // 3. remove the document from the memory
    // do this, only if the saving has succeeded
    documents.set(index, null);
}
Also used : GateRuntimeException(gate.util.GateRuntimeException) PersistenceException(gate.persist.PersistenceException) Document(gate.Document)

Example 18 with Document

use of gate.Document in project gate-core by GateNLP.

the class SerialCorpusImpl method setTransientSource.

public void setTransientSource(Object source) {
    if (!(source instanceof Corpus))
        return;
    // are restored.
    if (this.dataStore != null && this.lrPersistentId != null)
        return;
    Corpus tCorpus = (Corpus) source;
    // copy the corpus name and features from the one in memory
    this.setName(tCorpus.getName());
    this.setFeatures(tCorpus.getFeatures());
    docDataList = new ArrayList<DocumentData>();
    // now cache the names of all docs for future use
    List<String> docNames = tCorpus.getDocumentNames();
    for (int i = 0; i < docNames.size(); i++) {
        Document aDoc = tCorpus.get(i);
        docDataList.add(new DocumentData(docNames.get(i), null, aDoc.getClass().getName()));
    }
    // copy all the documents from the transient corpus
    documents = new ArrayList<Document>();
    documents.addAll(tCorpus);
    this.addedDocs = new Vector<Document>();
    this.removedDocIDs = new Vector<String>();
    this.changedDocs = new Vector<Document>();
    // make sure we fire events when docs are added/removed/etc
    Gate.getCreoleRegister().addCreoleListener(this);
}
Also used : Document(gate.Document) IndexedCorpus(gate.creole.ir.IndexedCorpus) Corpus(gate.Corpus)

Example 19 with Document

use of gate.Document in project gate-core by GateNLP.

the class SerialCorpusImpl method add.

@Override
public void add(int index, Document o) {
    if (o == null)
        return;
    Document doc = o;
    DocumentData docData = new DocumentData(doc.getName(), doc.getLRPersistenceId(), doc.getClass().getName());
    docDataList.add(index, docData);
    documents.add(index, doc);
    documentAdded(doc);
    fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this, doc, index, doc.getLRPersistenceId(), CorpusEvent.DOCUMENT_ADDED));
}
Also used : CorpusEvent(gate.event.CorpusEvent) Document(gate.Document)

Example 20 with Document

use of gate.Document in project gate-core by GateNLP.

the class SerialCorpusImpl method setIndexDefinition.

// readObject
@Override
public void setIndexDefinition(IndexDefinition definition) {
    if (definition != null) {
        this.getFeatures().put(GateConstants.CORPUS_INDEX_DEFINITION_FEATURE_KEY, definition);
        String className = definition.getIrEngineClassName();
        try {
            // Class aClass = Class.forName(className);
            Class<?> aClass = Class.forName(className, true, Gate.getClassLoader());
            IREngine engine = (IREngine) aClass.newInstance();
            this.indexManager = engine.getIndexmanager();
            this.indexManager.setIndexDefinition(definition);
            this.indexManager.setCorpus(this);
        } catch (Exception e) {
            e.printStackTrace(Err.getPrintWriter());
        }
        // switch (definition.getIndexType()) {
        // case GateConstants.IR_LUCENE_INVFILE:
        // this.indexManager = new LuceneIndexManager();
        // this.indexManager.setIndexDefinition(definition);
        // this.indexManager.setCorpus(this);
        // break;
        // }
        this.addedDocs = new Vector<Document>();
        this.removedDocIDs = new Vector<String>();
        this.changedDocs = new Vector<Document>();
    }
}
Also used : IREngine(gate.creole.ir.IREngine) Document(gate.Document) GateRuntimeException(gate.util.GateRuntimeException) ResourceInstantiationException(gate.creole.ResourceInstantiationException) IOException(java.io.IOException) IndexException(gate.creole.ir.IndexException) MethodNotImplementedException(gate.util.MethodNotImplementedException) PersistenceException(gate.persist.PersistenceException)

Aggregations

Document (gate.Document)47 File (java.io.File)17 FeatureMap (gate.FeatureMap)16 URL (java.net.URL)12 AnnotationSet (gate.AnnotationSet)9 TestDocument (gate.corpora.TestDocument)9 Annotation (gate.Annotation)7 Corpus (gate.Corpus)7 ResourceInstantiationException (gate.creole.ResourceInstantiationException)7 PersistenceException (gate.persist.PersistenceException)6 DataStore (gate.DataStore)5 LanguageResource (gate.LanguageResource)5 ArrayList (java.util.ArrayList)5 HashSet (java.util.HashSet)5 LanguageAnalyser (gate.LanguageAnalyser)4 SerialDataStore (gate.persist.SerialDataStore)4 GateRuntimeException (gate.util.GateRuntimeException)4 ActionEvent (java.awt.event.ActionEvent)4 List (java.util.List)4 AbstractAction (javax.swing.AbstractAction)4