Search in sources :

Example 6 with DataStore

use of gate.DataStore in project gate-core by GateNLP.

the class CorpusBenchmarkTool method evaluateMarkedClean.

// evaluateMarkedStored
protected void evaluateMarkedClean(File markedDir, File cleanDir, File errDir) {
    Document persDoc = null;
    Document cleanDoc = null;
    Document markedDoc = null;
    File[] cleanDocs = cleanDir.listFiles();
    for (int i = 0; i < cleanDocs.length; i++) {
        if (!cleanDocs[i].isFile())
            continue;
        // try reading the original document from clean
        FeatureMap params = Factory.newFeatureMap();
        try {
            params.put(Document.DOCUMENT_URL_PARAMETER_NAME, cleanDocs[i].toURI().toURL());
        } catch (java.net.MalformedURLException ex) {
            Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
            continue;
        }
        // params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
        params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
        FeatureMap hparams = Factory.newFeatureMap();
        // create the document
        try {
            cleanDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDocs[i].getName());
        } catch (gate.creole.ResourceInstantiationException ex) {
            Out.prln("Cannot create document from file: " + cleanDocs[i].getAbsolutePath());
            continue;
        }
        if (isMoreInfoMode) {
            StringBuffer errName = new StringBuffer(cleanDocs[i].getName());
            errName.replace(cleanDocs[i].getName().lastIndexOf("."), cleanDocs[i].getName().length(), ".err");
            Out.prln("<H2>" + "<a href=\"err/" + errName.toString() + "\">" + cleanDocs[i].getName() + "</a>" + "</H2>");
        } else
            Out.prln("<H2>" + cleanDocs[i].getName() + "</H2>");
        // try finding the marked document
        if (!isMarkedDS) {
            StringBuffer docName = new StringBuffer(cleanDoc.getName());
            docName.replace(cleanDoc.getName().lastIndexOf("."), docName.length(), ".xml");
            File markedDocFile = new File(markedDir, docName.toString());
            if (!markedDocFile.exists()) {
                Out.prln("Warning: Cannot find human-annotated document " + markedDocFile + " in " + markedDir);
                continue;
            } else {
                params = Factory.newFeatureMap();
                try {
                    params.put(Document.DOCUMENT_URL_PARAMETER_NAME, markedDocFile.toURI().toURL());
                } catch (java.net.MalformedURLException ex) {
                    Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
                    continue;
                }
                // params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, "");
                params.put(Document.DOCUMENT_ENCODING_PARAMETER_NAME, documentEncoding);
                // create the document
                try {
                    markedDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", params, hparams, cleanDoc.getName());
                } catch (gate.creole.ResourceInstantiationException ex) {
                    Out.prln("Cannot create document from file: " + markedDocFile.getAbsolutePath());
                    continue;
                }
            }
        // if markedDoc exists
        } else {
            try {
                // open marked from a DS
                // open the data store
                DataStore sds1 = Factory.openDataStore("gate.persist.SerialDataStore", markedDir.toURI().toURL().toExternalForm());
                List<String> lrIDs1 = sds1.getLrIds("gate.corpora.DocumentImpl");
                boolean found = false;
                int k = 0;
                // search for the marked doc with the same name
                while (k < lrIDs1.size() && !found) {
                    String docID1 = lrIDs1.get(k);
                    // read the stored document
                    FeatureMap features1 = Factory.newFeatureMap();
                    features1.put(DataStore.DATASTORE_FEATURE_NAME, sds1);
                    features1.put(DataStore.LR_ID_FEATURE_NAME, docID1);
                    Document tempDoc = (Document) Factory.createResource("gate.corpora.DocumentImpl", features1, hparams);
                    // check whether this is our doc
                    if (((String) tempDoc.getFeatures().get("gate.SourceURL")).endsWith(cleanDoc.getName())) {
                        found = true;
                        markedDoc = tempDoc;
                    } else
                        k++;
                }
            } catch (java.net.MalformedURLException ex) {
                Out.prln("Error finding marked directory " + markedDir.getAbsolutePath());
            } catch (gate.persist.PersistenceException ex1) {
                Out.prln("Error opening marked as a datastore (-marked_ds specified)");
            } catch (gate.creole.ResourceInstantiationException ex2) {
                Out.prln("Error opening marked as a datastore (-marked_ds specified)");
            }
        }
        try {
            evaluateDocuments(persDoc, cleanDoc, markedDoc, errDir);
        } catch (gate.creole.ResourceInstantiationException ex) {
            ex.printStackTrace();
            Out.prln("Evaluate failed on document: " + cleanDoc.getName());
        }
        if (persDoc != null) {
            final gate.Document pd = persDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(pd);
                }
            });
        }
        if (cleanDoc != null) {
            final gate.Document cd = cleanDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(cd);
                }
            });
        }
        if (markedDoc != null) {
            final gate.Document md = markedDoc;
            javax.swing.SwingUtilities.invokeLater(new Runnable() {

                @Override
                public void run() {
                    Factory.deleteResource(md);
                }
            });
        }
    }
// for loop through clean docs
}
Also used : ResourceInstantiationException(gate.creole.ResourceInstantiationException) Document(gate.Document) FeatureMap(gate.FeatureMap) PersistenceException(gate.persist.PersistenceException) SerialDataStore(gate.persist.SerialDataStore) DataStore(gate.DataStore) Document(gate.Document) File(java.io.File)

Example 7 with DataStore

use of gate.DataStore in project gate-core by GateNLP.

the class LRPersistence method createObject.

/**
 * Creates a new object from the data contained. This new object is supposed
 * to be a copy for the original object used as source for data extraction.
 */
@SuppressWarnings("unchecked")
@Override
public Object createObject() throws PersistenceException, ResourceInstantiationException {
    if (dsData == null)
        return super.createObject();
    else {
        // persistent doc
        initParams = PersistenceManager.getTransientRepresentation(initParams, containingControllerName, initParamOverrides);
        DataStore ds = (DataStore) PersistenceManager.getTransientRepresentation(dsData, containingControllerName, initParamOverrides);
        ((Map<Object, Object>) initParams).put(DataStore.DATASTORE_FEATURE_NAME, ds);
        ((Map<Object, Object>) initParams).put(DataStore.LR_ID_FEATURE_NAME, persistenceID);
        return super.createObject();
    }
}
Also used : DataStore(gate.DataStore) Map(java.util.Map)

Example 8 with DataStore

use of gate.DataStore in project gate-core by GateNLP.

the class TestPersist method testDSR.

// testDelete()
/**
 * Test the DS register.
 */
public void testDSR() throws Exception {
    DataStoreRegister dsr = Gate.getDataStoreRegister();
    assertTrue("DSR has wrong number elements (not 0): " + dsr.size(), dsr.size() == 0);
    // create a temporary directory; because File.createTempFile actually
    // writes the bloody thing, we need to delete it from disk before calling
    // DataStore.create
    File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    storageDir.delete();
    // create and open a serial data store
    DataStore sds = Factory.createDataStore("gate.persist.SerialDataStore", storageDir.toURI().toURL().toString());
    // create a document with some annotations / features on it
    String server = TestDocument.getTestServerName();
    Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
    doc.getFeatures().put("hi there", new Integer(23232));
    doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
    // save the document
    Document persDoc = (Document) sds.adopt(doc);
    sds.sync(persDoc);
    // DSR should have one member
    assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
    // create and open another serial data store
    storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    storageDir.delete();
    DataStore sds2 = Factory.createDataStore("gate.persist.SerialDataStore", storageDir.toURI().toURL().toString());
    // DSR should have two members
    assertTrue("DSR has wrong number elements: " + dsr.size(), dsr.size() == 2);
    // peek at the DSR members
    Iterator<DataStore> dsrIter = dsr.iterator();
    while (dsrIter.hasNext()) {
        DataStore ds = dsrIter.next();
        assertNotNull("null ds in ds reg", ds);
        if (DEBUG)
            Out.prln(ds);
    }
    // delete the datastores
    sds.close();
    assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
    sds.delete();
    assertTrue("DSR has wrong number elements (expected 1): " + dsr.size(), dsr.size() == 1);
    sds2.delete();
    assertTrue("DSR has wrong number elements (expected 0): " + dsr.size(), dsr.size() == 0);
}
Also used : DataStore(gate.DataStore) TestDocument(gate.corpora.TestDocument) Document(gate.Document) DataStoreRegister(gate.DataStoreRegister) File(java.io.File) URL(java.net.URL)

Example 9 with DataStore

use of gate.DataStore in project gate-core by GateNLP.

the class TestPersist method testSimple.

// testSaveRestore()
/**
 * Simple test
 */
public void testSimple() throws Exception {
    // create a temporary directory; because File.createTempFile actually
    // writes the bloody thing, we need to delete it from disk before calling
    // DataStore.create
    File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    storageDir.delete();
    // create and open a serial data store
    DataStore sds = Factory.createDataStore("gate.persist.SerialDataStore", storageDir.toURI().toURL().toString());
    // check we can get empty lists from empty data stores
    @SuppressWarnings("unused") List<String> lrTypes = sds.getLrTypes();
    // create a document with some annotations / features on it
    String server = TestDocument.getTestServerName();
    Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
    doc.getFeatures().put("hi there", new Integer(23232));
    doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
    // save the document
    Document persDoc = (Document) sds.adopt(doc);
    sds.sync(persDoc);
    // remember the persistence ID for reading back
    // (in the normal case these ids are obtained by DataStore.getLrIds(type))
    Object lrPersistenceId = persDoc.getLRPersistenceId();
    // read the document back
    FeatureMap features = Factory.newFeatureMap();
    features.put(DataStore.LR_ID_FEATURE_NAME, lrPersistenceId);
    features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
    Document doc2 = (Document) Factory.createResource("gate.corpora.DocumentImpl", features);
    // parameters should be different
    // check that the version we read back matches the original
    assertTrue(TestEqual.documentsEqual(persDoc, doc2));
    // delete the datastore
    sds.delete();
}
Also used : FeatureMap(gate.FeatureMap) DataStore(gate.DataStore) TestDocument(gate.corpora.TestDocument) Document(gate.Document) File(java.io.File) URL(java.net.URL)

Example 10 with DataStore

use of gate.DataStore in project gate-core by GateNLP.

the class MainFrame method openSerialDataStore.

// createSerialDataStore()
/**
 * Method is used in OpenDSAction
 * @return the opened datastore or null if an error occurs
 */
protected DataStore openSerialDataStore() {
    DataStore ds = null;
    // get the URL (a file in this case)
    fileChooser.setDialogTitle("Select the datastore directory");
    fileChooser.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
    fileChooser.setFileFilter(fileChooser.getAcceptAllFileFilter());
    fileChooser.setResource("gate.persist.SerialDataStore");
    if (fileChooser.showOpenDialog(MainFrame.this) == JFileChooser.APPROVE_OPTION) {
        try {
            URL dsURL = fileChooser.getSelectedFile().toURI().toURL();
            ds = Factory.openDataStore("gate.persist.SerialDataStore", dsURL.toExternalForm());
        } catch (MalformedURLException mue) {
            JOptionPane.showMessageDialog(MainFrame.this, "Invalid location for the datastore\n " + mue.toString(), "GATE", JOptionPane.ERROR_MESSAGE);
        } catch (PersistenceException pe) {
            JOptionPane.showMessageDialog(MainFrame.this, "Datastore opening error!\n " + pe.toString(), "GATE", JOptionPane.ERROR_MESSAGE);
        }
    // catch
    }
    return ds;
}
Also used : MalformedURLException(java.net.MalformedURLException) DataStore(gate.DataStore) PersistenceException(gate.persist.PersistenceException) URL(java.net.URL)

Aggregations

DataStore (gate.DataStore)14 PersistenceException (gate.persist.PersistenceException)9 File (java.io.File)6 URL (java.net.URL)6 Document (gate.Document)5 FeatureMap (gate.FeatureMap)5 ResourceInstantiationException (gate.creole.ResourceInstantiationException)5 GateRuntimeException (gate.util.GateRuntimeException)5 MalformedURLException (java.net.MalformedURLException)5 SerialDataStore (gate.persist.SerialDataStore)3 TestDocument (gate.corpora.TestDocument)2 GateException (gate.util.GateException)2 BenchmarkReportException (gate.util.reporting.exceptions.BenchmarkReportException)2 Point (java.awt.Point)2 IOException (java.io.IOException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 URISyntaxException (java.net.URISyntaxException)2 Corpus (gate.Corpus)1 DataStoreRegister (gate.DataStoreRegister)1 LanguageResource (gate.LanguageResource)1