Search in sources :

Example 1 with SerialCorpusImpl

use of gate.corpora.SerialCorpusImpl in project gate-core by GateNLP.

the class SerialDataStore method sync.

// close()
/**
 * Save: synchonise the in-memory image of the LR with the persistent
 * image.
 */
@Override
public void sync(LanguageResource lr) throws PersistenceException {
    // check that this LR is one of ours (i.e. has been adopted)
    if (lr.getDataStore() == null || !lr.getDataStore().equals(this))
        throw new PersistenceException("LR " + lr.getName() + " has not been adopted by this DataStore");
    // find the resource data for this LR
    ResourceData lrData = Gate.getCreoleRegister().get(lr.getClass().getName());
    // create a subdirectory for resources of this type if none exists
    File resourceTypeDirectory = new File(storageDir, lrData.getClassName());
    if ((!resourceTypeDirectory.exists()) || (!resourceTypeDirectory.isDirectory())) {
        // create the directory in the meantime
        if (!resourceTypeDirectory.mkdir() && !resourceTypeDirectory.exists())
            throw new PersistenceException("Can't write " + resourceTypeDirectory);
    }
    // create an indentifier for this resource
    String lrName = null;
    Object lrPersistenceId = null;
    lrName = lr.getName();
    lrPersistenceId = lr.getLRPersistenceId();
    if (lrName == null)
        lrName = lrData.getName();
    if (lrPersistenceId == null) {
        lrPersistenceId = constructPersistenceId(lrName);
        lr.setLRPersistenceId(lrPersistenceId);
    }
    // we're saving a corpus. I need to save its documents first
    if (lr instanceof Corpus) {
        // check if the corpus is the one we support. CorpusImpl cannot be saved!
        if (!(lr instanceof SerialCorpusImpl))
            throw new PersistenceException("Can't save a corpus which " + "is not of type SerialCorpusImpl!");
        SerialCorpusImpl corpus = (SerialCorpusImpl) lr;
        // corresponding document IDs
        for (int i = 0; i < corpus.size(); i++) {
            // if the document is not in memory, there's little point in saving it
            if ((!corpus.isDocumentLoaded(i)) && corpus.isPersistentDocument(i))
                continue;
            if (DEBUG)
                Out.prln("Saving document at position " + i);
            if (DEBUG)
                Out.prln("Document in memory " + corpus.isDocumentLoaded(i));
            if (DEBUG)
                Out.prln("is persistent? " + corpus.isPersistentDocument(i));
            if (DEBUG)
                Out.prln("Document name at position" + corpus.getDocumentName(i));
            Document doc = corpus.get(i);
            try {
                // if the document is not already adopted, we need to do that first
                if (doc.getLRPersistenceId() == null) {
                    if (DEBUG)
                        Out.prln("Document adopted" + doc.getName());
                    doc = (Document) this.adopt(doc);
                    this.sync(doc);
                    if (DEBUG)
                        Out.prln("Document sync-ed");
                    corpus.setDocumentPersistentID(i, doc.getLRPersistenceId());
                } else {
                    // if it is adopted, just sync it
                    this.sync(doc);
                    if (DEBUG)
                        Out.prln("Document sync-ed");
                }
                // store the persistent ID. Needs to be done even if the document was
                // already adopted, in case the doc was already persistent
                // when added to the corpus
                corpus.setDocumentPersistentID(i, doc.getLRPersistenceId());
                if (DEBUG)
                    Out.prln("new document ID " + doc.getLRPersistenceId());
            } catch (Exception ex) {
                throw new PersistenceException("Error while saving corpus: " + corpus + "because of an error storing document " + ex.getMessage(), ex);
            }
        }
    // for loop through documents
    }
    // create a File to store the resource in
    File resourceFile = new File(resourceTypeDirectory, (String) lrPersistenceId);
    // dump the LR into the new File
    try {
        OutputStream os = new FileOutputStream(resourceFile);
        // after 1.1 the serialised files are compressed
        if (!currentProtocolVersion.equals("1.0"))
            os = new GZIPOutputStream(os);
        os = new BufferedOutputStream(os);
        ObjectOutputStream oos = new ObjectOutputStream(os);
        oos.writeObject(lr);
        oos.close();
    } catch (IOException e) {
        throw new PersistenceException("Couldn't write to storage file: " + e.getMessage(), e);
    }
    // let the world know about it
    fireResourceWritten(new DatastoreEvent(this, DatastoreEvent.RESOURCE_WRITTEN, lr, lrPersistenceId));
}
Also used : ResourceData(gate.creole.ResourceData) BufferedOutputStream(java.io.BufferedOutputStream) ObjectOutputStream(java.io.ObjectOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) IOException(java.io.IOException) Document(gate.Document) ObjectOutputStream(java.io.ObjectOutputStream) Corpus(gate.Corpus) URISyntaxException(java.net.URISyntaxException) GateRuntimeException(gate.util.GateRuntimeException) MalformedURLException(java.net.MalformedURLException) IOException(java.io.IOException) GZIPOutputStream(java.util.zip.GZIPOutputStream) SerialCorpusImpl(gate.corpora.SerialCorpusImpl) FileOutputStream(java.io.FileOutputStream) DatastoreEvent(gate.event.DatastoreEvent) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Aggregations

Corpus (gate.Corpus)1 Document (gate.Document)1 SerialCorpusImpl (gate.corpora.SerialCorpusImpl)1 ResourceData (gate.creole.ResourceData)1 DatastoreEvent (gate.event.DatastoreEvent)1 GateRuntimeException (gate.util.GateRuntimeException)1 BufferedOutputStream (java.io.BufferedOutputStream)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 ObjectOutputStream (java.io.ObjectOutputStream)1 OutputStream (java.io.OutputStream)1 MalformedURLException (java.net.MalformedURLException)1 URISyntaxException (java.net.URISyntaxException)1 GZIPOutputStream (java.util.zip.GZIPOutputStream)1