Search in sources :

Example 6 with LanguageResource

use of gate.LanguageResource in project gate-core by GateNLP.

the class LRPersistence method extractDataFromSource.

/**
 * Populates this Persistence with the data that needs to be stored from the
 * original source object.
 */
@Override
public void extractDataFromSource(Object source) throws PersistenceException {
    // check input
    if (!(source instanceof LanguageResource)) {
        throw new UnsupportedOperationException(getClass().getName() + " can only be used for " + LanguageResource.class.getName() + " objects!\n" + source.getClass().getName() + " is not a " + LanguageResource.class.getName());
    }
    super.extractDataFromSource(source);
    // LR's will have the features saved by their respective persistence
    // mechanism
    features = null;
    LanguageResource lr = (LanguageResource) source;
    if (lr.getDataStore() == null) {
        dsData = null;
    } else {
        dsData = PersistenceManager.getPersistentRepresentation(lr.getDataStore());
        persistenceID = lr.getLRPersistenceId();
    }
}
Also used : LanguageResource(gate.LanguageResource)

Example 7 with LanguageResource

use of gate.LanguageResource in project gate-core by GateNLP.

the class TestPersist method testMultipleLrs.

// testSimple()
/**
 * Test multiple LRs
 */
public void testMultipleLrs() throws Exception {
    // create a temporary directory; because File.createTempFile actually
    // writes the bloody thing, we need to delete it from disk before calling
    // DataStore.create
    File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
    storageDir.delete();
    // create and open a serial data store
    SerialDataStore sds = new SerialDataStore(storageDir.toURI().toURL().toString());
    sds.create();
    sds.open();
    // create a document with some annotations / features on it
    String server = TestDocument.getTestServerName();
    Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
    doc.getFeatures().put("hi there", new Integer(23232));
    doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
    // create another document with some annotations / features on it
    Document doc2 = Factory.newDocument(new URL(server + "tests/html/test1.htm"));
    doc.getFeatures().put("hi there again", new Integer(23232));
    doc.getAnnotations().add(new Long(5), new Long(25), "dog poo irritates", Factory.newFeatureMap());
    // create a corpus with the documents
    Corpus corp = Factory.newCorpus("Hamish test corpus");
    corp.add(doc);
    corp.add(doc2);
    LanguageResource persCorpus = sds.adopt(corp);
    sds.sync(persCorpus);
    // read the documents back
    List<Resource> lrsFromDisk = new ArrayList<Resource>();
    List<String> lrIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
    Iterator<String> idsIter = lrIds.iterator();
    while (idsIter.hasNext()) {
        String lrId = idsIter.next();
        FeatureMap features = Factory.newFeatureMap();
        features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
        features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
        Resource lr = Factory.createResource("gate.corpora.SerialCorpusImpl", features);
        lrsFromDisk.add(lr);
    }
    if (DEBUG)
        System.out.println("LRs on disk" + lrsFromDisk);
    // check that the versions we read back match the originals
    Corpus diskCorp = (Corpus) lrsFromDisk.get(0);
    Document diskDoc = diskCorp.get(0);
    if (DEBUG)
        Out.prln("Documents in corpus: " + corp.getDocumentNames());
    assertTrue("corp name != mem name", corp.getName().equals(diskCorp.getName()));
    if (DEBUG)
        Out.prln("Memory features " + corp.getFeatures());
    if (DEBUG)
        Out.prln("Disk features " + diskCorp.getFeatures());
    assertTrue("corp feat != mem feat", corp.getFeatures().equals(diskCorp.getFeatures()));
    if (DEBUG)
        Out.prln("Annotations in doc: " + diskDoc.getAnnotations());
    assertTrue("doc annotations from disk not equal to memory version", TestEqual.annotationSetsEqual(doc.getAnnotations(), diskDoc.getAnnotations()));
    assertTrue("doc from disk not equal to memory version", TestEqual.documentsEqual(doc, diskDoc));
    Iterator<Document> corpusIter = diskCorp.iterator();
    while (corpusIter.hasNext()) {
        if (DEBUG)
            Out.prln(corpusIter.next().getName());
        else
            corpusIter.next();
    }
    // assertTrue("doc2 from disk not equal to memory version",
    // doc2.equals(diskDoc2));
    // delete the datastore
    sds.delete();
}
Also used : LanguageResource(gate.LanguageResource) Resource(gate.Resource) LanguageResource(gate.LanguageResource) ArrayList(java.util.ArrayList) TestDocument(gate.corpora.TestDocument) Document(gate.Document) URL(java.net.URL) Corpus(gate.Corpus) FeatureMap(gate.FeatureMap) File(java.io.File)

Example 8 with LanguageResource

use of gate.LanguageResource in project gate-core by GateNLP.

the class LuceneDataStoreImpl method sync.

/**
 * Save: synchonise the in-memory image of the LR with the persistent
 * image.
 */
@Override
public void sync(LanguageResource lr) throws PersistenceException {
    if (lr.getLRPersistenceId() != null) {
        // lock the LR ID so we don't write to the file while an
        // indexer task is reading it
        Object lock = lockObjectForID(lr.getLRPersistenceId());
        synchronized (lock) {
            // we load the copy of this LR and check if any modification were done
            // if so, it should be reindexed or else it should not be synced again.
            LanguageResource copy = null;
            try {
                copy = getLr(lr.getClass().getName(), lr.getLRPersistenceId());
                // we check it only if it is an instance of Document
                if (copy instanceof Document && lr instanceof Document) {
                    Document cDoc = (Document) copy;
                    Document lrDoc = (Document) lr;
                    boolean sameDocs = false;
                    // as that's what matters from the annic perspective
                    if (cDoc.getContent().equals(lrDoc.getContent())) {
                        if (cDoc.getAnnotations().equals(lrDoc.getAnnotations())) {
                            if (cDoc.getNamedAnnotationSets().equals(lrDoc.getNamedAnnotationSets())) {
                                boolean allSetsSame = true;
                                for (String key : cDoc.getNamedAnnotationSets().keySet()) {
                                    if (!cDoc.getAnnotations(key).equals(lrDoc.getAnnotations(key))) {
                                        allSetsSame = false;
                                        break;
                                    }
                                }
                                if (allSetsSame) {
                                    sameDocs = true;
                                }
                            }
                        }
                    }
                    if (sameDocs) {
                        lock = null;
                        return;
                    }
                }
            } catch (SecurityException e) {
                e.printStackTrace();
            } finally {
                // delete the copy of this LR
                if (copy != null) {
                    Factory.deleteResource(copy);
                }
            }
            super.sync(lr);
        }
        lock = null;
    } else {
        super.sync(lr);
    }
    if (lr instanceof Document) {
        queueForIndexing(lr.getLRPersistenceId());
    }
}
Also used : LanguageResource(gate.LanguageResource) Document(gate.Document)

Example 9 with LanguageResource

use of gate.LanguageResource in project gate-core by GateNLP.

the class SerialDataStore method getLr.

// constructPersistenceId
@Override
public LanguageResource getLr(String lrClassName, Object lrPersistenceId) throws PersistenceException, SecurityException {
    // find the subdirectory for resources of this type
    File resourceTypeDirectory = new File(storageDir, lrClassName);
    if ((!resourceTypeDirectory.exists()) || (!resourceTypeDirectory.isDirectory())) {
        throw new PersistenceException("Can't find " + resourceTypeDirectory);
    }
    // create a File to representing the resource storage file
    File resourceFile = new File(resourceTypeDirectory, lrPersistenceId.toString());
    if (!resourceFile.exists() || !resourceFile.isFile())
        throw new PersistenceException("Can't find file " + resourceFile);
    // try and read the file and deserialise it
    LanguageResource lr = null;
    try {
        InputStream is = new FileInputStream(resourceFile);
        // after 1.1 the serialised files are compressed
        if (!currentProtocolVersion.equals("1.0"))
            is = new GZIPInputStream(is);
        is = new BufferedInputStream(is);
        // Use an input stream that is aware of the GATE classloader
        ObjectInputStream ois = new GateAwareObjectInputStream(is);
        lr = (LanguageResource) ois.readObject();
        ois.close();
    } catch (IOException e) {
        throw new PersistenceException("Couldn't read file " + resourceFile + ": " + e);
    } catch (ClassNotFoundException ee) {
        throw new PersistenceException("Couldn't find class " + lrClassName + ": " + ee);
    }
    // set the dataStore property of the LR (which is transient and therefore
    // not serialised)
    lr.setDataStore(this);
    lr.setLRPersistenceId(lrPersistenceId);
    if (DEBUG)
        Out.prln("LR read in memory: " + lr);
    return lr;
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) LanguageResource(gate.LanguageResource) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedInputStream(java.io.BufferedInputStream) ObjectInputStream(java.io.ObjectInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) IOException(java.io.IOException) File(java.io.File) FileInputStream(java.io.FileInputStream) ObjectInputStream(java.io.ObjectInputStream)

Example 10 with LanguageResource

use of gate.LanguageResource in project gate-core by GateNLP.

the class TestCreole method testClassIndex.

// testLoading()
/**
 * Test resource indexing by class
 */
public void testClassIndex() throws Exception {
    ResourceData docRd = reg.get("gate.corpora.DocumentImpl");
    assertNotNull("couldn't find document res data", docRd);
    assertTrue("doc res data has wrong class name", docRd.getClassName().equals("gate.corpora.DocumentImpl"));
    assertTrue("doc res data has wrong interface name", docRd.getInterfaceName().equals("gate.Document"));
    Class<?> docClass = docRd.getResourceClass();
    assertNotNull("couldn't get doc class", docClass);
    LanguageResource docRes = (LanguageResource) docClass.newInstance();
    assertTrue("instance of doc is wrong type", docRes instanceof gate.Document);
    reg.clear();
}
Also used : LanguageResource(gate.LanguageResource) Document(gate.Document)

Aggregations

LanguageResource (gate.LanguageResource)13 Document (gate.Document)5 Corpus (gate.Corpus)4 FeatureMap (gate.FeatureMap)4 ProcessingResource (gate.ProcessingResource)4 Resource (gate.Resource)4 Controller (gate.Controller)3 ConditionalSerialAnalyserController (gate.creole.ConditionalSerialAnalyserController)3 File (java.io.File)3 CorpusController (gate.CorpusController)2 VisualResource (gate.VisualResource)2 AnnotationSchema (gate.creole.AnnotationSchema)2 PackagedController (gate.creole.PackagedController)2 GateRuntimeException (gate.util.GateRuntimeException)2 DefaultMutableTreeNode (javax.swing.tree.DefaultMutableTreeNode)2 DataStore (gate.DataStore)1 TestDocument (gate.corpora.TestDocument)1 AbstractVisualResource (gate.creole.AbstractVisualResource)1 ConditionalController (gate.creole.ConditionalController)1 ResourceInstantiationException (gate.creole.ResourceInstantiationException)1