use of gate.LanguageResource in project gate-core by GateNLP.
the class LRPersistence method extractDataFromSource.
/**
* Populates this Persistence with the data that needs to be stored from the
* original source object.
*/
@Override
public void extractDataFromSource(Object source) throws PersistenceException {
// check input
if (!(source instanceof LanguageResource)) {
throw new UnsupportedOperationException(getClass().getName() + " can only be used for " + LanguageResource.class.getName() + " objects!\n" + source.getClass().getName() + " is not a " + LanguageResource.class.getName());
}
super.extractDataFromSource(source);
// LR's will have the features saved by their respective persistence
// mechanism
features = null;
LanguageResource lr = (LanguageResource) source;
if (lr.getDataStore() == null) {
dsData = null;
} else {
dsData = PersistenceManager.getPersistentRepresentation(lr.getDataStore());
persistenceID = lr.getLRPersistenceId();
}
}
use of gate.LanguageResource in project gate-core by GateNLP.
the class TestPersist method testMultipleLrs.
// testSimple()
/**
* Test multiple LRs
*/
public void testMultipleLrs() throws Exception {
// create a temporary directory; because File.createTempFile actually
// writes the bloody thing, we need to delete it from disk before calling
// DataStore.create
File storageDir = File.createTempFile("TestPersist__", "__StorageDir");
storageDir.delete();
// create and open a serial data store
SerialDataStore sds = new SerialDataStore(storageDir.toURI().toURL().toString());
sds.create();
sds.open();
// create a document with some annotations / features on it
String server = TestDocument.getTestServerName();
Document doc = Factory.newDocument(new URL(server + "tests/doc0.html"));
doc.getFeatures().put("hi there", new Integer(23232));
doc.getAnnotations().add(new Long(5), new Long(25), "ThingyMaJig", Factory.newFeatureMap());
// create another document with some annotations / features on it
Document doc2 = Factory.newDocument(new URL(server + "tests/html/test1.htm"));
doc.getFeatures().put("hi there again", new Integer(23232));
doc.getAnnotations().add(new Long(5), new Long(25), "dog poo irritates", Factory.newFeatureMap());
// create a corpus with the documents
Corpus corp = Factory.newCorpus("Hamish test corpus");
corp.add(doc);
corp.add(doc2);
LanguageResource persCorpus = sds.adopt(corp);
sds.sync(persCorpus);
// read the documents back
List<Resource> lrsFromDisk = new ArrayList<Resource>();
List<String> lrIds = sds.getLrIds("gate.corpora.SerialCorpusImpl");
Iterator<String> idsIter = lrIds.iterator();
while (idsIter.hasNext()) {
String lrId = idsIter.next();
FeatureMap features = Factory.newFeatureMap();
features.put(DataStore.DATASTORE_FEATURE_NAME, sds);
features.put(DataStore.LR_ID_FEATURE_NAME, lrId);
Resource lr = Factory.createResource("gate.corpora.SerialCorpusImpl", features);
lrsFromDisk.add(lr);
}
if (DEBUG)
System.out.println("LRs on disk" + lrsFromDisk);
// check that the versions we read back match the originals
Corpus diskCorp = (Corpus) lrsFromDisk.get(0);
Document diskDoc = diskCorp.get(0);
if (DEBUG)
Out.prln("Documents in corpus: " + corp.getDocumentNames());
assertTrue("corp name != mem name", corp.getName().equals(diskCorp.getName()));
if (DEBUG)
Out.prln("Memory features " + corp.getFeatures());
if (DEBUG)
Out.prln("Disk features " + diskCorp.getFeatures());
assertTrue("corp feat != mem feat", corp.getFeatures().equals(diskCorp.getFeatures()));
if (DEBUG)
Out.prln("Annotations in doc: " + diskDoc.getAnnotations());
assertTrue("doc annotations from disk not equal to memory version", TestEqual.annotationSetsEqual(doc.getAnnotations(), diskDoc.getAnnotations()));
assertTrue("doc from disk not equal to memory version", TestEqual.documentsEqual(doc, diskDoc));
Iterator<Document> corpusIter = diskCorp.iterator();
while (corpusIter.hasNext()) {
if (DEBUG)
Out.prln(corpusIter.next().getName());
else
corpusIter.next();
}
// assertTrue("doc2 from disk not equal to memory version",
// doc2.equals(diskDoc2));
// delete the datastore
sds.delete();
}
use of gate.LanguageResource in project gate-core by GateNLP.
the class LuceneDataStoreImpl method sync.
/**
* Save: synchonise the in-memory image of the LR with the persistent
* image.
*/
@Override
public void sync(LanguageResource lr) throws PersistenceException {
if (lr.getLRPersistenceId() != null) {
// lock the LR ID so we don't write to the file while an
// indexer task is reading it
Object lock = lockObjectForID(lr.getLRPersistenceId());
synchronized (lock) {
// we load the copy of this LR and check if any modification were done
// if so, it should be reindexed or else it should not be synced again.
LanguageResource copy = null;
try {
copy = getLr(lr.getClass().getName(), lr.getLRPersistenceId());
// we check it only if it is an instance of Document
if (copy instanceof Document && lr instanceof Document) {
Document cDoc = (Document) copy;
Document lrDoc = (Document) lr;
boolean sameDocs = false;
// as that's what matters from the annic perspective
if (cDoc.getContent().equals(lrDoc.getContent())) {
if (cDoc.getAnnotations().equals(lrDoc.getAnnotations())) {
if (cDoc.getNamedAnnotationSets().equals(lrDoc.getNamedAnnotationSets())) {
boolean allSetsSame = true;
for (String key : cDoc.getNamedAnnotationSets().keySet()) {
if (!cDoc.getAnnotations(key).equals(lrDoc.getAnnotations(key))) {
allSetsSame = false;
break;
}
}
if (allSetsSame) {
sameDocs = true;
}
}
}
}
if (sameDocs) {
lock = null;
return;
}
}
} catch (SecurityException e) {
e.printStackTrace();
} finally {
// delete the copy of this LR
if (copy != null) {
Factory.deleteResource(copy);
}
}
super.sync(lr);
}
lock = null;
} else {
super.sync(lr);
}
if (lr instanceof Document) {
queueForIndexing(lr.getLRPersistenceId());
}
}
use of gate.LanguageResource in project gate-core by GateNLP.
the class SerialDataStore method getLr.
// constructPersistenceId
@Override
public LanguageResource getLr(String lrClassName, Object lrPersistenceId) throws PersistenceException, SecurityException {
// find the subdirectory for resources of this type
File resourceTypeDirectory = new File(storageDir, lrClassName);
if ((!resourceTypeDirectory.exists()) || (!resourceTypeDirectory.isDirectory())) {
throw new PersistenceException("Can't find " + resourceTypeDirectory);
}
// create a File to representing the resource storage file
File resourceFile = new File(resourceTypeDirectory, lrPersistenceId.toString());
if (!resourceFile.exists() || !resourceFile.isFile())
throw new PersistenceException("Can't find file " + resourceFile);
// try and read the file and deserialise it
LanguageResource lr = null;
try {
InputStream is = new FileInputStream(resourceFile);
// after 1.1 the serialised files are compressed
if (!currentProtocolVersion.equals("1.0"))
is = new GZIPInputStream(is);
is = new BufferedInputStream(is);
// Use an input stream that is aware of the GATE classloader
ObjectInputStream ois = new GateAwareObjectInputStream(is);
lr = (LanguageResource) ois.readObject();
ois.close();
} catch (IOException e) {
throw new PersistenceException("Couldn't read file " + resourceFile + ": " + e);
} catch (ClassNotFoundException ee) {
throw new PersistenceException("Couldn't find class " + lrClassName + ": " + ee);
}
// set the dataStore property of the LR (which is transient and therefore
// not serialised)
lr.setDataStore(this);
lr.setLRPersistenceId(lrPersistenceId);
if (DEBUG)
Out.prln("LR read in memory: " + lr);
return lr;
}
use of gate.LanguageResource in project gate-core by GateNLP.
the class TestCreole method testClassIndex.
// testLoading()
/**
* Test resource indexing by class
*/
public void testClassIndex() throws Exception {
ResourceData docRd = reg.get("gate.corpora.DocumentImpl");
assertNotNull("couldn't find document res data", docRd);
assertTrue("doc res data has wrong class name", docRd.getClassName().equals("gate.corpora.DocumentImpl"));
assertTrue("doc res data has wrong interface name", docRd.getInterfaceName().equals("gate.Document"));
Class<?> docClass = docRd.getResourceClass();
assertNotNull("couldn't get doc class", docClass);
LanguageResource docRes = (LanguageResource) docClass.newInstance();
assertTrue("instance of doc is wrong type", docRes instanceof gate.Document);
reg.clear();
}
Aggregations