use of gate.Document in project gate-core by GateNLP.
the class SerialCorpusImpl method remove.
@Override
public boolean remove(Object o) {
if (DEBUG)
Out.prln("SerialCorpus:Remove object called");
if (!(o instanceof Document))
return false;
Document doc = (Document) o;
// see if we can find it first. If not, then judt return
int index = findDocument(doc);
if (index == -1)
return false;
if (index < docDataList.size()) {
// we found it, so remove it
// by Andrey Shafirin: this part of code can produce an exception
// if
// document wasn't loaded
String docName = docDataList.get(index).getDocumentName();
Object docPersistentID = getDocumentPersistentID(index);
docDataList.remove(index);
// Document oldDoc = (Document) documents.remove(index);
documents.remove(index);
// + " are " + documents);
if (DEBUG)
Out.prln("documents after remove of " + docName + " are " + documents);
// documentRemoved(oldDoc.getLRPersistenceId().toString());
if (docPersistentID != null)
documentRemoved(docPersistentID.toString());
// fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this,
// oldDoc,
// index,
// CorpusEvent.DOCUMENT_REMOVED));
fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this, (Document) o, index, docPersistentID, CorpusEvent.DOCUMENT_REMOVED));
}
return true;
}
use of gate.Document in project gate-core by GateNLP.
the class SerialCorpusImpl method get.
@Override
public Document get(int index) {
if (index >= docDataList.size())
return null;
Document res = documents.get(index);
if (DEBUG)
Out.prln("SerialCorpusImpl: get(): index " + index + "result: " + res);
// if the document is null, then I must get it from the DS
if (res == null) {
FeatureMap parameters = Factory.newFeatureMap();
parameters.put(DataStore.DATASTORE_FEATURE_NAME, this.dataStore);
try {
parameters.put(DataStore.LR_ID_FEATURE_NAME, docDataList.get(index).getPersistentID());
Document lr = (Document) Factory.createResource(docDataList.get(index).getClassType(), parameters);
if (DEBUG)
Out.prln("Loaded document :" + lr.getName());
// change the result to the newly loaded doc
res = lr;
// finally replace the doc with the instantiated version
documents.set(index, lr);
} catch (ResourceInstantiationException ex) {
Err.prln("Error reading document inside a serialised corpus.");
throw new GateRuntimeException(ex);
}
}
return res;
}
use of gate.Document in project gate-core by GateNLP.
the class SerialCorpusImpl method add.
@Override
public boolean add(Document o) {
if (o == null)
return false;
Document doc = o;
// make it accept only docs from its own datastore
if (doc.getDataStore() != null && !this.dataStore.equals(doc.getDataStore())) {
Err.prln("Error: Persistent corpus can only accept documents " + "from its own datastore!");
return false;
}
// if
// add the document with its index in the docDataList
// in this case, since it's going to be added to the end
// the index will be the size of the docDataList before
// the addition
DocumentData docData = new DocumentData(doc.getName(), doc.getLRPersistenceId(), doc.getClass().getName());
boolean result = docDataList.add(docData);
documents.add(doc);
documentAdded(doc);
fireDocumentAdded(new CorpusEvent(SerialCorpusImpl.this, doc, docDataList.size() - 1, doc.getLRPersistenceId(), CorpusEvent.DOCUMENT_ADDED));
return result;
}
use of gate.Document in project gate-core by GateNLP.
the class SerialCorpusImpl method remove.
@Override
public Document remove(int index) {
if (DEBUG)
Out.prln("Remove index called");
// try to get the actual document if it was loaded
Document res = isDocumentLoaded(index) ? get(index) : null;
Object docLRID = docDataList.get(index).persistentID;
if (docLRID != null)
documentRemoved(docLRID.toString());
docDataList.remove(index);
documents.remove(index);
fireDocumentRemoved(new CorpusEvent(SerialCorpusImpl.this, res, index, docLRID, CorpusEvent.DOCUMENT_REMOVED));
return res;
}
use of gate.Document in project gate-core by GateNLP.
the class SerialAnalyserController method executeImpl.
/**
* Run the Processing Resources in sequence.
*/
@Override
protected void executeImpl() throws ExecutionException {
interrupted = false;
if (corpus == null)
throw new ExecutionException("(SerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!");
benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName());
// reset the prTimeMap that keeps track of the time
// taken by each PR to process the entire corpus
super.resetPrTimeMap();
if (document == null) {
// iterate through the documents in the corpus
for (int i = 0; i < corpus.size(); i++) {
String savedBenchmarkId = getBenchmarkId();
try {
if (isInterrupted()) {
throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!");
}
boolean docWasLoaded = corpus.isDocumentLoaded(i);
// record the time before loading the document
long documentLoadingStartTime = Benchmark.startPoint();
Document doc = corpus.get(i);
// include the document name in the benchmark ID for sub-events
setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(), getBenchmarkId()));
// report the document loading
benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
Benchmark.checkPoint(documentLoadingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED, getBenchmarkId()), this, benchmarkFeatures);
// set the doc and corpus
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(doc);
((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
}
try {
if (DEBUG)
Out.pr("SerialAnalyserController processing doc=" + doc.getName() + "...");
super.executeImpl();
if (DEBUG)
Out.prln("done.");
} finally {
// make sure we unset the doc and corpus even if we got an exception
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(null);
((LanguageAnalyser) prList.get(j)).setCorpus(null);
}
}
if (!docWasLoaded) {
long documentSavingStartTime = Benchmark.startPoint();
// trigger saving
corpus.unloadDocument(doc);
Benchmark.checkPoint(documentSavingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED, getBenchmarkId()), this, benchmarkFeatures);
// close the previoulsy unloaded Doc
Factory.deleteResource(doc);
}
} finally {
setBenchmarkId(savedBenchmarkId);
}
}
} else {
// set the doc and corpus
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(document);
((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
}
try {
if (DEBUG)
Out.pr("SerialAnalyserController processing doc=" + document.getName() + "...");
super.executeImpl();
if (DEBUG)
Out.prln("done.");
} finally {
// make sure we unset the doc and corpus even if we got an exception
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(null);
((LanguageAnalyser) prList.get(j)).setCorpus(null);
}
}
}
// document was not null
// remove the features that we added
benchmarkFeatures.remove(Benchmark.DOCUMENT_NAME_FEATURE);
benchmarkFeatures.remove(Benchmark.CORPUS_NAME_FEATURE);
}
Aggregations