use of gate.LanguageAnalyser in project gate-core by GateNLP.
the class ConditionalSerialController method duplicate.
/**
* Custom duplication method for conditional controllers to handle
* duplicating the running strategies.
*/
@Override
public Resource duplicate(Factory.DuplicationContext ctx) throws ResourceInstantiationException {
ConditionalController c = (ConditionalController) super.duplicate(ctx);
Collection<ProcessingResource> newPRs = c.getPRs();
List<RunningStrategy> newStrategies = new ArrayList<RunningStrategy>(strategiesList.size());
Iterator<RunningStrategy> oldRSIt = getRunningStrategies().iterator();
Iterator<ProcessingResource> prIt = newPRs.iterator();
while (oldRSIt.hasNext()) {
RunningStrategy oldStrat = oldRSIt.next();
ProcessingResource currentPR = prIt.next();
if (oldStrat instanceof AnalyserRunningStrategy) {
newStrategies.add(new AnalyserRunningStrategy((LanguageAnalyser) currentPR, ((AnalyserRunningStrategy) oldStrat).getRunMode(), ((AnalyserRunningStrategy) oldStrat).getFeatureName(), ((AnalyserRunningStrategy) oldStrat).getFeatureValue()));
} else {
boolean run = true;
if (oldStrat instanceof UnconditionalRunningStrategy) {
run = oldStrat.shouldRun();
}
// assume an unconditional strategy. Subclasses that know about other types
// of strategies can fix this up later
newStrategies.add(new RunningStrategy.UnconditionalRunningStrategy(currentPR, run));
}
}
c.setRunningStrategies(newStrategies);
return c;
}
use of gate.LanguageAnalyser in project gate-core by GateNLP.
the class TestAnnic method testAnnicIndexing.
/**
* Testing the annic indexing functionalities
*
* @throws Exception
*/
public void testAnnicIndexing() throws Exception {
// lets create a corpus
testCorpus = Factory.newCorpus("TestAnnic");
File directory = new File(new File(new File(new File(new File(new File(new File(Gate.getGateHome(), "src"), "test"), "gate"), "resources"), "gate.ac.uk"), "tests"), "annic");
File[] files = directory.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isFile()) {
Document doc = Factory.newDocument(files[i].toURI().toURL(), "ISO-8859-1");
testCorpus.add(doc);
}
}
AnnicIndexing annicPR = new AnnicIndexing();
LanguageAnalyser splitter = (LanguageAnalyser) Factory.createResource("gate.creole.splitter.SentenceSplitter");
splitter.setParameterValue("inputASName", "Key");
splitter.setParameterValue("outputASName", "Key");
// splitter.setOutputASName("Key");
for (int i = 0; i < testCorpus.size(); i++) {
splitter.setDocument(testCorpus.get(i));
splitter.execute();
}
// index
annicPR.setAnnotationSetName("Key");
annicPR.setBaseTokenAnnotationType("Token");
annicPR.setCorpus(testCorpus);
annicPR.setIndexUnitAnnotationType("Sentence");
annicPR.setIndexOutputDirectoryLocation(indexURL.toURI().toURL());
annicPR.execute();
Factory.deleteResource(testCorpus);
}
use of gate.LanguageAnalyser in project gate-core by GateNLP.
the class SerialAnalyserController method executeImpl.
/**
* Run the Processing Resources in sequence.
*/
@Override
protected void executeImpl() throws ExecutionException {
interrupted = false;
if (corpus == null)
throw new ExecutionException("(SerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!");
benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName());
// reset the prTimeMap that keeps track of the time
// taken by each PR to process the entire corpus
super.resetPrTimeMap();
if (document == null) {
// iterate through the documents in the corpus
for (int i = 0; i < corpus.size(); i++) {
String savedBenchmarkId = getBenchmarkId();
try {
if (isInterrupted()) {
throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!");
}
boolean docWasLoaded = corpus.isDocumentLoaded(i);
// record the time before loading the document
long documentLoadingStartTime = Benchmark.startPoint();
Document doc = corpus.get(i);
// include the document name in the benchmark ID for sub-events
setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(), getBenchmarkId()));
// report the document loading
benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
Benchmark.checkPoint(documentLoadingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED, getBenchmarkId()), this, benchmarkFeatures);
// set the doc and corpus
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(doc);
((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
}
try {
if (DEBUG)
Out.pr("SerialAnalyserController processing doc=" + doc.getName() + "...");
super.executeImpl();
if (DEBUG)
Out.prln("done.");
} finally {
// make sure we unset the doc and corpus even if we got an exception
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(null);
((LanguageAnalyser) prList.get(j)).setCorpus(null);
}
}
if (!docWasLoaded) {
long documentSavingStartTime = Benchmark.startPoint();
// trigger saving
corpus.unloadDocument(doc);
Benchmark.checkPoint(documentSavingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED, getBenchmarkId()), this, benchmarkFeatures);
// close the previoulsy unloaded Doc
Factory.deleteResource(doc);
}
} finally {
setBenchmarkId(savedBenchmarkId);
}
}
} else {
// set the doc and corpus
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(document);
((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
}
try {
if (DEBUG)
Out.pr("SerialAnalyserController processing doc=" + document.getName() + "...");
super.executeImpl();
if (DEBUG)
Out.prln("done.");
} finally {
// make sure we unset the doc and corpus even if we got an exception
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(null);
((LanguageAnalyser) prList.get(j)).setCorpus(null);
}
}
}
// document was not null
// remove the features that we added
benchmarkFeatures.remove(Benchmark.DOCUMENT_NAME_FEATURE);
benchmarkFeatures.remove(Benchmark.CORPUS_NAME_FEATURE);
}
use of gate.LanguageAnalyser in project gate-core by GateNLP.
the class ConditionalSerialAnalyserController method executeImpl.
/**
* Run the Processing Resources in sequence.
*/
@Override
protected void executeImpl() throws ExecutionException {
interrupted = false;
if (corpus == null)
throw new ExecutionException("(ConditionalSerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!");
benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName());
if (document == null) {
// iterate through the documents in the corpus
for (int i = 0; i < corpus.size(); i++) {
String savedBenchmarkId = getBenchmarkId();
try {
if (isInterrupted()) {
throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!");
}
boolean docWasLoaded = corpus.isDocumentLoaded(i);
// record the time before loading the document
long documentLoadingStartTime = Benchmark.startPoint();
Document doc = corpus.get(i);
// include the document name in the benchmark ID for sub-events
setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(), getBenchmarkId()));
// report the document loading
benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
Benchmark.checkPoint(documentLoadingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED, getBenchmarkId()), this, benchmarkFeatures);
// set the doc and corpus
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(doc);
((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
}
try {
if (DEBUG)
Out.pr("SerialAnalyserController processing doc=" + doc.getName() + "...");
super.executeImpl();
if (DEBUG)
Out.prln("done.");
} finally {
// make sure we unset the doc and corpus even if we got an exception
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(null);
((LanguageAnalyser) prList.get(j)).setCorpus(null);
}
}
if (!docWasLoaded) {
long documentSavingStartTime = Benchmark.startPoint();
// trigger saving
corpus.unloadDocument(doc);
Benchmark.checkPoint(documentSavingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED, getBenchmarkId()), this, benchmarkFeatures);
// close the previously unloaded Doc
Factory.deleteResource(doc);
}
} finally {
setBenchmarkId(savedBenchmarkId);
}
}
} else {
// set the doc and corpus
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(document);
((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
}
try {
if (DEBUG)
Out.pr("SerialAnalyserController processing doc=" + document.getName() + "...");
super.executeImpl();
if (DEBUG)
Out.prln("done.");
} finally {
// make sure we unset the doc and corpus even if we got an exception
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(null);
((LanguageAnalyser) prList.get(j)).setCorpus(null);
}
}
}
// document was not null
// //iterate through the documents in the corpus
// for(int i = 0; i < corpus.size(); i++){
// if(isInterrupted()) throw new ExecutionInterruptedException(
// "The execution of the " + getName() +
// " application has been abruptly interrupted!");
//
// boolean docWasLoaded = corpus.isDocumentLoaded(i);
//
// // record the time before loading the document
// long documentLoadingStartTime = Benchmark.startPoint();
//
// Document doc = (Document)corpus.get(i);
//
// // report the document loading
// benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
// Benchmark.checkPoint(documentLoadingStartTime,
// Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED,
// getBenchmarkId()), this, benchmarkFeatures);
// //run the system over this document
// //set the doc and corpus
// for(int j = 0; j < prList.size(); j++){
// ((LanguageAnalyser)prList.get(j)).setDocument(doc);
// ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
// }
//
// try{
// if (DEBUG)
// Out.pr("ConditionalSerialAnalyserController processing doc=" + doc.getName()+ "...");
// super.executeImpl();
// if (DEBUG)
// Out.prln("done.");
// }
// finally {
// // make sure we unset the doc and corpus even if we got an exception
// for(int j = 0; j < prList.size(); j++){
// ((LanguageAnalyser)prList.get(j)).setDocument(null);
// ((LanguageAnalyser)prList.get(j)).setCorpus(null);
// }
// }
//
// if(!docWasLoaded){
// long documentSavingStartTime = Benchmark.startPoint();
// // trigger saving
// corpus.unloadDocument(doc);
// Benchmark.checkPoint(documentSavingStartTime,
// Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED,
// getBenchmarkId()), this, benchmarkFeatures);
// //close the previoulsy unloaded Doc
// Factory.deleteResource(doc);
// }
// }
}
use of gate.LanguageAnalyser in project gate-core by GateNLP.
the class RealtimeCorpusController method executeImpl.
/**
* Run the Processing Resources in sequence.
*/
@Override
@SuppressWarnings("deprecation")
public void executeImpl() throws ExecutionException {
interrupted = false;
String haveTimeout = null;
if (corpus == null)
throw new ExecutionException("(SerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!");
// iterate through the documents in the corpus
for (int i = 0; i < corpus.size(); i++) {
if (isInterrupted())
throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!");
boolean docWasLoaded = corpus.isDocumentLoaded(i);
Document doc = corpus.get(i);
// start the execution, in the separate thread
threadDying = false;
Future<?> docRunnerFuture = threadSource.submit(new DocRunner(doc));
// how long have we already waited
long waitSoFar = 0;
// check if we should use graceful stop first
if (actualGraceful != -1 && (actualTimeout == -1 || actualGraceful < actualTimeout)) {
try {
docRunnerFuture.get(actualGraceful, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
// we waited the graceful period, and the task did not finish
// -> interrupt the job (nicely)
threadDying = true;
waitSoFar += actualGraceful;
haveTimeout = "Execution timeout, attempting to gracefully stop worker thread...";
logger.info(haveTimeout);
// interrupt the working thread - we can't cancel the future as
// that would cause future get() calls to fail immediately with
// a CancellationException
Thread t = currentWorkingThread;
if (t != null) {
t.interrupt();
}
for (int j = 0; j < prList.size(); j++) {
((Executable) prList.get(j)).interrupt();
}
// next check scheduled for
// - half-time between graceful and timeout, or
// - graceful-and-a-half (if no timeout)
long waitTime = (actualTimeout != -1) ? (actualTimeout - actualGraceful) / 2 : (actualGraceful / 2);
try {
docRunnerFuture.get(waitTime, TimeUnit.MILLISECONDS);
} catch (TimeoutException e1) {
// the mid point has been reached: try nullify
threadDying = true;
waitSoFar += waitTime;
haveTimeout = "Execution timeout, attempting to induce exception in order to stop worker thread...";
logger.info(haveTimeout);
for (int j = 0; j < prList.size(); j++) {
((LanguageAnalyser) prList.get(j)).setDocument(null);
((LanguageAnalyser) prList.get(j)).setCorpus(null);
}
} catch (InterruptedException e1) {
// the current thread (not the execution thread!) was interrupted
// throw it forward
Thread.currentThread().interrupt();
} catch (java.util.concurrent.ExecutionException e2) {
throw new ExecutionException(e2);
}
} catch (java.util.concurrent.ExecutionException e) {
throw new ExecutionException(e);
} catch (InterruptedException e) {
// the current thread (not the execution thread!) was interrupted
// throw it forward
Thread.currentThread().interrupt();
}
}
// wait before we call stop()
if (actualTimeout != -1) {
long waitTime = actualTimeout - waitSoFar;
if (waitTime > 0) {
try {
docRunnerFuture.get(waitTime, TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
// we're out of time: stop the thread
threadDying = true;
haveTimeout = "Execution timeout, worker thread will be forcibly terminated!";
logger.info(haveTimeout);
// using a volatile variable instead of synchronisation
Thread theThread = currentWorkingThread;
if (theThread != null) {
theThread.stop();
try {
// and wait for it to actually die
docRunnerFuture.get();
} catch (InterruptedException e2) {
// current thread has been interrupted:
Thread.currentThread().interrupt();
} catch (java.util.concurrent.ExecutionException ee) {
if (ee.getCause() instanceof ThreadDeath) {
// we have just caused this
} else {
logger.error("Real Time Controller Malfunction", ee);
haveTimeout = "Real Time Controller Malfunction: " + ee.getMessage();
}
}
}
} catch (InterruptedException e) {
// the current thread (not the execution thread!) was interrupted
// throw it forward
Thread.currentThread().interrupt();
} catch (java.util.concurrent.ExecutionException e) {
throw new ExecutionException(e);
}
} else {
// stop now!
threadDying = true;
haveTimeout = "Execution timeout, worker thread will be forcibly terminated!";
logger.info(haveTimeout);
// using a volatile variable instead of synchronisation
Thread theThread = currentWorkingThread;
if (theThread != null) {
theThread.stop();
try {
// and wait for it to actually die
docRunnerFuture.get();
} catch (InterruptedException e) {
// current thread has been interrupted:
Thread.currentThread().interrupt();
} catch (java.util.concurrent.ExecutionException ee) {
if (ee.getCause() instanceof ThreadDeath) {
// we have just caused this
} else {
logger.error("Real Time Controller Malfunction", ee);
haveTimeout = "Real Time Controller Malfunction: " + ee.getMessage();
}
}
}
}
}
String docName = doc.getName();
// at this point we finished execution (one way or another)
if (!docWasLoaded) {
// trigger saving
getCorpus().unloadDocument(doc);
// close the previously unloaded Doc
Factory.deleteResource(doc);
}
if (!suppressExceptions && haveTimeout != null) {
throw new ExecutionException("Execution timeout occurred");
}
// global progress bar depends on this status message firing at the end
// of processing for each document.
fireStatusChanged("Finished running " + getName() + " on document " + docName);
}
}
Aggregations