Search in sources :

Example 1 with LanguageAnalyser

use of gate.LanguageAnalyser in project gate-core by GateNLP.

the class ConditionalSerialController method duplicate.

/**
 * Custom duplication method for conditional controllers to handle
 * duplicating the running strategies.
 */
@Override
public Resource duplicate(Factory.DuplicationContext ctx) throws ResourceInstantiationException {
    ConditionalController c = (ConditionalController) super.duplicate(ctx);
    Collection<ProcessingResource> newPRs = c.getPRs();
    List<RunningStrategy> newStrategies = new ArrayList<RunningStrategy>(strategiesList.size());
    Iterator<RunningStrategy> oldRSIt = getRunningStrategies().iterator();
    Iterator<ProcessingResource> prIt = newPRs.iterator();
    while (oldRSIt.hasNext()) {
        RunningStrategy oldStrat = oldRSIt.next();
        ProcessingResource currentPR = prIt.next();
        if (oldStrat instanceof AnalyserRunningStrategy) {
            newStrategies.add(new AnalyserRunningStrategy((LanguageAnalyser) currentPR, ((AnalyserRunningStrategy) oldStrat).getRunMode(), ((AnalyserRunningStrategy) oldStrat).getFeatureName(), ((AnalyserRunningStrategy) oldStrat).getFeatureValue()));
        } else {
            boolean run = true;
            if (oldStrat instanceof UnconditionalRunningStrategy) {
                run = oldStrat.shouldRun();
            }
            // assume an unconditional strategy.  Subclasses that know about other types
            // of strategies can fix this up later
            newStrategies.add(new RunningStrategy.UnconditionalRunningStrategy(currentPR, run));
        }
    }
    c.setRunningStrategies(newStrategies);
    return c;
}
Also used : UnconditionalRunningStrategy(gate.creole.RunningStrategy.UnconditionalRunningStrategy) UnconditionalRunningStrategy(gate.creole.RunningStrategy.UnconditionalRunningStrategy) ProcessingResource(gate.ProcessingResource) ArrayList(java.util.ArrayList) LanguageAnalyser(gate.LanguageAnalyser) UnconditionalRunningStrategy(gate.creole.RunningStrategy.UnconditionalRunningStrategy)

Example 2 with LanguageAnalyser

use of gate.LanguageAnalyser in project gate-core by GateNLP.

the class TestAnnic method testAnnicIndexing.

/**
 * Testing the annic indexing functionalities
 *
 * @throws Exception
 */
public void testAnnicIndexing() throws Exception {
    // lets create a corpus
    testCorpus = Factory.newCorpus("TestAnnic");
    File directory = new File(new File(new File(new File(new File(new File(new File(Gate.getGateHome(), "src"), "test"), "gate"), "resources"), "gate.ac.uk"), "tests"), "annic");
    File[] files = directory.listFiles();
    for (int i = 0; i < files.length; i++) {
        if (files[i].isFile()) {
            Document doc = Factory.newDocument(files[i].toURI().toURL(), "ISO-8859-1");
            testCorpus.add(doc);
        }
    }
    AnnicIndexing annicPR = new AnnicIndexing();
    LanguageAnalyser splitter = (LanguageAnalyser) Factory.createResource("gate.creole.splitter.SentenceSplitter");
    splitter.setParameterValue("inputASName", "Key");
    splitter.setParameterValue("outputASName", "Key");
    // splitter.setOutputASName("Key");
    for (int i = 0; i < testCorpus.size(); i++) {
        splitter.setDocument(testCorpus.get(i));
        splitter.execute();
    }
    // index
    annicPR.setAnnotationSetName("Key");
    annicPR.setBaseTokenAnnotationType("Token");
    annicPR.setCorpus(testCorpus);
    annicPR.setIndexUnitAnnotationType("Sentence");
    annicPR.setIndexOutputDirectoryLocation(indexURL.toURI().toURL());
    annicPR.execute();
    Factory.deleteResource(testCorpus);
}
Also used : LanguageAnalyser(gate.LanguageAnalyser) Document(gate.Document) File(java.io.File)

Example 3 with LanguageAnalyser

use of gate.LanguageAnalyser in project gate-core by GateNLP.

the class SerialAnalyserController method executeImpl.

/**
 * Run the Processing Resources in sequence.
 */
@Override
protected void executeImpl() throws ExecutionException {
    interrupted = false;
    if (corpus == null)
        throw new ExecutionException("(SerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!");
    benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName());
    // reset the prTimeMap that keeps track of the time
    // taken by each PR to process the entire corpus
    super.resetPrTimeMap();
    if (document == null) {
        // iterate through the documents in the corpus
        for (int i = 0; i < corpus.size(); i++) {
            String savedBenchmarkId = getBenchmarkId();
            try {
                if (isInterrupted()) {
                    throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!");
                }
                boolean docWasLoaded = corpus.isDocumentLoaded(i);
                // record the time before loading the document
                long documentLoadingStartTime = Benchmark.startPoint();
                Document doc = corpus.get(i);
                // include the document name in the benchmark ID for sub-events
                setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(), getBenchmarkId()));
                // report the document loading
                benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
                Benchmark.checkPoint(documentLoadingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED, getBenchmarkId()), this, benchmarkFeatures);
                // set the doc and corpus
                for (int j = 0; j < prList.size(); j++) {
                    ((LanguageAnalyser) prList.get(j)).setDocument(doc);
                    ((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
                }
                try {
                    if (DEBUG)
                        Out.pr("SerialAnalyserController processing doc=" + doc.getName() + "...");
                    super.executeImpl();
                    if (DEBUG)
                        Out.prln("done.");
                } finally {
                    // make sure we unset the doc and corpus even if we got an exception
                    for (int j = 0; j < prList.size(); j++) {
                        ((LanguageAnalyser) prList.get(j)).setDocument(null);
                        ((LanguageAnalyser) prList.get(j)).setCorpus(null);
                    }
                }
                if (!docWasLoaded) {
                    long documentSavingStartTime = Benchmark.startPoint();
                    // trigger saving
                    corpus.unloadDocument(doc);
                    Benchmark.checkPoint(documentSavingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED, getBenchmarkId()), this, benchmarkFeatures);
                    // close the previoulsy unloaded Doc
                    Factory.deleteResource(doc);
                }
            } finally {
                setBenchmarkId(savedBenchmarkId);
            }
        }
    } else {
        // set the doc and corpus
        for (int j = 0; j < prList.size(); j++) {
            ((LanguageAnalyser) prList.get(j)).setDocument(document);
            ((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
        }
        try {
            if (DEBUG)
                Out.pr("SerialAnalyserController processing doc=" + document.getName() + "...");
            super.executeImpl();
            if (DEBUG)
                Out.prln("done.");
        } finally {
            // make sure we unset the doc and corpus even if we got an exception
            for (int j = 0; j < prList.size(); j++) {
                ((LanguageAnalyser) prList.get(j)).setDocument(null);
                ((LanguageAnalyser) prList.get(j)).setCorpus(null);
            }
        }
    }
    // document was not null
    // remove the features that we added
    benchmarkFeatures.remove(Benchmark.DOCUMENT_NAME_FEATURE);
    benchmarkFeatures.remove(Benchmark.CORPUS_NAME_FEATURE);
}
Also used : LanguageAnalyser(gate.LanguageAnalyser) Document(gate.Document)

Example 4 with LanguageAnalyser

use of gate.LanguageAnalyser in project gate-core by GateNLP.

the class ConditionalSerialAnalyserController method executeImpl.

/**
 * Run the Processing Resources in sequence.
 */
@Override
protected void executeImpl() throws ExecutionException {
    interrupted = false;
    if (corpus == null)
        throw new ExecutionException("(ConditionalSerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!");
    benchmarkFeatures.put(Benchmark.CORPUS_NAME_FEATURE, corpus.getName());
    if (document == null) {
        // iterate through the documents in the corpus
        for (int i = 0; i < corpus.size(); i++) {
            String savedBenchmarkId = getBenchmarkId();
            try {
                if (isInterrupted()) {
                    throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!");
                }
                boolean docWasLoaded = corpus.isDocumentLoaded(i);
                // record the time before loading the document
                long documentLoadingStartTime = Benchmark.startPoint();
                Document doc = corpus.get(i);
                // include the document name in the benchmark ID for sub-events
                setBenchmarkId(Benchmark.createBenchmarkId("doc_" + doc.getName(), getBenchmarkId()));
                // report the document loading
                benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
                Benchmark.checkPoint(documentLoadingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED, getBenchmarkId()), this, benchmarkFeatures);
                // set the doc and corpus
                for (int j = 0; j < prList.size(); j++) {
                    ((LanguageAnalyser) prList.get(j)).setDocument(doc);
                    ((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
                }
                try {
                    if (DEBUG)
                        Out.pr("SerialAnalyserController processing doc=" + doc.getName() + "...");
                    super.executeImpl();
                    if (DEBUG)
                        Out.prln("done.");
                } finally {
                    // make sure we unset the doc and corpus even if we got an exception
                    for (int j = 0; j < prList.size(); j++) {
                        ((LanguageAnalyser) prList.get(j)).setDocument(null);
                        ((LanguageAnalyser) prList.get(j)).setCorpus(null);
                    }
                }
                if (!docWasLoaded) {
                    long documentSavingStartTime = Benchmark.startPoint();
                    // trigger saving
                    corpus.unloadDocument(doc);
                    Benchmark.checkPoint(documentSavingStartTime, Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED, getBenchmarkId()), this, benchmarkFeatures);
                    // close the previously unloaded Doc
                    Factory.deleteResource(doc);
                }
            } finally {
                setBenchmarkId(savedBenchmarkId);
            }
        }
    } else {
        // set the doc and corpus
        for (int j = 0; j < prList.size(); j++) {
            ((LanguageAnalyser) prList.get(j)).setDocument(document);
            ((LanguageAnalyser) prList.get(j)).setCorpus(corpus);
        }
        try {
            if (DEBUG)
                Out.pr("SerialAnalyserController processing doc=" + document.getName() + "...");
            super.executeImpl();
            if (DEBUG)
                Out.prln("done.");
        } finally {
            // make sure we unset the doc and corpus even if we got an exception
            for (int j = 0; j < prList.size(); j++) {
                ((LanguageAnalyser) prList.get(j)).setDocument(null);
                ((LanguageAnalyser) prList.get(j)).setCorpus(null);
            }
        }
    }
// document was not null
// //iterate through the documents in the corpus
// for(int i = 0; i < corpus.size(); i++){
// if(isInterrupted()) throw new ExecutionInterruptedException(
// "The execution of the " + getName() +
// " application has been abruptly interrupted!");
// 
// boolean docWasLoaded = corpus.isDocumentLoaded(i);
// 
// // record the time before loading the document
// long documentLoadingStartTime = Benchmark.startPoint();
// 
// Document doc = (Document)corpus.get(i);
// 
// // report the document loading
// benchmarkFeatures.put(Benchmark.DOCUMENT_NAME_FEATURE, doc.getName());
// Benchmark.checkPoint(documentLoadingStartTime,
// Benchmark.createBenchmarkId(Benchmark.DOCUMENT_LOADED,
// getBenchmarkId()), this, benchmarkFeatures);
// //run the system over this document
// //set the doc and corpus
// for(int j = 0; j < prList.size(); j++){
// ((LanguageAnalyser)prList.get(j)).setDocument(doc);
// ((LanguageAnalyser)prList.get(j)).setCorpus(corpus);
// }
// 
// try{
// if (DEBUG)
// Out.pr("ConditionalSerialAnalyserController processing doc=" + doc.getName()+ "...");
// super.executeImpl();
// if (DEBUG)
// Out.prln("done.");
// }
// finally {
// // make sure we unset the doc and corpus even if we got an exception
// for(int j = 0; j < prList.size(); j++){
// ((LanguageAnalyser)prList.get(j)).setDocument(null);
// ((LanguageAnalyser)prList.get(j)).setCorpus(null);
// }
// }
// 
// if(!docWasLoaded){
// long documentSavingStartTime = Benchmark.startPoint();
// // trigger saving
// corpus.unloadDocument(doc);
// Benchmark.checkPoint(documentSavingStartTime,
// Benchmark.createBenchmarkId(Benchmark.DOCUMENT_SAVED,
// getBenchmarkId()), this, benchmarkFeatures);
// //close the previoulsy unloaded Doc
// Factory.deleteResource(doc);
// }
// }
}
Also used : LanguageAnalyser(gate.LanguageAnalyser) Document(gate.Document)

Example 5 with LanguageAnalyser

use of gate.LanguageAnalyser in project gate-core by GateNLP.

the class RealtimeCorpusController method executeImpl.

/**
 * Run the Processing Resources in sequence.
 */
@Override
@SuppressWarnings("deprecation")
public void executeImpl() throws ExecutionException {
    interrupted = false;
    String haveTimeout = null;
    if (corpus == null)
        throw new ExecutionException("(SerialAnalyserController) \"" + getName() + "\":\n" + "The corpus supplied for execution was null!");
    // iterate through the documents in the corpus
    for (int i = 0; i < corpus.size(); i++) {
        if (isInterrupted())
            throw new ExecutionInterruptedException("The execution of the " + getName() + " application has been abruptly interrupted!");
        boolean docWasLoaded = corpus.isDocumentLoaded(i);
        Document doc = corpus.get(i);
        // start the execution, in the separate thread
        threadDying = false;
        Future<?> docRunnerFuture = threadSource.submit(new DocRunner(doc));
        // how long have we already waited
        long waitSoFar = 0;
        // check if we should use graceful stop first
        if (actualGraceful != -1 && (actualTimeout == -1 || actualGraceful < actualTimeout)) {
            try {
                docRunnerFuture.get(actualGraceful, TimeUnit.MILLISECONDS);
            } catch (TimeoutException e) {
                // we waited the graceful period, and the task did not finish
                // -> interrupt the job (nicely)
                threadDying = true;
                waitSoFar += actualGraceful;
                haveTimeout = "Execution timeout, attempting to gracefully stop worker thread...";
                logger.info(haveTimeout);
                // interrupt the working thread - we can't cancel the future as
                // that would cause future get() calls to fail immediately with
                // a CancellationException
                Thread t = currentWorkingThread;
                if (t != null) {
                    t.interrupt();
                }
                for (int j = 0; j < prList.size(); j++) {
                    ((Executable) prList.get(j)).interrupt();
                }
                // next check scheduled for
                // - half-time between graceful and timeout, or
                // - graceful-and-a-half (if no timeout)
                long waitTime = (actualTimeout != -1) ? (actualTimeout - actualGraceful) / 2 : (actualGraceful / 2);
                try {
                    docRunnerFuture.get(waitTime, TimeUnit.MILLISECONDS);
                } catch (TimeoutException e1) {
                    // the mid point has been reached: try nullify
                    threadDying = true;
                    waitSoFar += waitTime;
                    haveTimeout = "Execution timeout, attempting to induce exception in order to stop worker thread...";
                    logger.info(haveTimeout);
                    for (int j = 0; j < prList.size(); j++) {
                        ((LanguageAnalyser) prList.get(j)).setDocument(null);
                        ((LanguageAnalyser) prList.get(j)).setCorpus(null);
                    }
                } catch (InterruptedException e1) {
                    // the current thread (not the execution thread!) was interrupted
                    // throw it forward
                    Thread.currentThread().interrupt();
                } catch (java.util.concurrent.ExecutionException e2) {
                    throw new ExecutionException(e2);
                }
            } catch (java.util.concurrent.ExecutionException e) {
                throw new ExecutionException(e);
            } catch (InterruptedException e) {
                // the current thread (not the execution thread!) was interrupted
                // throw it forward
                Thread.currentThread().interrupt();
            }
        }
        // wait before we call stop()
        if (actualTimeout != -1) {
            long waitTime = actualTimeout - waitSoFar;
            if (waitTime > 0) {
                try {
                    docRunnerFuture.get(waitTime, TimeUnit.MILLISECONDS);
                } catch (TimeoutException e) {
                    // we're out of time: stop the thread
                    threadDying = true;
                    haveTimeout = "Execution timeout, worker thread will be forcibly terminated!";
                    logger.info(haveTimeout);
                    // using a volatile variable instead of synchronisation
                    Thread theThread = currentWorkingThread;
                    if (theThread != null) {
                        theThread.stop();
                        try {
                            // and wait for it to actually die
                            docRunnerFuture.get();
                        } catch (InterruptedException e2) {
                            // current thread has been interrupted:
                            Thread.currentThread().interrupt();
                        } catch (java.util.concurrent.ExecutionException ee) {
                            if (ee.getCause() instanceof ThreadDeath) {
                            // we have just caused this
                            } else {
                                logger.error("Real Time Controller Malfunction", ee);
                                haveTimeout = "Real Time Controller Malfunction: " + ee.getMessage();
                            }
                        }
                    }
                } catch (InterruptedException e) {
                    // the current thread (not the execution thread!) was interrupted
                    // throw it forward
                    Thread.currentThread().interrupt();
                } catch (java.util.concurrent.ExecutionException e) {
                    throw new ExecutionException(e);
                }
            } else {
                // stop now!
                threadDying = true;
                haveTimeout = "Execution timeout, worker thread will be forcibly terminated!";
                logger.info(haveTimeout);
                // using a volatile variable instead of synchronisation
                Thread theThread = currentWorkingThread;
                if (theThread != null) {
                    theThread.stop();
                    try {
                        // and wait for it to actually die
                        docRunnerFuture.get();
                    } catch (InterruptedException e) {
                        // current thread has been interrupted:
                        Thread.currentThread().interrupt();
                    } catch (java.util.concurrent.ExecutionException ee) {
                        if (ee.getCause() instanceof ThreadDeath) {
                        // we have just caused this
                        } else {
                            logger.error("Real Time Controller Malfunction", ee);
                            haveTimeout = "Real Time Controller Malfunction: " + ee.getMessage();
                        }
                    }
                }
            }
        }
        String docName = doc.getName();
        // at this point we finished execution (one way or another)
        if (!docWasLoaded) {
            // trigger saving
            getCorpus().unloadDocument(doc);
            // close the previously unloaded Doc
            Factory.deleteResource(doc);
        }
        if (!suppressExceptions && haveTimeout != null) {
            throw new ExecutionException("Execution timeout occurred");
        }
        // global progress bar depends on this status message firing at the end
        // of processing for each document.
        fireStatusChanged("Finished running " + getName() + " on document " + docName);
    }
}
Also used : Document(gate.Document) LanguageAnalyser(gate.LanguageAnalyser) TimeoutException(java.util.concurrent.TimeoutException)

Aggregations

LanguageAnalyser (gate.LanguageAnalyser)5 Document (gate.Document)4 ProcessingResource (gate.ProcessingResource)1 UnconditionalRunningStrategy (gate.creole.RunningStrategy.UnconditionalRunningStrategy)1 File (java.io.File)1 ArrayList (java.util.ArrayList)1 TimeoutException (java.util.concurrent.TimeoutException)1