Search in sources :

Example 1 with Mira

use of edu.lium.mira.Mira in project webanno by webanno.

the class AutomationUtil method generateFinalClassifier.

/**
 * Based on the other layer, predict features for the training document
 *
 * @param aTemplate
 *            the template.
 * @param aRepository
 *            the repository.
 * @return the prediction.
 * @throws UIMAException
 *             hum?
 * @throws ClassNotFoundException
 *             hum?
 * @throws IOException
 *             hum?
 * @throws AnnotationException
 *             hum?
 *
 * @throws AutomationException
 *             if an error occurs.
 */
public static String generateFinalClassifier(MiraTemplate aTemplate, DocumentService aRepository, CurationDocumentService aCurationDocumentService, AnnotationSchemaService aAnnotationService, AutomationService aAutomationService, UserDao aUserDao) throws UIMAException, ClassNotFoundException, IOException, AnnotationException, AutomationException {
    int frequency = 2;
    double sigma = 1;
    int iterations = 10;
    int beamSize = 0;
    boolean maxPosteriors = false;
    AnnotationFeature layerFeature = aTemplate.getTrainFeature();
    List<List<String>> predictions = new ArrayList<>();
    File miraDir = aAutomationService.getMiraDir(layerFeature);
    Mira mira = new Mira();
    File predFile = new File(miraDir, layerFeature.getLayer().getId() + "-" + layerFeature.getId() + ".train.ft");
    File predcitedFile = new File(predFile.getAbsolutePath() + "-pred");
    boolean trainingDocumentUpdated = false;
    // A. training document for other train layers were changed
    for (AnnotationFeature feature : aTemplate.getOtherFeatures()) {
        for (TrainingDocument document : aAutomationService.listTrainingDocuments(aTemplate.getTrainFeature().getProject())) {
            if (!document.isProcessed() && document.getFeature() != null && document.getFeature().equals(feature)) {
                trainingDocumentUpdated = true;
                break;
            }
        }
    }
    // B. Training document for the main training layer were changed
    for (TrainingDocument document : aAutomationService.listTrainingDocuments(layerFeature.getProject())) {
        if (!document.isProcessed() && (document.getFeature() != null && document.getFeature().equals(layerFeature))) {
            trainingDocumentUpdated = true;
            break;
        }
    }
    // C. New Curation document arrives
    for (SourceDocument document : aRepository.listSourceDocuments(layerFeature.getProject())) {
        if (document.getState().equals(SourceDocumentState.CURATION_FINISHED)) {
            trainingDocumentUpdated = true;
            break;
        }
    }
    // D. tab-sep training documents
    for (TrainingDocument document : aAutomationService.listTabSepDocuments(aTemplate.getTrainFeature().getProject())) {
        if (!document.isProcessed() && document.getFeature() != null && document.getFeature().equals(layerFeature)) {
            trainingDocumentUpdated = true;
            break;
        }
    }
    if (!trainingDocumentUpdated) {
        return aTemplate.getResult();
    }
    // if no other layer is used, use this as main train document,
    // otherwise, add all the
    // predictions and modify template
    File baseTrainFile = new File(miraDir, layerFeature.getLayer().getId() + "-" + layerFeature.getId() + ".train.base");
    File trainFile = new File(miraDir, layerFeature.getLayer().getId() + "-" + layerFeature.getId() + ".train");
    // generate final classifier, using all features generated
    String trainName = trainFile.getAbsolutePath();
    String finalClassifierModelName = aAutomationService.getMiraModel(layerFeature, false, null).getAbsolutePath();
    getFeatureOtherLayer(aTemplate, aRepository, aAutomationService, aAnnotationService, aUserDao, beamSize, maxPosteriors, predictions, mira, predFile, predcitedFile, null);
    getFeaturesTabSep(aTemplate, aAutomationService, beamSize, maxPosteriors, layerFeature, predictions, mira, predFile, predcitedFile);
    generateTrainDocument(aTemplate, aRepository, aCurationDocumentService, aAnnotationService, aAutomationService, aUserDao, false);
    String trainTemplate;
    if (predictions.size() == 0) {
        trainTemplate = createTemplate(aTemplate.getTrainFeature(), getMiraTemplateFile(layerFeature, aAutomationService), 0);
        FileUtils.copyFile(baseTrainFile, trainFile);
    } else {
        trainTemplate = createTemplate(aTemplate.getTrainFeature(), getMiraTemplateFile(layerFeature, aAutomationService), predictions.size());
        buildTrainFile(baseTrainFile, trainFile, predictions);
    }
    boolean randomInit = false;
    if (!layerFeature.getLayer().isLockToTokenOffset()) {
        mira.setIobScorer();
    }
    mira.loadTemplates(trainTemplate);
    mira.setClip(sigma);
    mira.maxPosteriors = maxPosteriors;
    mira.beamSize = beamSize;
    int numExamples = mira.count(trainName, frequency);
    mira.initModel(randomInit);
    String trainResult = "";
    for (int i = 0; i < iterations; i++) {
        trainResult = mira.train(trainName, iterations, numExamples, i);
        mira.averageWeights(iterations * numExamples);
    }
    mira.saveModel(finalClassifierModelName);
    // all training documents are processed by now
    for (TrainingDocument document : aAutomationService.listTrainingDocuments(layerFeature.getProject())) {
        document.setProcessed(true);
    }
    for (TrainingDocument document : aAutomationService.listTabSepDocuments(layerFeature.getProject())) {
        document.setProcessed(true);
    }
    return trainResult;
}
Also used : ArrayList(java.util.ArrayList) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) Mira(edu.lium.mira.Mira) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)

Example 2 with Mira

use of edu.lium.mira.Mira in project webanno by webanno.

the class AutomationUtil method addOtherFeatureToPredictDocument.

/**
 * Based on the other layer, add features for the prediction document
 *
 * @param aTemplate
 *            the template.
 * @param aRepository
 *            the repository.
 * @throws UIMAException
 *             hum?
 * @throws ClassNotFoundException
 *             hum?
 * @throws IOException
 *             hum?
 * @throws AnnotationException
 *             hum?
 * @throws AutomationException
 *             hum?
 */
public static void addOtherFeatureToPredictDocument(MiraTemplate aTemplate, DocumentService aRepository, AnnotationSchemaService aAnnotationService, AutomationService aAutomationService, UserDao aUserDao) throws UIMAException, ClassNotFoundException, IOException, AnnotationException, AutomationException {
    AnnotationFeature layerFeature = aTemplate.getTrainFeature();
    File miraDir = aAutomationService.getMiraDir(layerFeature);
    for (SourceDocument document : aRepository.listSourceDocuments(layerFeature.getProject())) {
        List<List<String>> predictions = new ArrayList<>();
        File predFtFile = new File(miraDir, document.getId() + ".pred.ft");
        Mira mira = new Mira();
        int beamSize = 0;
        boolean maxPosteriors = false;
        File predcitedFile = new File(predFtFile.getAbsolutePath() + "-pred");
        getFeatureOtherLayer(aTemplate, aRepository, aAutomationService, aAnnotationService, aUserDao, beamSize, maxPosteriors, predictions, mira, predFtFile, predcitedFile, document);
        getFeaturesTabSep(aTemplate, aAutomationService, beamSize, maxPosteriors, layerFeature, predictions, mira, predFtFile, predcitedFile);
        File basePredFile = new File(miraDir, document.getId() + ".pred");
        if (predictions.size() == 0) {
            createTemplate(aTemplate.getTrainFeature(), getMiraTemplateFile(layerFeature, aAutomationService), 0);
            FileUtils.copyFile(predFtFile, basePredFile);
        } else {
            createTemplate(aTemplate.getTrainFeature(), getMiraTemplateFile(layerFeature, aAutomationService), predictions.size());
            buildPredictFile(predFtFile, basePredFile, predictions, aTemplate.getTrainFeature());
        }
    }
}
Also used : SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) File(java.io.File) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) Mira(edu.lium.mira.Mira)

Example 3 with Mira

use of edu.lium.mira.Mira in project webanno by webanno.

the class AutomationUtil method otherFeatureClassifiers.

/**
 * When additional layers are used as training feature, the training document should be
 * auto-predicted with the other layers. Example, if the train layer is Named Entity and POS
 * layer is used as additional feature, the training document should be predicted using the POS
 * layer documents for POS annotation
 *
 * @param aTemplate
 *            the template.
 * @param aRepository
 *            the repository.
 * @throws IOException
 *             hum?
 * @throws ClassNotFoundException
 *             hum?
 */
public static void otherFeatureClassifiers(MiraTemplate aTemplate, DocumentService aRepository, AutomationService aAutomationService) throws IOException, ClassNotFoundException {
    Mira mira = new Mira();
    int frequency = 2;
    double sigma = 1;
    int iterations = 10;
    int beamSize = 0;
    boolean maxPosteriors = false;
    String templateName = null;
    for (AnnotationFeature feature : aTemplate.getOtherFeatures()) {
        templateName = createTemplate(feature, getMiraTemplateFile(feature, aAutomationService), 0);
        File miraDir = aAutomationService.getMiraDir(aTemplate.getTrainFeature());
        File trainFile = new File(miraDir, feature.getId() + ".train");
        String initalModelName = "";
        String trainName = trainFile.getAbsolutePath();
        String modelName = aAutomationService.getMiraModel(feature, true, null).getAbsolutePath();
        boolean randomInit = false;
        if (!feature.getLayer().isLockToTokenOffset()) {
            mira.setIobScorer();
        }
        mira.loadTemplates(templateName);
        mira.setClip(sigma);
        mira.maxPosteriors = maxPosteriors;
        mira.beamSize = beamSize;
        int numExamples = mira.count(trainName, frequency);
        mira.initModel(randomInit);
        if (!initalModelName.equals("")) {
            mira.loadModel(initalModelName);
        }
        for (int i = 0; i < iterations; i++) {
            mira.train(trainName, iterations, numExamples, i);
            mira.averageWeights(iterations * numExamples);
        }
        mira.saveModel(modelName);
    }
}
Also used : File(java.io.File) Mira(edu.lium.mira.Mira) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)

Example 4 with Mira

use of edu.lium.mira.Mira in project webanno by webanno.

the class AutomationUtil method predict.

public static void predict(MiraTemplate aTemplate, DocumentService aRepository, CorrectionDocumentService aCorrectionDocumentService, AutomationService aAutomationService, UserDao aUserDao) throws UIMAException, ClassNotFoundException, IOException, AnnotationException {
    AnnotationFeature layerFeature = aTemplate.getTrainFeature();
    File miraDir = aAutomationService.getMiraDir(layerFeature);
    AutomationStatus status = aAutomationService.getAutomationStatus(aTemplate);
    for (SourceDocument document : aRepository.listSourceDocuments(layerFeature.getProject())) {
        File predFile = new File(miraDir, document.getId() + ".pred");
        Mira mira = new Mira();
        int shiftColumns = 0;
        int nbest = 1;
        int beamSize = 0;
        boolean maxPosteriors = false;
        String modelName = aAutomationService.getMiraModel(layerFeature, false, null).getAbsolutePath();
        String testName = predFile.getAbsolutePath();
        File predcitedFile = new File(predFile.getAbsolutePath() + "-pred");
        PrintStream stream = new PrintStream(predcitedFile);
        BufferedReader input = new BufferedReader(new InputStreamReader(System.in));
        if (testName != null) {
            input = new BufferedReader(new FileReader(testName));
        }
        mira.loadModel(modelName);
        mira.setShiftColumns(shiftColumns);
        mira.nbest = nbest;
        mira.beamSize = beamSize;
        mira.maxPosteriors = maxPosteriors;
        mira.test(input, stream);
        LOG.info("Prediction is wrtten to a MIRA File. To be done is writing back to the CAS");
        LineIterator it = IOUtils.lineIterator(new FileReader(predcitedFile));
        List<String> annotations = new ArrayList<>();
        while (it.hasNext()) {
            String line = it.next();
            if (line.trim().equals("")) {
                continue;
            }
            StringTokenizer st = new StringTokenizer(line, " ");
            String tag = "";
            while (st.hasMoreTokens()) {
                tag = st.nextToken();
            }
            annotations.add(tag);
        }
        LOG.info(annotations.size() + " Predictions found to be written to the CAS");
        JCas jCas = null;
        User user = aUserDao.getCurrentUser();
        try {
            AnnotationDocument annoDocument = aRepository.getAnnotationDocument(document, user);
            jCas = aRepository.readAnnotationCas(annoDocument);
            automate(jCas, layerFeature, annotations);
        } catch (DataRetrievalFailureException e) {
            automate(jCas, layerFeature, annotations);
            LOG.info("Predictions found are written to the CAS");
            aCorrectionDocumentService.writeCorrectionCas(jCas, document);
            status.setAnnoDocs(status.getAnnoDocs() - 1);
        }
        automate(jCas, layerFeature, annotations);
        LOG.info("Predictions found are written to the CAS");
        aCorrectionDocumentService.writeCorrectionCas(jCas, document);
        status.setAnnoDocs(status.getAnnoDocs() - 1);
    }
}
Also used : PrintStream(java.io.PrintStream) User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) InputStreamReader(java.io.InputStreamReader) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument) LineIterator(org.apache.commons.io.LineIterator) AutomationStatus(de.tudarmstadt.ukp.clarin.webanno.automation.model.AutomationStatus) Mira(edu.lium.mira.Mira) StringTokenizer(java.util.StringTokenizer) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) DataRetrievalFailureException(org.springframework.dao.DataRetrievalFailureException) File(java.io.File) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)

Example 5 with Mira

use of edu.lium.mira.Mira in project webanno by webanno.

the class AutomationUtil method tabSepClassifiers.

/**
 * Classifier for an external tab-sep file (token TAB feature)
 *
 * @param aTemplate
 *            the template.
 * @throws IOException
 *             hum?
 * @throws ClassNotFoundException
 *             hum?
 */
public static void tabSepClassifiers(MiraTemplate aTemplate, AutomationService aAutomationService) throws IOException, ClassNotFoundException {
    Mira mira = new Mira();
    int frequency = 2;
    double sigma = 1;
    int iterations = 10;
    int beamSize = 0;
    boolean maxPosteriors = false;
    String templateName = null;
    boolean documentChanged = false;
    for (TrainingDocument document : aAutomationService.listTabSepDocuments(aTemplate.getTrainFeature().getProject())) {
        if (!document.isProcessed()) {
            documentChanged = true;
            break;
        }
    }
    if (!documentChanged) {
        return;
    }
    for (TrainingDocument trainingDocument : aAutomationService.listTabSepDocuments(aTemplate.getTrainFeature().getProject())) {
        if (trainingDocument.getFeature() != null) {
            // This is a target layer train document
            continue;
        }
        File miraDir = aAutomationService.getMiraDir(aTemplate.getTrainFeature());
        File trainFile = new File(miraDir, trainingDocument.getId() + trainingDocument.getProject().getId() + ".train");
        templateName = createTemplate(null, getMiraTemplateFile(aTemplate.getTrainFeature(), aAutomationService), 0);
        String initalModelName = "";
        String trainName = trainFile.getAbsolutePath();
        String modelName = aAutomationService.getMiraModel(aTemplate.getTrainFeature(), true, trainingDocument).getAbsolutePath();
        boolean randomInit = false;
        mira.loadTemplates(templateName);
        mira.setClip(sigma);
        mira.maxPosteriors = maxPosteriors;
        mira.beamSize = beamSize;
        int numExamples = mira.count(trainName, frequency);
        mira.initModel(randomInit);
        if (!initalModelName.equals("")) {
            mira.loadModel(initalModelName);
        }
        for (int i = 0; i < iterations; i++) {
            mira.train(trainName, iterations, numExamples, i);
            mira.averageWeights(iterations * numExamples);
        }
        mira.saveModel(modelName);
    }
}
Also used : File(java.io.File) Mira(edu.lium.mira.Mira) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)

Aggregations

Mira (edu.lium.mira.Mira)5 File (java.io.File)5 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)4 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)3 ArrayList (java.util.ArrayList)3 TrainingDocument (de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)2 List (java.util.List)2 AutomationStatus (de.tudarmstadt.ukp.clarin.webanno.automation.model.AutomationStatus)1 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)1 User (de.tudarmstadt.ukp.clarin.webanno.security.model.User)1 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1 InputStreamReader (java.io.InputStreamReader)1 PrintStream (java.io.PrintStream)1 StringTokenizer (java.util.StringTokenizer)1 LineIterator (org.apache.commons.io.LineIterator)1 JCas (org.apache.uima.jcas.JCas)1 DataRetrievalFailureException (org.springframework.dao.DataRetrievalFailureException)1