Search in sources :

Example 6 with TrainingDocument

use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.

the class AutomationUtil method addOtherFeatureFromAnnotation.

/**
 * If the training file or the test file already contain the "Other layer" annotations, get the
 * UIMA annotation and add it as a feature - no need to train and predict for this "other layer"
 */
private static void addOtherFeatureFromAnnotation(AnnotationFeature aFeature, DocumentService aRepository, AutomationService aAutomationServic, AnnotationSchemaService aAnnotationService, UserDao aUserDao, List<List<String>> aPredictions, SourceDocument aSourceDocument) throws UIMAException, ClassNotFoundException, IOException {
    AutomationTypeAdapter adapter = (AutomationTypeAdapter) aAnnotationService.getAdapter(aFeature.getLayer());
    List<String> annotations = new ArrayList<>();
    // this is training - all training documents will be converted to a single training file
    if (aSourceDocument == null) {
        for (TrainingDocument trainingDocument : aAutomationServic.listTrainingDocuments(aFeature.getProject())) {
            JCas jCas = aAutomationServic.readTrainingAnnotationCas(trainingDocument);
            for (Sentence sentence : select(jCas, Sentence.class)) {
                if (aFeature.getLayer().isMultipleTokens()) {
                    annotations.addAll(((SpanAdapter) adapter).getMultipleAnnotation(sentence, aFeature).values());
                } else {
                    annotations.addAll(adapter.getAnnotation(sentence, aFeature));
                }
            }
        }
        aPredictions.add(annotations);
    } else // This is SourceDocument to predict (in the suggestion pane)
    {
        User user = aUserDao.getCurrentUser();
        AnnotationDocument annodoc = aRepository.createOrGetAnnotationDocument(aSourceDocument, user);
        JCas jCas = aRepository.readAnnotationCas(annodoc);
        for (Sentence sentence : select(jCas, Sentence.class)) {
            if (aFeature.getLayer().isMultipleTokens()) {
                annotations.addAll(((SpanAdapter) adapter).getMultipleAnnotation(sentence, aFeature).values());
            } else {
                annotations.addAll(adapter.getAnnotation(sentence, aFeature));
            }
        }
        aPredictions.add(annotations);
    }
}
Also used : User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) SpanAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.SpanAdapter) AnnotationDocument(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AutomationTypeAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.AutomationTypeAdapter) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)

Example 7 with TrainingDocument

use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.

the class AutomationUtil method tabSepClassifiers.

/**
 * Classifier for an external tab-sep file (token TAB feature)
 *
 * @param aTemplate
 *            the template.
 * @throws IOException
 *             hum?
 * @throws ClassNotFoundException
 *             hum?
 */
public static void tabSepClassifiers(MiraTemplate aTemplate, AutomationService aAutomationService) throws IOException, ClassNotFoundException {
    Mira mira = new Mira();
    int frequency = 2;
    double sigma = 1;
    int iterations = 10;
    int beamSize = 0;
    boolean maxPosteriors = false;
    String templateName = null;
    boolean documentChanged = false;
    for (TrainingDocument document : aAutomationService.listTabSepDocuments(aTemplate.getTrainFeature().getProject())) {
        if (!document.isProcessed()) {
            documentChanged = true;
            break;
        }
    }
    if (!documentChanged) {
        return;
    }
    for (TrainingDocument trainingDocument : aAutomationService.listTabSepDocuments(aTemplate.getTrainFeature().getProject())) {
        if (trainingDocument.getFeature() != null) {
            // This is a target layer train document
            continue;
        }
        File miraDir = aAutomationService.getMiraDir(aTemplate.getTrainFeature());
        File trainFile = new File(miraDir, trainingDocument.getId() + trainingDocument.getProject().getId() + ".train");
        templateName = createTemplate(null, getMiraTemplateFile(aTemplate.getTrainFeature(), aAutomationService), 0);
        String initalModelName = "";
        String trainName = trainFile.getAbsolutePath();
        String modelName = aAutomationService.getMiraModel(aTemplate.getTrainFeature(), true, trainingDocument).getAbsolutePath();
        boolean randomInit = false;
        mira.loadTemplates(templateName);
        mira.setClip(sigma);
        mira.maxPosteriors = maxPosteriors;
        mira.beamSize = beamSize;
        int numExamples = mira.count(trainName, frequency);
        mira.initModel(randomInit);
        if (!initalModelName.equals("")) {
            mira.loadModel(initalModelName);
        }
        for (int i = 0; i < iterations; i++) {
            mira.train(trainName, iterations, numExamples, i);
            mira.averageWeights(iterations * numExamples);
        }
        mira.saveModel(modelName);
    }
}
Also used : File(java.io.File) Mira(edu.lium.mira.Mira) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)

Example 8 with TrainingDocument

use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.

the class AutomationUtil method addOtherFeatureTrainDocument.

// generates training document that will be used to predict the training document
// to add extra features, for example add POS tag as a feature for NE classifier
public static void addOtherFeatureTrainDocument(MiraTemplate aTemplate, AnnotationSchemaService aAnnotationService, AutomationService aAutomationService, UserDao aUserDao) throws IOException, UIMAException, ClassNotFoundException {
    File miraDir = aAutomationService.getMiraDir(aTemplate.getTrainFeature());
    if (!miraDir.exists()) {
        FileUtils.forceMkdir(miraDir);
    }
    AutomationStatus status = aAutomationService.getAutomationStatus(aTemplate);
    for (AnnotationFeature feature : aTemplate.getOtherFeatures()) {
        File trainFile = new File(miraDir, feature.getId() + ".train");
        boolean documentChanged = false;
        for (TrainingDocument document : aAutomationService.listTrainingDocuments(feature.getProject())) {
            if (!document.isProcessed() && (document.getFeature() != null && document.getFeature().equals(feature))) {
                documentChanged = true;
                break;
            }
        }
        if (!documentChanged && trainFile.exists()) {
            continue;
        }
        BufferedWriter trainOut = new BufferedWriter(new FileWriter(trainFile));
        AutomationTypeAdapter adapter = (AutomationTypeAdapter) aAnnotationService.getAdapter(feature.getLayer());
        for (TrainingDocument trainingDocument : aAutomationService.listTrainingDocuments(feature.getProject())) {
            if ((trainingDocument.getFeature() != null && trainingDocument.getFeature().equals(feature))) {
                JCas jCas = aAutomationService.readTrainingAnnotationCas(trainingDocument);
                for (Sentence sentence : select(jCas, Sentence.class)) {
                    trainOut.append(getMiraLine(sentence, feature, adapter).toString()).append("\n");
                }
                trainingDocument.setProcessed(false);
                status.setTrainDocs(status.getTrainDocs() - 1);
            }
        }
        trainOut.close();
    }
}
Also used : FileWriter(java.io.FileWriter) JCas(org.apache.uima.jcas.JCas) File(java.io.File) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) AutomationStatus(de.tudarmstadt.ukp.clarin.webanno.automation.model.AutomationStatus) AnnotationFeature(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument) BufferedWriter(java.io.BufferedWriter) AutomationTypeAdapter(de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.AutomationTypeAdapter)

Example 9 with TrainingDocument

use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.

the class AutomationServiceEventAdapter method onBeforeProjectRemove.

@EventListener
public void onBeforeProjectRemove(BeforeProjectRemovedEvent aEvent) throws Exception {
    Project project = aEvent.getProject();
    for (TrainingDocument document : service.listTrainingDocuments(project)) {
        service.removeTrainingDocument(document);
    }
    for (MiraTemplate template : service.listMiraTemplates(project)) {
        // remove associated TRAIN and OTHER features from the Mira Template
        template.setTrainFeature(null);
        template.setOtherFeatures(null);
        service.removeMiraTemplate(template);
    }
}
Also used : Project(de.tudarmstadt.ukp.clarin.webanno.model.Project) MiraTemplate(de.tudarmstadt.ukp.clarin.webanno.automation.model.MiraTemplate) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument) EventListener(org.springframework.context.event.EventListener)

Example 10 with TrainingDocument

use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.

the class MiraAutomationServiceImpl method listTabSepDocuments.

@Override
@Transactional(noRollbackFor = NoResultException.class)
public List<TrainingDocument> listTabSepDocuments(Project aProject) {
    List<TrainingDocument> trainingDocuments = entityManager.createQuery("FROM TrainingDocument where project =:project", TrainingDocument.class).setParameter("project", aProject).getResultList();
    List<TrainingDocument> tabSepDocuments = new ArrayList<>();
    for (TrainingDocument trainingDocument : trainingDocuments) {
        if (trainingDocument.getFormat().equals(WebAnnoConst.TAB_SEP)) {
            tabSepDocuments.add(trainingDocument);
        }
    }
    return tabSepDocuments;
}
Also used : ArrayList(java.util.ArrayList) TrainingDocument(de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument) Transactional(org.springframework.transaction.annotation.Transactional)

Aggregations

TrainingDocument (de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument)11 File (java.io.File)7 ArrayList (java.util.ArrayList)5 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)4 AutomationTypeAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.AutomationTypeAdapter)3 AutomationStatus (de.tudarmstadt.ukp.clarin.webanno.automation.model.AutomationStatus)3 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)3 BufferedWriter (java.io.BufferedWriter)3 FileReader (java.io.FileReader)3 FileWriter (java.io.FileWriter)3 StringTokenizer (java.util.StringTokenizer)3 LineIterator (org.apache.commons.io.LineIterator)3 JCas (org.apache.uima.jcas.JCas)3 MiraTemplate (de.tudarmstadt.ukp.clarin.webanno.automation.model.MiraTemplate)2 Project (de.tudarmstadt.ukp.clarin.webanno.model.Project)2 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)2 User (de.tudarmstadt.ukp.clarin.webanno.security.model.User)2 Mira (edu.lium.mira.Mira)2 SpanAdapter (de.tudarmstadt.ukp.clarin.webanno.api.annotation.adapter.SpanAdapter)1 AnnotationException (de.tudarmstadt.ukp.clarin.webanno.api.annotation.exception.AnnotationException)1