use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.
the class AutomationUtil method addOtherFeatureFromAnnotation.
/**
* If the training file or the test file already contain the "Other layer" annotations, get the
* UIMA annotation and add it as a feature - no need to train and predict for this "other layer"
*/
private static void addOtherFeatureFromAnnotation(AnnotationFeature aFeature, DocumentService aRepository, AutomationService aAutomationServic, AnnotationSchemaService aAnnotationService, UserDao aUserDao, List<List<String>> aPredictions, SourceDocument aSourceDocument) throws UIMAException, ClassNotFoundException, IOException {
AutomationTypeAdapter adapter = (AutomationTypeAdapter) aAnnotationService.getAdapter(aFeature.getLayer());
List<String> annotations = new ArrayList<>();
// this is training - all training documents will be converted to a single training file
if (aSourceDocument == null) {
for (TrainingDocument trainingDocument : aAutomationServic.listTrainingDocuments(aFeature.getProject())) {
JCas jCas = aAutomationServic.readTrainingAnnotationCas(trainingDocument);
for (Sentence sentence : select(jCas, Sentence.class)) {
if (aFeature.getLayer().isMultipleTokens()) {
annotations.addAll(((SpanAdapter) adapter).getMultipleAnnotation(sentence, aFeature).values());
} else {
annotations.addAll(adapter.getAnnotation(sentence, aFeature));
}
}
}
aPredictions.add(annotations);
} else // This is SourceDocument to predict (in the suggestion pane)
{
User user = aUserDao.getCurrentUser();
AnnotationDocument annodoc = aRepository.createOrGetAnnotationDocument(aSourceDocument, user);
JCas jCas = aRepository.readAnnotationCas(annodoc);
for (Sentence sentence : select(jCas, Sentence.class)) {
if (aFeature.getLayer().isMultipleTokens()) {
annotations.addAll(((SpanAdapter) adapter).getMultipleAnnotation(sentence, aFeature).values());
} else {
annotations.addAll(adapter.getAnnotation(sentence, aFeature));
}
}
aPredictions.add(annotations);
}
}
use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.
the class AutomationUtil method tabSepClassifiers.
/**
* Classifier for an external tab-sep file (token TAB feature)
*
* @param aTemplate
* the template.
* @throws IOException
* hum?
* @throws ClassNotFoundException
* hum?
*/
public static void tabSepClassifiers(MiraTemplate aTemplate, AutomationService aAutomationService) throws IOException, ClassNotFoundException {
Mira mira = new Mira();
int frequency = 2;
double sigma = 1;
int iterations = 10;
int beamSize = 0;
boolean maxPosteriors = false;
String templateName = null;
boolean documentChanged = false;
for (TrainingDocument document : aAutomationService.listTabSepDocuments(aTemplate.getTrainFeature().getProject())) {
if (!document.isProcessed()) {
documentChanged = true;
break;
}
}
if (!documentChanged) {
return;
}
for (TrainingDocument trainingDocument : aAutomationService.listTabSepDocuments(aTemplate.getTrainFeature().getProject())) {
if (trainingDocument.getFeature() != null) {
// This is a target layer train document
continue;
}
File miraDir = aAutomationService.getMiraDir(aTemplate.getTrainFeature());
File trainFile = new File(miraDir, trainingDocument.getId() + trainingDocument.getProject().getId() + ".train");
templateName = createTemplate(null, getMiraTemplateFile(aTemplate.getTrainFeature(), aAutomationService), 0);
String initalModelName = "";
String trainName = trainFile.getAbsolutePath();
String modelName = aAutomationService.getMiraModel(aTemplate.getTrainFeature(), true, trainingDocument).getAbsolutePath();
boolean randomInit = false;
mira.loadTemplates(templateName);
mira.setClip(sigma);
mira.maxPosteriors = maxPosteriors;
mira.beamSize = beamSize;
int numExamples = mira.count(trainName, frequency);
mira.initModel(randomInit);
if (!initalModelName.equals("")) {
mira.loadModel(initalModelName);
}
for (int i = 0; i < iterations; i++) {
mira.train(trainName, iterations, numExamples, i);
mira.averageWeights(iterations * numExamples);
}
mira.saveModel(modelName);
}
}
use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.
the class AutomationUtil method addOtherFeatureTrainDocument.
// generates training document that will be used to predict the training document
// to add extra features, for example add POS tag as a feature for NE classifier
public static void addOtherFeatureTrainDocument(MiraTemplate aTemplate, AnnotationSchemaService aAnnotationService, AutomationService aAutomationService, UserDao aUserDao) throws IOException, UIMAException, ClassNotFoundException {
File miraDir = aAutomationService.getMiraDir(aTemplate.getTrainFeature());
if (!miraDir.exists()) {
FileUtils.forceMkdir(miraDir);
}
AutomationStatus status = aAutomationService.getAutomationStatus(aTemplate);
for (AnnotationFeature feature : aTemplate.getOtherFeatures()) {
File trainFile = new File(miraDir, feature.getId() + ".train");
boolean documentChanged = false;
for (TrainingDocument document : aAutomationService.listTrainingDocuments(feature.getProject())) {
if (!document.isProcessed() && (document.getFeature() != null && document.getFeature().equals(feature))) {
documentChanged = true;
break;
}
}
if (!documentChanged && trainFile.exists()) {
continue;
}
BufferedWriter trainOut = new BufferedWriter(new FileWriter(trainFile));
AutomationTypeAdapter adapter = (AutomationTypeAdapter) aAnnotationService.getAdapter(feature.getLayer());
for (TrainingDocument trainingDocument : aAutomationService.listTrainingDocuments(feature.getProject())) {
if ((trainingDocument.getFeature() != null && trainingDocument.getFeature().equals(feature))) {
JCas jCas = aAutomationService.readTrainingAnnotationCas(trainingDocument);
for (Sentence sentence : select(jCas, Sentence.class)) {
trainOut.append(getMiraLine(sentence, feature, adapter).toString()).append("\n");
}
trainingDocument.setProcessed(false);
status.setTrainDocs(status.getTrainDocs() - 1);
}
}
trainOut.close();
}
}
use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.
the class AutomationServiceEventAdapter method onBeforeProjectRemove.
@EventListener
public void onBeforeProjectRemove(BeforeProjectRemovedEvent aEvent) throws Exception {
Project project = aEvent.getProject();
for (TrainingDocument document : service.listTrainingDocuments(project)) {
service.removeTrainingDocument(document);
}
for (MiraTemplate template : service.listMiraTemplates(project)) {
// remove associated TRAIN and OTHER features from the Mira Template
template.setTrainFeature(null);
template.setOtherFeatures(null);
service.removeMiraTemplate(template);
}
}
use of de.tudarmstadt.ukp.clarin.webanno.model.TrainingDocument in project webanno by webanno.
the class MiraAutomationServiceImpl method listTabSepDocuments.
@Override
@Transactional(noRollbackFor = NoResultException.class)
public List<TrainingDocument> listTabSepDocuments(Project aProject) {
List<TrainingDocument> trainingDocuments = entityManager.createQuery("FROM TrainingDocument where project =:project", TrainingDocument.class).setParameter("project", aProject).getResultList();
List<TrainingDocument> tabSepDocuments = new ArrayList<>();
for (TrainingDocument trainingDocument : trainingDocuments) {
if (trainingDocument.getFormat().equals(WebAnnoConst.TAB_SEP)) {
tabSepDocuments.add(trainingDocument);
}
}
return tabSepDocuments;
}
Aggregations