use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class TestTaskUtils method initJCas.
private JCas initJCas(boolean setUnitIdAsPartOfTheInstanceId) throws Exception {
AnalysisEngine engine = AnalysisEngineFactory.createEngine(NoOpAnnotator.class);
JCas jCas = engine.newJCas();
JCasId id = new JCasId(jCas);
id.setId(4711);
id.addToIndexes();
DocumentMetaData meta = new DocumentMetaData(jCas);
meta.setDocumentTitle("title");
meta.setDocumentId("4711");
meta.addToIndexes();
String[][] tokens = { // sequence 1
{ "a", "DT" }, // sequence 1
{ "car", "NN" }, // sequence 1
{ "drives", "VBZ" }, // sequence 2
{ "the", "DT" }, // sequence 2
{ "hedgehogs", "NN" }, // sequence 2
{ "dies", "VBZ" } };
StringBuilder sb = new StringBuilder();
for (int i = 0; i < tokens.length; i++) {
int start = sb.length();
int end = start + tokens[i][0].length();
TextClassificationTarget unit = new TextClassificationTarget(jCas, start, end);
if (setUnitIdAsPartOfTheInstanceId) {
unit.setSuffix(tokens[i][0]);
}
unit.setId(i);
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(jCas, start, end);
outcome.setOutcome(tokens[i][1]);
outcome.addToIndexes();
sb.append(tokens[i][0]);
if (i + 1 < tokens.length) {
sb.append(" ");
}
}
String text = sb.toString();
jCas.setDocumentText(text);
int lenSeq1 = tokens[0][0].length() + 1 + tokens[1][0].length() + 1 + tokens[2][0].length();
TextClassificationSequence seq1 = new TextClassificationSequence(jCas, 0, lenSeq1);
seq1.addToIndexes();
TextClassificationSequence seq2 = new TextClassificationSequence(jCas, lenSeq1 + 1, text.length());
seq2.addToIndexes();
return jCas;
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class TestFoldUtil method createNoSequenceCas.
private void createNoSequenceCas() throws Exception {
tmpFoldNoSeq = new TemporaryFolder();
tmpFoldNoSeq.create();
jcasNoSequence = JCasFactory.createJCas();
jcasNoSequence.setDocumentText("Mr. Hawksley said yesterday he would be willing to go before the city .");
setUnit(jcasNoSequence, 0, 2);
setUnit(jcasNoSequence, 4, 12);
setUnit(jcasNoSequence, 13, 18);
setUnit(jcasNoSequence, 18, 28);
setUnit(jcasNoSequence, 31, 36);
setUnit(jcasNoSequence, 37, 39);
setUnit(jcasNoSequence, 40, 47);
setUnit(jcasNoSequence, 48, 50);
setUnit(jcasNoSequence, 51, 53);
setUnit(jcasNoSequence, 54, 60);
setUnit(jcasNoSequence, 61, 64);
setUnit(jcasNoSequence, 65, 69);
setUnit(jcasNoSequence, 70, 71);
DocumentMetaData dmd = new DocumentMetaData(jcasNoSequence);
dmd.setDocumentId("id");
dmd.addToIndexes();
createJCasIdAnnotation(jcasNoSequence);
AnalysisEngine xmiWriter = AnalysisEngineFactory.createEngine(BinaryCasWriter.class, BinaryCasWriter.PARAM_TARGET_LOCATION, tmpFoldNoSeq.getRoot(), BinaryCasWriter.PARAM_FORMAT, "6+");
xmiWriter.process(jcasNoSequence);
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class TestPairReader method getNext.
@Override
public void getNext(JCas jcas) throws IOException, CollectionException {
super.getNext(jcas);
JCasId id = new JCasId(jcas);
id.setId(jcasid++);
id.addToIndexes();
for (String outcomeValue : getTextClassificationOutcomes(jcas)) {
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
outcome.setOutcome(outcomeValue);
outcome.addToIndexes();
}
// as we are creating more than one CAS out of a single file, we need to have different
// document titles and URIs for each CAS
// otherwise, serialized CASes will be overwritten
DocumentMetaData dmd = DocumentMetaData.get(jcas);
dmd.setDocumentTitle(dmd.getDocumentTitle() + "-" + fileOffset);
dmd.setDocumentUri(dmd.getDocumentUri() + "-" + fileOffset);
fileOffset++;
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class CasStorageServiceImpl method realWriteCas.
private void realWriteCas(Project aProject, String aDocumentName, long aDocumentId, JCas aJcas, String aUserName, File aAnnotationFolder, File aTargetPath) throws IOException {
log.debug("Writing annotation document [{}]({}) for user [{}] in project [{}]({})", aDocumentName, aDocumentId, aUserName, aProject.getName(), aProject.getId());
try {
if (casDoctor != null) {
casDoctor.analyze(aProject, aJcas.getCas());
}
} catch (CasDoctorException e) {
StringBuilder detailMsg = new StringBuilder();
detailMsg.append("CAS Doctor found problems for user [").append(aUserName).append("] in source document [").append(aDocumentName).append("] (").append(aDocumentId).append(") in project[").append(aProject.getName()).append("] (").append(aProject.getId()).append(")\n");
e.getDetails().forEach(m -> detailMsg.append(String.format("- [%s] %s%n", m.level, m.message)));
throw new DataRetrievalFailureException(detailMsg.toString());
} catch (Exception e) {
throw new DataRetrievalFailureException("Error analyzing CAS of user [" + aUserName + "] in source document [" + aDocumentName + "] (" + aDocumentId + ") in project [" + aProject.getName() + "] (" + aProject.getId() + ")", e);
}
synchronized (lock) {
// File annotationFolder = getAnnotationFolder(aDocument);
FileUtils.forceMkdir(aAnnotationFolder);
final String username = aUserName;
File currentVersion = new File(aAnnotationFolder, username + ".ser");
File oldVersion = new File(aAnnotationFolder, username + ".ser.old");
// Save current version
try {
// Make a backup of the current version of the file before overwriting
if (currentVersion.exists()) {
renameFile(currentVersion, oldVersion);
}
// Now write the new version to "<username>.ser" or CURATION_USER.ser
DocumentMetaData md;
try {
md = DocumentMetaData.get(aJcas);
} catch (IllegalArgumentException e) {
md = DocumentMetaData.create(aJcas);
}
md.setDocumentId(aUserName);
// File targetPath = getAnnotationFolder(aDocument);
CasPersistenceUtils.writeSerializedCas(aJcas, new File(aTargetPath, aUserName + ".ser"));
try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
log.info("Updated annotations for user [{}] on document [{}]({}) in project [{}]({})", aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
}
// If the saving was successful, we delete the old version
if (oldVersion.exists()) {
FileUtils.forceDelete(oldVersion);
}
} catch (IOException e) {
// If we could not save the new version, restore the old one.
FileUtils.forceDelete(currentVersion);
// If this is the first version, there is no old version, so do not restore anything
if (oldVersion.exists()) {
renameFile(oldVersion, currentVersion);
}
// Now abort anyway
throw e;
}
// Manage history
if (backupInterval > 0) {
// Determine the reference point in time based on the current version
long now = currentVersion.lastModified();
// Get all history files for the current user
File[] history = aAnnotationFolder.listFiles(new FileFilter() {
private final Matcher matcher = Pattern.compile(Pattern.quote(username) + "\\.ser\\.[0-9]+\\.bak").matcher("");
@Override
public boolean accept(File aFile) {
// Check if the filename matches the pattern given above.
return matcher.reset(aFile.getName()).matches();
}
});
// Sort the files (oldest one first)
Arrays.sort(history, LastModifiedFileComparator.LASTMODIFIED_COMPARATOR);
// Check if we need to make a new history file
boolean historyFileCreated = false;
File historyFile = new File(aAnnotationFolder, username + ".ser." + now + ".bak");
if (history.length == 0) {
// If there is no history yet but we should keep history, then we create a
// history file in any case.
FileUtils.copyFile(currentVersion, historyFile);
historyFileCreated = true;
} else {
// Check if the newest history file is significantly older than the current one
File latestHistory = history[history.length - 1];
if (latestHistory.lastModified() + backupInterval < now) {
FileUtils.copyFile(currentVersion, historyFile);
historyFileCreated = true;
}
}
// Prune history based on number of backup
if (historyFileCreated) {
// The new version is not in the history, so we keep that in any case. That
// means we need to keep one less.
int toKeep = Math.max(backupKeepNumber - 1, 0);
if ((backupKeepNumber > 0) && (toKeep < history.length)) {
// Copy the oldest files to a new array
File[] toRemove = new File[history.length - toKeep];
System.arraycopy(history, 0, toRemove, 0, toRemove.length);
// Restrict the history to what is left
File[] newHistory = new File[toKeep];
if (toKeep > 0) {
System.arraycopy(history, toRemove.length, newHistory, 0, newHistory.length);
}
history = newHistory;
// Remove these old files
for (File file : toRemove) {
FileUtils.forceDelete(file);
try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
log.info("Removed surplus history file [{}] of user [{}] for " + "document [{}]({}) in project [{}]({})", file.getName(), aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
}
}
}
// Prune history based on time
if (backupKeepTime > 0) {
for (File file : history) {
if ((file.lastModified() + backupKeepTime) < now) {
FileUtils.forceDelete(file);
try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(aProject.getId()))) {
log.info("Removed outdated history file [{}] of user [{}] for " + "document [{}]({}) in project [{}]({})", file.getName(), aUserName, aDocumentName, aDocumentId, aProject.getName(), aProject.getId());
}
}
}
}
}
}
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class ImportExportServiceImpl method exportCasToFile.
/**
* A new directory is created using UUID so that every exported file will reside in its own
* directory. This is useful as the written file can have multiple extensions based on the
* Writer class used.
*/
@Override
public File exportCasToFile(CAS cas, SourceDocument aDocument, String aFileName, @SuppressWarnings("rawtypes") Class aWriter, boolean aStripExtension) throws IOException, UIMAException {
// Update the source file name in case it is changed for some reason. This is necessary
// for the writers to create the files under the correct names.
Project project = aDocument.getProject();
File currentDocumentUri = new File(dir.getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + project.getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId() + "/" + SOURCE_FOLDER);
DocumentMetaData documentMetadata = DocumentMetaData.get(cas.getJCas());
documentMetadata.setDocumentUri(new File(currentDocumentUri, aFileName).toURI().toURL().toExternalForm());
documentMetadata.setDocumentBaseUri(currentDocumentUri.toURI().toURL().toExternalForm());
documentMetadata.setCollectionId(currentDocumentUri.toURI().toURL().toExternalForm());
documentMetadata.setDocumentUri(new File(dir.getAbsolutePath() + "/" + PROJECT_FOLDER + "/" + project.getId() + "/" + DOCUMENT_FOLDER + "/" + aDocument.getId() + "/" + SOURCE_FOLDER + "/" + aFileName).toURI().toURL().toExternalForm());
// update with the correct tagset name
List<AnnotationFeature> features = annotationService.listAnnotationFeature(project);
for (AnnotationFeature feature : features) {
TagSet tagSet = feature.getTagset();
if (tagSet == null) {
continue;
} else if (!feature.getLayer().getType().equals(WebAnnoConst.CHAIN_TYPE)) {
updateCasWithTagSet(cas, feature.getLayer().getName(), tagSet.getName());
}
}
File exportTempDir = File.createTempFile("webanno", "export");
try {
exportTempDir.delete();
exportTempDir.mkdirs();
AnalysisEngineDescription writer;
if (aWriter.getName().equals("de.tudarmstadt.ukp.clarin.webanno.tsv.WebannoTsv3Writer")) {
List<AnnotationLayer> layers = annotationService.listAnnotationLayer(aDocument.getProject());
List<String> slotFeatures = new ArrayList<>();
List<String> slotTargets = new ArrayList<>();
List<String> linkTypes = new ArrayList<>();
Set<String> spanLayers = new HashSet<>();
Set<String> slotLayers = new HashSet<>();
for (AnnotationLayer layer : layers) {
if (layer.getType().contentEquals(WebAnnoConst.SPAN_TYPE)) {
// TSV will not use this
if (!annotationExists(cas, layer.getName())) {
continue;
}
boolean isslotLayer = false;
for (AnnotationFeature f : annotationService.listAnnotationFeature(layer)) {
if (MultiValueMode.ARRAY.equals(f.getMultiValueMode()) && LinkMode.WITH_ROLE.equals(f.getLinkMode())) {
isslotLayer = true;
slotFeatures.add(layer.getName() + ":" + f.getName());
slotTargets.add(f.getType());
linkTypes.add(f.getLinkTypeName());
}
}
if (isslotLayer) {
slotLayers.add(layer.getName());
} else {
spanLayers.add(layer.getName());
}
}
}
spanLayers.addAll(slotLayers);
List<String> chainLayers = new ArrayList<>();
for (AnnotationLayer layer : layers) {
if (layer.getType().contentEquals(WebAnnoConst.CHAIN_TYPE)) {
if (!chainAnnotationExists(cas, layer.getName() + "Chain")) {
continue;
}
chainLayers.add(layer.getName());
}
}
List<String> relationLayers = new ArrayList<>();
for (AnnotationLayer layer : layers) {
if (layer.getType().contentEquals(WebAnnoConst.RELATION_TYPE)) {
// TSV will not use this
if (!annotationExists(cas, layer.getName())) {
continue;
}
relationLayers.add(layer.getName());
}
}
writer = createEngineDescription(aWriter, JCasFileWriter_ImplBase.PARAM_TARGET_LOCATION, exportTempDir, JCasFileWriter_ImplBase.PARAM_STRIP_EXTENSION, aStripExtension, "spanLayers", spanLayers, "slotFeatures", slotFeatures, "slotTargets", slotTargets, "linkTypes", linkTypes, "chainLayers", chainLayers, "relationLayers", relationLayers);
} else {
writer = createEngineDescription(aWriter, JCasFileWriter_ImplBase.PARAM_TARGET_LOCATION, exportTempDir, JCasFileWriter_ImplBase.PARAM_STRIP_EXTENSION, aStripExtension);
}
runPipeline(cas, writer);
// If the writer produced more than one file, we package it up as a ZIP file
File exportFile;
if (exportTempDir.listFiles().length > 1) {
exportFile = new File(exportTempDir.getAbsolutePath() + ".zip");
try {
ZipUtils.zipFolder(exportTempDir, exportFile);
} catch (Exception e) {
try (MDC.MDCCloseable closable = MDC.putCloseable(Logging.KEY_PROJECT_ID, String.valueOf(project.getId()))) {
log.info("Unable to create zip File");
}
}
} else {
exportFile = new File(exportTempDir.getParent(), exportTempDir.listFiles()[0].getName());
FileUtils.copyFile(exportTempDir.listFiles()[0], exportFile);
}
return exportFile;
} finally {
if (exportTempDir != null) {
FileUtils.forceDelete(exportTempDir);
}
}
}
Aggregations