use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class WebannoTsv2Reader method convertToCas.
public void convertToCas(JCas aJCas, InputStream aIs, String aEncoding) throws IOException {
StringBuilder text = new StringBuilder();
DocumentMetaData documentMetadata = DocumentMetaData.get(aJCas);
fileName = documentMetadata.getDocumentTitle();
setAnnotations(aJCas, aIs, aEncoding, text);
aJCas.setDocumentText(text.toString());
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class WebannoTsv2Writer method convertToTsv.
private void convertToTsv(JCas aJCas, OutputStream aOs, String aEncoding) throws IOException, ResourceInitializationException, CASRuntimeException, CASException {
LowLevelCAS llCas = aJCas.getLowLevelCas();
tokenIds = new HashMap<>();
setTokenId(aJCas, tokenIds);
tokenPositions = new TreeMap<>();
setTokenPosition(aJCas, tokenPositions);
Map<Integer, Integer> getTokensPerSentence = new TreeMap<>();
setTokenSentenceAddress(aJCas, getTokensPerSentence);
// list of annotation types
Set<Type> allTypes = new LinkedHashSet<>();
for (Annotation a : select(aJCas, Annotation.class)) {
if (!(a instanceof Token || a instanceof Sentence || a instanceof DocumentMetaData || a instanceof TagsetDescription || a instanceof CoreferenceLink)) {
allTypes.add(a.getType());
}
}
Set<Type> relationTypes = new LinkedHashSet<>();
// get all arc types
for (Type type : allTypes) {
if (type.getFeatures().size() == 0) {
continue;
}
for (Feature feature : type.getFeatures()) {
if (feature.getShortName().equals(GOVERNOR)) {
relationTypes.add(type);
break;
}
}
}
allTypes.removeAll(relationTypes);
// relation annotations
Map<Type, String> relationTypesMap = new HashMap<>();
for (Type type : relationTypes) {
if (type.getName().equals(Dependency.class.getName())) {
relationTypesMap.put(type, POS.class.getName());
continue;
}
for (AnnotationFS anno : CasUtil.select(aJCas.getCas(), type)) {
for (Feature feature : type.getFeatures()) {
if (feature.getShortName().equals(GOVERNOR)) {
relationTypesMap.put(type, anno.getFeatureValue(feature).getType().getName());
}
}
}
}
// all span annotation first
Map<Feature, Type> spanFeatures = new LinkedHashMap<>();
allTypes: for (Type type : allTypes) {
if (type.getFeatures().size() == 0) {
continue;
}
for (Feature feature : type.getFeatures()) {
// coreference annotation not supported
if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
continue allTypes;
}
}
IOUtils.write(" # " + type.getName(), aOs, aEncoding);
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
continue;
}
spanFeatures.put(feature, type);
IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
}
}
// write all relation annotation first
Set<Feature> relationFeatures = new LinkedHashSet<>();
for (Type type : relationTypes) {
IOUtils.write(" # " + type.getName(), aOs, aEncoding);
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
continue;
}
relationFeatures.add(feature);
IOUtils.write(" | " + feature.getShortName(), aOs, aEncoding);
}
// Add the attach type for the realtion anotation
IOUtils.write(" | AttachTo=" + relationTypesMap.get(type), aOs, aEncoding);
}
IOUtils.write("\n", aOs, aEncoding);
Map<Feature, Map<Integer, String>> allAnnos = new HashMap<>();
allTypes: for (Type type : allTypes) {
for (Feature feature : type.getFeatures()) {
// coreference annotation not supported
if (feature.getShortName().equals(FIRST) || feature.getShortName().equals(NEXT)) {
continue allTypes;
}
}
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end")) {
continue;
}
Map<Integer, String> tokenAnnoMap = new TreeMap<>();
setTokenAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
allAnnos.put(feature, tokenAnnoMap);
}
}
// get tokens where dependents are drown to
Map<Feature, Map<Integer, String>> relAnnos = new HashMap<>();
for (Type type : relationTypes) {
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
continue;
}
Map<Integer, String> tokenAnnoMap = new HashMap<>();
setRelationFeatureAnnos(aJCas.getCas(), tokenAnnoMap, type, feature);
relAnnos.put(feature, tokenAnnoMap);
}
}
// get tokens where dependents are drown from - the governor
Map<Type, Map<Integer, String>> governorAnnos = new HashMap<>();
for (Type type : relationTypes) {
Map<Integer, String> govAnnoMap = new HashMap<>();
setRelationGovernorPos(aJCas.getCas(), govAnnoMap, type);
governorAnnos.put(type, govAnnoMap);
}
int sentId = 1;
for (Sentence sentence : select(aJCas, Sentence.class)) {
IOUtils.write("#id=" + sentId++ + "\n", aOs, aEncoding);
IOUtils.write("#text=" + sentence.getCoveredText().replace("\n", "") + "\n", aOs, aEncoding);
for (Token token : selectCovered(Token.class, sentence)) {
IOUtils.write(tokenIds.get(llCas.ll_getFSRef(token)) + "\t" + token.getCoveredText() + "\t", aOs, aEncoding);
// all span annotations on this token
for (Feature feature : spanFeatures.keySet()) {
String annos = allAnnos.get(feature).get(llCas.ll_getFSRef(token));
if (annos == null) {
if (multipleSpans.contains(spanFeatures.get(feature).getName())) {
IOUtils.write("O\t", aOs, aEncoding);
} else {
IOUtils.write("_\t", aOs, aEncoding);
}
} else {
IOUtils.write(annos + "\t", aOs, aEncoding);
}
}
for (Type type : relationTypes) {
for (Feature feature : type.getFeatures()) {
if (feature.toString().equals("uima.cas.AnnotationBase:sofa") || feature.toString().equals("uima.tcas.Annotation:begin") || feature.toString().equals("uima.tcas.Annotation:end") || feature.getShortName().equals(GOVERNOR) || feature.getShortName().equals(DEPENDENT)) {
continue;
}
String annos = relAnnos.get(feature).get(llCas.ll_getFSRef(token));
if (annos == null) {
IOUtils.write("_\t", aOs, aEncoding);
} else {
IOUtils.write(annos + "\t", aOs, aEncoding);
}
}
// the governor positions
String govPos = governorAnnos.get(type).get(llCas.ll_getFSRef(token));
if (govPos == null) {
IOUtils.write("_\t", aOs, aEncoding);
} else {
IOUtils.write(governorAnnos.get(type).get(llCas.ll_getFSRef(token)) + "\t", aOs, aEncoding);
}
}
IOUtils.write("\n", aOs, aEncoding);
}
IOUtils.write("\n", aOs, aEncoding);
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class CasDiff2 method addCas.
/**
* CASes are added to the diff one after another, building the diff iteratively. A CAS can be
* added multiple times for different types. Make sure a CAS is not added twice with the same
* type!
*
* @param aCasGroupId
* the ID of the CAS group to add.
* @param aCas
* the CAS itself.
* @param aType
* the type on which to calculate the diff.
*/
private void addCas(String aCasGroupId, int aCasId, CAS aCas, String aType) {
// Remember that we have already seen this CAS.
List<CAS> casList = cases.get(aCasGroupId);
if (casList == null) {
casList = new ArrayList<>();
cases.put(aCasGroupId, casList);
}
// that failed when we had multiple "null" CASes.
if ((casList.size() - 1) < aCasId) {
casList.add(aCas);
}
assert (casList.size() - 1) == aCasId : "Expected CAS ID [" + (casList.size() - 1) + "] but was [" + aCasId + "]";
// We add these to the internal list above, but then we bail out here.
if (aCas == null) {
log.debug("CAS group [" + aCasGroupId + "] does not contain a CAS at index [" + aCasId + "].");
return;
}
if (log.isDebugEnabled()) {
log.debug("Processing CAS group [" + aCasGroupId + "] CAS [" + aCasId + "].");
String collectionId = null;
String documentId = null;
try {
DocumentMetaData dmd = DocumentMetaData.get(aCas);
collectionId = dmd.getCollectionId();
documentId = dmd.getDocumentId();
log.debug("User [" + collectionId + "] - Document [" + documentId + "]");
} catch (IllegalArgumentException e) {
// We use this information only for debugging - so we can ignore if the information
// is missing.
}
}
Collection<AnnotationFS> annotations;
if (begin == -1 && end == -1) {
annotations = select(aCas, getType(aCas, aType));
} else {
annotations = selectCovered(aCas, getType(aCas, aType), begin, end);
}
if (annotations.isEmpty()) {
log.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains no annotations of type [" + aType + "]");
return;
} else {
log.debug("CAS group [" + aCasGroupId + "] CAS [" + aCasId + "] contains [" + annotations.size() + "] annotations of type [" + aType + "]");
}
int posBefore = configSets.keySet().size();
log.debug("Positions before: [" + posBefore + "]");
for (AnnotationFS fs : annotations) {
List<Position> positions = new ArrayList<>();
// Get/create configuration set at the current position
positions.add(getAdapter(aType).getPosition(aCasId, fs));
// Generate secondary positions for multi-link features
positions.addAll(getAdapter(aType).generateSubPositions(aCasId, fs, linkCompareBehavior));
for (Position pos : positions) {
ConfigurationSet configSet = configSets.get(pos);
if (configSet == null) {
configSet = new ConfigurationSet(pos);
configSets.put(pos, configSet);
}
assert pos.getClass() == configSet.position.getClass() : "Position type mismatch [" + pos.getClass() + "] vs [" + configSet.position.getClass() + "]";
// Merge FS into current set
configSet.addConfiguration(aCasGroupId, fs);
}
}
log.debug("Positions after: [" + configSets.keySet().size() + "] (delta: " + (configSets.keySet().size() - posBefore) + ")");
//
// // Remember that we have processed the type
// entryTypes.add(aType);
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class AgreementPage method getJCases.
/**
* Get the finished CASes used to compute agreement.
*/
private Map<String, List<JCas>> getJCases() {
// Avoid reloading the CASes when switching features.
if (cachedCASes != null) {
return cachedCASes;
}
Project project = projectSelectionForm.getModelObject().project;
List<User> users = projectService.listProjectUsersWithPermissions(project, PermissionLevel.USER);
List<SourceDocument> sourceDocuments = documentService.listSourceDocuments(project);
cachedCASes = new LinkedHashMap<>();
for (User user : users) {
List<JCas> cases = new ArrayList<>();
for (SourceDocument document : sourceDocuments) {
JCas jCas = null;
// Load the CAS if there is a finished one.
if (documentService.existsAnnotationDocument(document, user)) {
AnnotationDocument annotationDocument = documentService.getAnnotationDocument(document, user);
if (annotationDocument.getState().equals(AnnotationDocumentState.FINISHED)) {
try {
jCas = documentService.readAnnotationCas(annotationDocument);
annotationService.upgradeCas(jCas.getCas(), annotationDocument);
// REC: I think there is no need to write the CASes here. We would not
// want to interfere with currently active annotator users
// Set the CAS name in the DocumentMetaData so that we can pick it
// up in the Diff position for the purpose of debugging / transparency.
DocumentMetaData documentMetadata = DocumentMetaData.get(jCas);
documentMetadata.setDocumentId(annotationDocument.getDocument().getName());
documentMetadata.setCollectionId(annotationDocument.getProject().getName());
} catch (Exception e) {
LOG.error("Unable to load data", e);
error("Unable to load data: " + ExceptionUtils.getRootCauseMessage(e));
}
}
}
// The next line can enter null values into the list if a user didn't work on this
// source document yet.
cases.add(jCas);
}
cachedCASes.put(user.getUsername(), cases);
}
return cachedCASes;
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class LinewiseTextReader method getNext.
public void getNext(JCas aJCas) throws IOException, CollectionException {
DocumentMetaData md = new DocumentMetaData(aJCas);
md.setDocumentTitle("");
md.setDocumentId("" + (instanceId++));
md.setLanguage(language);
md.addToIndexes();
String[] split = nextLine.split("\t");
String documentText = split[1];
String label = split[0];
documentText = checkUnescapeHtml(documentText);
documentText = checkUnescapeJava(documentText);
aJCas.setDocumentText(documentText);
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas);
outcome.setOutcome(label);
outcome.addToIndexes();
checkSetSentence(aJCas);
}
Aggregations