use of org.apache.uima.jcas.cas.FSArray in project webanno by webanno.
the class Conll2009Writer method convert.
private void convert(JCas aJCas, PrintWriter aOut) {
Map<Token, Collection<SemPred>> predIdx = indexCovered(aJCas, Token.class, SemPred.class);
Map<SemArg, Collection<Token>> argIdx = indexCovered(aJCas, SemArg.class, Token.class);
for (Sentence sentence : select(aJCas, Sentence.class)) {
HashMap<Token, Row> ctokens = new LinkedHashMap<Token, Row>();
// Tokens
List<Token> tokens = selectCovered(Token.class, sentence);
// Check if we should try to include the FEATS in output
List<MorphologicalFeatures> morphology = selectCovered(MorphologicalFeatures.class, sentence);
boolean useFeats = tokens.size() == morphology.size();
List<SemPred> preds = selectCovered(SemPred.class, sentence);
for (int i = 0; i < tokens.size(); i++) {
Row row = new Row();
row.id = i + 1;
row.token = tokens.get(i);
row.args = new SemArgLink[preds.size()];
if (useFeats) {
row.feats = morphology.get(i);
}
// If there are multiple semantic predicates for the current token, then
// we keep only the first
Collection<SemPred> predsForToken = predIdx.get(row.token);
if (predsForToken != null && !predsForToken.isEmpty()) {
row.pred = predsForToken.iterator().next();
}
ctokens.put(row.token, row);
}
// Dependencies
List<Dependency> basicDeps = selectCovered(Dependency.class, sentence).stream().filter(dep -> {
String flavor = FSUtil.getFeature(dep, "flavor", String.class);
return flavor == null || DependencyFlavor.BASIC.equals(flavor);
}).collect(Collectors.toList());
for (Dependency rel : basicDeps) {
Row row = ctokens.get(rel.getDependent());
if (row.deprel != null) {
throw new IllegalStateException("Illegal basic dependency structure - token [" + row.token.getCoveredText() + "] is dependent of more than one dependency.");
}
row.deprel = rel;
}
// Semantic arguments
for (int p = 0; p < preds.size(); p++) {
FSArray args = preds.get(p).getArguments();
for (SemArgLink arg : select(args, SemArgLink.class)) {
for (Token t : argIdx.get(arg.getTarget())) {
Row row = ctokens.get(t);
row.args[p] = arg;
}
}
}
// Write sentence in CONLL 2009 format
for (Row row : ctokens.values()) {
int id = row.id;
String form = row.token.getCoveredText();
String lemma = UNUSED;
if (writeLemma && (row.token.getLemma() != null)) {
lemma = row.token.getLemma().getValue();
}
String plemma = lemma;
String pos = UNUSED;
if (writePos && (row.token.getPos() != null)) {
POS posAnno = row.token.getPos();
pos = posAnno.getPosValue();
}
String ppos = pos;
String feat = UNUSED;
if (writeMorph && (row.feats != null)) {
feat = row.feats.getValue();
}
String pfeat = feat;
int headId = UNUSED_INT;
String deprel = UNUSED;
if (writeDependency && (row.deprel != null)) {
deprel = row.deprel.getDependencyType();
headId = ctokens.get(row.deprel.getGovernor()).id;
if (headId == row.id) {
// ROOT dependencies may be modeled as a loop, ignore these.
headId = 0;
}
}
String head = UNUSED;
if (headId != UNUSED_INT) {
head = Integer.toString(headId);
}
String phead = head;
String pdeprel = deprel;
String fillpred = UNUSED;
String pred = UNUSED;
StringBuilder apreds = new StringBuilder();
if (writeSemanticPredicate) {
if (row.pred != null) {
fillpred = "Y";
pred = row.pred.getCategory();
}
for (SemArgLink arg : row.args) {
if (apreds.length() > 0) {
apreds.append('\t');
}
apreds.append(arg != null ? arg.getRole() : UNUSED);
}
}
aOut.printf("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", id, form, lemma, plemma, pos, ppos, feat, pfeat, head, phead, deprel, pdeprel, fillpred, pred, apreds);
}
aOut.println();
}
}
use of org.apache.uima.jcas.cas.FSArray in project tika by apache.
the class CTAKESUtils method getAnnotationProperty.
/**
* Returns the annotation value based on the given annotation type.
*
* @param annotation
* {@see IdentifiedAnnotation} object.
* @param property
* {@see CTAKESAnnotationProperty} enum used to identify the
* annotation type.
* @return the annotation value.
*/
public static String getAnnotationProperty(IdentifiedAnnotation annotation, CTAKESAnnotationProperty property) {
String value = null;
if (property == CTAKESAnnotationProperty.BEGIN) {
value = Integer.toString(annotation.getBegin());
} else if (property == CTAKESAnnotationProperty.END) {
value = Integer.toString(annotation.getEnd());
} else if (property == CTAKESAnnotationProperty.CONDITIONAL) {
value = Boolean.toString(annotation.getConditional());
} else if (property == CTAKESAnnotationProperty.CONFIDENCE) {
value = Float.toString(annotation.getConfidence());
} else if (property == CTAKESAnnotationProperty.DISCOVERY_TECNIQUE) {
value = Integer.toString(annotation.getDiscoveryTechnique());
} else if (property == CTAKESAnnotationProperty.GENERIC) {
value = Boolean.toString(annotation.getGeneric());
} else if (property == CTAKESAnnotationProperty.HISTORY_OF) {
value = Integer.toString(annotation.getHistoryOf());
} else if (property == CTAKESAnnotationProperty.ID) {
value = Integer.toString(annotation.getId());
} else if (property == CTAKESAnnotationProperty.ONTOLOGY_CONCEPT_ARR) {
FSArray mentions = annotation.getOntologyConceptArr();
StringBuilder sb = new StringBuilder();
if (mentions != null) {
for (int i = 0; i < mentions.size(); i++) {
if (mentions.get(i) instanceof UmlsConcept) {
UmlsConcept concept = (UmlsConcept) mentions.get(i);
sb.append(concept.getCui());
if (i < mentions.size() - 1) {
sb.append(",");
}
}
}
}
value = sb.toString();
} else if (property == CTAKESAnnotationProperty.POLARITY) {
value = Integer.toString(annotation.getPolarity());
}
return value;
}
Aggregations