use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method makeChainHead.
private static void makeChainHead(Type aType, AnnotationFS first) {
CAS cas = first.getCAS();
FeatureStructure h = cas.createFS(aType);
FSUtil.setFeature(h, "first", first);
cas.addFsToIndexes(h);
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method testZeroLengthSlotFeature2.
@Test
public void testZeroLengthSlotFeature2() throws Exception {
JCas jcas = makeJCasOneSentence();
CAS cas = jcas.getCas();
List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
Token t1 = tokens.get(0);
Token t2 = tokens.get(1);
Token t3 = tokens.get(2);
Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t3.getEnd());
cas.addFsToIndexes(s2);
AnnotationFS s3 = cas.createAnnotation(type, t3.getEnd(), t3.getEnd());
cas.addFsToIndexes(s3);
FeatureStructure link1 = makeLinkFS(jcas, "p1", s2);
FeatureStructure link2 = makeLinkFS(jcas, "p2", s3);
makeLinkHostFS(jcas, t1.getBegin(), t1.getEnd(), link1, link2);
writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method testMultiTokenStackedSlotFeature.
@Test
public void testMultiTokenStackedSlotFeature() throws Exception {
JCas jcas = makeJCasOneSentence();
CAS cas = jcas.getCas();
List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
Token t1 = tokens.get(0);
Token t2 = tokens.get(1);
Token t3 = tokens.get(2);
Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t3.getEnd());
cas.addFsToIndexes(s2);
AnnotationFS s3 = cas.createAnnotation(type, t2.getBegin(), t3.getEnd());
cas.addFsToIndexes(s3);
FeatureStructure link1 = makeLinkFS(jcas, "p1", s2);
FeatureStructure link2 = makeLinkFS(jcas, "p2", s3);
makeLinkHostFS(jcas, t1.getBegin(), t1.getEnd(), link1, link2);
writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method testSimpleSameRoleSlotFeature.
@Test
public void testSimpleSameRoleSlotFeature() throws Exception {
JCas jcas = makeJCasOneSentence();
CAS cas = jcas.getCas();
List<Token> tokens = new ArrayList<>(select(jcas, Token.class));
Token t1 = tokens.get(0);
Token t2 = tokens.get(1);
Token t3 = tokens.get(2);
Type type = cas.getTypeSystem().getType("webanno.custom.SimpleSpan");
AnnotationFS s2 = cas.createAnnotation(type, t2.getBegin(), t2.getEnd());
cas.addFsToIndexes(s2);
AnnotationFS s3 = cas.createAnnotation(type, t3.getBegin(), t3.getEnd());
cas.addFsToIndexes(s3);
FeatureStructure link1 = makeLinkFS(jcas, "p1", s2);
FeatureStructure link2 = makeLinkFS(jcas, "p1", s3);
makeLinkHostFS(jcas, t1.getBegin(), t1.getEnd(), link1, link2);
writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_SLOT_FEATS, asList("webanno.custom.SimpleLinkHost:links"), WebannoTsv3Writer.PARAM_SPAN_LAYERS, asList("webanno.custom.SimpleSpan", "webanno.custom.SimpleLinkHost"), WebannoTsv3Writer.PARAM_LINK_TYPES, asList("webanno.custom.LinkType"), WebannoTsv3Writer.PARAM_SLOT_TARGETS, asList("webanno.custom.SimpleSpan"));
}
use of org.apache.uima.cas.FeatureStructure in project webanno by webanno.
the class Tsv3XDeserializer method readContent.
private void readContent(LineNumberReader aIn, TsvDocument aDoc) throws IOException {
StringBuilder text = new StringBuilder();
State prevState = State.INTER_SENTENCE_SPACE;
State state = State.INTER_SENTENCE_SPACE;
StringBuilder sentenceText = new StringBuilder();
TsvSentence prevSentence = null;
TsvSentence sentence = null;
TsvToken token = null;
String line = aIn.readLine();
while (!State.END.equals(state)) {
// These variables are only used in TOKEN and SUBTOKEN states.
String[] fields = null;
String id = null;
String[] offsets = null;
int begin = -1;
int end = -1;
// Determine the status of the current line
if (startsWith(line, PREFIX_TEXT)) {
state = State.SENTENCE;
} else if (line == null) {
state = State.END;
} else if (isEmpty(line)) {
state = State.INTER_SENTENCE_SPACE;
} else {
fields = splitPreserveAllTokens(line, FIELD_SEPARATOR);
// Get token metadata
id = fields[0];
offsets = split(fields[1], "-");
begin = Integer.valueOf(offsets[0]);
end = Integer.valueOf(offsets[1]);
// TOKEN or SUBTOKEN?
if (id.contains(".")) {
state = State.SUBTOKEN;
} else {
state = State.TOKEN;
}
}
// Assert that the order of information in the file is correct
switch(prevState) {
case INTER_SENTENCE_SPACE:
if (!State.SENTENCE.equals(state)) {
throw new IOException("Line " + aIn.getLineNumber() + ": Expected sentence header but got [" + state + "]");
}
break;
case SENTENCE:
if (!(State.SENTENCE.equals(state) || State.TOKEN.equals(state))) {
throw new IOException("Line " + aIn.getLineNumber() + ": Expected sentence header or token but got [" + state + "]");
}
break;
case TOKEN:
case SUBTOKEN:
if (!(State.INTER_SENTENCE_SPACE.equals(state) || State.END.equals(state) || State.TOKEN.equals(state) || State.SUBTOKEN.equals(state))) {
throw new IOException("Line " + aIn.getLineNumber() + ": Expected token, sub-token or sentence break but got [" + state + "]");
}
break;
}
// Do the actual parsing
switch(state) {
case END:
case INTER_SENTENCE_SPACE:
// End of sentence action
// The -1 here is to account for the tailing line break
sentence.getUimaSentence().setEnd(text.length() - 1);
sentence.getUimaSentence().addToIndexes();
prevSentence = sentence;
sentence = null;
break;
case TOKEN:
// End of sentence header action
if (State.SENTENCE.equals(prevState)) {
// last sentence!
if (text.length() > begin) {
assert text.length() == begin + 1;
assert text.charAt(text.length() - 1) == LINE_BREAK;
text.setLength(text.length() - 1);
}
// the gap.
if (text.length() < begin) {
text.append(repeat(' ', begin - text.length()));
}
assert text.length() == begin;
assert sentence == null;
Sentence uimaSentence = new Sentence(aDoc.getJCas());
uimaSentence.setBegin(text.length());
sentence = aDoc.createSentence(uimaSentence);
text.append(sentenceText);
sentenceText.setLength(0);
}
// Token parsing action
Token uimaToken = new Token(aDoc.getJCas(), begin, end);
uimaToken.addToIndexes();
token = sentence.createToken(uimaToken);
// Read annotations from the columns
parseAnnotations(aDoc, sentence, token, fields);
break;
case SUBTOKEN:
// Read annotations from the columns
TsvSubToken subToken = token.createSubToken(begin, end);
parseAnnotations(aDoc, sentence, subToken, fields);
break;
case SENTENCE:
// Header parsing action
String textFragment = substringAfter(line, "=");
textFragment = unescapeText(aDoc.getFormatHeader(), textFragment);
sentenceText.append(textFragment);
sentenceText.append(LINE_BREAK);
break;
}
prevState = state;
line = aIn.readLine();
}
aDoc.getJCas().setDocumentText(text.toString());
// After all data has been read, we also add the annotations with disambiguation ID to
// the CAS indexes. This ensures we only add them after their final begin/end offsets
// have been determined since most of these annotations are actually multi-token
// annotations.
CAS cas = aDoc.getJCas().getCas();
Set<FeatureStructure> fses = new LinkedHashSet<>();
for (TsvSentence s : aDoc.getSentences()) {
for (TsvToken t : s.getTokens()) {
for (Type type : t.getUimaTypes()) {
fses.addAll(t.getUimaAnnotations(type));
}
for (TsvSubToken st : t.getSubTokens()) {
for (Type type : st.getUimaTypes()) {
fses.addAll(st.getUimaAnnotations(type));
}
}
}
}
fses.forEach(cas::addFsToIndexes);
}
Aggregations