use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.
the class WebAnnoCasUtil method isSameSentence.
/**
* Check if the two given offsets are within the same sentence.
*
* @param aJcas
* the JCAs.
* @param aReferenceOffset
* the reference offset.
* @param aCompareOffset
* the comparison offset.
* @return if the two offsets are within the same sentence.
*/
public static boolean isSameSentence(JCas aJcas, int aReferenceOffset, int aCompareOffset) {
// Trivial case
if (aReferenceOffset == aCompareOffset) {
return true;
}
int offset1 = Math.min(aReferenceOffset, aCompareOffset);
int offset2 = Math.max(aReferenceOffset, aCompareOffset);
// Scanning through sentences
Iterator<Sentence> si = JCasUtil.iterator(aJcas, Sentence.class);
while (si.hasNext()) {
Sentence s = si.next();
if (s.getBegin() <= offset1 && offset1 <= s.getEnd()) {
return s.getBegin() <= offset2 && offset2 <= s.getEnd();
}
// offset, we will never again find a sentence that contains it.
if (offset1 < s.getBegin()) {
return false;
}
}
return false;
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.
the class WebAnnoCasUtil method getSentenceNumber.
/**
* Get the sentence number at this specific position
*
* @param aJcas
* the JCas.
* @param aBeginOffset
* the begin offset.
* @return the sentence number.
*/
public static int getSentenceNumber(JCas aJcas, int aBeginOffset) {
int sentenceNumber = 0;
Collection<Sentence> sentences = select(aJcas, Sentence.class);
if (sentences.isEmpty()) {
throw new IndexOutOfBoundsException("No sentences");
}
for (Sentence sentence : select(aJcas, Sentence.class)) {
if (sentence.getBegin() <= aBeginOffset && aBeginOffset <= sentence.getEnd()) {
sentenceNumber++;
break;
}
sentenceNumber++;
}
return sentenceNumber;
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.
the class RemoteApiController2 method createCompatibleCas.
private JCas createCompatibleCas(long aProjectId, long aDocumentId, MultipartFile aFile, Optional<String> aFormat) throws RemoteApiException, ClassNotFoundException, IOException, UIMAException {
Project project = getProject(aProjectId);
SourceDocument document = getDocument(project, aDocumentId);
// Check if the format is supported
String format = aFormat.orElse(FORMAT_DEFAULT);
Map<String, Class<CollectionReader>> readableFormats = importExportService.getReadableFormats();
if (readableFormats.get(format) == null) {
throw new UnsupportedFormatException("Format [%s] not supported. Acceptable formats are %s.", format, readableFormats.keySet());
}
// Convert the uploaded annotation document into a CAS
File tmpFile = null;
JCas annotationCas;
try {
tmpFile = File.createTempFile("upload", ".bin");
aFile.transferTo(tmpFile);
annotationCas = importExportService.importCasFromFile(tmpFile, project, format);
} finally {
if (tmpFile != null) {
FileUtils.forceDelete(tmpFile);
}
}
// Check if the uploaded file is compatible with the source document. They are compatible
// if the text is the same and if all the token and sentence annotations have the same
// offsets.
JCas initialCas = documentService.createOrReadInitialCas(document);
String initialText = initialCas.getDocumentText();
String annotationText = annotationCas.getDocumentText();
// If any of the texts contains tailing line breaks, we ignore that. We assume at the moment
// that nobody will have created annotations over that trailing line breaks.
initialText = StringUtils.chomp(initialText);
annotationText = StringUtils.chomp(annotationText);
if (ObjectUtils.notEqual(initialText, annotationText)) {
int diffIndex = StringUtils.indexOfDifference(initialText, annotationText);
String expected = initialText.substring(diffIndex, Math.min(initialText.length(), diffIndex + 20));
String actual = annotationText.substring(diffIndex, Math.min(annotationText.length(), diffIndex + 20));
throw new IncompatibleDocumentException("Text of annotation document does not match text of source document at offset " + "[%d]. Expected [%s] but found [%s].", diffIndex, expected, actual);
}
// Just in case we really had to chomp off a trailing line break from the annotation CAS,
// make sure we copy over the proper text from the initial CAS
// NOT AT HOME THIS YOU SHOULD TRY
// SETTING THE SOFA STRING FORCEFULLY FOLLOWING THE DARK SIDE IS!
forceSetFeatureValue(annotationCas.getSofa(), CAS.FEATURE_BASE_NAME_SOFASTRING, initialCas.getDocumentText());
FSUtil.setFeature(annotationCas.getDocumentAnnotationFs(), CAS.FEATURE_BASE_NAME_END, initialCas.getDocumentText().length());
Collection<Sentence> annotationSentences = select(annotationCas, Sentence.class);
Collection<Sentence> initialSentences = select(initialCas, Sentence.class);
if (annotationSentences.size() != initialSentences.size()) {
throw new IncompatibleDocumentException("Expected [%d] sentences, but annotation document contains [%d] sentences.", initialSentences.size(), annotationSentences.size());
}
assertCompatibleOffsets(initialSentences, annotationSentences);
Collection<Token> annotationTokens = select(annotationCas, Token.class);
Collection<Token> initialTokens = select(initialCas, Token.class);
if (annotationTokens.size() != initialTokens.size()) {
throw new IncompatibleDocumentException("Expected [%d] sentences, but annotation document contains [%d] sentences.", initialSentences.size(), annotationSentences.size());
}
assertCompatibleOffsets(initialTokens, annotationTokens);
return annotationCas;
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method makeJCasTwoSentences.
private static JCas makeJCasTwoSentences() throws UIMAException {
JCas jcas = makeJCas();
TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
tb.buildTokens(jcas, "He loves her .\nShe loves him not .");
assertEquals(2, select(jcas, Sentence.class).size());
return jcas;
}
use of de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence in project webanno by webanno.
the class WebAnnoTsv3WriterTestBase method makeJCasOneSentence.
private static JCas makeJCasOneSentence(String aText) throws UIMAException {
JCas jcas = makeJCas();
TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
tb.buildTokens(jcas, aText);
// sentence break
for (Sentence s : select(jcas, Sentence.class)) {
s.removeFromIndexes();
}
// Add a new sentence covering the whole text
new Sentence(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
return jcas;
}
Aggregations