use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class PretokenisedSequence method addToken.
/**
* Called when reconstructing a sentence from a previously annotated corpus,
* adding the next string.
*
* @throws TalismaneException
* if couldn't find the token at the next sentence position
*/
public Token addToken(String string) throws TalismaneException {
CharSequence text = this.getSentence().getText();
int start = 0;
if (this.size() > 0)
start = this.get(this.size() - 1).getEndIndex();
// jump forward to first non-whitespace character
for (; start < text.length(); start++) {
char c = text.charAt(start);
if (!Character.isWhitespace(c))
break;
}
// go backwards along whitespace to match string
for (int i = 0; i < string.length(); i++) {
char s = string.charAt(i);
if (Character.isWhitespace(s)) {
start--;
char t = text.charAt(start);
if (!Character.isWhitespace(t))
break;
} else {
break;
}
}
int end = start + string.length();
if (end > text.length())
throw new TalismaneException("Add token failed: Expected |" + string + "| at positions " + start + ", " + end + ", but only remaining text (length " + text.length() + ") is |" + text.subSequence(start, text.length()) + "| in sentence: |" + text + "|");
if (!string.equals(text.subSequence(start, end).toString()))
throw new TalismaneException("Add token failed: Expected |" + string + "| but was |" + text.subSequence(start, end) + "| in sentence: |" + text + "|");
return this.addToken(start, end);
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class WordListFinder method addWordList.
/**
* Add an external word list located in a scanner from a particular filename.
*
* @throws TalismaneException
* if unknown file type
*/
public void addWordList(String fileName, Scanner scanner) throws TalismaneException {
LOG.debug("Reading " + fileName);
String typeLine = scanner.nextLine();
if (!typeLine.startsWith("Type: "))
throw new JolicielException("In file " + fileName + ", expected line starting with \"Type: \"");
String type = typeLine.substring("Type: ".length());
if ("WordList".equals(type)) {
WordList textFileWordList = new WordList(fileName, scanner);
this.addWordList(textFileWordList);
} else {
throw new TalismaneException("Unexpected type in file: " + fileName + ": " + type);
}
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class PosTagEventStream method next.
@Override
public ClassificationEvent next() throws TalismaneException, IOException {
ClassificationEvent event = null;
if (this.hasNext()) {
PosTaggedToken taggedToken = currentSentence.get(currentIndex++);
String classification = taggedToken.getTag().getCode();
if (LOG.isDebugEnabled())
LOG.debug("next event, token: " + taggedToken.getToken().getAnalyisText() + " : " + classification);
PosTaggerContext context = new PosTaggerContextImpl(taggedToken.getToken(), currentHistory);
List<FeatureResult<?>> posTagFeatureResults = new ArrayList<FeatureResult<?>>();
for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
if (featureResult != null)
posTagFeatureResults.add(featureResult);
}
if (LOG.isTraceEnabled()) {
LOG.trace("Token: " + taggedToken.getToken().getAnalyisText());
SortedSet<String> featureResultSet = posTagFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
for (String featureResultString : featureResultSet) {
LOG.trace(featureResultString);
}
}
event = new ClassificationEvent(posTagFeatureResults, classification);
currentHistory.addPosTaggedToken(taggedToken);
if (currentIndex == currentSentence.size()) {
currentSentence = null;
}
}
return event;
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class PosTagRegexBasedCorpusReader method processSentence.
@Override
protected void processSentence(Sentence sentence, List<CorpusLine> corpusLines) throws TalismaneException, IOException {
try {
super.processSentence(sentence, corpusLines);
posTagSequence = new PosTagSequence(tokenSequence);
int i = 0;
for (CorpusLine corpusLine : corpusLines) {
PosTaggedToken posTaggedToken = this.convertToPosTaggedToken(corpusLine, posTagSequence, i++, this.getCurrentFile());
this.idTokenMap.put(corpusLine.getIndex(), posTaggedToken);
}
} catch (TalismaneException e) {
this.clearSentence();
throw e;
}
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class PosTagRegexBasedCorpusReader method convertToPosTaggedToken.
protected PosTaggedToken convertToPosTaggedToken(CorpusLine corpusLine, PosTagSequence posTagSequence, int index, File currentFile) throws TalismaneException {
Token token = posTagSequence.getTokenSequence().get(index);
PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
PosTag posTag = null;
try {
posTag = posTagSet.getPosTag(corpusLine.getElement(CorpusElement.POSTAG));
} catch (UnknownPosTagException upte) {
String fileName = "";
if (currentFile != null)
fileName = currentFile.getPath();
throw new TalismaneException("Unknown posTag, " + fileName + ", on line " + corpusLine.getLineNumber() + ": " + corpusLine.getElement(CorpusElement.POSTAG));
}
Decision posTagDecision = new Decision(posTag.getCode());
PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
if (LOG.isTraceEnabled()) {
LOG.trace(posTaggedToken.toString());
}
if (corpusLine.hasElement(CorpusElement.POSTAG_COMMENT))
posTaggedToken.setComment(corpusLine.getElement(CorpusElement.POSTAG_COMMENT));
// set the lexical entry if we have one
if (corpusLine.getLexicalEntry() != null) {
List<LexicalEntry> lexicalEntrySet = new ArrayList<>(1);
lexicalEntrySet.add(corpusLine.getLexicalEntry());
posTaggedToken.setLexicalEntries(lexicalEntrySet);
}
posTagSequence.addPosTaggedToken(posTaggedToken);
return posTaggedToken;
}
Aggregations