Search in sources :

Example 26 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class PretokenisedSequence method addToken.

/**
 * Called when reconstructing a sentence from a previously annotated corpus,
 * adding the next string.
 *
 * @throws TalismaneException
 *           if couldn't find the token at the next sentence position
 */
public Token addToken(String string) throws TalismaneException {
    CharSequence text = this.getSentence().getText();
    int start = 0;
    if (this.size() > 0)
        start = this.get(this.size() - 1).getEndIndex();
    // jump forward to first non-whitespace character
    for (; start < text.length(); start++) {
        char c = text.charAt(start);
        if (!Character.isWhitespace(c))
            break;
    }
    // go backwards along whitespace to match string
    for (int i = 0; i < string.length(); i++) {
        char s = string.charAt(i);
        if (Character.isWhitespace(s)) {
            start--;
            char t = text.charAt(start);
            if (!Character.isWhitespace(t))
                break;
        } else {
            break;
        }
    }
    int end = start + string.length();
    if (end > text.length())
        throw new TalismaneException("Add token failed: Expected |" + string + "| at positions " + start + ", " + end + ", but only remaining text (length " + text.length() + ") is |" + text.subSequence(start, text.length()) + "| in sentence: |" + text + "|");
    if (!string.equals(text.subSequence(start, end).toString()))
        throw new TalismaneException("Add token failed: Expected |" + string + "| but was |" + text.subSequence(start, end) + "| in sentence: |" + text + "|");
    return this.addToken(start, end);
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException)

Example 27 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class WordListFinder method addWordList.

/**
 * Add an external word list located in a scanner from a particular filename.
 *
 * @throws TalismaneException
 *           if unknown file type
 */
public void addWordList(String fileName, Scanner scanner) throws TalismaneException {
    LOG.debug("Reading " + fileName);
    String typeLine = scanner.nextLine();
    if (!typeLine.startsWith("Type: "))
        throw new JolicielException("In file " + fileName + ", expected line starting with \"Type: \"");
    String type = typeLine.substring("Type: ".length());
    if ("WordList".equals(type)) {
        WordList textFileWordList = new WordList(fileName, scanner);
        this.addWordList(textFileWordList);
    } else {
        throw new TalismaneException("Unexpected type in file: " + fileName + ": " + type);
    }
}
Also used : JolicielException(com.joliciel.talismane.utils.JolicielException) TalismaneException(com.joliciel.talismane.TalismaneException)

Example 28 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class PosTagEventStream method next.

@Override
public ClassificationEvent next() throws TalismaneException, IOException {
    ClassificationEvent event = null;
    if (this.hasNext()) {
        PosTaggedToken taggedToken = currentSentence.get(currentIndex++);
        String classification = taggedToken.getTag().getCode();
        if (LOG.isDebugEnabled())
            LOG.debug("next event, token: " + taggedToken.getToken().getAnalyisText() + " : " + classification);
        PosTaggerContext context = new PosTaggerContextImpl(taggedToken.getToken(), currentHistory);
        List<FeatureResult<?>> posTagFeatureResults = new ArrayList<FeatureResult<?>>();
        for (PosTaggerFeature<?> posTaggerFeature : posTaggerFeatures) {
            RuntimeEnvironment env = new RuntimeEnvironment();
            FeatureResult<?> featureResult = posTaggerFeature.check(context, env);
            if (featureResult != null)
                posTagFeatureResults.add(featureResult);
        }
        if (LOG.isTraceEnabled()) {
            LOG.trace("Token: " + taggedToken.getToken().getAnalyisText());
            SortedSet<String> featureResultSet = posTagFeatureResults.stream().map(f -> f.toString()).collect(Collectors.toCollection(() -> new TreeSet<String>()));
            for (String featureResultString : featureResultSet) {
                LOG.trace(featureResultString);
            }
        }
        event = new ClassificationEvent(posTagFeatureResults, classification);
        currentHistory.addPosTaggedToken(taggedToken);
        if (currentIndex == currentSentence.size()) {
            currentSentence = null;
        }
    }
    return event;
}
Also used : Logger(org.slf4j.Logger) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) Set(java.util.Set) IOException(java.io.IOException) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) Collectors(java.util.stream.Collectors) TreeSet(java.util.TreeSet) TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) PosTaggerFeature(com.joliciel.talismane.posTagger.features.PosTaggerFeature) List(java.util.List) ClassificationEventStream(com.joliciel.talismane.machineLearning.ClassificationEventStream) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult) Map(java.util.Map) RuntimeEnvironment(com.joliciel.talismane.machineLearning.features.RuntimeEnvironment) ArrayList(java.util.ArrayList) TreeSet(java.util.TreeSet) ClassificationEvent(com.joliciel.talismane.machineLearning.ClassificationEvent) FeatureResult(com.joliciel.talismane.machineLearning.features.FeatureResult)

Example 29 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class PosTagRegexBasedCorpusReader method processSentence.

@Override
protected void processSentence(Sentence sentence, List<CorpusLine> corpusLines) throws TalismaneException, IOException {
    try {
        super.processSentence(sentence, corpusLines);
        posTagSequence = new PosTagSequence(tokenSequence);
        int i = 0;
        for (CorpusLine corpusLine : corpusLines) {
            PosTaggedToken posTaggedToken = this.convertToPosTaggedToken(corpusLine, posTagSequence, i++, this.getCurrentFile());
            this.idTokenMap.put(corpusLine.getIndex(), posTaggedToken);
        }
    } catch (TalismaneException e) {
        this.clearSentence();
        throw e;
    }
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) CorpusLine(com.joliciel.talismane.corpus.CorpusLine)

Example 30 with TalismaneException

use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.

the class PosTagRegexBasedCorpusReader method convertToPosTaggedToken.

protected PosTaggedToken convertToPosTaggedToken(CorpusLine corpusLine, PosTagSequence posTagSequence, int index, File currentFile) throws TalismaneException {
    Token token = posTagSequence.getTokenSequence().get(index);
    PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
    PosTag posTag = null;
    try {
        posTag = posTagSet.getPosTag(corpusLine.getElement(CorpusElement.POSTAG));
    } catch (UnknownPosTagException upte) {
        String fileName = "";
        if (currentFile != null)
            fileName = currentFile.getPath();
        throw new TalismaneException("Unknown posTag, " + fileName + ", on line " + corpusLine.getLineNumber() + ": " + corpusLine.getElement(CorpusElement.POSTAG));
    }
    Decision posTagDecision = new Decision(posTag.getCode());
    PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
    if (LOG.isTraceEnabled()) {
        LOG.trace(posTaggedToken.toString());
    }
    if (corpusLine.hasElement(CorpusElement.POSTAG_COMMENT))
        posTaggedToken.setComment(corpusLine.getElement(CorpusElement.POSTAG_COMMENT));
    // set the lexical entry if we have one
    if (corpusLine.getLexicalEntry() != null) {
        List<LexicalEntry> lexicalEntrySet = new ArrayList<>(1);
        lexicalEntrySet.add(corpusLine.getLexicalEntry());
        posTaggedToken.setLexicalEntries(lexicalEntrySet);
    }
    posTagSequence.addPosTaggedToken(posTaggedToken);
    return posTaggedToken;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) Token(com.joliciel.talismane.tokeniser.Token) LexicalEntry(com.joliciel.talismane.lexicon.LexicalEntry) Decision(com.joliciel.talismane.machineLearning.Decision)

Aggregations

TalismaneException (com.joliciel.talismane.TalismaneException)47 ArrayList (java.util.ArrayList)27 Config (com.typesafe.config.Config)14 File (java.io.File)11 List (java.util.List)10 TreeSet (java.util.TreeSet)10 FeatureResult (com.joliciel.talismane.machineLearning.features.FeatureResult)9 IOException (java.io.IOException)9 HashMap (java.util.HashMap)9 Set (java.util.Set)9 Decision (com.joliciel.talismane.machineLearning.Decision)8 RuntimeEnvironment (com.joliciel.talismane.machineLearning.features.RuntimeEnvironment)8 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)8 Token (com.joliciel.talismane.tokeniser.Token)8 Map (java.util.Map)8 SortedSet (java.util.SortedSet)8 Collectors (java.util.stream.Collectors)8 Logger (org.slf4j.Logger)8 LoggerFactory (org.slf4j.LoggerFactory)8 Sentence (com.joliciel.talismane.rawText.Sentence)7