Search in sources :

Example 6 with LexicalEntry

use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.

the class Token method getLexicalEntry.

/**
 * The "best" lexical entry for this token/postag combination if one exists,
 * or null otherwise.
 */
public LexicalEntry getLexicalEntry(PosTag posTag) {
    if (this.lexicalEntryMap == null) {
        this.lexicalEntryMap = new HashMap<PosTag, List<LexicalEntry>>();
    }
    List<LexicalEntry> lexicalEntries = this.lexicalEntryMap.get(posTag);
    if (lexicalEntries == null) {
        lexicalEntries = TalismaneSession.get(sessionId).getMergedLexicon().findLexicalEntries(this.getText(), posTag);
        this.lexicalEntryMap.put(posTag, lexicalEntries);
    }
    LexicalEntry bestEntry = null;
    if (lexicalEntries.size() > 0)
        bestEntry = lexicalEntries.get(0);
    return bestEntry;
}
Also used : PosTag(com.joliciel.talismane.posTagger.PosTag) ArrayList(java.util.ArrayList) List(java.util.List) LexicalEntry(com.joliciel.talismane.lexicon.LexicalEntry)

Example 7 with LexicalEntry

use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.

the class PosTagRegexBasedCorpusReader method convertToPosTaggedToken.

protected PosTaggedToken convertToPosTaggedToken(CorpusLine corpusLine, PosTagSequence posTagSequence, int index, File currentFile) throws TalismaneException {
    Token token = posTagSequence.getTokenSequence().get(index);
    PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
    PosTag posTag = null;
    try {
        posTag = posTagSet.getPosTag(corpusLine.getElement(CorpusElement.POSTAG));
    } catch (UnknownPosTagException upte) {
        String fileName = "";
        if (currentFile != null)
            fileName = currentFile.getPath();
        throw new TalismaneException("Unknown posTag, " + fileName + ", on line " + corpusLine.getLineNumber() + ": " + corpusLine.getElement(CorpusElement.POSTAG));
    }
    Decision posTagDecision = new Decision(posTag.getCode());
    PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
    if (LOG.isTraceEnabled()) {
        LOG.trace(posTaggedToken.toString());
    }
    if (corpusLine.hasElement(CorpusElement.POSTAG_COMMENT))
        posTaggedToken.setComment(corpusLine.getElement(CorpusElement.POSTAG_COMMENT));
    // set the lexical entry if we have one
    if (corpusLine.getLexicalEntry() != null) {
        List<LexicalEntry> lexicalEntrySet = new ArrayList<>(1);
        lexicalEntrySet.add(corpusLine.getLexicalEntry());
        posTaggedToken.setLexicalEntries(lexicalEntrySet);
    }
    posTagSequence.addPosTaggedToken(posTaggedToken);
    return posTaggedToken;
}
Also used : TalismaneException(com.joliciel.talismane.TalismaneException) ArrayList(java.util.ArrayList) Token(com.joliciel.talismane.tokeniser.Token) LexicalEntry(com.joliciel.talismane.lexicon.LexicalEntry) Decision(com.joliciel.talismane.machineLearning.Decision)

Example 8 with LexicalEntry

use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.

the class AbstractLexicalAttributeFeature method checkInternal.

@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(T context, RuntimeEnvironment env) throws TalismaneException {
    PosTaggedTokenWrapper innerWrapper = this.getToken(context, env);
    if (innerWrapper == null)
        return null;
    PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
    if (posTaggedToken == null)
        return null;
    FeatureResult<List<WeightedOutcome<String>>> featureResult = null;
    List<String> attributes = this.getAttributes(innerWrapper, env);
    Set<String> results = new HashSet<>();
    for (LexicalEntry lexicalEntry : posTaggedToken.getLexicalEntries()) {
        boolean haveAtLeastOne = false;
        Set<String> previousAttributeStrings = new HashSet<>();
        previousAttributeStrings.add("");
        for (String attribute : attributes) {
            List<String> values = lexicalEntry.getAttributeAsList(attribute);
            if (values.size() > 0) {
                Set<String> currentAttributeStrings = new HashSet<>();
                haveAtLeastOne = true;
                for (String value : values) {
                    for (String prevString : previousAttributeStrings) {
                        if (prevString.length() > 0)
                            currentAttributeStrings.add(prevString + "|" + value);
                        else
                            currentAttributeStrings.add(value);
                    }
                }
                previousAttributeStrings = currentAttributeStrings;
            }
        }
        if (haveAtLeastOne) {
            results.addAll(previousAttributeStrings);
        }
    }
    if (results.size() > 0) {
        List<WeightedOutcome<String>> outcomes = new ArrayList<>(results.size());
        for (String result : results) {
            outcomes.add(new WeightedOutcome<String>(result, 1.0));
        }
        featureResult = this.generateResult(outcomes);
    }
    return featureResult;
}
Also used : PosTaggedToken(com.joliciel.talismane.posTagger.PosTaggedToken) ArrayList(java.util.ArrayList) WeightedOutcome(com.joliciel.talismane.utils.WeightedOutcome) List(java.util.List) ArrayList(java.util.ArrayList) LexicalEntry(com.joliciel.talismane.lexicon.LexicalEntry) HashSet(java.util.HashSet)

Aggregations

LexicalEntry (com.joliciel.talismane.lexicon.LexicalEntry)8 PosTaggedToken (com.joliciel.talismane.posTagger.PosTaggedToken)4 ArrayList (java.util.ArrayList)4 TreeSet (java.util.TreeSet)3 PosTag (com.joliciel.talismane.posTagger.PosTag)2 Token (com.joliciel.talismane.tokeniser.Token)2 List (java.util.List)2 TalismaneException (com.joliciel.talismane.TalismaneException)1 Decision (com.joliciel.talismane.machineLearning.Decision)1 NonDeterministicPosTagger (com.joliciel.talismane.posTagger.NonDeterministicPosTagger)1 PosTagSequence (com.joliciel.talismane.posTagger.PosTagSequence)1 Sentence (com.joliciel.talismane.rawText.Sentence)1 TokenSequence (com.joliciel.talismane.tokeniser.TokenSequence)1 WeightedOutcome (com.joliciel.talismane.utils.WeightedOutcome)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1