use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class Token method getLexicalEntry.
/**
* The "best" lexical entry for this token/postag combination if one exists,
* or null otherwise.
*/
public LexicalEntry getLexicalEntry(PosTag posTag) {
if (this.lexicalEntryMap == null) {
this.lexicalEntryMap = new HashMap<PosTag, List<LexicalEntry>>();
}
List<LexicalEntry> lexicalEntries = this.lexicalEntryMap.get(posTag);
if (lexicalEntries == null) {
lexicalEntries = TalismaneSession.get(sessionId).getMergedLexicon().findLexicalEntries(this.getText(), posTag);
this.lexicalEntryMap.put(posTag, lexicalEntries);
}
LexicalEntry bestEntry = null;
if (lexicalEntries.size() > 0)
bestEntry = lexicalEntries.get(0);
return bestEntry;
}
use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class PosTagRegexBasedCorpusReader method convertToPosTaggedToken.
protected PosTaggedToken convertToPosTaggedToken(CorpusLine corpusLine, PosTagSequence posTagSequence, int index, File currentFile) throws TalismaneException {
Token token = posTagSequence.getTokenSequence().get(index);
PosTagSet posTagSet = TalismaneSession.get(sessionId).getPosTagSet();
PosTag posTag = null;
try {
posTag = posTagSet.getPosTag(corpusLine.getElement(CorpusElement.POSTAG));
} catch (UnknownPosTagException upte) {
String fileName = "";
if (currentFile != null)
fileName = currentFile.getPath();
throw new TalismaneException("Unknown posTag, " + fileName + ", on line " + corpusLine.getLineNumber() + ": " + corpusLine.getElement(CorpusElement.POSTAG));
}
Decision posTagDecision = new Decision(posTag.getCode());
PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
if (LOG.isTraceEnabled()) {
LOG.trace(posTaggedToken.toString());
}
if (corpusLine.hasElement(CorpusElement.POSTAG_COMMENT))
posTaggedToken.setComment(corpusLine.getElement(CorpusElement.POSTAG_COMMENT));
// set the lexical entry if we have one
if (corpusLine.getLexicalEntry() != null) {
List<LexicalEntry> lexicalEntrySet = new ArrayList<>(1);
lexicalEntrySet.add(corpusLine.getLexicalEntry());
posTaggedToken.setLexicalEntries(lexicalEntrySet);
}
posTagSequence.addPosTaggedToken(posTaggedToken);
return posTaggedToken;
}
use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class AbstractLexicalAttributeFeature method checkInternal.
@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(T context, RuntimeEnvironment env) throws TalismaneException {
PosTaggedTokenWrapper innerWrapper = this.getToken(context, env);
if (innerWrapper == null)
return null;
PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
if (posTaggedToken == null)
return null;
FeatureResult<List<WeightedOutcome<String>>> featureResult = null;
List<String> attributes = this.getAttributes(innerWrapper, env);
Set<String> results = new HashSet<>();
for (LexicalEntry lexicalEntry : posTaggedToken.getLexicalEntries()) {
boolean haveAtLeastOne = false;
Set<String> previousAttributeStrings = new HashSet<>();
previousAttributeStrings.add("");
for (String attribute : attributes) {
List<String> values = lexicalEntry.getAttributeAsList(attribute);
if (values.size() > 0) {
Set<String> currentAttributeStrings = new HashSet<>();
haveAtLeastOne = true;
for (String value : values) {
for (String prevString : previousAttributeStrings) {
if (prevString.length() > 0)
currentAttributeStrings.add(prevString + "|" + value);
else
currentAttributeStrings.add(value);
}
}
previousAttributeStrings = currentAttributeStrings;
}
}
if (haveAtLeastOne) {
results.addAll(previousAttributeStrings);
}
}
if (results.size() > 0) {
List<WeightedOutcome<String>> outcomes = new ArrayList<>(results.size());
for (String result : results) {
outcomes.add(new WeightedOutcome<String>(result, 1.0));
}
featureResult = this.generateResult(outcomes);
}
return featureResult;
}
Aggregations