use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class LemmaFeature method checkInternal.
@Override
protected FeatureResult<String> checkInternal(T context, RuntimeEnvironment env) throws TalismaneException {
PosTaggedTokenWrapper innerWrapper = this.getToken(context, env);
if (innerWrapper == null)
return null;
PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
if (posTaggedToken == null)
return null;
FeatureResult<String> featureResult = null;
List<LexicalEntry> lexicalEntries = posTaggedToken.getLexicalEntries();
if (lexicalEntries.size() > 0) {
LexicalEntry lexicalEntry = lexicalEntries.get(0);
featureResult = this.generateResult(lexicalEntry.getLemma());
}
return featureResult;
}
use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class PosTaggerEvaluator method evaluate.
/**
* Evaluate a given pos tagger.
*
* @throws TalismaneException
* @throws IOException
*/
public void evaluate() throws TalismaneException, IOException {
while (corpusReader.hasNextSentence()) {
PosTagSequence realPosTagSequence = corpusReader.nextPosTagSequence();
List<TokenSequence> tokenSequences = null;
List<PosTagSequence> guessedSequences = null;
TokenSequence tokenSequence = realPosTagSequence.getTokenSequence();
PosTagSequence guessedSequence = null;
if (this.tokeniser != null) {
Sentence sentence = tokenSequence.getSentence();
tokenSequences = tokeniser.tokenise(sentence);
tokenSequence = tokenSequences.get(0);
} else {
tokenSequences = new ArrayList<TokenSequence>();
tokenSequences.add(tokenSequence);
}
if (posTagger instanceof NonDeterministicPosTagger) {
NonDeterministicPosTagger nonDeterministicPosTagger = (NonDeterministicPosTagger) posTagger;
guessedSequences = nonDeterministicPosTagger.tagSentence(tokenSequences);
guessedSequence = guessedSequences.get(0);
} else {
guessedSequence = posTagger.tagSentence(tokenSequence);
}
if (LOG.isDebugEnabled()) {
StringBuilder stringBuilder = new StringBuilder();
for (PosTaggedToken posTaggedToken : guessedSequence) {
Set<String> lemmas = new TreeSet<String>();
stringBuilder.append(posTaggedToken.getToken().getOriginalText());
stringBuilder.append("[" + posTaggedToken.getTag());
List<LexicalEntry> entries = posTaggedToken.getLexicalEntries();
boolean dropCurrentWord = false;
if (entries.size() > 1)
dropCurrentWord = true;
for (LexicalEntry entry : posTaggedToken.getLexicalEntries()) {
if (!lemmas.contains(entry.getLemma())) {
if (dropCurrentWord && posTaggedToken.getToken().getText().equals(entry.getLemma())) {
dropCurrentWord = false;
continue;
}
stringBuilder.append("|" + entry.getLemma());
// stringBuilder.append("/" + entry.getCategory());
stringBuilder.append("/" + entry.getMorphology());
lemmas.add(entry.getLemma());
}
}
stringBuilder.append("] ");
}
LOG.debug(stringBuilder.toString());
}
for (PosTagEvaluationObserver observer : this.observers) {
observer.onNextPosTagSequence(realPosTagSequence, guessedSequences);
}
}
for (PosTagEvaluationObserver observer : this.observers) {
observer.onEvaluationComplete();
}
}
use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class PosTaggedToken method getMorphologyForCoNLL.
/**
* A string representation of all of the morpho-syntaxic information combined
* in CoNLL-X format.
*/
public String getMorphologyForCoNLL() {
if (morphologyForCoNLL == null) {
StringBuilder sb = new StringBuilder();
Set<String> items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.SubCategory) && lexicalEntry.getSubCategory().length() > 0)
items.add(lexicalEntry.getSubCategory());
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("s=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.Case)) {
items.addAll(lexicalEntry.getCase());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("c=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.Number)) {
items.addAll(lexicalEntry.getNumber());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("n=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.Gender)) {
items.addAll(lexicalEntry.getGender());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("g=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.Tense)) {
items.addAll(lexicalEntry.getTense());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("t=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.Mood)) {
items.addAll(lexicalEntry.getMood());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("m=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.Aspect)) {
items.addAll(lexicalEntry.getAspect());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("a=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.Person)) {
items.addAll(lexicalEntry.getPerson());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("p=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
items = new TreeSet<>();
for (LexicalEntry lexicalEntry : this.getLexicalEntries()) {
if (lexicalEntry.hasAttribute(LexicalAttribute.PossessorNumber)) {
items.addAll(lexicalEntry.getPossessorNumber());
}
}
if (items.size() > 0) {
if (sb.length() > 0)
sb.append("|");
sb.append("poss=");
sb.append(items.stream().collect(Collectors.joining(",")));
}
morphologyForCoNLL = sb.toString();
}
return morphologyForCoNLL;
}
use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class AbstractLexicalAttributesFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(T context, RuntimeEnvironment env) throws TalismaneException {
PosTaggedTokenWrapper innerWrapper = this.getToken(context, env);
if (innerWrapper == null)
return null;
PosTaggedToken posTaggedToken = innerWrapper.getPosTaggedToken();
if (posTaggedToken == null)
return null;
FeatureResult<String> featureResult = null;
List<String> attributes = this.getAttributes(innerWrapper, env);
Map<String, Set<String>> results = new HashMap<>();
for (String attribute : attributes) {
Set<String> values = new TreeSet<>();
results.put(attribute, values);
for (LexicalEntry lexicalEntry : posTaggedToken.getLexicalEntries()) {
values.addAll(lexicalEntry.getAttributeAsList(attribute));
}
}
boolean firstAttribute = true;
boolean haveAtLeastOne = false;
StringBuilder sb = new StringBuilder();
for (String attribute : attributes) {
if (!firstAttribute)
sb.append("|");
Set<String> values = results.get(attribute);
if (values.size() > 0) {
haveAtLeastOne = true;
sb.append(values.stream().collect(Collectors.joining(";")));
}
firstAttribute = false;
}
if (haveAtLeastOne) {
String result = sb.toString();
featureResult = this.generateResult(result);
}
return featureResult;
}
use of com.joliciel.talismane.lexicon.LexicalEntry in project talismane by joliciel-informatique.
the class LemmaForPosTagFeature method checkInternal.
@Override
public FeatureResult<String> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
Token token = innerWrapper.getToken();
FeatureResult<String> featureResult = null;
List<String> posTagCodes = new ArrayList<String>();
for (StringFeature<TokenWrapper> posTagCodeFeature : posTagCodeFeatures) {
FeatureResult<String> posTagCodeResult = posTagCodeFeature.check(innerWrapper, env);
if (posTagCodeResult != null)
posTagCodes.add(posTagCodeResult.getOutcome());
}
for (String posTagCode : posTagCodes) {
PosTag posTag = TalismaneSession.get(sessionId).getPosTagSet().getPosTag(posTagCode);
LexicalEntry lexicalEntry = token.getLexicalEntry(posTag);
if (lexicalEntry != null) {
featureResult = this.generateResult(lexicalEntry.getLemma());
break;
}
}
return featureResult;
}
Aggregations