use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class DependencyNode method autoPopulate.
/**
* Populate this node's dependents directly from the parse configuration.
*/
public void autoPopulate() {
for (PosTaggedToken dependent : parseConfiguration.getDependents(this.token)) {
DependencyNode childNode;
try {
childNode = this.addDependent(dependent);
} catch (TalismaneException e) {
// should never happen
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
}
childNode.autoPopulate();
}
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class CorpusLineReader method read.
/**
* Read one line out of the corpus, and transform it into a {@link CorpusLine}
*
* @param line
* the line to read
* @param lineNumber
* the line number we reached, starting at 1.
* @throws TalismaneException
* if the regex wasn't matched on a given line
*/
public CorpusLine read(String line, int lineNumber) throws TalismaneException {
Matcher matcher = this.pattern.matcher(line);
if (!matcher.matches())
throw new TalismaneException("Didn't match pattern \"" + regex + "\". Compiled to: \"" + this.pattern.pattern() + "\". On line " + lineNumber + ": " + line);
CorpusLine corpusLine = new CorpusLine(line, lineNumber);
for (CorpusElement elementType : CorpusElement.values()) {
if (placeholderIndexMap.containsKey(elementType)) {
String value = matcher.group(placeholderIndexMap.get(elementType));
switch(elementType) {
case TOKEN:
case LEMMA:
value = TalismaneSession.get(sessionId).getCoNLLFormatter().fromCoNLL(value);
break;
default:
if ("_".equals(value))
value = "";
break;
}
corpusLine.setElement(elementType, value);
}
}
if (this.lexicalEntryReader != null) {
WritableLexicalEntry lexicalEntry = new CompactLexicalEntry(lexicalEntrySupport);
this.lexicalEntryReader.readEntry(line, lexicalEntry);
corpusLine.setLexicalEntry(lexicalEntry);
}
Map<CorpusElement, String> updateValues = new HashMap<>();
for (CorpusRule corpusRule : corpusRules) {
corpusRule.apply(corpusLine, updateValues);
}
for (CorpusElement element : updateValues.keySet()) {
String value = updateValues.get(element);
if (LOG.isTraceEnabled()) {
LOG.trace("On line " + lineNumber + ", updating " + element.name() + " from '" + corpusLine.getElement(element) + "' to '" + value + "'");
}
corpusLine.setElement(element, value);
}
return corpusLine;
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class LinearSVMModelTrainer method getFeatureMatrix.
private Feature[][] getFeatureMatrix(ClassificationEventStream corpusEventStream, TObjectIntMap<String> featureIndexMap, TObjectIntMap<String> outcomeIndexMap, TIntList outcomeList, TIntIntMap featureCountMap, CountingInfo countingInfo) {
try {
int maxFeatureCount = 0;
List<Feature[]> fullFeatureList = new ArrayList<Feature[]>();
while (corpusEventStream.hasNext()) {
ClassificationEvent corpusEvent = corpusEventStream.next();
int outcomeIndex = outcomeIndexMap.get(corpusEvent.getClassification());
if (outcomeIndex < 0) {
outcomeIndex = countingInfo.currentOutcomeIndex++;
outcomeIndexMap.put(corpusEvent.getClassification(), outcomeIndex);
}
outcomeList.add(outcomeIndex);
Map<Integer, Feature> featureList = new TreeMap<Integer, Feature>();
for (FeatureResult<?> featureResult : corpusEvent.getFeatureResults()) {
if (featureResult.getOutcome() instanceof List) {
@SuppressWarnings("unchecked") FeatureResult<List<WeightedOutcome<String>>> stringCollectionResult = (FeatureResult<List<WeightedOutcome<String>>>) featureResult;
for (WeightedOutcome<String> stringOutcome : stringCollectionResult.getOutcome()) {
String featureName = featureResult.getTrainingName() + "|" + featureResult.getTrainingOutcome(stringOutcome.getOutcome());
double value = stringOutcome.getWeight();
this.addFeatureResult(featureName, value, featureList, featureIndexMap, featureCountMap, countingInfo);
}
} else {
double value = 1.0;
if (featureResult.getOutcome() instanceof Double) {
@SuppressWarnings("unchecked") FeatureResult<Double> doubleResult = (FeatureResult<Double>) featureResult;
value = doubleResult.getOutcome().doubleValue();
}
this.addFeatureResult(featureResult.getTrainingName(), value, featureList, featureIndexMap, featureCountMap, countingInfo);
}
}
if (featureList.size() > maxFeatureCount)
maxFeatureCount = featureList.size();
// convert to array immediately, to avoid double storage
int j = 0;
Feature[] featureArray = new Feature[featureList.size()];
for (Feature feature : featureList.values()) {
featureArray[j] = feature;
j++;
}
fullFeatureList.add(featureArray);
countingInfo.numEvents++;
if (countingInfo.numEvents % 1000 == 0) {
LOG.debug("Processed " + countingInfo.numEvents + " events.");
}
}
Feature[][] featureMatrix = new Feature[countingInfo.numEvents][];
int i = 0;
for (Feature[] featureArray : fullFeatureList) {
featureMatrix[i] = featureArray;
i++;
}
fullFeatureList = null;
LOG.debug("Event count: " + countingInfo.numEvents);
LOG.debug("Feature count: " + featureIndexMap.size());
return featureMatrix;
} catch (TalismaneException e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
} catch (IOException e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e);
}
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class CorpusProjectifier method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
List<DependencyArc> arcs = new ArrayList<DependencyArc>(parseConfiguration.getNonProjectiveDependencies());
NonProjectivePair pair = this.getNextPair(arcs);
if (pair != null) {
// set so that it stays untouched
for (DependencyArc arc : arcs) {
parseConfiguration.addManualNonProjectiveDependency(arc.getHead(), arc.getDependent(), arc.getLabel());
}
}
while (pair != null) {
PosTaggedToken newHead1 = null;
PosTaggedToken parent1 = parseConfiguration.getHead(pair.arc1.getHead());
int depIndex1 = pair.arc1.getDependent().getToken().getIndex();
int depthDelta1 = 1;
while (parent1 != null) {
int headIndex = parent1.getToken().getIndex();
int startIndex = headIndex < depIndex1 ? headIndex : depIndex1;
int endIndex = headIndex >= depIndex1 ? headIndex : depIndex1;
if (isProjective(startIndex, endIndex, pair.arc2)) {
newHead1 = parent1;
break;
}
parent1 = parseConfiguration.getHead(parent1);
depthDelta1++;
}
PosTaggedToken newHead2 = null;
PosTaggedToken parent2 = parseConfiguration.getHead(pair.arc2.getHead());
int depIndex2 = pair.arc2.getDependent().getToken().getIndex();
int depthDelta2 = 1;
while (parent2 != null) {
int headIndex = parent2.getToken().getIndex();
int startIndex = headIndex < depIndex2 ? headIndex : depIndex2;
int endIndex = headIndex >= depIndex2 ? headIndex : depIndex2;
if (isProjective(startIndex, endIndex, pair.arc2)) {
newHead2 = parent2;
break;
}
parent2 = parseConfiguration.getHead(parent2);
depthDelta2++;
}
if (newHead1 != null && newHead2 != null) {
int linearDistance1 = Math.abs(newHead1.getIndex() - depIndex1);
int linearDistance2 = Math.abs(newHead2.getIndex() - depIndex2);
int rootDepthDelta1 = 0;
PosTaggedToken parent = parseConfiguration.getHead(newHead1);
while (parent != null) {
rootDepthDelta1++;
parent = parseConfiguration.getHead(parent);
}
int rootDepthDelta2 = 0;
parent = parseConfiguration.getHead(newHead2);
while (parent != null) {
rootDepthDelta2++;
parent = parseConfiguration.getHead(parent);
}
switch(strategy) {
case LeastLinearDistance:
if (linearDistance1 < linearDistance2) {
newHead2 = null;
break;
} else if (linearDistance2 < linearDistance1) {
newHead1 = null;
break;
}
// break left out on purpose
case LeastDepthDifference:
if (depthDelta1 < depthDelta2) {
newHead2 = null;
break;
} else if (depthDelta2 < depthDelta1) {
newHead1 = null;
break;
}
// break left out on purpose
case GreatestDepth:
if (rootDepthDelta1 < rootDepthDelta2) {
newHead1 = null;
break;
} else {
newHead2 = null;
break;
}
}
}
if (newHead1 != null && newHead2 == null) {
parseConfiguration.removeDependency(pair.arc1);
String newLabel = pair.arc1.getLabel();
if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
newLabel += this.nonProjectiveArcSuffix;
parseConfiguration.addDependency(newHead1, pair.arc1.getDependent(), newLabel, null);
// for the other arc, copy the non-projective version, in case
// there is an attempt at manual projectivisation
DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc2.getDependent());
parseConfiguration.removeDependency(otherProjArc);
parseConfiguration.addDependency(pair.arc2.getHead(), pair.arc2.getDependent(), pair.arc2.getLabel(), null);
} else if (newHead1 == null && newHead2 != null) {
parseConfiguration.removeDependency(pair.arc2);
String newLabel = pair.arc2.getLabel();
if (this.nonProjectiveArcSuffix.length() > 0 && !newLabel.endsWith(this.nonProjectiveArcSuffix))
newLabel += this.nonProjectiveArcSuffix;
parseConfiguration.addDependency(newHead2, pair.arc2.getDependent(), newLabel, null);
// for the other arc, copy the non-projective version, in case
// there is an attempt at manual projectivisation
DependencyArc otherProjArc = parseConfiguration.getGoverningDependency(pair.arc1.getDependent());
parseConfiguration.removeDependency(otherProjArc);
parseConfiguration.addDependency(pair.arc1.getHead(), pair.arc1.getDependent(), pair.arc1.getLabel(), null);
} else {
throw new TalismaneException("Cannot deprojectify " + pair + ". Could not find projective parents.");
}
parseConfiguration.clearMemory();
arcs = new ArrayList<DependencyArc>(parseConfiguration.getDependencies());
pair = this.getNextPair(arcs);
}
}
use of com.joliciel.talismane.TalismaneException in project talismane by joliciel-informatique.
the class StandoffReader method hasNextSentence.
@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
// we've reached the end, do nothing
} else {
if (configuration == null && sentenceIndex < sentences.size()) {
List<StandoffToken> tokens = sentences.get(sentenceIndex++);
LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
if (rules == null)
throw new RuntimeException("Linguistic rules have not been set.");
String text = "";
for (StandoffToken standoffToken : tokens) {
String word = standoffToken.text;
if (rules.shouldAddSpace(text, word))
text += " ";
text += word;
}
Sentence sentence = new Sentence(text, sessionId);
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
}
PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
for (StandoffToken standoffToken : tokens) {
Token token = tokenSequence.addToken(standoffToken.text);
Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
if (LOG.isTraceEnabled()) {
LOG.trace(posTaggedToken.toString());
}
posTaggedToken.setComment(standoffToken.comment);
posTagSequence.addPosTaggedToken(posTaggedToken);
idTokenMap.put(standoffToken.id, posTaggedToken);
LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
}
tokenSequence.setWithRoot(true);
configuration = new ParseConfiguration(posTagSequence);
for (StandoffToken standoffToken : tokens) {
StandoffRelation relation = relationMap.get(standoffToken.id);
if (relation != null) {
PosTaggedToken head = idTokenMap.get(relation.fromToken);
PosTaggedToken dependent = idTokenMap.get(relation.toToken);
if (head == null) {
throw new TalismaneException("No token found for head id: " + relation.fromToken);
}
if (dependent == null) {
throw new TalismaneException("No token found for dependent id: " + relation.toToken);
}
DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
arc.setComment(relation.comment);
} else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
if (punctuationDepLabel != null) {
PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
for (int i = dependent.getIndex() - 1; i >= 0; i--) {
PosTaggedToken head = posTagSequence.get(i);
if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
continue;
configuration.addDependency(head, dependent, punctuationDepLabel, null);
break;
}
}
}
}
}
}
return (configuration != null);
}
Aggregations