use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class ParseConfiguration method addDependency.
/**
* Add the given dependency to the current configuration.
*
* @param transition
* the transition generating this dependency
* @throws CircularDependencyException
* if this would create a circular dependency
*/
public DependencyArc addDependency(PosTaggedToken head, PosTaggedToken dependent, String label, Transition transition) throws CircularDependencyException {
DependencyArc arc = new DependencyArc(head, dependent, label);
if (LOG.isTraceEnabled())
LOG.trace("Adding arc " + arc + " with transition " + transition);
this.addDependency(arc);
this.dependentTransitionMap[dependent.getIndex()] = transition;
// calculate probability based on decisions
if (LOG.isTraceEnabled())
LOG.trace("Prob for " + arc.toString());
double probLog = 0.0;
int numDecisions = 0;
for (int i = lastProbApplied; i < this.decisions.size(); i++) {
Decision decision = decisions.get(i);
probLog += decision.getProbabilityLog();
if (LOG.isTraceEnabled()) {
LOG.trace(decision.getOutcome() + ", *= " + decision.getProbability());
}
numDecisions++;
}
if (useGeometricMeanForProbs) {
if (numDecisions > 0)
probLog /= numDecisions;
}
arc.setProbability(Math.exp(probLog));
this.lastProbApplied = this.decisions.size();
if (LOG.isTraceEnabled())
LOG.trace("prob=" + arc.getProbability());
return arc;
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class LinearSVMOneVsRestDecisionMaker method decide.
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<Feature> featureList = LinearSVMUtils.prepareData(featureResults, featureIndexMap);
List<Decision> decisions = null;
if (featureList.size() == 0) {
LOG.info("No features for current context.");
TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
double uniformProb = 1 / outcomes.size();
for (String outcome : outcomes) {
Decision decision = new Decision(outcome, uniformProb);
outcomeSet.add(decision);
}
decisions = new ArrayList<Decision>(outcomeSet);
} else {
Feature[] instance = new Feature[1];
instance = featureList.toArray(instance);
TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
int i = 0;
for (Model model : models) {
int myLabel = 0;
for (int j = 0; j < model.getLabels().length; j++) if (model.getLabels()[j] == 1)
myLabel = j;
double[] probabilities = new double[2];
Linear.predictProbability(model, instance, probabilities);
Decision decision = new Decision(outcomes.get(i), probabilities[myLabel]);
outcomeSet.add(decision);
i++;
}
decisions = new ArrayList<Decision>(outcomeSet);
}
return decisions;
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class StandoffReader method hasNextSentence.
@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
// we've reached the end, do nothing
} else {
if (configuration == null && sentenceIndex < sentences.size()) {
List<StandoffToken> tokens = sentences.get(sentenceIndex++);
LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
if (rules == null)
throw new RuntimeException("Linguistic rules have not been set.");
String text = "";
for (StandoffToken standoffToken : tokens) {
String word = standoffToken.text;
if (rules.shouldAddSpace(text, word))
text += " ";
text += word;
}
Sentence sentence = new Sentence(text, sessionId);
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
}
PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
for (StandoffToken standoffToken : tokens) {
Token token = tokenSequence.addToken(standoffToken.text);
Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
if (LOG.isTraceEnabled()) {
LOG.trace(posTaggedToken.toString());
}
posTaggedToken.setComment(standoffToken.comment);
posTagSequence.addPosTaggedToken(posTaggedToken);
idTokenMap.put(standoffToken.id, posTaggedToken);
LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
}
tokenSequence.setWithRoot(true);
configuration = new ParseConfiguration(posTagSequence);
for (StandoffToken standoffToken : tokens) {
StandoffRelation relation = relationMap.get(standoffToken.id);
if (relation != null) {
PosTaggedToken head = idTokenMap.get(relation.fromToken);
PosTaggedToken dependent = idTokenMap.get(relation.toToken);
if (head == null) {
throw new TalismaneException("No token found for head id: " + relation.fromToken);
}
if (dependent == null) {
throw new TalismaneException("No token found for dependent id: " + relation.toToken);
}
DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
arc.setComment(relation.comment);
} else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
if (punctuationDepLabel != null) {
PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
for (int i = dependent.getIndex() - 1; i >= 0; i--) {
PosTaggedToken head = posTagSequence.get(i);
if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
continue;
configuration.addDependency(head, dependent, punctuationDepLabel, null);
break;
}
}
}
}
}
}
return (configuration != null);
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class LinearSVMDecisionMaker method decide.
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<Feature> featureList = LinearSVMUtils.prepareData(featureResults, featureIndexMap);
List<Decision> decisions = null;
if (featureList.size() == 0) {
LOG.info("No features for current context.");
TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
double uniformProb = 1 / outcomes.size();
for (String outcome : outcomes) {
Decision decision = new Decision(outcome, uniformProb);
outcomeSet.add(decision);
}
decisions = new ArrayList<Decision>(outcomeSet);
} else {
Feature[] instance = new Feature[1];
instance = featureList.toArray(instance);
double[] probabilities = new double[model.getLabels().length];
Linear.predictProbability(model, instance, probabilities);
TreeSet<Decision> outcomeSet = new TreeSet<Decision>();
for (int i = 0; i < model.getLabels().length; i++) {
Decision decision = new Decision(outcomes.get(i), probabilities[i]);
outcomeSet.add(decision);
}
decisions = new ArrayList<Decision>(outcomeSet);
}
return decisions;
}
use of com.joliciel.talismane.machineLearning.Decision in project talismane by joliciel-informatique.
the class SentenceDetectorTest method testDetectSentences2.
@Test
public void testDetectSentences2() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
DecisionMaker decisionMaker = new DecisionMaker() {
@Override
public ScoringStrategy<ClassificationSolution> getDefaultScoringStrategy() {
return new GeometricMeanScoringStrategy();
}
@Override
public List<Decision> decide(List<FeatureResult<?>> featureResults) {
List<Decision> decisions = new ArrayList<>();
Decision decision = new Decision(SentenceDetectorOutcome.IS_BOUNDARY.name(), 1.0);
decisions.add(decision);
return decisions;
}
};
String[] labels = new String[0];
Set<SentenceDetectorFeature<?>> features = new HashSet<>();
SentenceDetector sentenceDetector = new SentenceDetector(decisionMaker, features, sessionId);
String text = "Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After";
AnnotatedText annotatedText = new AnnotatedText(text, "Before analysis. ".length(), text.length());
List<Annotation<RawTextNoSentenceBreakMarker>> noSentenceBreakMarkers = new ArrayList<>();
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello ".length(), "Before analysis. Hello Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
noSentenceBreakMarkers.add(new Annotation<>("Before analysis. Hello Mr. Jones\nHow are you, ".length(), "Before analysis. Hello Mr. Jones\nHow are you, Mr.".length(), new RawTextNoSentenceBreakMarker("me"), labels));
annotatedText.addAnnotations(noSentenceBreakMarkers);
List<Annotation<SentenceBoundary>> existingBoundaries = new ArrayList<>();
existingBoundaries.add(new Annotation<>("".length(), "Before analysis.".length(), new SentenceBoundary(), labels));
annotatedText.addAnnotations(existingBoundaries);
List<Annotation<RawTextSentenceBreakMarker>> sentenceBreaks = new ArrayList<>();
sentenceBreaks.add(new Annotation<>("Before analysis. Hello Mr. Jones".length(), "Before analysis. Hello Mr. Jones\n".length(), new RawTextSentenceBreakMarker("me"), labels));
annotatedText.addAnnotations(sentenceBreaks);
List<Integer> guessedBoundaries = sentenceDetector.detectSentences(annotatedText);
assertEquals(2, guessedBoundaries.size());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), guessedBoundaries.get(0).intValue());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), guessedBoundaries.get(1).intValue());
List<Annotation<SentenceBoundary>> sentenceBoundaries = annotatedText.getAnnotations(SentenceBoundary.class);
System.out.println(sentenceBoundaries.toString());
assertEquals(4, sentenceBoundaries.size());
assertEquals("".length(), sentenceBoundaries.get(0).getStart());
assertEquals("Before analysis.".length(), sentenceBoundaries.get(0).getEnd());
assertEquals("Before analysis. ".length(), sentenceBoundaries.get(1).getStart());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(1).getEnd());
assertEquals("Before analysis. Hello Mr. Jones\n".length(), sentenceBoundaries.get(2).getStart());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(2).getEnd());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones?".length(), sentenceBoundaries.get(3).getStart());
assertEquals("Before analysis. Hello Mr. Jones\nHow are you, Mr. Jones? After".length(), sentenceBoundaries.get(3).getEnd());
}
Aggregations