use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.
the class ParseFeatureTester method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException {
ParseConfiguration currentConfiguration = new ParseConfiguration(parseConfiguration.getPosTagSequence());
for (Transition transition : parseConfiguration.getTransitions()) {
StringBuilder sb = new StringBuilder();
for (PosTaggedToken taggedToken : currentConfiguration.getPosTagSequence()) {
if (taggedToken.equals(currentConfiguration.getStack().getFirst())) {
sb.append(" #[" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]#");
} else if (taggedToken.equals(currentConfiguration.getBuffer().getFirst())) {
sb.append(" #[" + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString() + "]#");
} else {
sb.append(" " + taggedToken.getToken().getOriginalText().replace(' ', '_') + "/" + taggedToken.getTag().toString());
}
}
sb.append(" ## Line: " + parseConfiguration.getSentence().getStartLineNumber());
if (LOG.isTraceEnabled())
LOG.trace(sb.toString());
List<FeatureResult<?>> parseFeatureResults = new ArrayList<FeatureResult<?>>();
for (ParseConfigurationFeature<?> parseFeature : parseFeatures) {
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<?> featureResult = parseFeature.check(currentConfiguration, env);
if (featureResult != null) {
parseFeatureResults.add(featureResult);
if (LOG.isTraceEnabled()) {
LOG.trace(featureResult.toString());
}
}
}
String classification = transition.getCode();
for (FeatureResult<?> featureResult : parseFeatureResults) {
Map<String, List<String>> classificationMap = featureResultMap.get(featureResult.toString());
if (classificationMap == null) {
classificationMap = new TreeMap<String, List<String>>();
featureResultMap.put(featureResult.toString(), classificationMap);
}
List<String> sentences = classificationMap.get(classification);
if (sentences == null) {
sentences = new ArrayList<String>();
classificationMap.put(classification, sentences);
}
sentences.add(sb.toString());
}
// apply the transition and up the index
currentConfiguration = new ParseConfiguration(currentConfiguration);
transition.apply(currentConfiguration);
}
}
use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.
the class ParserFeatureParser method getRules.
public List<ParserRule> getRules(List<String> ruleDescriptors) throws TalismaneException {
List<ParserRule> rules = new ArrayList<ParserRule>();
FunctionDescriptorParser descriptorParser = new FunctionDescriptorParser();
for (String ruleDescriptor : ruleDescriptors) {
LOG.debug(ruleDescriptor);
if (ruleDescriptor.trim().length() > 0 && !ruleDescriptor.startsWith("#")) {
String[] ruleParts = ruleDescriptor.split("\t");
String transitionCode = ruleParts[0];
Transition transition = null;
Set<Transition> transitions = null;
boolean negative = false;
String descriptor = null;
String descriptorName = null;
if (ruleParts.length > 2) {
descriptor = ruleParts[2];
descriptorName = ruleParts[1];
} else {
descriptor = ruleParts[1];
}
if (transitionCode.length() == 0) {
if (descriptorName == null) {
throw new TalismaneException("Rule without Transition must have a name.");
}
} else {
if (transitionCode.startsWith("!")) {
negative = true;
String[] transitionCodes = transitionCode.substring(1).split(";");
transitions = new HashSet<Transition>();
for (String code : transitionCodes) {
Transition oneTransition = TalismaneSession.get(sessionId).getTransitionSystem().getTransitionForCode(code);
transitions.add(oneTransition);
}
transition = transitions.iterator().next();
} else {
transition = TalismaneSession.get(sessionId).getTransitionSystem().getTransitionForCode(transitionCode);
}
}
FunctionDescriptor functionDescriptor = descriptorParser.parseDescriptor(descriptor);
if (descriptorName != null)
functionDescriptor.setDescriptorName(descriptorName);
List<ParseConfigurationFeature<?>> myFeatures = this.parseDescriptor(functionDescriptor);
if (transition != null) {
for (ParseConfigurationFeature<?> feature : myFeatures) {
if (feature instanceof BooleanFeature) {
@SuppressWarnings("unchecked") BooleanFeature<ParseConfigurationWrapper> condition = (BooleanFeature<ParseConfigurationWrapper>) feature;
if (negative) {
ParserRule rule = new ParserRule(condition, transitions, true);
rules.add(rule);
} else {
ParserRule rule = new ParserRule(condition, transition, false);
rules.add(rule);
}
} else {
throw new TalismaneException("Rule must be based on a boolean feature.");
}
}
// next feature
}
// is it a rule, or just a descriptor
}
// proper rule descriptor
}
// next rule descriptor
return rules;
}
use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.
the class ParseEvaluationSentenceWriter method onParseEnd.
@Override
public void onParseEnd(ParseConfiguration realConfiguration, List<ParseConfiguration> guessedConfigurations) throws IOException {
TreeSet<Integer> startIndexes = new TreeSet<Integer>();
for (PosTaggedToken posTaggedToken : realConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
Token token = posTaggedToken.getToken();
startIndexes.add(token.getStartIndex());
}
}
if (hasTokeniser || hasPosTagger) {
int i = 0;
for (ParseConfiguration guessedConfiguration : guessedConfigurations) {
for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
Token token = posTaggedToken.getToken();
startIndexes.add(token.getStartIndex());
}
}
i++;
if (i == guessCount)
break;
}
}
Map<Integer, Integer> startIndexMap = new HashMap<Integer, Integer>();
int j = 0;
for (int startIndex : startIndexes) {
startIndexMap.put(startIndex, j++);
}
PosTagSequence posTagSequence = realConfiguration.getPosTagSequence();
PosTaggedToken[] realTokens = new PosTaggedToken[startIndexes.size()];
for (PosTaggedToken posTaggedToken : posTagSequence) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
realTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
}
}
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
String realLabel = realArc.getLabel() == null ? "null" : realArc.getLabel();
csvFileWriter.write(CSV.format(realLabel));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : realTokens) {
if (posTaggedToken != null) {
DependencyArc realArc = realConfiguration.getGoverningDependency(posTaggedToken);
int startIndex = -1;
if (realArc != null) {
PosTaggedToken head = realArc.getHead();
if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
startIndex = head.getToken().getStartIndex();
}
}
if (startIndex < 0)
csvFileWriter.write(CSV.format("ROOT"));
else
csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (int i = 0; i < guessCount; i++) {
if (i < guessedConfigurations.size()) {
ParseConfiguration guessedConfiguration = guessedConfigurations.get(i);
PosTaggedToken[] guessedTokens = new PosTaggedToken[startIndexes.size()];
for (PosTaggedToken posTaggedToken : guessedConfiguration.getPosTagSequence()) {
if (!posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) {
guessedTokens[startIndexMap.get(posTaggedToken.getToken().getStartIndex())] = posTaggedToken;
}
}
if (hasTokeniser) {
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getToken().getOriginalText()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
}
if (hasPosTagger) {
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
csvFileWriter.write(CSV.format(posTaggedToken.getTag().getCode()));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
}
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
String guessedLabel = "";
if (guessedArc != null) {
guessedLabel = guessedArc.getLabel() == null ? "null" : guessedArc.getLabel();
}
csvFileWriter.write(CSV.format(guessedLabel));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
int startIndex = -1;
if (guessedArc != null) {
PosTaggedToken head = guessedArc.getHead();
if (!head.getTag().equals(PosTag.ROOT_POS_TAG)) {
startIndex = head.getToken().getStartIndex();
}
}
if (startIndex < 0)
csvFileWriter.write(CSV.format("ROOT"));
else
csvFileWriter.write(CSV.getColumnLabel(startIndexMap.get(startIndex)) + CSV.getCsvSeparator());
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
for (PosTaggedToken posTaggedToken : guessedTokens) {
if (posTaggedToken != null) {
DependencyArc guessedArc = guessedConfiguration.getGoverningDependency(posTaggedToken);
double prob = 1.0;
if (guessedArc != null) {
Transition transition = guessedConfiguration.getTransition(guessedArc);
if (transition != null)
prob = transition.getDecision().getProbability();
}
csvFileWriter.write(CSV.format(prob));
} else {
csvFileWriter.write(CSV.getCsvSeparator());
}
}
csvFileWriter.write("\n");
} else {
csvFileWriter.write("\n");
csvFileWriter.write("\n");
}
// have more configurations
}
// next guessed configuration
csvFileWriter.flush();
}
use of com.joliciel.talismane.parser.Transition in project talismane by joliciel-informatique.
the class TransitionLogWriter method onNextParseConfiguration.
@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration) throws TalismaneException, IOException {
ParseConfiguration currentConfiguration = new ParseConfiguration(parseConfiguration.getPosTagSequence());
writer.write("\n");
writer.write("\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + "\n");
Set<DependencyArc> dependencies = new HashSet<DependencyArc>();
for (Transition transition : parseConfiguration.getTransitions()) {
currentConfiguration = new ParseConfiguration(currentConfiguration);
transition.apply(currentConfiguration);
DependencyArc newDep = null;
if (currentConfiguration.getDependencies().size() > dependencies.size()) {
for (DependencyArc arc : currentConfiguration.getDependencies()) {
if (dependencies.contains(arc)) {
continue;
} else {
dependencies.add(arc);
newDep = arc;
break;
}
}
}
String newDepText = "";
if (newDep != null) {
newDepText = newDep.getLabel() + "[" + newDep.getHead().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getHead().getTag().getCode() + "," + newDep.getDependent().getToken().getOriginalText().replace(' ', '_') + "|" + newDep.getDependent().getTag().getCode() + "]";
}
writer.write(transition.getCode() + "\t" + this.getTopOfStack(currentConfiguration) + "\t" + this.getTopOfBuffer(currentConfiguration) + "\t" + newDepText + "\n");
}
writer.flush();
}
Aggregations