use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.
the class TalismaneAPIExamples method example2.
/**
* Similar to example1, but begins with filtering and sentence detection.
*/
public static void example2(String sessionId) throws Exception {
String text = "Les gens qui voient de travers pensent que les bancs verts qu'on voit sur les trottoirs " + "sont faits pour les impotents ou les ventripotents. " + "Mais c'est une absurdité, car, à la vérité, ils sont là, c'est notoire, " + "pour accueillir quelque temps les amours débutants.";
RawText rawText = new RawText(text, true, sessionId);
// issues (e.g. replace " with ")
for (RawTextAnnotator filter : TalismaneSession.get(sessionId).getTextAnnotators()) {
filter.annotate(rawText);
}
// retrieve the processed text after filters have been applied
AnnotatedText processedText = rawText.getProcessedText();
// detect sentences
SentenceDetector sentenceDetector = SentenceDetector.getInstance(sessionId);
sentenceDetector.detectSentences(processedText);
// the detected sentences can be retrieved directly from the raw text
// this allows annotations made on the sentences to get reflected in the
// raw text
List<Sentence> sentences = rawText.getDetectedSentences();
for (Sentence sentence : sentences) {
// assignment for a given word)
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
}
// tokenise the text
Tokeniser tokeniser = Tokeniser.getInstance(sessionId);
TokenSequence tokenSequence = tokeniser.tokeniseSentence(sentence);
// pos-tag the token sequence
PosTagger posTagger = PosTaggers.getPosTagger(sessionId);
PosTagSequence posTagSequence = posTagger.tagSentence(tokenSequence);
System.out.println(posTagSequence);
// parse the pos-tag sequence
Parser parser = Parsers.getParser(sessionId);
ParseConfiguration parseConfiguration = parser.parseSentence(posTagSequence);
System.out.println(parseConfiguration);
ParseTree parseTree = new ParseTree(parseConfiguration, true);
System.out.println(parseTree);
}
}
use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.
the class StandoffReader method hasNextSentence.
@Override
public boolean hasNextSentence() throws TalismaneException, IOException {
if (this.getMaxSentenceCount() > 0 && sentenceCount >= this.getMaxSentenceCount()) {
// we've reached the end, do nothing
} else {
if (configuration == null && sentenceIndex < sentences.size()) {
List<StandoffToken> tokens = sentences.get(sentenceIndex++);
LinguisticRules rules = TalismaneSession.get(sessionId).getLinguisticRules();
if (rules == null)
throw new RuntimeException("Linguistic rules have not been set.");
String text = "";
for (StandoffToken standoffToken : tokens) {
String word = standoffToken.text;
if (rules.shouldAddSpace(text, word))
text += " ";
text += word;
}
Sentence sentence = new Sentence(text, sessionId);
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
}
PretokenisedSequence tokenSequence = new PretokenisedSequence(sentence, sessionId);
PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
Map<String, PosTaggedToken> idTokenMap = new HashMap<String, PosTaggedToken>();
for (StandoffToken standoffToken : tokens) {
Token token = tokenSequence.addToken(standoffToken.text);
Decision posTagDecision = new Decision(standoffToken.posTag.getCode());
PosTaggedToken posTaggedToken = new PosTaggedToken(token, posTagDecision, sessionId);
if (LOG.isTraceEnabled()) {
LOG.trace(posTaggedToken.toString());
}
posTaggedToken.setComment(standoffToken.comment);
posTagSequence.addPosTaggedToken(posTaggedToken);
idTokenMap.put(standoffToken.id, posTaggedToken);
LOG.debug("Found token " + standoffToken.id + ", " + posTaggedToken);
}
tokenSequence.setWithRoot(true);
configuration = new ParseConfiguration(posTagSequence);
for (StandoffToken standoffToken : tokens) {
StandoffRelation relation = relationMap.get(standoffToken.id);
if (relation != null) {
PosTaggedToken head = idTokenMap.get(relation.fromToken);
PosTaggedToken dependent = idTokenMap.get(relation.toToken);
if (head == null) {
throw new TalismaneException("No token found for head id: " + relation.fromToken);
}
if (dependent == null) {
throw new TalismaneException("No token found for dependent id: " + relation.toToken);
}
DependencyArc arc = configuration.addDependency(head, dependent, relation.label, null);
arc.setComment(relation.comment);
} else if (standoffToken.posTag.getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION) {
if (punctuationDepLabel != null) {
PosTaggedToken dependent = idTokenMap.get(standoffToken.id);
for (int i = dependent.getIndex() - 1; i >= 0; i--) {
PosTaggedToken head = posTagSequence.get(i);
if (head.getTag().getOpenClassIndicator() == PosTagOpenClassIndicator.PUNCTUATION)
continue;
configuration.addDependency(head, dependent, punctuationDepLabel, null);
break;
}
}
}
}
}
}
return (configuration != null);
}
use of com.joliciel.talismane.posTagger.PosTagSequence in project talismane by joliciel-informatique.
the class SerializationTest method testSerialize.
@Test
public void testSerialize() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
String sessionId = "test";
Sentence sentence = new Sentence("Il aime les pommes", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
tokenSequence.addToken("".length(), "Il".length());
tokenSequence.addToken("Il ".length(), "Il aime".length());
tokenSequence.addToken("Il aime ".length(), "Il aime les".length());
tokenSequence.addToken("Il aime les ".length(), "Il aime les pommes".length());
PosTagSequence posTagSequence = new PosTagSequence(tokenSequence);
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("CLS", 0.90), sessionId));
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("V", 0.70), sessionId));
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("DET", 0.60), sessionId));
posTagSequence.addPosTaggedToken(new PosTaggedToken(posTagSequence.getNextToken(), new Decision("NC", 0.80), sessionId));
posTagSequence.prependRoot();
ParseConfiguration configuration = new ParseConfiguration(posTagSequence);
LOG.debug(configuration.toString());
// ROOT ... il
new ShiftTransition().apply(configuration);
LOG.debug("Shift -> " + configuration.toString());
// ROOT il <- aime
new LeftArcEagerTransition("suj").apply(configuration);
LOG.debug("Left -> " + configuration.toString());
// ROOT -> aime
new RightArcEagerTransition("root").apply(configuration);
LOG.debug("Right -> " + configuration.toString());
// ROOT aime ... les
new ShiftTransition().apply(configuration);
LOG.debug("Shift -> " + configuration.toString());
// ROOT aime les <- pommes
new LeftArcEagerTransition("det").apply(configuration);
LOG.debug("Left -> " + configuration.toString());
// ROOT aime -> pommes
new RightArcEagerTransition("obj").apply(configuration);
LOG.debug("Right -> " + configuration.toString());
ParseTree parseTree = new ParseTree(configuration, true);
LOG.debug(parseTree.toString());
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ObjectOutputStream oos = new ObjectOutputStream(bos);
oos.writeObject(sentence);
oos.writeObject(tokenSequence);
oos.writeObject(posTagSequence);
oos.writeObject(configuration);
oos.writeObject(parseTree);
byte[] bytes = bos.toByteArray();
ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(bytes));
Sentence sentence2 = (Sentence) ois.readObject();
TokenSequence tokenSequence2 = (TokenSequence) ois.readObject();
PosTagSequence posTagSequence2 = (PosTagSequence) ois.readObject();
ParseConfiguration configuration2 = (ParseConfiguration) ois.readObject();
ParseTree parseTree2 = (ParseTree) ois.readObject();
assertEquals(sentence, sentence2);
assertEquals(tokenSequence, tokenSequence2);
assertEquals(posTagSequence, posTagSequence2);
assertEquals(configuration, configuration2);
assertEquals(parseTree, parseTree2);
}
Aggregations