use of com.joliciel.talismane.tokeniser.Tokeniser in project talismane by joliciel-informatique.
the class TalismaneAPIExamples method example1.
/**
* An example tokenising, pos-tagging and parsing a pre-existing sentence.
*/
public static void example1(String sessionId) throws Exception {
String text = "Les amoureux qui se bécotent sur les bancs publics ont des petites gueules bien sympathiques.";
// tokenise the text
Tokeniser tokeniser = Tokeniser.getInstance(sessionId);
TokenSequence tokenSequence = tokeniser.tokeniseText(text);
// pos-tag the token sequence
PosTagger posTagger = PosTaggers.getPosTagger(sessionId);
PosTagSequence posTagSequence = posTagger.tagSentence(tokenSequence);
System.out.println(posTagSequence);
// parse the pos-tag sequence
Parser parser = Parsers.getParser(sessionId);
ParseConfiguration parseConfiguration = parser.parseSentence(posTagSequence);
ParseTree parseTree = new ParseTree(parseConfiguration, true);
System.out.println(parseTree);
}
use of com.joliciel.talismane.tokeniser.Tokeniser in project talismane by joliciel-informatique.
the class TalismaneAPIExamples method example2.
/**
* Similar to example1, but begins with filtering and sentence detection.
*/
public static void example2(String sessionId) throws Exception {
String text = "Les gens qui voient de travers pensent que les bancs verts qu'on voit sur les trottoirs " + "sont faits pour les impotents ou les ventripotents. " + "Mais c'est une absurdité, car, à la vérité, ils sont là, c'est notoire, " + "pour accueillir quelque temps les amours débutants.";
RawText rawText = new RawText(text, true, sessionId);
// issues (e.g. replace " with ")
for (RawTextAnnotator filter : TalismaneSession.get(sessionId).getTextAnnotators()) {
filter.annotate(rawText);
}
// retrieve the processed text after filters have been applied
AnnotatedText processedText = rawText.getProcessedText();
// detect sentences
SentenceDetector sentenceDetector = SentenceDetector.getInstance(sessionId);
sentenceDetector.detectSentences(processedText);
// the detected sentences can be retrieved directly from the raw text
// this allows annotations made on the sentences to get reflected in the
// raw text
List<Sentence> sentences = rawText.getDetectedSentences();
for (Sentence sentence : sentences) {
// assignment for a given word)
for (SentenceAnnotator annotator : TalismaneSession.get(sessionId).getSentenceAnnotators()) {
annotator.annotate(sentence);
}
// tokenise the text
Tokeniser tokeniser = Tokeniser.getInstance(sessionId);
TokenSequence tokenSequence = tokeniser.tokeniseSentence(sentence);
// pos-tag the token sequence
PosTagger posTagger = PosTaggers.getPosTagger(sessionId);
PosTagSequence posTagSequence = posTagger.tagSentence(tokenSequence);
System.out.println(posTagSequence);
// parse the pos-tag sequence
Parser parser = Parsers.getParser(sessionId);
ParseConfiguration parseConfiguration = parser.parseSentence(posTagSequence);
System.out.println(parseConfiguration);
ParseTree parseTree = new ParseTree(parseConfiguration, true);
System.out.println(parseTree);
}
}
Aggregations