use of org.grupolys.samulan.util.TaggedTokenInformation in project uuusa by aghie.
the class MaltParserWrapper method parse.
public SentimentDependencyGraph parse(List<TaggedTokenInformation> ttis) {
SentimentDependencyGraph sdg = null;
String[] tokens = new String[ttis.size()];
// System.out.println("MaltParserWrapper parse");
int i = 0;
for (TaggedTokenInformation tti : ttis) {
tokens[i] = tti.toConll();
i += 1;
}
// Parses the Swedish sentence above
String[] outputTokens;
try {
outputTokens = this.parser.parseTokens(tokens);
sdg = new SentimentDependencyGraph(String.join("\n", outputTokens));
// // Outputs the with the head index and dependency type information
// for (int j = 0; j < outputTokens.length; j++) {
// System.out.println(outputTokens[j]);
// }
} catch (MaltChainedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// Terminates the parser model
try {
this.parser.terminateParserModel();
} catch (MaltChainedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sdg;
}
use of org.grupolys.samulan.util.TaggedTokenInformation in project uuusa by aghie.
the class MaxentStanfordTagger method tag.
// private String toConll(String taggedText){
//
// }
@Override
public List<TaggedTokenInformation> tag(List<String> tokens) {
// TODO Auto-generated method stub
ArrayList<TaggedTokenInformation> ttis = new ArrayList<TaggedTokenInformation>();
String tag, token;
String tagged_text = this.tagger.tagTokenizedString(String.join(" ", tokens));
short i = 1;
for (String tag_token : tagged_text.split(" ")) {
token = tag_token.substring(0, tag_token.lastIndexOf(STANFORD_SEPARATOR));
tag = tag_token.substring(tag_token.lastIndexOf(STANFORD_SEPARATOR) + 1);
ttis.add(new TaggedTokenInformation(i, token, null, tag, tag, null));
i += 1;
}
return ttis;
}
use of org.grupolys.samulan.util.TaggedTokenInformation in project uuusa by aghie.
the class Processor method process.
public List<SentimentDependencyGraph> process(String text) {
// HashMap<String, String> emoLookupTable = new HashMap<String,String>();
// for (String emoticon : emoticons){
// System.out.println(emoticon);
// String emouuid = UUID.randomUUID().toString();
// text.replaceAll(emoticon, emouuid);
// emoLookupTable.put(emouuid, emoticon);
// }
List<SentimentDependencyGraph> sdgs = new ArrayList<SentimentDependencyGraph>();
DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text.concat(" ")));
dp.setTokenizerFactory(PTBTokenizer.factory(new WordTokenFactory(), "ptb3Escaping=false"));
for (List<HasWord> sentence : dp) {
List<String> words = sentence.stream().map(w -> w.toString()).collect(Collectors.toList());
// System.out.println("text: "+text);
List<String> tokens = this.tokenizer.tokenize(String.join(" ", words));
// System.out.println("tokens: "+tokens);
List<TaggedTokenInformation> ttis = this.tagger.tag(tokens);
sdgs.add(this.parser.parse(ttis));
}
// this.parser.parse(ttis);
return sdgs;
}
Aggregations