use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.
the class Samulan method analyse.
// Obtains the sentiment classification for line in a tsv file (it classifies new samples)
private static void analyse(String pathRawFiles, String encoding, Processor p, RuleBasedAnalyser rba, String pathOutput, String scale, boolean verbose, String pathToSaveParsedFile) {
String id, category, text, goldPolarity;
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(pathRawFiles));
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
StringBuilder sb = new StringBuilder();
String line;
Writer writer = null;
try {
if (pathOutput != null)
writer = new PrintWriter(pathOutput, encoding);
else
writer = new BufferedWriter(new OutputStreamWriter(System.out));
} catch (FileNotFoundException | UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
Writer conllWriter = null;
if (pathToSaveParsedFile != null) {
try {
conllWriter = new PrintWriter(pathToSaveParsedFile, encoding);
} catch (FileNotFoundException | UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
try {
line = br.readLine();
int conllTextId = 0;
while (line != null) {
String[] ls = line.split("\t");
text = ls[ls.length - 1];
List<SentimentDependencyGraph> sdgs = p.process(text);
// If the user has provided a path to save the graphs in a conll file we save them
if (pathToSaveParsedFile != null) {
conllTextId += 1;
conllWriter.write("### " + conllTextId + "\t" + ((ls.length > 1) ? ls[0] : "NotAvailable") + "\n");
// System.out.println("### "+conllTextId+"\t"+((ls.length > 1)? ls[0] : "NotAvailable" )+"\n");
for (SentimentDependencyGraph dg : sdgs) {
// System.out.println(dg.toConll()+"\n");
conllWriter.write(dg.toConll() + "\n");
}
}
List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
SentimentInformation siFinal = rba.merge(sis);
writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\n");
writer.flush();
if (verbose) {
sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
}
line = br.readLine();
}
br.close();
writer.close();
if (pathToSaveParsedFile != null) {
conllWriter.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.
the class MaltParserWrapper method parse.
public SentimentDependencyGraph parse(List<TaggedTokenInformation> ttis) {
SentimentDependencyGraph sdg = null;
String[] tokens = new String[ttis.size()];
// System.out.println("MaltParserWrapper parse");
int i = 0;
for (TaggedTokenInformation tti : ttis) {
tokens[i] = tti.toConll();
i += 1;
}
// Parses the Swedish sentence above
String[] outputTokens;
try {
outputTokens = this.parser.parseTokens(tokens);
sdg = new SentimentDependencyGraph(String.join("\n", outputTokens));
// // Outputs the with the head index and dependency type information
// for (int j = 0; j < outputTokens.length; j++) {
// System.out.println(outputTokens[j]);
// }
} catch (MaltChainedException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
// Terminates the parser model
try {
this.parser.terminateParserModel();
} catch (MaltChainedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return sdg;
}
use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.
the class Samulan method analyse.
// Obtains a list of SentimentDependencyGraph given a CoNLL file
// private static List<SentimentDependencyGraph> getGraphs(String path, String encoding){
// CoNLLReader conllReader = new CoNLLReader();
// List<DependencyGraph> graphs =conllReader.read(path, encoding);
// List<SentimentDependencyGraph> sgraphs = new ArrayList<SentimentDependencyGraph>();
//
// for (DependencyGraph dg: graphs){
//
// HashMap<Short, DependencyNode> nodes = dg.getNodes();
// HashMap<Short, DependencyNode> snodes = new HashMap<Short, DependencyNode>();
// for (short address: nodes.keySet()){
// snodes.put(address,new SentimentDependencyNode(nodes.get(address), null));
// }
//
// sgraphs.add(new SentimentDependencyGraph(snodes));
// }
// return sgraphs;
// }
// Obtains the sentiment classification for each graph in a CoNLL file
private static void analyse(String conllFile, String encoding, RuleBasedAnalyser rba, String pathOutput, String scale, boolean verbose) {
BufferedReader br = null;
String line, conll = null, textID = null, previousTextID = null;
boolean newFileGraphs = false;
boolean first = true;
CoNLLReader conllReader = new CoNLLReader();
List<SentimentDependencyGraph> sdgs = new ArrayList<SentimentDependencyGraph>();
double totalAnalyseTime = 0, textAnalyseTime = 0;
try {
br = new BufferedReader(new FileReader(conllFile));
} catch (FileNotFoundException e1) {
System.err.println("File or directory: " + conllFile + " not found");
e1.printStackTrace();
}
Writer writer = null;
try {
if (pathOutput != null)
writer = new PrintWriter(pathOutput, encoding);
else
writer = new BufferedWriter(new OutputStreamWriter(System.out));
} catch (FileNotFoundException | UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
line = br.readLine();
while (line != null) {
String[] ls = line.split("\t");
if (newFileGraphs && (previousTextID != null)) {
long initAnalyseTextTime = System.nanoTime();
List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
long stopAnalyseTextTime = System.nanoTime();
String text = String.join(" ", sdgs.stream().map((SentimentDependencyGraph dg) -> dg.subgraphToString((short) 0)).collect(Collectors.toList()));
SentimentInformation siFinal = rba.merge(sis);
try {
textAnalyseTime = (stopAnalyseTextTime - initAnalyseTextTime) / 1000000000.0;
totalAnalyseTime += textAnalyseTime;
writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\t" + " [The analysis took: " + textAnalyseTime + " seg.] [Accumulated time is: " + totalAnalyseTime + "]\n");
// writer.write(previousTextID+"\t"+printOutputScaled(siFinal,scale, rba.getAc().isBinaryNeutralAsNegative())+"\t"+"\t"+text+"\t"+" [The analysis took: "+textAnalyseTime+" seg.] [Accumulated time is: "+totalAnalyseTime+"]\n");
writer.flush();
if (verbose) {
sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
sdgs.clear();
previousTextID = null;
newFileGraphs = false;
}
// We process the line
if (line.startsWith(CONLL_IDENTIFIER_SYMBOL)) {
String lcleaned = line.replace(CONLL_IDENTIFIER_SYMBOL, "").replace(" ", "").replace("\n", "");
if (!first) {
previousTextID = textID;
}
first = false;
textID = lcleaned.split("\t")[1];
conll = "";
newFileGraphs = true;
} else // We are still reading conll graphs from the same text
{
// We are reading a new conll graph, but from the same text
if (line.equals("")) {
HashMap<Short, DependencyNode> nodes = conllReader.read(conll).getNodes();
HashMap<Short, DependencyNode> snodes = new HashMap<Short, DependencyNode>();
for (short address : nodes.keySet()) {
snodes.put(address, new SentimentDependencyNode(nodes.get(address), null));
}
sdgs.add(new SentimentDependencyGraph(snodes));
conll = "";
} else {
conll = conll.concat(line + "\n");
}
}
line = br.readLine();
}
// Last graph
if (!sdgs.isEmpty()) {
List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
;
String text = String.join(" ", sdgs.stream().map((SentimentDependencyGraph dg) -> dg.subgraphToString((short) 0)).collect(Collectors.toList()));
SentimentInformation siFinal = rba.merge(sis);
try {
writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\t" + " [The analysis took: " + textAnalyseTime + " seg.] [Accumulated time is: " + totalAnalyseTime + "]\n");
// writer.write(textID+"\t"+printOutputScaled(siFinal,scale,rba.getAc().isBinaryNeutralAsNegative())+"\t"+"\t"+text+"\t"+" [The analysis took: "+textAnalyseTime+" seg.] [Accumulated time is: "+totalAnalyseTime+"]\n");
writer.flush();
if (verbose) {
sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
sdgs.clear();
textID = null;
newFileGraphs = false;
}
br.close();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.
the class SyntacticRuleBasedAnalyser method getAllQueuedOperations.
private List<QueuedOperationInformation> getAllQueuedOperations(SentimentInformation head, List<SentimentInformation> children) {
List<QueuedOperationInformation> allQueuedOperations = new ArrayList<QueuedOperationInformation>(head.getQueuedOperations());
for (SentimentInformation siChild : children) {
for (QueuedOperationInformation oChild : siChild.getQueuedOperations()) {
// Nesting weighting operations
// TODO only supports double nesting
short headAddress = siChild.getSentimentDependencyNode().getHead();
SentimentDependencyGraph sdgChild = siChild.getSentimentDependencyGraph();
SentimentDependencyNode headNode = sdgChild.getNode(headAddress);
String headLemma = this.rm.getD().getLemma(headNode.getCpostag(), headNode.getWord());
SentimentDependencyNode grandPaNode = sdgChild.getNode(headNode.getHead());
String grandPaLemma = this.rm.getD().getLemma(grandPaNode.getCpostag(), grandPaNode.getWord());
boolean grandPaIsSubjective = this.rm.getD().getValue(grandPaNode.getCpostag(), grandPaLemma, true) != 0;
if (this.rm.getD().isWeight(headLemma) && grandPaIsSubjective) {
oChild.setLevelsUp((short) (oChild.getLevelsUp() + 1));
}
allQueuedOperations.add(oChild);
}
}
return allQueuedOperations;
}
use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.
the class Processor method process.
public List<SentimentDependencyGraph> process(String text) {
// HashMap<String, String> emoLookupTable = new HashMap<String,String>();
// for (String emoticon : emoticons){
// System.out.println(emoticon);
// String emouuid = UUID.randomUUID().toString();
// text.replaceAll(emoticon, emouuid);
// emoLookupTable.put(emouuid, emoticon);
// }
List<SentimentDependencyGraph> sdgs = new ArrayList<SentimentDependencyGraph>();
DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text.concat(" ")));
dp.setTokenizerFactory(PTBTokenizer.factory(new WordTokenFactory(), "ptb3Escaping=false"));
for (List<HasWord> sentence : dp) {
List<String> words = sentence.stream().map(w -> w.toString()).collect(Collectors.toList());
// System.out.println("text: "+text);
List<String> tokens = this.tokenizer.tokenize(String.join(" ", words));
// System.out.println("tokens: "+tokens);
List<TaggedTokenInformation> ttis = this.tagger.tag(tokens);
sdgs.add(this.parser.parse(ttis));
}
// this.parser.parse(ttis);
return sdgs;
}
Aggregations