use of org.grupolys.nlputils.parser.CoNLLReader in project uuusa by aghie.
the class Samulan method analyse.
// Obtains a list of SentimentDependencyGraph given a CoNLL file
// private static List<SentimentDependencyGraph> getGraphs(String path, String encoding){
// CoNLLReader conllReader = new CoNLLReader();
// List<DependencyGraph> graphs =conllReader.read(path, encoding);
// List<SentimentDependencyGraph> sgraphs = new ArrayList<SentimentDependencyGraph>();
//
// for (DependencyGraph dg: graphs){
//
// HashMap<Short, DependencyNode> nodes = dg.getNodes();
// HashMap<Short, DependencyNode> snodes = new HashMap<Short, DependencyNode>();
// for (short address: nodes.keySet()){
// snodes.put(address,new SentimentDependencyNode(nodes.get(address), null));
// }
//
// sgraphs.add(new SentimentDependencyGraph(snodes));
// }
// return sgraphs;
// }
// Obtains the sentiment classification for each graph in a CoNLL file
private static void analyse(String conllFile, String encoding, RuleBasedAnalyser rba, String pathOutput, String scale, boolean verbose) {
BufferedReader br = null;
String line, conll = null, textID = null, previousTextID = null;
boolean newFileGraphs = false;
boolean first = true;
CoNLLReader conllReader = new CoNLLReader();
List<SentimentDependencyGraph> sdgs = new ArrayList<SentimentDependencyGraph>();
double totalAnalyseTime = 0, textAnalyseTime = 0;
try {
br = new BufferedReader(new FileReader(conllFile));
} catch (FileNotFoundException e1) {
System.err.println("File or directory: " + conllFile + " not found");
e1.printStackTrace();
}
Writer writer = null;
try {
if (pathOutput != null)
writer = new PrintWriter(pathOutput, encoding);
else
writer = new BufferedWriter(new OutputStreamWriter(System.out));
} catch (FileNotFoundException | UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
try {
line = br.readLine();
while (line != null) {
String[] ls = line.split("\t");
if (newFileGraphs && (previousTextID != null)) {
long initAnalyseTextTime = System.nanoTime();
List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
long stopAnalyseTextTime = System.nanoTime();
String text = String.join(" ", sdgs.stream().map((SentimentDependencyGraph dg) -> dg.subgraphToString((short) 0)).collect(Collectors.toList()));
SentimentInformation siFinal = rba.merge(sis);
try {
textAnalyseTime = (stopAnalyseTextTime - initAnalyseTextTime) / 1000000000.0;
totalAnalyseTime += textAnalyseTime;
writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\t" + " [The analysis took: " + textAnalyseTime + " seg.] [Accumulated time is: " + totalAnalyseTime + "]\n");
// writer.write(previousTextID+"\t"+printOutputScaled(siFinal,scale, rba.getAc().isBinaryNeutralAsNegative())+"\t"+"\t"+text+"\t"+" [The analysis took: "+textAnalyseTime+" seg.] [Accumulated time is: "+totalAnalyseTime+"]\n");
writer.flush();
if (verbose) {
sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
sdgs.clear();
previousTextID = null;
newFileGraphs = false;
}
// We process the line
if (line.startsWith(CONLL_IDENTIFIER_SYMBOL)) {
String lcleaned = line.replace(CONLL_IDENTIFIER_SYMBOL, "").replace(" ", "").replace("\n", "");
if (!first) {
previousTextID = textID;
}
first = false;
textID = lcleaned.split("\t")[1];
conll = "";
newFileGraphs = true;
} else // We are still reading conll graphs from the same text
{
// We are reading a new conll graph, but from the same text
if (line.equals("")) {
HashMap<Short, DependencyNode> nodes = conllReader.read(conll).getNodes();
HashMap<Short, DependencyNode> snodes = new HashMap<Short, DependencyNode>();
for (short address : nodes.keySet()) {
snodes.put(address, new SentimentDependencyNode(nodes.get(address), null));
}
sdgs.add(new SentimentDependencyGraph(snodes));
conll = "";
} else {
conll = conll.concat(line + "\n");
}
}
line = br.readLine();
}
// Last graph
if (!sdgs.isEmpty()) {
List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
;
String text = String.join(" ", sdgs.stream().map((SentimentDependencyGraph dg) -> dg.subgraphToString((short) 0)).collect(Collectors.toList()));
SentimentInformation siFinal = rba.merge(sis);
try {
writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\t" + " [The analysis took: " + textAnalyseTime + " seg.] [Accumulated time is: " + totalAnalyseTime + "]\n");
// writer.write(textID+"\t"+printOutputScaled(siFinal,scale,rba.getAc().isBinaryNeutralAsNegative())+"\t"+"\t"+text+"\t"+" [The analysis took: "+textAnalyseTime+" seg.] [Accumulated time is: "+totalAnalyseTime+"]\n");
writer.flush();
if (verbose) {
sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
sdgs.clear();
textID = null;
newFileGraphs = false;
}
br.close();
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
Aggregations