Search in sources :

Example 1 with SentimentDependencyGraph

use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.

the class Samulan method analyse.

// Obtains the sentiment classification for line in a tsv file (it classifies new samples)
private static void analyse(String pathRawFiles, String encoding, Processor p, RuleBasedAnalyser rba, String pathOutput, String scale, boolean verbose, String pathToSaveParsedFile) {
    String id, category, text, goldPolarity;
    BufferedReader br = null;
    try {
        br = new BufferedReader(new FileReader(pathRawFiles));
    } catch (FileNotFoundException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    StringBuilder sb = new StringBuilder();
    String line;
    Writer writer = null;
    try {
        if (pathOutput != null)
            writer = new PrintWriter(pathOutput, encoding);
        else
            writer = new BufferedWriter(new OutputStreamWriter(System.out));
    } catch (FileNotFoundException | UnsupportedEncodingException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    Writer conllWriter = null;
    if (pathToSaveParsedFile != null) {
        try {
            conllWriter = new PrintWriter(pathToSaveParsedFile, encoding);
        } catch (FileNotFoundException | UnsupportedEncodingException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    try {
        line = br.readLine();
        int conllTextId = 0;
        while (line != null) {
            String[] ls = line.split("\t");
            text = ls[ls.length - 1];
            List<SentimentDependencyGraph> sdgs = p.process(text);
            // If the user has provided a path to save the graphs in a conll file we save them
            if (pathToSaveParsedFile != null) {
                conllTextId += 1;
                conllWriter.write("### 	" + conllTextId + "\t" + ((ls.length > 1) ? ls[0] : "NotAvailable") + "\n");
                // System.out.println("### 	"+conllTextId+"\t"+((ls.length > 1)? ls[0] : "NotAvailable" )+"\n");
                for (SentimentDependencyGraph dg : sdgs) {
                    // System.out.println(dg.toConll()+"\n");
                    conllWriter.write(dg.toConll() + "\n");
                }
            }
            List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
            SentimentInformation siFinal = rba.merge(sis);
            writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\n");
            writer.flush();
            if (verbose) {
                sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
            }
            line = br.readLine();
        }
        br.close();
        writer.close();
        if (pathToSaveParsedFile != null) {
            conllWriter.close();
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : SentimentDependencyGraph(org.grupolys.samulan.util.SentimentDependencyGraph) FileNotFoundException(java.io.FileNotFoundException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) BufferedReader(java.io.BufferedReader) SentimentInformation(org.grupolys.samulan.util.SentimentInformation) FileReader(java.io.FileReader) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) PrintWriter(java.io.PrintWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) PrintWriter(java.io.PrintWriter)

Example 2 with SentimentDependencyGraph

use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.

the class MaltParserWrapper method parse.

public SentimentDependencyGraph parse(List<TaggedTokenInformation> ttis) {
    SentimentDependencyGraph sdg = null;
    String[] tokens = new String[ttis.size()];
    // System.out.println("MaltParserWrapper parse");
    int i = 0;
    for (TaggedTokenInformation tti : ttis) {
        tokens[i] = tti.toConll();
        i += 1;
    }
    // Parses the Swedish sentence above
    String[] outputTokens;
    try {
        outputTokens = this.parser.parseTokens(tokens);
        sdg = new SentimentDependencyGraph(String.join("\n", outputTokens));
    // // Outputs the with the head index and dependency type information
    // for (int j = 0; j < outputTokens.length; j++) {
    // System.out.println(outputTokens[j]);
    // }
    } catch (MaltChainedException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    // Terminates the parser model
    try {
        this.parser.terminateParserModel();
    } catch (MaltChainedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return sdg;
}
Also used : MaltChainedException(org.maltparser.core.exception.MaltChainedException) SentimentDependencyGraph(org.grupolys.samulan.util.SentimentDependencyGraph) TaggedTokenInformation(org.grupolys.samulan.util.TaggedTokenInformation)

Example 3 with SentimentDependencyGraph

use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.

the class Samulan method analyse.

// Obtains a list of SentimentDependencyGraph given a CoNLL file
// private static List<SentimentDependencyGraph> getGraphs(String path, String encoding){
// CoNLLReader conllReader = new CoNLLReader();
// List<DependencyGraph> graphs =conllReader.read(path, encoding);
// List<SentimentDependencyGraph> sgraphs = new ArrayList<SentimentDependencyGraph>();
// 
// for (DependencyGraph dg: graphs){
// 
// HashMap<Short, DependencyNode> nodes = dg.getNodes();
// HashMap<Short, DependencyNode> snodes = new HashMap<Short, DependencyNode>();
// for (short address: nodes.keySet()){
// snodes.put(address,new SentimentDependencyNode(nodes.get(address), null));
// }
// 
// sgraphs.add(new SentimentDependencyGraph(snodes));
// }
// return sgraphs;
// }
// Obtains the sentiment classification for each graph in a CoNLL file
private static void analyse(String conllFile, String encoding, RuleBasedAnalyser rba, String pathOutput, String scale, boolean verbose) {
    BufferedReader br = null;
    String line, conll = null, textID = null, previousTextID = null;
    boolean newFileGraphs = false;
    boolean first = true;
    CoNLLReader conllReader = new CoNLLReader();
    List<SentimentDependencyGraph> sdgs = new ArrayList<SentimentDependencyGraph>();
    double totalAnalyseTime = 0, textAnalyseTime = 0;
    try {
        br = new BufferedReader(new FileReader(conllFile));
    } catch (FileNotFoundException e1) {
        System.err.println("File or directory: " + conllFile + " not found");
        e1.printStackTrace();
    }
    Writer writer = null;
    try {
        if (pathOutput != null)
            writer = new PrintWriter(pathOutput, encoding);
        else
            writer = new BufferedWriter(new OutputStreamWriter(System.out));
    } catch (FileNotFoundException | UnsupportedEncodingException e1) {
        // TODO Auto-generated catch block
        e1.printStackTrace();
    }
    try {
        line = br.readLine();
        while (line != null) {
            String[] ls = line.split("\t");
            if (newFileGraphs && (previousTextID != null)) {
                long initAnalyseTextTime = System.nanoTime();
                List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
                long stopAnalyseTextTime = System.nanoTime();
                String text = String.join(" ", sdgs.stream().map((SentimentDependencyGraph dg) -> dg.subgraphToString((short) 0)).collect(Collectors.toList()));
                SentimentInformation siFinal = rba.merge(sis);
                try {
                    textAnalyseTime = (stopAnalyseTextTime - initAnalyseTextTime) / 1000000000.0;
                    totalAnalyseTime += textAnalyseTime;
                    writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\t" + " [The analysis took: " + textAnalyseTime + " seg.] [Accumulated time is: " + totalAnalyseTime + "]\n");
                    // writer.write(previousTextID+"\t"+printOutputScaled(siFinal,scale, rba.getAc().isBinaryNeutralAsNegative())+"\t"+"\t"+text+"\t"+" [The analysis took: "+textAnalyseTime+" seg.] [Accumulated time is: "+totalAnalyseTime+"]\n");
                    writer.flush();
                    if (verbose) {
                        sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
                    }
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                sdgs.clear();
                previousTextID = null;
                newFileGraphs = false;
            }
            // We process the line
            if (line.startsWith(CONLL_IDENTIFIER_SYMBOL)) {
                String lcleaned = line.replace(CONLL_IDENTIFIER_SYMBOL, "").replace(" ", "").replace("\n", "");
                if (!first) {
                    previousTextID = textID;
                }
                first = false;
                textID = lcleaned.split("\t")[1];
                conll = "";
                newFileGraphs = true;
            } else // We are still reading conll graphs from the same text
            {
                // We are reading a new conll graph, but from the same text
                if (line.equals("")) {
                    HashMap<Short, DependencyNode> nodes = conllReader.read(conll).getNodes();
                    HashMap<Short, DependencyNode> snodes = new HashMap<Short, DependencyNode>();
                    for (short address : nodes.keySet()) {
                        snodes.put(address, new SentimentDependencyNode(nodes.get(address), null));
                    }
                    sdgs.add(new SentimentDependencyGraph(snodes));
                    conll = "";
                } else {
                    conll = conll.concat(line + "\n");
                }
            }
            line = br.readLine();
        }
        // Last graph
        if (!sdgs.isEmpty()) {
            List<SentimentInformation> sis = sdgs.stream().map((SentimentDependencyGraph dg) -> (rba.analyse(dg, (short) 0))).collect(Collectors.toList());
            ;
            String text = String.join(" ", sdgs.stream().map((SentimentDependencyGraph dg) -> dg.subgraphToString((short) 0)).collect(Collectors.toList()));
            SentimentInformation siFinal = rba.merge(sis);
            try {
                writer.write(printOutputScaled(siFinal, scale, rba.getAc().isBinaryNeutralAsNegative()) + "\t" + "\t" + text + "\t" + " [The analysis took: " + textAnalyseTime + " seg.] [Accumulated time is: " + totalAnalyseTime + "]\n");
                // writer.write(textID+"\t"+printOutputScaled(siFinal,scale,rba.getAc().isBinaryNeutralAsNegative())+"\t"+"\t"+text+"\t"+" [The analysis took: "+textAnalyseTime+" seg.] [Accumulated time is: "+totalAnalyseTime+"]\n");
                writer.flush();
                if (verbose) {
                    sdgs.stream().forEach(sdg -> sdg.printLandscapeGraph((short) 0));
                }
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            sdgs.clear();
            textID = null;
            newFileGraphs = false;
        }
        br.close();
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : SentimentDependencyNode(org.grupolys.samulan.util.SentimentDependencyNode) SentimentDependencyGraph(org.grupolys.samulan.util.SentimentDependencyGraph) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) CoNLLReader(org.grupolys.nlputils.parser.CoNLLReader) BufferedWriter(java.io.BufferedWriter) SentimentDependencyNode(org.grupolys.samulan.util.SentimentDependencyNode) DependencyNode(org.grupolys.nlputils.parser.DependencyNode) FileReader(java.io.FileReader) PrintWriter(java.io.PrintWriter) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) SentimentInformation(org.grupolys.samulan.util.SentimentInformation) OutputStreamWriter(java.io.OutputStreamWriter) OutputStreamWriter(java.io.OutputStreamWriter) PrintWriter(java.io.PrintWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer)

Example 4 with SentimentDependencyGraph

use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.

the class SyntacticRuleBasedAnalyser method getAllQueuedOperations.

private List<QueuedOperationInformation> getAllQueuedOperations(SentimentInformation head, List<SentimentInformation> children) {
    List<QueuedOperationInformation> allQueuedOperations = new ArrayList<QueuedOperationInformation>(head.getQueuedOperations());
    for (SentimentInformation siChild : children) {
        for (QueuedOperationInformation oChild : siChild.getQueuedOperations()) {
            // Nesting weighting operations
            // TODO only supports double nesting
            short headAddress = siChild.getSentimentDependencyNode().getHead();
            SentimentDependencyGraph sdgChild = siChild.getSentimentDependencyGraph();
            SentimentDependencyNode headNode = sdgChild.getNode(headAddress);
            String headLemma = this.rm.getD().getLemma(headNode.getCpostag(), headNode.getWord());
            SentimentDependencyNode grandPaNode = sdgChild.getNode(headNode.getHead());
            String grandPaLemma = this.rm.getD().getLemma(grandPaNode.getCpostag(), grandPaNode.getWord());
            boolean grandPaIsSubjective = this.rm.getD().getValue(grandPaNode.getCpostag(), grandPaLemma, true) != 0;
            if (this.rm.getD().isWeight(headLemma) && grandPaIsSubjective) {
                oChild.setLevelsUp((short) (oChild.getLevelsUp() + 1));
            }
            allQueuedOperations.add(oChild);
        }
    }
    return allQueuedOperations;
}
Also used : SentimentDependencyNode(org.grupolys.samulan.util.SentimentDependencyNode) SentimentDependencyGraph(org.grupolys.samulan.util.SentimentDependencyGraph) QueuedOperationInformation(org.grupolys.samulan.util.QueuedOperationInformation) ArrayList(java.util.ArrayList) SentimentInformation(org.grupolys.samulan.util.SentimentInformation)

Example 5 with SentimentDependencyGraph

use of org.grupolys.samulan.util.SentimentDependencyGraph in project uuusa by aghie.

the class Processor method process.

public List<SentimentDependencyGraph> process(String text) {
    // HashMap<String, String> emoLookupTable = new HashMap<String,String>();
    // for (String emoticon : emoticons){
    // System.out.println(emoticon);
    // String emouuid = UUID.randomUUID().toString();
    // text.replaceAll(emoticon, emouuid);
    // emoLookupTable.put(emouuid, emoticon);
    // }
    List<SentimentDependencyGraph> sdgs = new ArrayList<SentimentDependencyGraph>();
    DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text.concat(" ")));
    dp.setTokenizerFactory(PTBTokenizer.factory(new WordTokenFactory(), "ptb3Escaping=false"));
    for (List<HasWord> sentence : dp) {
        List<String> words = sentence.stream().map(w -> w.toString()).collect(Collectors.toList());
        // System.out.println("text: "+text);
        List<String> tokens = this.tokenizer.tokenize(String.join(" ", words));
        // System.out.println("tokens: "+tokens);
        List<TaggedTokenInformation> ttis = this.tagger.tag(tokens);
        sdgs.add(this.parser.parse(ttis));
    }
    // this.parser.parse(ttis);
    return sdgs;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) PTBTokenizer(edu.stanford.nlp.process.PTBTokenizer) TreeTokenizerFactory(edu.stanford.nlp.trees.TreeTokenizerFactory) HashMap(java.util.HashMap) LexedTokenFactory(edu.stanford.nlp.process.LexedTokenFactory) ParserI(org.grupolys.samulan.processor.parser.ParserI) ArrayList(java.util.ArrayList) TokenizeI(org.grupolys.samulan.processor.tokenizer.TokenizeI) Twokenize(cmu.arktweetnlp.Twokenize) CoreLabelTokenFactory(edu.stanford.nlp.process.CoreLabelTokenFactory) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor) TokenizerFactory(edu.stanford.nlp.process.TokenizerFactory) WordTokenFactory(edu.stanford.nlp.process.WordTokenFactory) HasWord(edu.stanford.nlp.ling.HasWord) WhitespaceTokenizerFactory(edu.stanford.nlp.process.WhitespaceTokenizer.WhitespaceTokenizerFactory) Set(java.util.Set) UUID(java.util.UUID) LexerTokenizer(edu.stanford.nlp.process.LexerTokenizer) MaxentTagger(edu.stanford.nlp.tagger.maxent.MaxentTagger) Collectors(java.util.stream.Collectors) List(java.util.List) TaggerI(org.grupolys.samulan.processor.tagger.TaggerI) Stream(java.util.stream.Stream) StringReader(java.io.StringReader) SentimentDependencyGraph(org.grupolys.samulan.util.SentimentDependencyGraph) TaggedTokenInformation(org.grupolys.samulan.util.TaggedTokenInformation) SentimentDependencyGraph(org.grupolys.samulan.util.SentimentDependencyGraph) ArrayList(java.util.ArrayList) WordTokenFactory(edu.stanford.nlp.process.WordTokenFactory) StringReader(java.io.StringReader) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor) TaggedTokenInformation(org.grupolys.samulan.util.TaggedTokenInformation)

Aggregations

SentimentDependencyGraph (org.grupolys.samulan.util.SentimentDependencyGraph)5 ArrayList (java.util.ArrayList)3 SentimentInformation (org.grupolys.samulan.util.SentimentInformation)3 BufferedReader (java.io.BufferedReader)2 BufferedWriter (java.io.BufferedWriter)2 FileNotFoundException (java.io.FileNotFoundException)2 FileReader (java.io.FileReader)2 IOException (java.io.IOException)2 OutputStreamWriter (java.io.OutputStreamWriter)2 PrintWriter (java.io.PrintWriter)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 Writer (java.io.Writer)2 HashMap (java.util.HashMap)2 SentimentDependencyNode (org.grupolys.samulan.util.SentimentDependencyNode)2 TaggedTokenInformation (org.grupolys.samulan.util.TaggedTokenInformation)2 Twokenize (cmu.arktweetnlp.Twokenize)1 HasWord (edu.stanford.nlp.ling.HasWord)1 CoreLabelTokenFactory (edu.stanford.nlp.process.CoreLabelTokenFactory)1 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)1 LexedTokenFactory (edu.stanford.nlp.process.LexedTokenFactory)1