Search in sources :

Example 21 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class JoinDistancePredicate method compareField.

/**
 * Used to compare the value's of a field from the inner and outer tuples'.
 *
 * @param innerTuple
 * @param outerTuple
 * @param attributeName
 * @return True if both the tuples have the field and the values are equal.
 */
private boolean compareField(Tuple innerTuple, Tuple outerTuple, String attributeName) {
    IField innerField = innerTuple.getField(attributeName);
    IField outerField = outerTuple.getField(attributeName);
    if (innerField == null || outerField == null) {
        return false;
    }
    return innerField.getValue().equals(outerField.getValue());
}
Also used : IField(edu.uci.ics.texera.api.field.IField)

Example 22 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class EmojiSentimentOperator method getNextTuple.

@Override
public Tuple getNextTuple() throws TexeraException {
    if (cursor == CLOSED) {
        return null;
    }
    Tuple inputTuple = inputOperator.getNextTuple();
    if (inputTuple == null) {
        return null;
    }
    List<IField> outputFields = new ArrayList<>();
    outputFields.addAll(inputTuple.getFields());
    outputFields.add(new IntegerField(computeSentimentScore(inputTuple)));
    return new Tuple(outputSchema, outputFields);
}
Also used : IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 23 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class EmojiSentimentOperator method computeSentimentScore.

/*The following function computes the sentiment score of the given field of a tuple.The function first checks if 
    there is a smiley related regex pattern in the text followed by a frowny regex pattern it adds a point if smiley 
    pattern is found and subtracts point for frowny regex pattern. If none of them are found it checks for javascript
    escape characters in range defined by EMOJI_REGEX . If escape characters are found it converts them into unicode
    string to check which if the unicode string is contained in the happy list, sad list or neutral Arraylist of unicode
    strings and increments or decrements score appropriately.*/
private Integer computeSentimentScore(Tuple inputTuple) {
    String inputText = inputTuple.<IField>getField(predicate.getInputAttributeName()).getValue().toString();
    Matcher matcher = null;
    Integer matchedStringScore = SentimentConstants.NEUTRAL;
    if (SMILEY_REGEX_PATTERN != null) {
        matcher = SMILEY_REGEX_PATTERN.matcher(inputText);
        if (matcher.matches()) {
            matchedStringScore++;
        }
    }
    if (FROWNY_REGEX_PATTERN != null) {
        matcher = FROWNY_REGEX_PATTERN.matcher(inputText);
        if (matcher.matches()) {
            matchedStringScore--;
        }
    }
    if (EMOJI_REGEX != null) {
        matcher = EMOJI_REGEX.matcher(inputText);
        if (matcher.matches()) {
            for (int i = 0; i < matcher.groupCount(); i++) {
                String matchedString = matcher.group(i);
                char[] ca = matchedString.toCharArray();
                // for presence of emoticon unicode in corrosponding arraylists. A unicodestring is made of two adjacent chars.
                for (int j = 0; j < ca.length - 1; j++) {
                    String unicodeString = String.format("%04x", Character.toCodePoint(ca[j], ca[j + 1]));
                    // check if the uncode string is present in the any one of the arraylists
                    if (happy.contains(unicodeString)) {
                        matchedStringScore++;
                    } else if (neutral.contains(unicodeString)) {
                    // neutral doesn't affect the score
                    } else if (unhappy.contains(unicodeString)) {
                        matchedStringScore--;
                    }
                }
            }
        }
    }
    if (matchedStringScore < SentimentConstants.NEUTRAL) {
        matchedStringScore = SentimentConstants.NEGATIVE;
    }
    if (matchedStringScore > SentimentConstants.NEUTRAL) {
        matchedStringScore = SentimentConstants.POSITIVE;
    }
    return matchedStringScore;
}
Also used : Matcher(java.util.regex.Matcher) IField(edu.uci.ics.texera.api.field.IField)

Example 24 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class NlpSentimentOperator method computeSentimentScore.

private Integer computeSentimentScore(Tuple inputTuple) {
    String inputText = inputTuple.<IField>getField(predicate.getInputAttributeName()).getValue().toString();
    Annotation documentAnnotation = new Annotation(inputText);
    sentimentPipeline.annotate(documentAnnotation);
    // mainSentiment is calculated by the sentiment class of the longest sentence
    Integer mainSentiment = 0;
    Integer longestSentenceLength = 0;
    for (CoreMap sentence : documentAnnotation.get(CoreAnnotations.SentencesAnnotation.class)) {
        Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
        int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
        String sentenceText = sentence.toString();
        if (sentenceText.length() > longestSentenceLength) {
            mainSentiment = sentiment;
        }
    }
    return normalizeSentimentScore(mainSentiment);
}
Also used : SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SentimentCoreAnnotations(edu.stanford.nlp.sentiment.SentimentCoreAnnotations) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) IField(edu.uci.ics.texera.api.field.IField) CoreMap(edu.stanford.nlp.util.CoreMap) Annotation(edu.stanford.nlp.pipeline.Annotation)

Example 25 with IField

use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.

the class NlpSentimentOperator method getNextTuple.

@Override
public Tuple getNextTuple() throws TexeraException {
    if (cursor == CLOSED) {
        return null;
    }
    Tuple inputTuple = inputOperator.getNextTuple();
    if (inputTuple == null) {
        return null;
    }
    List<IField> outputFields = new ArrayList<>();
    outputFields.addAll(inputTuple.getFields());
    outputFields.add(new IntegerField(computeSentimentScore(inputTuple)));
    return new Tuple(outputSchema, outputFields);
}
Also used : ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Aggregations

IField (edu.uci.ics.texera.api.field.IField)145 ArrayList (java.util.ArrayList)113 Tuple (edu.uci.ics.texera.api.tuple.Tuple)106 TextField (edu.uci.ics.texera.api.field.TextField)100 Span (edu.uci.ics.texera.api.span.Span)99 Schema (edu.uci.ics.texera.api.schema.Schema)92 Test (org.junit.Test)84 StringField (edu.uci.ics.texera.api.field.StringField)79 IntegerField (edu.uci.ics.texera.api.field.IntegerField)78 DoubleField (edu.uci.ics.texera.api.field.DoubleField)63 DateField (edu.uci.ics.texera.api.field.DateField)58 Attribute (edu.uci.ics.texera.api.schema.Attribute)57 SimpleDateFormat (java.text.SimpleDateFormat)56 ListField (edu.uci.ics.texera.api.field.ListField)32 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)29 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)9 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 JsonNode (com.fasterxml.jackson.databind.JsonNode)4 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4