use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.
the class JoinDistancePredicate method compareField.
/**
* Used to compare the value's of a field from the inner and outer tuples'.
*
* @param innerTuple
* @param outerTuple
* @param attributeName
* @return True if both the tuples have the field and the values are equal.
*/
private boolean compareField(Tuple innerTuple, Tuple outerTuple, String attributeName) {
IField innerField = innerTuple.getField(attributeName);
IField outerField = outerTuple.getField(attributeName);
if (innerField == null || outerField == null) {
return false;
}
return innerField.getValue().equals(outerField.getValue());
}
use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.
the class EmojiSentimentOperator method getNextTuple.
@Override
public Tuple getNextTuple() throws TexeraException {
if (cursor == CLOSED) {
return null;
}
Tuple inputTuple = inputOperator.getNextTuple();
if (inputTuple == null) {
return null;
}
List<IField> outputFields = new ArrayList<>();
outputFields.addAll(inputTuple.getFields());
outputFields.add(new IntegerField(computeSentimentScore(inputTuple)));
return new Tuple(outputSchema, outputFields);
}
use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.
the class EmojiSentimentOperator method computeSentimentScore.
/*The following function computes the sentiment score of the given field of a tuple.The function first checks if
there is a smiley related regex pattern in the text followed by a frowny regex pattern it adds a point if smiley
pattern is found and subtracts point for frowny regex pattern. If none of them are found it checks for javascript
escape characters in range defined by EMOJI_REGEX . If escape characters are found it converts them into unicode
string to check which if the unicode string is contained in the happy list, sad list or neutral Arraylist of unicode
strings and increments or decrements score appropriately.*/
private Integer computeSentimentScore(Tuple inputTuple) {
String inputText = inputTuple.<IField>getField(predicate.getInputAttributeName()).getValue().toString();
Matcher matcher = null;
Integer matchedStringScore = SentimentConstants.NEUTRAL;
if (SMILEY_REGEX_PATTERN != null) {
matcher = SMILEY_REGEX_PATTERN.matcher(inputText);
if (matcher.matches()) {
matchedStringScore++;
}
}
if (FROWNY_REGEX_PATTERN != null) {
matcher = FROWNY_REGEX_PATTERN.matcher(inputText);
if (matcher.matches()) {
matchedStringScore--;
}
}
if (EMOJI_REGEX != null) {
matcher = EMOJI_REGEX.matcher(inputText);
if (matcher.matches()) {
for (int i = 0; i < matcher.groupCount(); i++) {
String matchedString = matcher.group(i);
char[] ca = matchedString.toCharArray();
// for presence of emoticon unicode in corrosponding arraylists. A unicodestring is made of two adjacent chars.
for (int j = 0; j < ca.length - 1; j++) {
String unicodeString = String.format("%04x", Character.toCodePoint(ca[j], ca[j + 1]));
// check if the uncode string is present in the any one of the arraylists
if (happy.contains(unicodeString)) {
matchedStringScore++;
} else if (neutral.contains(unicodeString)) {
// neutral doesn't affect the score
} else if (unhappy.contains(unicodeString)) {
matchedStringScore--;
}
}
}
}
}
if (matchedStringScore < SentimentConstants.NEUTRAL) {
matchedStringScore = SentimentConstants.NEGATIVE;
}
if (matchedStringScore > SentimentConstants.NEUTRAL) {
matchedStringScore = SentimentConstants.POSITIVE;
}
return matchedStringScore;
}
use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.
the class NlpSentimentOperator method computeSentimentScore.
private Integer computeSentimentScore(Tuple inputTuple) {
String inputText = inputTuple.<IField>getField(predicate.getInputAttributeName()).getValue().toString();
Annotation documentAnnotation = new Annotation(inputText);
sentimentPipeline.annotate(documentAnnotation);
// mainSentiment is calculated by the sentiment class of the longest sentence
Integer mainSentiment = 0;
Integer longestSentenceLength = 0;
for (CoreMap sentence : documentAnnotation.get(CoreAnnotations.SentencesAnnotation.class)) {
Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
String sentenceText = sentence.toString();
if (sentenceText.length() > longestSentenceLength) {
mainSentiment = sentiment;
}
}
return normalizeSentimentScore(mainSentiment);
}
use of edu.uci.ics.texera.api.field.IField in project textdb by TextDB.
the class NlpSentimentOperator method getNextTuple.
@Override
public Tuple getNextTuple() throws TexeraException {
if (cursor == CLOSED) {
return null;
}
Tuple inputTuple = inputOperator.getNextTuple();
if (inputTuple == null) {
return null;
}
List<IField> outputFields = new ArrayList<>();
outputFields.addAll(inputTuple.getFields());
outputFields.add(new IntegerField(computeSentimentScore(inputTuple)));
return new Tuple(outputSchema, outputFields);
}
Aggregations