use of edu.uci.ics.texera.api.field.IntegerField in project textdb by TextDB.
the class NlpSentimentOperator method getNextTuple.
@Override
public Tuple getNextTuple() throws TexeraException {
if (cursor == CLOSED) {
return null;
}
Tuple inputTuple = inputOperator.getNextTuple();
if (inputTuple == null) {
return null;
}
List<IField> outputFields = new ArrayList<>();
outputFields.addAll(inputTuple.getFields());
outputFields.add(new IntegerField(computeSentimentScore(inputTuple)));
return new Tuple(outputSchema, outputFields);
}
use of edu.uci.ics.texera.api.field.IntegerField in project textdb by TextDB.
the class WordCloudSink method processTuples.
@Override
public void processTuples() throws TexeraException {
// calculate word frequencies
List<Map.Entry<String, Integer>> wordCountList = wordCount();
double minValue = Double.MAX_VALUE;
double maxValue = Double.MIN_VALUE;
for (Map.Entry<String, Integer> e : wordCountList) {
int frequency = e.getValue();
minValue = Math.min(minValue, frequency);
maxValue = Math.max(maxValue, frequency);
}
// normalize the font size for wordcloud js
// https://github.com/timdream/wordcloud2.js/issues/53
List<Tuple> tempList = new ArrayList<>();
for (Map.Entry<String, Integer> e : wordCountList) {
int frequency = e.getValue();
tempList.add(new Tuple(outputSchema, new StringField(e.getKey()), new IntegerField((int) ((frequency - minValue) / (maxValue - minValue) * (this.MAX_FONT_SIZE - this.MIN_FONT_SIZE) + this.MIN_FONT_SIZE))));
}
this.result = tempList;
}
use of edu.uci.ics.texera.api.field.IntegerField in project textdb by TextDB.
the class FuzzyTokenMatcherTest method TestFuzzyTokenMatcher2.
@Test
public void TestFuzzyTokenMatcher2() throws Exception {
String query = "Twelve Angry Men";
// The ratio of tokens that need to be matched
double threshold = 0.5;
ArrayList<String> attributeNames = new ArrayList<>();
attributeNames.add(TestConstants.DESCRIPTION);
Schema schema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS_ATTR).build();
List<Span> spanList1 = Arrays.asList(new Span(TestConstants.DESCRIPTION, 5, 10, "angry", "Angry", 1));
IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry"), new ListField<Span>(spanList1) };
List<Span> spanList2 = Arrays.asList(new Span(TestConstants.DESCRIPTION, 6, 11, "angry", "Angry", 1));
IField[] fields2 = { new StringField("brad lie angelina"), new StringField("pitt"), new IntegerField(44), new DoubleField(6.10), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-12-1972")), new TextField("White Angry"), new ListField<Span>(spanList2) };
List<Span> spanList3 = Arrays.asList(new Span(TestConstants.DESCRIPTION, 40, 45, "angry", "Angry", 8));
IField[] fields3 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<Span>(spanList3) };
List<Span> spanList4 = Arrays.asList(new Span(TestConstants.DESCRIPTION, 6, 11, "angry", "angry", 1));
IField[] fields4 = { new StringField("Mary brown"), new StringField("Lake Forest"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Short angry"), new ListField<Span>(spanList4) };
Tuple tuple1 = new Tuple(schema, fields1);
Tuple tuple2 = new Tuple(schema, fields2);
Tuple tuple3 = new Tuple(schema, fields3);
Tuple tuple4 = new Tuple(schema, fields4);
List<Tuple> expectedResultList = new ArrayList<>();
expectedResultList.add(tuple1);
expectedResultList.add(tuple2);
expectedResultList.add(tuple3);
expectedResultList.add(tuple4);
List<Tuple> results = FuzzyTokenMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, threshold, attributeNames);
boolean contains = TestUtils.equals(expectedResultList, results);
Assert.assertTrue(contains);
}
use of edu.uci.ics.texera.api.field.IntegerField in project textdb by TextDB.
the class JoinDistanceTest method testBothOperatorsMultipleTuplesSpanWithinThreshold.
// --------------------<END of single tuple test cases>--------------------
/*
* This case tests for the scenario when both the operators' have multiple
* tuples and spans are within threshold.
*
* Test result: Join should result in a list containing tuples with spans.
* The number of tuples is equal to the number of tuples with spans within threshold.
*/
@Test
public void testBothOperatorsMultipleTuplesSpanWithinThreshold() throws Exception {
List<Tuple> tuples = new ArrayList<>();
tuples.add(JoinTestConstants.bookGroup1.get(3));
tuples.add(JoinTestConstants.bookGroup2.get(2));
tuples.add(JoinTestConstants.bookGroup2.get(4));
JoinTestHelper.insertToTable(BOOK_TABLE, tuples);
KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "review", conjunction);
KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "book", conjunction);
List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 12), Integer.MAX_VALUE, 0);
Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build();
List<Span> spanList = new ArrayList<>();
Span span1 = new Span(JoinTestConstants.REVIEW, 0, 16, "review_book", "Review of a " + "Book");
spanList.add(span1);
Span span2 = new Span(JoinTestConstants.REVIEW, 62, 73, "review_book", "book review");
spanList.add(span2);
Span span3 = new Span(JoinTestConstants.REVIEW, 235, 246, "review_book", "book review");
spanList.add(span3);
IField[] book1 = { new IntegerField(54), new StringField("Andria Williams"), new StringField("The Longest Night: A Novel"), new IntegerField(400), new TextField("Review of a Book. This is a typical " + "review. This is a test. A book review " + "test. A test to test queries without " + "actually using actual review. From " + "here onwards, we can pretend this to " + "be actually a review even if it is not " + "your typical book review."), new ListField<>(spanList) };
IField[] book2 = { new IntegerField(65), new StringField("Sharon Guskin"), new StringField("The Forgetting Time: A Novel"), new IntegerField(368), new TextField("Review of a Book. This is a typical " + "review. This is a test. A book review " + "test. A test to test queries without " + "actually using actual review. From " + "here onwards, we can pretend this to " + "be actually a review even if it is not " + "your typical book review."), new ListField<>(spanList) };
IField[] book3 = { new IntegerField(63), new StringField("Paul Kalanithi"), new StringField("When Breath Becomes Air"), new IntegerField(256), new TextField("Review of a Book. This is a typical " + "review. This is a test. A book review " + "test. A test to test queries without " + "actually using actual review. From " + "here onwards, we can pretend this to " + "be actually a review even if it is not " + "your typical book review."), new ListField<>(spanList) };
Tuple expectedTuple1 = new Tuple(resultSchema, book1);
Tuple expectedTuple2 = new Tuple(resultSchema, book2);
Tuple expectedTuple3 = new Tuple(resultSchema, book3);
List<Tuple> expectedResult = new ArrayList<>();
expectedResult.add(expectedTuple1);
expectedResult.add(expectedTuple2);
expectedResult.add(expectedTuple3);
Assert.assertEquals(3, resultList.size());
Assert.assertTrue(TestUtils.equals(expectedResult, resultList));
}
use of edu.uci.ics.texera.api.field.IntegerField in project textdb by TextDB.
the class JoinDistanceTest method testBothTheSpansAreSame.
// This case tests for the scenario when the spans to be joined are the same, i.e. both the keywords
// are same.
// e.g.
// [<11, 18>]
// [<11, 18>]
// threshold = 20 (can be any non-negative number)
// [ ]
// [ ]
// Test result: Join should return same span and key and the value in span
// should be the same.
// [<11, 18>]
@Test
public void testBothTheSpansAreSame() throws Exception {
JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build();
List<Span> spanList = new ArrayList<>();
Span span1 = new Span(JoinTestConstants.REVIEW, 11, 18, "special_special", "special");
spanList.add(span1);
IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) };
Tuple expectedTuple = new Tuple(resultSchema, book1);
List<Tuple> expectedResult = new ArrayList<>();
expectedResult.add(expectedTuple);
Assert.assertEquals(1, resultList.size());
Assert.assertTrue(TestUtils.equals(expectedResult, resultList));
}
Aggregations