Search in sources :

Example 91 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInStringFieldUsingPhrase.

/**
 * Scenario: verifies GetNextTuple of DictionaryMatcher and multiple word
 * queries in String Field using PHRASEOPERATOR
 */
@Test
public void testSingleWordQueryInStringFieldUsingPhrase() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("john Lee", "bruce"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list1 = new ArrayList<Span>();
    Span span1 = new Span("lastName", 0, 8, "john Lee", "john Lee");
    Span span2 = new Span("firstName", 0, 5, "bruce", "bruce");
    list1.add(span1);
    list1.add(span2);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry"), new ListField<Span>(list1) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.PHRASE_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 92 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class JoinDistanceTest method testSpansOverlapAndWithinThreshold.

/*
     * This case tests for the spans to be joined have some overlap and both
     * |(span 1 spanStartIndex) - (span 2 spanStartIndex)|,
     * |(span 1 spanEndIndex) - (span 2 spanEndIndex)| are within threshold.
     * 
     * e.g.
     * [<75, 97>]
     * [<92, 109>]
     * threshold = 20 (within threshold)
     * result: [<75, 109>]
     * 
     * Test result: The list contains a tuple with all the fields and a span
     * list consisting of the joined span. The joined span is made up of the
     * field name, start and stop index (computed as <min(span1 spanStartIndex,
     * span2 spanStartIndex), max(span1 spanEndIndex, span2 spanEndIndex)>)
     * key (combination of span1 key and span2 key) and value (combination of 
     * span1 value and span2 value).
     */
@Test
public void testSpansOverlapAndWithinThreshold() throws Exception {
    JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "gastrointestinal tract", phrase);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "tract interesting", phrase);
    List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
    Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build();
    List<Span> spanList = new ArrayList<>();
    Span span1 = new Span(JoinTestConstants.REVIEW, 75, 109, "gastrointestinal tract_" + "tract interesting", "gastrointestinal " + "tract interesting");
    spanList.add(span1);
    IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) };
    Tuple expectedTuple = new Tuple(resultSchema, book1);
    List<Tuple> expectedResult = new ArrayList<>();
    expectedResult.add(expectedTuple);
    Assert.assertEquals(1, resultList.size());
    Assert.assertTrue(TestUtils.equals(expectedResult, resultList));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) JoinDistancePredicate(edu.uci.ics.texera.dataflow.join.JoinDistancePredicate) Span(edu.uci.ics.texera.api.span.Span) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 93 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class JoinDistanceTest method testForLimitWhenLimitIsLesserThanActualNumberOfResults.

// ---------------------<Limit and offset test cases.>---------------------
/*
     * This case tests for the scenario when limit is some integer greater than
     * 0 and less than the actual number of results and offset is 0 and join 
     * is performed.
     * Test result: A list of tuples with number of tuples equal to limit.
     */
@Test
public void testForLimitWhenLimitIsLesserThanActualNumberOfResults() throws Exception {
    List<Tuple> tuples = JoinTestConstants.bookGroup1.subList(1, 5);
    JoinTestHelper.insertToTable(BOOK_TABLE, tuples);
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "typical", conjunction);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "actually", conjunction);
    List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 90), 3, 0);
    Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build();
    List<Span> spanList = new ArrayList<>();
    Span span1 = new Span(JoinTestConstants.REVIEW, 28, 119, "typical_actually", "typical review. " + "This is a test. A book review test. " + "A test to test queries without actually");
    spanList.add(span1);
    Span span2 = new Span(JoinTestConstants.REVIEW, 186, 234, "typical_actually", "actually a review " + "even if it is not your typical");
    spanList.add(span2);
    IField[] book1 = { new IntegerField(51), new StringField("author unknown"), new StringField("typical"), new IntegerField(300), new TextField("Review of a Book. This is a typical " + "review. This is a test. A book review " + "test. A test to test queries without " + "actually using actual review. From " + "here onwards, we can pretend this to " + "be actually a review even if it is not " + "your typical book review."), new ListField<>(spanList) };
    IField[] book2 = { new IntegerField(53), new StringField("Noah Hawley"), new StringField("Before the Fall"), new IntegerField(400), new TextField("Review of a Book. This is a typical " + "review. This is a test. A book review " + "test. A test to test queries without " + "actually using actual review. From " + "here onwards, we can pretend this to " + "be actually a review even if it is not " + "your typical book review."), new ListField<>(spanList) };
    IField[] book3 = { new IntegerField(54), new StringField("Andria Williams"), new StringField("The Longest Night: A Novel"), new IntegerField(400), new TextField("Review of a Book. This is a typical " + "review. This is a test. A book review " + "test. A test to test queries without " + "actually using actual review. From " + "here onwards, we can pretend this to " + "be actually a review even if it is not " + "your typical book review."), new ListField<>(spanList) };
    IField[] book4 = { new IntegerField(55), new StringField("Matti Friedman"), new StringField("Pumpkinflowers: A Soldier's " + "Story"), new IntegerField(256), new TextField("Review of a Book. This is a typical " + "review. This is a test. A book review " + "test. A test to test queries without " + "actually using actual review. From " + "here onwards, we can pretend this to " + "be actually a review even if it is not " + "your typical book review."), new ListField<>(spanList) };
    Tuple expectedTuple1 = new Tuple(resultSchema, book1);
    Tuple expectedTuple2 = new Tuple(resultSchema, book2);
    Tuple expectedTuple3 = new Tuple(resultSchema, book3);
    Tuple expectedTuple4 = new Tuple(resultSchema, book4);
    List<Tuple> expectedResult = new ArrayList<>(3);
    expectedResult.add(expectedTuple1);
    expectedResult.add(expectedTuple2);
    expectedResult.add(expectedTuple3);
    expectedResult.add(expectedTuple4);
    Assert.assertEquals(3, resultList.size());
    Assert.assertTrue(TestUtils.containsAll(expectedResult, resultList));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) JoinDistancePredicate(edu.uci.ics.texera.dataflow.join.JoinDistancePredicate) Span(edu.uci.ics.texera.api.span.Span) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 94 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class JoinDistanceTest method testIdsMatchFieldsMatchSpanWithinThreshold.

/*
     * This case tests for the scenario when the difference of keyword spans
     *  is within the given span threshold.
     *  e.g.
     *  [<11, 18>]
     *  [<27, 33>]
     *  threshold = 20 (within threshold)
     *  result: [<11, 33>]
     *  
     * Test result: The list contains a tuple with all the fields and a span
     * list consisting of the joined span. The joined span is made up of the
     * field name, start and stop index (computed as <min(span1 spanStartIndex,
     * span2 spanStartIndex), max(span1 spanEndIndex, span2 spanEndIndex)>)
     * key (combination of span1 key and span2 key) and value (combination of
     * span1 value and span2 value).
     * 
     */
@Test
public void testIdsMatchFieldsMatchSpanWithinThreshold() throws Exception {
    JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "writer", conjunction);
    List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
    Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build();
    List<Span> spanList = new ArrayList<>();
    Span span1 = new Span(JoinTestConstants.REVIEW, 11, 33, "special_writer", "special kind of " + "writer");
    spanList.add(span1);
    IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) };
    Tuple expectedTuple = new Tuple(resultSchema, book1);
    List<Tuple> expectedResult = new ArrayList<>();
    expectedResult.add(expectedTuple);
    Assert.assertEquals(1, resultList.size());
    Assert.assertTrue(TestUtils.equals(expectedResult, resultList));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) JoinDistancePredicate(edu.uci.ics.texera.dataflow.join.JoinDistancePredicate) Span(edu.uci.ics.texera.api.span.Span) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 95 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class JoinDistanceTest method testOneSpanEncompassesOtherAndDifferenceLessThanThreshold.

// This case tests for the scenario when one of the spans to be joined encompasses the other span
// and both |(span 1 spanStartIndex) - (span 2 spanStartIndex)|,
// |(span 1 spanEndIndex) - (span 2 spanEndIndex)| are within threshold.
// e.g.
// [<11, 18>]
// [<3, 33>]
// threshold = 20 (within threshold)
// Test result: The bigger span should be returned.
// [<3, 33>]
@Test
public void testOneSpanEncompassesOtherAndDifferenceLessThanThreshold() throws Exception {
    JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "takes a special kind of writer", phrase);
    List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
    Schema resultSchema = new Schema.Builder().add(JoinTestConstants.BOOK_SCHEMA).add(SchemaConstants.SPAN_LIST_ATTRIBUTE).build();
    List<Span> spanList = new ArrayList<>();
    Span span1 = new Span(JoinTestConstants.REVIEW, 3, 33, "special_takes a special " + "kind of writer", "takes a special " + "kind of writer");
    spanList.add(span1);
    IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) };
    Tuple expectedTuple = new Tuple(resultSchema, book1);
    List<Tuple> expectedResult = new ArrayList<>();
    expectedResult.add(expectedTuple);
    Assert.assertEquals(1, resultList.size());
    Assert.assertTrue(TestUtils.equals(expectedResult, resultList));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) JoinDistancePredicate(edu.uci.ics.texera.dataflow.join.JoinDistancePredicate) Span(edu.uci.ics.texera.api.span.Span) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (edu.uci.ics.texera.api.tuple.Tuple)332 ArrayList (java.util.ArrayList)191 Test (org.junit.Test)178 IField (edu.uci.ics.texera.api.field.IField)130 Schema (edu.uci.ics.texera.api.schema.Schema)126 Span (edu.uci.ics.texera.api.span.Span)100 StringField (edu.uci.ics.texera.api.field.StringField)96 Attribute (edu.uci.ics.texera.api.schema.Attribute)95 IntegerField (edu.uci.ics.texera.api.field.IntegerField)92 TextField (edu.uci.ics.texera.api.field.TextField)90 DoubleField (edu.uci.ics.texera.api.field.DoubleField)65 DateField (edu.uci.ics.texera.api.field.DateField)60 SimpleDateFormat (java.text.SimpleDateFormat)58 DataWriter (edu.uci.ics.texera.storage.DataWriter)33 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)30 ListField (edu.uci.ics.texera.api.field.ListField)28 TupleSourceOperator (edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator)24 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)23 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)21 ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)21