Search in sources :

Example 11 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInStringFieldUsingKeywordChinese.

/**
     * Scenario: verifies GetNextTuple of DictionaryMatcher and multiple word
     * queries in String Field using KEYWORDOPERATOR
     * Test in Chinese.
     */
@Test
public void testSingleWordQueryInStringFieldUsingKeywordChinese() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("无忌", "长孙"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list1 = new ArrayList<Span>();
    List<Span> list2 = new ArrayList<Span>();
    Span span1 = new Span("lastName", 0, 2, "长孙", "长孙");
    Span span2 = new Span("firstName", 0, 2, "无忌", "无忌");
    list1.add(span1);
    list2.add(span2);
    Attribute[] schemaAttributes = new Attribute[TestConstantsChinese.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstantsChinese.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院"), new ListField<Span>(list1) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    IField[] fields2 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院"), new ListField<Span>(list2) };
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    expectedResults.add(tuple2);
    List<String> attributeNames = Arrays.asList(TestConstantsChinese.FIRST_NAME, TestConstantsChinese.LAST_NAME, TestConstantsChinese.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(CHINESE_TABLE, dictionary, attributeNames, KeywordMatchingType.CONJUNCTION_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 12 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInTextFieldUsingKeywordChinese.

/**
     * Scenario: verifies GetNextTuple of DictionaryMatcher and single word
     * queries in Text Field using KEYWORD OPERATOR in Chinese
     */
@Test
public void testSingleWordQueryInTextFieldUsingKeywordChinese() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("北京大学"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span = new Span("description", 0, 4, "北京大学", "北京大学", 0);
    list.add(span);
    Attribute[] schemaAttributes = new Attribute[TestConstantsChinese.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstantsChinese.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院"), new ListField<Span>(list) };
    IField[] fields2 = { new StringField("孔明"), new StringField("洛克贝尔"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("北京大学计算机学院"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    expectedResults.add(tuple2);
    List<String> attributeNames = Arrays.asList(TestConstantsChinese.FIRST_NAME, TestConstantsChinese.LAST_NAME, TestConstantsChinese.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(CHINESE_TABLE, dictionary, attributeNames, KeywordMatchingType.CONJUNCTION_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 13 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class JoinDistanceTest method testWhenOpenOrCloseIsCalledTwiceAndTryToGetNextTupleWhenClosed.

// ------------------------<Test cases for cursor.>------------------------
/*
     * This case tests for the scenario when open and/or close is called twice 
     * and also when getNextTuple() is called when operator is closed.
     * Test result: Opening or closing the operator twice shouldn't result in 
     * any noticeable difference in operation. But, calling getNetTuple() when 
     * operator is closed should throw an exception.
     */
@Test(expected = DataFlowException.class)
public void testWhenOpenOrCloseIsCalledTwiceAndTryToGetNextTupleWhenClosed() throws Exception {
    List<Tuple> tuples = JoinTestConstants.bookGroup1.subList(1, 5);
    JoinTestHelper.insertToTable(BOOK_TABLE, tuples);
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "typical", conjunction);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "actually", conjunction);
    JoinDistancePredicate distancePredicate = new JoinDistancePredicate(JoinTestConstants.REVIEW, 90);
    Join join = new Join(distancePredicate);
    join.setOuterInputOperator(keywordSourceOuter);
    join.setInnerInputOperator(keywordSourceInner);
    Tuple tuple;
    List<Tuple> resultList = new ArrayList<>();
    join.open();
    join.open();
    while ((tuple = join.getNextTuple()) != null) {
        resultList.add(tuple);
    }
    join.close();
    join.close();
    Assert.assertEquals(4, resultList.size());
    // this line should throw an exception because operator is already closed
    if ((tuple = join.getNextTuple()) != null) {
        resultList.add(tuple);
    }
}
Also used : ArrayList(java.util.ArrayList) Join(edu.uci.ics.textdb.exp.join.Join) JoinDistancePredicate(edu.uci.ics.textdb.exp.join.JoinDistancePredicate) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator) Test(org.junit.Test)

Example 14 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class JoinDistanceTest method testOneOfTheOperatorResultIsEmpty.

// This case tests for the scenario when either/both of the operators'
// result lists are empty (i.e. when one/both of the operators' are
// not able to find any suitable matches)
// Test result: Join should return an empty list.
@Test
public void testOneOfTheOperatorResultIsEmpty() throws Exception {
    JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "book", conjunction);
    List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
    Assert.assertEquals(0, resultList.size());
}
Also used : JoinDistancePredicate(edu.uci.ics.textdb.exp.join.JoinDistancePredicate) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator) Test(org.junit.Test)

Example 15 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class JoinDistanceTest method testBothTheSpansAreSame.

// This case tests for the scenario when the spans to be joined are the same, i.e. both the keywords
// are same.
// e.g.
// [<11, 18>]
// [<11, 18>]
// threshold = 20 (can be any non-negative number)
// [ ]
// [ ]
// Test result: Join should return same span and key and the value in span
// should be the same.
// [<11, 18>]
@Test
public void testBothTheSpansAreSame() throws Exception {
    JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
    KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
    KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
    List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
    Schema resultSchema = Utils.createSpanSchema(JoinTestConstants.BOOK_SCHEMA);
    List<Span> spanList = new ArrayList<>();
    Span span1 = new Span(JoinTestConstants.REVIEW, 11, 18, "special_special", "special");
    spanList.add(span1);
    IField[] book1 = { new IntegerField(52), new StringField("Mary Roach"), new StringField("Grunt: The Curious Science of Humans at War"), new IntegerField(288), new TextField("It takes a special kind " + "of writer to make topics ranging from death to our " + "gastrointestinal tract interesting (sometimes " + "hilariously so), and pop science writer Mary Roach is " + "always up to the task."), new ListField<>(spanList) };
    Tuple expectedTuple = new Tuple(resultSchema, book1);
    List<Tuple> expectedResult = new ArrayList<>();
    expectedResult.add(expectedTuple);
    Assert.assertEquals(1, resultList.size());
    Assert.assertTrue(TestUtils.equals(expectedResult, resultList));
}
Also used : Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) JoinDistancePredicate(edu.uci.ics.textdb.exp.join.JoinDistancePredicate) Span(edu.uci.ics.textdb.api.span.Span) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (edu.uci.ics.textdb.api.tuple.Tuple)234 ArrayList (java.util.ArrayList)144 Test (org.junit.Test)135 IField (edu.uci.ics.textdb.api.field.IField)102 Schema (edu.uci.ics.textdb.api.schema.Schema)95 TextField (edu.uci.ics.textdb.api.field.TextField)92 StringField (edu.uci.ics.textdb.api.field.StringField)85 Attribute (edu.uci.ics.textdb.api.schema.Attribute)84 IntegerField (edu.uci.ics.textdb.api.field.IntegerField)80 Span (edu.uci.ics.textdb.api.span.Span)80 DoubleField (edu.uci.ics.textdb.api.field.DoubleField)64 DateField (edu.uci.ics.textdb.api.field.DateField)61 SimpleDateFormat (java.text.SimpleDateFormat)60 DataWriter (edu.uci.ics.textdb.storage.DataWriter)31 Dictionary (edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary)25 KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)20 RelationManager (edu.uci.ics.textdb.storage.RelationManager)19 JoinDistancePredicate (edu.uci.ics.textdb.exp.join.JoinDistancePredicate)18 ScanBasedSourceOperator (edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator)17 ScanSourcePredicate (edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate)17