Search in sources :

Example 56 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class KeywordConjunctionTest method testWordInMultipleFieldsQuery.

/**
     * Verifies: data source has multiple attributes, and an entity can appear
     * in all the fields and multiple times.
     * 
     * @throws Exception
     */
@Test
public void testWordInMultipleFieldsQuery() throws Exception {
    // Prepare the query
    String query = "lin clooney";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare the expected result list
    List<Span> list = new ArrayList<>();
    Span span1 = new Span("lastName", 0, 11, "lin clooney", "lin clooney");
    Span span2 = new Span("description", 0, 3, "lin", "Lin", 0);
    Span span3 = new Span("description", 25, 28, "lin", "lin", 5);
    Span span4 = new Span("description", 4, 11, "clooney", "Clooney", 1);
    Span span5 = new Span("description", 29, 36, "clooney", "clooney", 6);
    list.add(span1);
    list.add(span2);
    list.add(span3);
    list.add(span4);
    list.add(span5);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    // Perform the query
    List<Tuple> resultList = KeywordTestHelper.getQueryResults(PEOPLE_TABLE, query, attributeNames, conjunction);
    // check the results
    boolean contains = TestUtils.equals(expectedResultList, resultList);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 57 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInTextFieldUsingPhraseChinese.

/**
     * Scenario: verifies GetNextTuple of DictionaryMatcher and single word
     * queries in Text Field using PHRASE OPERATOR in Chinese.
     */
@Test
public void testSingleWordQueryInTextFieldUsingPhraseChinese() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("北京大学"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span = new Span("description", 0, 4, "北京大学", "北京大学");
    list.add(span);
    Attribute[] schemaAttributes = new Attribute[TestConstantsChinese.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstantsChinese.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院"), new ListField<Span>(list) };
    IField[] fields2 = { new StringField("孔明"), new StringField("洛克贝尔"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("北京大学计算机学院"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    expectedResults.add(tuple2);
    List<String> attributeNames = Arrays.asList(TestConstantsChinese.FIRST_NAME, TestConstantsChinese.LAST_NAME, TestConstantsChinese.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(CHINESE_TABLE, dictionary, attributeNames, KeywordMatchingType.PHRASE_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 58 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInTextFieldUsingKeyword.

/**
     * Scenario: verifies GetNextTuple of DictionaryMatcher and single word
     * queries in Text Field using KEYWORD OPERATOR
     */
@Test
public void testSingleWordQueryInTextFieldUsingKeyword() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("tall"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span = new Span("description", 0, 4, "tall", "Tall", 0);
    list.add(span);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("bruce"), new StringField("john Lee"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("Tall Angry"), new ListField<Span>(list) };
    IField[] fields2 = { new StringField("christian john wayne"), new StringField("rock bale"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("Tall Fair"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    expectedResults.add(tuple2);
    List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.CONJUNCTION_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 59 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class DictionaryMatcherTest method testSingleWordQueryInTextFieldUsingScanChinese.

/**
     * Scenario: verifies GetNextTuple of DictionaryMatcher and single word
     * queries in Text Field using SCANOPERATOR in Chinese.
     */
@Test
public void testSingleWordQueryInTextFieldUsingScanChinese() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("学院"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list1 = new ArrayList<Span>();
    Span span1 = new Span("description", 8, 10, "学院", "学院");
    list1.add(span1);
    List<Span> list2 = new ArrayList<Span>();
    Span span2 = new Span("description", 7, 9, "学院", "学院");
    list2.add(span2);
    Attribute[] schemaAttributes = new Attribute[TestConstantsChinese.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstantsChinese.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("无忌"), new StringField("长孙"), new IntegerField(46), new DoubleField(5.50), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-14-1970")), new TextField("北京大学电气工程学院"), new ListField<Span>(list1) };
    IField[] fields2 = { new StringField("孔明"), new StringField("洛克贝尔"), new IntegerField(42), new DoubleField(5.99), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1974")), new TextField("北京大学计算机学院"), new ListField<Span>(list2) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    Tuple tuple2 = new Tuple(new Schema(schemaAttributes), fields2);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    expectedResults.add(tuple2);
    List<String> attributeNames = Arrays.asList(TestConstantsChinese.FIRST_NAME, TestConstantsChinese.LAST_NAME, TestConstantsChinese.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(CHINESE_TABLE, dictionary, attributeNames, KeywordMatchingType.SUBSTRING_SCANBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 60 with Tuple

use of edu.uci.ics.textdb.api.tuple.Tuple in project textdb by TextDB.

the class DictionaryMatcherTest method testWordInMultipleFieldsQueryUsingKeyword.

/**
     * Scenario S-12:verifies: data source has multiple attributes, and an
     * entity can appear in all the fields and multiple times using KEYWORD
     * OPERATOR.
     */
@Test
public void testWordInMultipleFieldsQueryUsingKeyword() throws Exception {
    ArrayList<String> names = new ArrayList<String>(Arrays.asList("lin clooney"));
    Dictionary dictionary = new Dictionary(names);
    // create a data tuple first
    List<Span> list = new ArrayList<Span>();
    Span span1 = new Span("lastName", 0, 11, "lin clooney", "lin clooney");
    Span span2 = new Span("description", 0, 3, "lin", "Lin", 0);
    Span span3 = new Span("description", 4, 11, "clooney", "Clooney", 1);
    Span span4 = new Span("description", 25, 28, "lin", "lin", 5);
    Span span5 = new Span("description", 29, 36, "clooney", "clooney", 6);
    list.add(span1);
    list.add(span2);
    list.add(span3);
    list.add(span4);
    list.add(span5);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = RESULTS_ATTRIBUTE;
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<Span>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    expectedResults.add(tuple1);
    List<String> attributeNames = Arrays.asList(TestConstants.FIRST_NAME, TestConstants.LAST_NAME, TestConstants.DESCRIPTION);
    List<Tuple> returnedResults = DictionaryMatcherTestHelper.getQueryResults(PEOPLE_TABLE, dictionary, attributeNames, KeywordMatchingType.CONJUNCTION_INDEXBASED);
    boolean contains = TestUtils.equals(expectedResults, returnedResults);
    Assert.assertTrue(contains);
}
Also used : Dictionary(edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary) Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.textdb.api.field.IntegerField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) StringField(edu.uci.ics.textdb.api.field.StringField) TextField(edu.uci.ics.textdb.api.field.TextField) DateField(edu.uci.ics.textdb.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.textdb.api.field.DoubleField) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (edu.uci.ics.textdb.api.tuple.Tuple)234 ArrayList (java.util.ArrayList)144 Test (org.junit.Test)135 IField (edu.uci.ics.textdb.api.field.IField)102 Schema (edu.uci.ics.textdb.api.schema.Schema)95 TextField (edu.uci.ics.textdb.api.field.TextField)92 StringField (edu.uci.ics.textdb.api.field.StringField)85 Attribute (edu.uci.ics.textdb.api.schema.Attribute)84 IntegerField (edu.uci.ics.textdb.api.field.IntegerField)80 Span (edu.uci.ics.textdb.api.span.Span)80 DoubleField (edu.uci.ics.textdb.api.field.DoubleField)64 DateField (edu.uci.ics.textdb.api.field.DateField)61 SimpleDateFormat (java.text.SimpleDateFormat)60 DataWriter (edu.uci.ics.textdb.storage.DataWriter)31 Dictionary (edu.uci.ics.textdb.exp.dictionarymatcher.Dictionary)25 KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)20 RelationManager (edu.uci.ics.textdb.storage.RelationManager)19 JoinDistancePredicate (edu.uci.ics.textdb.exp.join.JoinDistancePredicate)18 ScanBasedSourceOperator (edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator)17 ScanSourcePredicate (edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate)17