Search in sources :

Example 31 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class KeywordConjunctionTest method testMultipleWordsQuery.

/**
 * Verifies the List<ITuple> returned by Keyword Matcher on multiple-word
 * queries
 *
 * @throws Exception
 */
@Test
public void testMultipleWordsQuery() throws Exception {
    // Prepare the query
    String query = "george lin lin";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare the expected result list
    List<Span> list = new ArrayList<Span>();
    Span span1 = new Span("firstName", 0, 14, "george lin lin", "george lin lin");
    list.add(span1);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    // Perform the query
    List<Tuple> resultList = KeywordTestHelper.getQueryResults(PEOPLE_TABLE, query, attributeNames, conjunction);
    // check the results
    boolean contains = TestUtils.equals(expectedResultList, resultList);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 32 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class KeywordSubstringTest method testSubstringForStringField.

/**
 * Verifies List<ITuple> returned by Substring Matcher on query with
 * multiple spaces and stop words on a String Field
 *
 * @throws Exception
 */
@Test
public void testSubstringForStringField() throws Exception {
    // Prepare Query
    String query = "short and lin";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare expected result list
    List<Span> list = new ArrayList<Span>();
    Span span1 = new Span("description", 15, 28, "short and lin", "Short and lin");
    list.add(span1);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    // Perform Query
    List<Tuple> resultList = KeywordTestHelper.getScanSourceResults(PEOPLE_TABLE, query, attributeNames, substring, Integer.MAX_VALUE, 0);
    // Perform Check
    boolean contains = TestUtils.equals(expectedResultList, resultList);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 33 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class KeywordSubstringTest method testCombinedSpanInMultipleFieldsQuery.

/**
 * Verifies: Verifies List<ITuple> returned multiple results by Substring
 * Matcher on query with spaces on both left side and right side.
 *
 * @throws Exception
 */
@Test
public void testCombinedSpanInMultipleFieldsQuery() throws Exception {
    // Prepare Query
    String query = " lin ";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare expected result list
    List<Span> list = new ArrayList<>();
    Span span1 = new Span("description", 24, 29, " lin ", " lin ");
    list.add(span1);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    // Perform Query
    List<Tuple> resultList = KeywordTestHelper.getScanSourceResults(PEOPLE_TABLE, query, attributeNames, substring, Integer.MAX_VALUE, 0);
    // Perform Check
    boolean contains = TestUtils.equals(expectedResultList, resultList);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 34 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class KeywordSubstringTest method testSubstringWithStopwordQuery.

/**
 * Verifies: Verifies List<ITuple> returned multiple results by Substring
 * Matcher on query with spaces on both left side and right side.
 *
 * @throws Exception
 */
@Test
public void testSubstringWithStopwordQuery() throws Exception {
    // Prepare Query
    String query = "is";
    ArrayList<String> attributeNames = new ArrayList<>();
    attributeNames.add(TestConstants.FIRST_NAME);
    attributeNames.add(TestConstants.LAST_NAME);
    attributeNames.add(TestConstants.DESCRIPTION);
    // Prepare expected result list
    List<Span> list = new ArrayList<>();
    Span span1 = new Span("description", 12, 14, "is", "is");
    Span span2 = new Span("description", 37, 39, "is", "is");
    list.add(span1);
    list.add(span2);
    Attribute[] schemaAttributes = new Attribute[TestConstants.ATTRIBUTES_PEOPLE.length + 1];
    for (int count = 0; count < schemaAttributes.length - 1; count++) {
        schemaAttributes[count] = TestConstants.ATTRIBUTES_PEOPLE[count];
    }
    schemaAttributes[schemaAttributes.length - 1] = new Attribute(RESULTS, AttributeType.LIST);
    IField[] fields1 = { new StringField("george lin lin"), new StringField("lin clooney"), new IntegerField(43), new DoubleField(6.06), new DateField(new SimpleDateFormat("MM-dd-yyyy").parse("01-13-1973")), new TextField("Lin Clooney is Short and lin clooney is Angry"), new ListField<>(list) };
    Tuple tuple1 = new Tuple(new Schema(schemaAttributes), fields1);
    List<Tuple> expectedResultList = new ArrayList<>();
    expectedResultList.add(tuple1);
    // Perform Query
    List<Tuple> resultList = KeywordTestHelper.getScanSourceResults(PEOPLE_TABLE, query, attributeNames, substring, Integer.MAX_VALUE, 0);
    // Perform Check
    boolean contains = TestUtils.equals(expectedResultList, resultList);
    Assert.assertTrue(contains);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IntegerField(edu.uci.ics.texera.api.field.IntegerField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) StringField(edu.uci.ics.texera.api.field.StringField) TextField(edu.uci.ics.texera.api.field.TextField) DateField(edu.uci.ics.texera.api.field.DateField) SimpleDateFormat(java.text.SimpleDateFormat) DoubleField(edu.uci.ics.texera.api.field.DoubleField) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 35 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class RegexMatcherTest method testRegexText2.

@Test
public void testRegexText2() throws Exception {
    String query = "follow(-| )?up";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "followup"
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = new Schema.Builder().add(RegexTestConstantsText.SCHEMA_TEXT).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 28, 36, query, "followup"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 54, 62, query, "followup"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(4).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "follow up"
    spans.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 18, 27, query, "follow up"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 51, 60, query, "follow up"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "follow-up" & "followup"
    spans.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 24, 33, query, "follow-up"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 38, 46, query, "followup"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(6).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Span (edu.uci.ics.texera.api.span.Span)135 ArrayList (java.util.ArrayList)104 IField (edu.uci.ics.texera.api.field.IField)100 Tuple (edu.uci.ics.texera.api.tuple.Tuple)100 Schema (edu.uci.ics.texera.api.schema.Schema)91 Test (org.junit.Test)84 TextField (edu.uci.ics.texera.api.field.TextField)79 IntegerField (edu.uci.ics.texera.api.field.IntegerField)67 StringField (edu.uci.ics.texera.api.field.StringField)65 DoubleField (edu.uci.ics.texera.api.field.DoubleField)55 Attribute (edu.uci.ics.texera.api.schema.Attribute)53 DateField (edu.uci.ics.texera.api.field.DateField)52 SimpleDateFormat (java.text.SimpleDateFormat)52 ListField (edu.uci.ics.texera.api.field.ListField)41 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)30 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)13 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)12 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)11 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 Collectors (java.util.stream.Collectors)9