Search in sources :

Example 16 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testGetNextTupleCorpURL.

@Test
public void testGetNextTupleCorpURL() throws Exception {
    String query = "^(https?:\\/\\/)?([\\da-z\\.-]+)\\.([a-z\\.]{2,6})([\\/\\w \\.-]*)*\\/?$";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(CORP_TABLE, query, Arrays.asList(RegexTestConstantsCorp.URL));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "http://weibo.com"
    List<Tuple> data = RegexTestConstantsCorp.getSampleCorpTuples();
    Schema spanSchema = Utils.addAttributeToSchema(RegexTestConstantsCorp.SCHEMA_CORP, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsCorp.URL, 0, 16, query, "http://weibo.com"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(1).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "https://www.microsoft.com/en-us/"
    spans.clear();
    spans.add(new Span(RegexTestConstantsCorp.URL, 0, 32, query, "https://www.microsoft.com/en-us/"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(2).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 17 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testRegexWithLimit.

@Test
public void testRegexWithLimit() throws Exception {
    String query = "patient";
    List<Tuple> exactResultsWithLimit = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT), true, 2, 0);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = Utils.addAttributeToSchema(RegexTestConstantsText.SCHEMA_TEXT, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(4).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 65, 72, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(6).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.containsAll(expectedResults, exactResultsWithLimit));
    Assert.assertEquals(expectedResults.size(), 3);
    Assert.assertEquals(exactResultsWithLimit.size(), 2);
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 18 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testGetNextTuplePeopleFirstName.

@Test
public void testGetNextTuplePeopleFirstName() throws Exception {
    String query = "g[^\\s]*";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, Arrays.asList(TestConstants.FIRST_NAME));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "brad lie angelina"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = Utils.addAttributeToSchema(TestConstants.SCHEMA_PEOPLE, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(TestConstants.FIRST_NAME, 11, 17, query, "gelina"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(2).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "george lin lin"
    spans.clear();
    spans.add(new Span(TestConstants.FIRST_NAME, 0, 6, query, "george"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 19 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testRegexText2.

@Test
public void testRegexText2() throws Exception {
    String query = "follow(-| )?up";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "followup"
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = Utils.addAttributeToSchema(RegexTestConstantsText.SCHEMA_TEXT, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 28, 36, query, "followup"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 54, 62, query, "followup"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(4).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "follow up"
    spans.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 18, 27, query, "follow up"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 51, 60, query, "follow up"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "follow-up" & "followup"
    spans.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 24, 33, query, "follow-up"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 38, 46, query, "followup"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(6).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 20 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testGetNextTupleStaffEmail.

@Test
public void testGetNextTupleStaffEmail() throws Exception {
    String query = "^([a-z0-9_\\.-]+)@([\\da-z\\.-]+)\\.([a-z\\.]{2,6})$";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(STAFF_TABLE, query, Arrays.asList(RegexTestConstantStaff.EMAIL));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "k.bocanegra@uci.edu"
    List<Tuple> data = RegexTestConstantStaff.getSampleStaffTuples();
    Schema spanSchema = Utils.addAttributeToSchema(RegexTestConstantStaff.SCHEMA_STAFF, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantStaff.EMAIL, 0, 19, query, "m.bocanegra@164.com"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(0).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "hwangl@ics.uci.edu"
    spans.clear();
    spans.add(new Span(RegexTestConstantStaff.EMAIL, 0, 18, query, "hwangk@ske.akb.edu"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(1).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

ListField (edu.uci.ics.textdb.api.field.ListField)20 Span (edu.uci.ics.textdb.api.span.Span)18 IField (edu.uci.ics.textdb.api.field.IField)17 ArrayList (java.util.ArrayList)17 Attribute (edu.uci.ics.textdb.api.schema.Attribute)16 Schema (edu.uci.ics.textdb.api.schema.Schema)16 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)14 Test (org.junit.Test)11 SchemaConstants (edu.uci.ics.textdb.api.constants.SchemaConstants)5 DataFlowException (edu.uci.ics.textdb.api.exception.DataFlowException)5 AttributeType (edu.uci.ics.textdb.api.schema.AttributeType)5 Collectors (java.util.stream.Collectors)5 Iterator (java.util.Iterator)4 List (java.util.List)4 ErrorMessages (edu.uci.ics.textdb.api.constants.ErrorMessages)3 TextDBException (edu.uci.ics.textdb.api.exception.TextDBException)3 Utils (edu.uci.ics.textdb.api.utils.Utils)3 AbstractSingleInputOperator (edu.uci.ics.textdb.exp.common.AbstractSingleInputOperator)3 DataflowUtils (edu.uci.ics.textdb.exp.utils.DataflowUtils)3 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2