Search in sources :

Example 46 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class RegexMatcherTest method testGetNextTuplePeopleFirstName.

@Test
public void testGetNextTuplePeopleFirstName() throws Exception {
    String query = "g[^\\s]*";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, Arrays.asList(TestConstants.FIRST_NAME));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "brad lie angelina"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(TestConstants.FIRST_NAME, 11, 17, query, "gelina"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(2).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "george lin lin"
    spans.clear();
    spans.add(new Span(TestConstants.FIRST_NAME, 0, 6, query, "george"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 47 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class RegexMatcherTest method testRegexWithLimit.

@Test
public void testRegexWithLimit() throws Exception {
    String query = "patient";
    List<Tuple> exactResultsWithLimit = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT), true, 2, 0);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = new Schema.Builder().add(RegexTestConstantsText.SCHEMA_TEXT).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(4).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 65, 72, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(6).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.containsAll(expectedResults, exactResultsWithLimit));
    Assert.assertEquals(expectedResults.size(), 3);
    Assert.assertEquals(exactResultsWithLimit.size(), 2);
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 48 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class RegexMatcherTest method testGetNextTupleStaffEmail.

@Test
public void testGetNextTupleStaffEmail() throws Exception {
    String query = "^([a-z0-9_\\.-]+)@([\\da-z\\.-]+)\\.([a-z\\.]{2,6})$";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(STAFF_TABLE, query, Arrays.asList(RegexTestConstantStaff.EMAIL));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "k.bocanegra@uci.edu"
    List<Tuple> data = RegexTestConstantStaff.getSampleStaffTuples();
    Schema spanSchema = new Schema.Builder().add(RegexTestConstantStaff.SCHEMA_STAFF).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantStaff.EMAIL, 0, 19, query, "m.bocanegra@164.com"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(0).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "hwangl@ics.uci.edu"
    spans.clear();
    spans.add(new Span(RegexTestConstantStaff.EMAIL, 0, 18, query, "hwangk@ske.akb.edu"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(1).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 49 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class RegexMatcherTest method testGetNextTupleCorpIP.

@Test
public void testGetNextTupleCorpIP() throws Exception {
    String query = "^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(CORP_TABLE, query, Arrays.asList(RegexTestConstantsCorp.IP_ADDRESS));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "66.220.144.0"
    List<Tuple> data = RegexTestConstantsCorp.getSampleCorpTuples();
    Schema spanSchema = new Schema.Builder().add(RegexTestConstantsCorp.SCHEMA_CORP).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsCorp.IP_ADDRESS, 0, 12, query, "66.220.144.0"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(0).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "180.149.134.141"
    spans.clear();
    spans.add(new Span(RegexTestConstantsCorp.IP_ADDRESS, 0, 15, query, "180.149.134.141"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(1).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "131.107.0.89"
    spans.clear();
    spans.add(new Span(RegexTestConstantsCorp.IP_ADDRESS, 0, 12, query, "131.107.0.89"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(2).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 50 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class RegexMatcherTest method testRegexWithLimitOffset.

@Test
public void testRegexWithLimitOffset() throws Exception {
    String query = "patient";
    List<Tuple> exactResultsWithLimitOffset = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT), true, 2, 1);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = new Schema.Builder().add(RegexTestConstantsText.SCHEMA_TEXT).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(4).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 65, 72, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(6).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.containsAll(expectedResults, exactResultsWithLimitOffset));
    Assert.assertEquals(expectedResults.size(), 3);
    Assert.assertEquals(exactResultsWithLimitOffset.size(), 2);
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Span (edu.uci.ics.texera.api.span.Span)130 ArrayList (java.util.ArrayList)104 IField (edu.uci.ics.texera.api.field.IField)100 Tuple (edu.uci.ics.texera.api.tuple.Tuple)99 Schema (edu.uci.ics.texera.api.schema.Schema)90 Test (org.junit.Test)84 TextField (edu.uci.ics.texera.api.field.TextField)78 IntegerField (edu.uci.ics.texera.api.field.IntegerField)66 StringField (edu.uci.ics.texera.api.field.StringField)64 DoubleField (edu.uci.ics.texera.api.field.DoubleField)55 DateField (edu.uci.ics.texera.api.field.DateField)52 SimpleDateFormat (java.text.SimpleDateFormat)52 Attribute (edu.uci.ics.texera.api.schema.Attribute)51 ListField (edu.uci.ics.texera.api.field.ListField)40 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)30 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)11 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)11 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)11 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 Collectors (java.util.stream.Collectors)8