Search in sources :

Example 56 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class SpanTupleTest method createSpanListField.

private IField createSpanListField() {
    List<Span> list = new ArrayList<Span>();
    // The key value will be:
    // For RegexMatcher : "n.*k"
    // For NamedEntityMatcher : LOCATION
    // For DictionaryMatcher: "new york" - For DictionaryMatcher the key and
    // value are same
    // For KeyWordMatcher: "new york" - the value can be "new" or "york"
    Span span1 = new Span("description", 18, 26, "LOCATION", "new york");
    Span span2 = new Span("description", 52, 63, "LOCATION", "los angeles");
    list.add(span1);
    list.add(span2);
    IField spanListField = new ListField<Span>(list);
    return spanListField;
}
Also used : ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span)

Example 57 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class LabeledRegexMatcherTest method testMultipleLabeledRegex.

@Test
public void testMultipleLabeledRegex() throws Exception {
    String query = "<lab1>.*<lab2>";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, "short", Arrays.asList(TestConstants.DESCRIPTION), "lab1", false, Integer.MAX_VALUE, 0, "angry", "lab2");
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "Short angry" and "Short and lin clooney is Angry"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.DESCRIPTION, 0, 11, query, "Short angry"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    spans.add(new Span(TestConstants.DESCRIPTION, 15, 45, query, "Short and lin clooney is Angry"));
    spanField = new ListField<>(new ArrayList<>(spans));
    fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 58 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class LabeledRegexMatcherTest method testGetNextTupleLabeledRegex.

@Test
public void testGetNextTupleLabeledRegex() throws Exception {
    String query = "<name>";
    String keywordQuery = "george lin lin";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, keywordQuery, Arrays.asList(TestConstants.FIRST_NAME), "name", false, Integer.MAX_VALUE, 0);
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "george lin lin"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.FIRST_NAME, 0, 14, query, "george lin lin"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 59 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class LabeledRegexMatcherTest method testIgnoreCaseLabeledRegex.

@Test
public void testIgnoreCaseLabeledRegex() throws Exception {
    String query = "<NAME>";
    String keywordQuery = "george lin lin";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, keywordQuery, Arrays.asList(TestConstants.FIRST_NAME), "name", false, Integer.MAX_VALUE, 0);
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "george lin lin"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.FIRST_NAME, 0, 14, query, "george lin lin"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 60 with Span

use of edu.uci.ics.texera.api.span.Span in project textdb by TextDB.

the class LabeledRegexMatcherTest method testDisjunctionLabeledRegex.

@Test
public void testDisjunctionLabeledRegex() throws Exception {
    String query = "<lab1>|<lab2>";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(PEOPLE_TABLE, query, "short", Arrays.asList(TestConstants.DESCRIPTION), "lab1", false, Integer.MAX_VALUE, 0, "angry", "lab2");
    List<Tuple> expectedResults = new ArrayList<>();
    // expected to match "Short" and "angry"
    List<Tuple> data = TestConstants.getSamplePeopleTuples();
    Schema spanSchema = new Schema.Builder().add(TestConstants.SCHEMA_PEOPLE).add(RESULTS, AttributeType.LIST).build();
    List<Span> spans = new ArrayList<>();
    spans.add(new Span(TestConstants.DESCRIPTION, 0, 5, query, "Short"));
    spans.add(new Span(TestConstants.DESCRIPTION, 6, 11, query, "angry"));
    IField spanField = new ListField<>(new ArrayList<>(spans));
    List<IField> fields = new ArrayList<>(data.get(3).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "Short" and "Angry"
    spans.clear();
    spans.add(new Span(TestConstants.DESCRIPTION, 15, 20, query, "Short"));
    spans.add(new Span(TestConstants.DESCRIPTION, 40, 45, query, "Angry"));
    spanField = new ListField<>(new ArrayList<>(spans));
    fields = new ArrayList<>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    List<String> attributeNames = new ArrayList<>();
    attributeNames.add(RESULTS);
    Assert.assertTrue(TestUtils.attributeEquals(expectedResults, exactResults, attributeNames));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.texera.api.field.ListField) IField(edu.uci.ics.texera.api.field.IField) Span(edu.uci.ics.texera.api.span.Span) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Span (edu.uci.ics.texera.api.span.Span)130 ArrayList (java.util.ArrayList)104 IField (edu.uci.ics.texera.api.field.IField)100 Tuple (edu.uci.ics.texera.api.tuple.Tuple)99 Schema (edu.uci.ics.texera.api.schema.Schema)90 Test (org.junit.Test)84 TextField (edu.uci.ics.texera.api.field.TextField)78 IntegerField (edu.uci.ics.texera.api.field.IntegerField)66 StringField (edu.uci.ics.texera.api.field.StringField)64 DoubleField (edu.uci.ics.texera.api.field.DoubleField)55 DateField (edu.uci.ics.texera.api.field.DateField)52 SimpleDateFormat (java.text.SimpleDateFormat)52 Attribute (edu.uci.ics.texera.api.schema.Attribute)51 ListField (edu.uci.ics.texera.api.field.ListField)40 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)30 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)11 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)11 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)11 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9 Collectors (java.util.stream.Collectors)8