Search in sources :

Example 6 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testGetNextTupleCorpIP.

@Test
public void testGetNextTupleCorpIP() throws Exception {
    String query = "^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(CORP_TABLE, query, Arrays.asList(RegexTestConstantsCorp.IP_ADDRESS));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match "66.220.144.0"
    List<Tuple> data = RegexTestConstantsCorp.getSampleCorpTuples();
    Schema spanSchema = Utils.addAttributeToSchema(RegexTestConstantsCorp.SCHEMA_CORP, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsCorp.IP_ADDRESS, 0, 12, query, "66.220.144.0"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(0).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "180.149.134.141"
    spans.clear();
    spans.add(new Span(RegexTestConstantsCorp.IP_ADDRESS, 0, 15, query, "180.149.134.141"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(1).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    // expected to match "131.107.0.89"
    spans.clear();
    spans.add(new Span(RegexTestConstantsCorp.IP_ADDRESS, 0, 12, query, "131.107.0.89"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(2).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 7 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testRegexWithLimitOffset.

@Test
public void testRegexWithLimitOffset() throws Exception {
    String query = "patient";
    List<Tuple> exactResultsWithLimitOffset = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT), true, 2, 1);
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = Utils.addAttributeToSchema(RegexTestConstantsText.SCHEMA_TEXT, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(4).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 65, 72, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(5).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    spans.clear();
    fields.clear();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 4, 11, query, "patient"));
    spanField = new ListField<Span>(new ArrayList<Span>(spans));
    fields = new ArrayList<IField>(data.get(6).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.containsAll(expectedResults, exactResultsWithLimitOffset));
    Assert.assertEquals(expectedResults.size(), 3);
    Assert.assertEquals(exactResultsWithLimitOffset.size(), 2);
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 8 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class RegexMatcherTest method testRegexText4.

@Test
public void testRegexText4() throws Exception {
    String query = "\\[(.)?\\]";
    List<Tuple> exactResults = RegexMatcherTestHelper.getQueryResults(TEXT_TABLE, query, Arrays.asList(RegexTestConstantsText.CONTENT));
    List<Tuple> expectedResults = new ArrayList<Tuple>();
    // expected to match [a] & [!]
    List<Tuple> data = RegexTestConstantsText.getSampleTextTuples();
    Schema spanSchema = Utils.addAttributeToSchema(RegexTestConstantsText.SCHEMA_TEXT, new Attribute(RESULTS, AttributeType.LIST));
    List<Span> spans = new ArrayList<Span>();
    spans.add(new Span(RegexTestConstantsText.CONTENT, 110, 113, query, "[a]"));
    spans.add(new Span(RegexTestConstantsText.CONTENT, 120, 123, query, "[!]"));
    IField spanField = new ListField<Span>(new ArrayList<Span>(spans));
    List<IField> fields = new ArrayList<IField>(data.get(10).getFields());
    fields.add(spanField);
    expectedResults.add(new Tuple(spanSchema, fields.toArray(new IField[fields.size()])));
    Assert.assertTrue(TestUtils.equals(expectedResults, exactResults));
}
Also used : Attribute(edu.uci.ics.textdb.api.schema.Attribute) Schema(edu.uci.ics.textdb.api.schema.Schema) ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) Test(org.junit.Test)

Example 9 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class DataflowUtils method getSpanTuple.

/**
     * @about Creating a new span tuple from span schema, field list
     */
public static Tuple getSpanTuple(List<IField> fieldList, List<Span> spanList, Schema spanSchema) {
    IField spanListField = new ListField<Span>(new ArrayList<>(spanList));
    List<IField> fieldListDuplicate = new ArrayList<>(fieldList);
    fieldListDuplicate.add(spanListField);
    IField[] fieldsDuplicate = fieldListDuplicate.toArray(new IField[fieldListDuplicate.size()]);
    return new Tuple(spanSchema, fieldsDuplicate);
}
Also used : ArrayList(java.util.ArrayList) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField)

Example 10 with ListField

use of edu.uci.ics.textdb.api.field.ListField in project textdb by TextDB.

the class SimilarityJoinPredicate method mergeTuples.

private Tuple mergeTuples(Tuple innerTuple, Tuple outerTuple, Schema outputSchema, List<Span> mergeSpanList) {
    List<IField> resultFields = new ArrayList<>();
    for (String attrName : outputSchema.getAttributeNames()) {
        // generate a new _ID field for this tuple
        if (attrName.equals(SchemaConstants._ID)) {
            IDField newID = new IDField(UUID.randomUUID().toString());
            resultFields.add(newID);
        // use the generated spanList
        } else if (attrName.equals(SchemaConstants.SPAN_LIST)) {
            resultFields.add(new ListField<Span>(mergeSpanList));
        // put the payload of two tuples together
        } else if (attrName.equals(SchemaConstants.PAYLOAD)) {
            ListField<Span> innerPayloadField = innerTuple.getField(SchemaConstants.PAYLOAD);
            List<Span> innerPayload = innerPayloadField.getValue();
            ListField<Span> outerPayloadField = outerTuple.getField(SchemaConstants.PAYLOAD);
            List<Span> outerPayload = outerPayloadField.getValue();
            List<Span> resultPayload = new ArrayList<>();
            resultPayload.addAll(innerPayload.stream().map(span -> addFieldPrefix(span, INNER_PREFIX)).collect(Collectors.toList()));
            resultPayload.addAll(outerPayload.stream().map(span -> addFieldPrefix(span, "outer_")).collect(Collectors.toList()));
        // add other fields from inner/outer tuples
        } else {
            if (attrName.startsWith(INNER_PREFIX)) {
                resultFields.add(innerTuple.getField(attrName.substring(INNER_PREFIX.length())));
            } else if (attrName.startsWith(OUTER_PREFIX)) {
                resultFields.add(outerTuple.getField(attrName.substring(OUTER_PREFIX.length())));
            }
        }
    }
    return new Tuple(outputSchema, resultFields.stream().toArray(IField[]::new));
}
Also used : IDField(edu.uci.ics.textdb.api.field.IDField) ListField(edu.uci.ics.textdb.api.field.ListField) IField(edu.uci.ics.textdb.api.field.IField) Span(edu.uci.ics.textdb.api.span.Span)

Aggregations

ListField (edu.uci.ics.textdb.api.field.ListField)20 Span (edu.uci.ics.textdb.api.span.Span)18 IField (edu.uci.ics.textdb.api.field.IField)17 ArrayList (java.util.ArrayList)17 Attribute (edu.uci.ics.textdb.api.schema.Attribute)16 Schema (edu.uci.ics.textdb.api.schema.Schema)16 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)14 Test (org.junit.Test)11 SchemaConstants (edu.uci.ics.textdb.api.constants.SchemaConstants)5 DataFlowException (edu.uci.ics.textdb.api.exception.DataFlowException)5 AttributeType (edu.uci.ics.textdb.api.schema.AttributeType)5 Collectors (java.util.stream.Collectors)5 Iterator (java.util.Iterator)4 List (java.util.List)4 ErrorMessages (edu.uci.ics.textdb.api.constants.ErrorMessages)3 TextDBException (edu.uci.ics.textdb.api.exception.TextDBException)3 Utils (edu.uci.ics.textdb.api.utils.Utils)3 AbstractSingleInputOperator (edu.uci.ics.textdb.exp.common.AbstractSingleInputOperator)3 DataflowUtils (edu.uci.ics.textdb.exp.utils.DataflowUtils)3 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)2