Search in sources :

Example 81 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class RegexMatcherTestHelper method getRegexSourceResults.

public static List<Tuple> getRegexSourceResults(String tableName, String keywordQuery1, String regex, List<String> attributeNames, KeywordMatchingType matchingType, String spanListName1, int limit, int offset, String keywordQuery2, String spanListName2) throws TexeraException {
    KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(keywordQuery1, attributeNames, LuceneAnalyzerConstants.standardAnalyzerString(), matchingType, tableName, spanListName1);
    KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
    KeywordPredicate keywordPredicate1 = new KeywordPredicate(keywordQuery2, attributeNames, LuceneAnalyzerConstants.standardAnalyzerString(), matchingType, spanListName2);
    KeywordMatcher keywordMatcher1 = new KeywordMatcher(keywordPredicate1);
    keywordMatcher1.setInputOperator(keywordSource);
    RegexPredicate regexPredicate = new RegexPredicate(regex, attributeNames, RESULTS);
    RegexMatcher regexMatcher = new RegexMatcher(regexPredicate);
    regexMatcher.setLimit(limit);
    regexMatcher.setOffset(offset);
    regexMatcher.setInputOperator(keywordMatcher1);
    Tuple tuple;
    List<Tuple> results = new ArrayList<>();
    regexMatcher.open();
    while ((tuple = regexMatcher.getNextTuple()) != null) {
        results.add(tuple);
    }
    regexMatcher.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 82 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class RegexMatcherTestHelper method getScanSourceResults.

public static List<Tuple> getScanSourceResults(String tableName, String keywordQuery1, String regex, List<String> attributeNames, KeywordMatchingType matchingType, String spanListName1, int limit, int offset, String keywordQuery2, String spanListName2) throws TexeraException {
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    KeywordPredicate keywordPredicate = new KeywordPredicate(keywordQuery1, attributeNames, LuceneAnalyzerConstants.standardAnalyzerString(), matchingType, spanListName1);
    KeywordMatcher keywordMatcher = new KeywordMatcher(keywordPredicate);
    KeywordPredicate keywordPredicate1 = new KeywordPredicate(keywordQuery2, attributeNames, LuceneAnalyzerConstants.standardAnalyzerString(), matchingType, spanListName2);
    KeywordMatcher keywordMatcher1 = new KeywordMatcher(keywordPredicate1);
    keywordMatcher1.setInputOperator(keywordMatcher);
    keywordMatcher.setInputOperator(scanSource);
    RegexPredicate regexPredicate = new RegexPredicate(regex, attributeNames, RESULTS);
    RegexMatcher regexMatcher = new RegexMatcher(regexPredicate);
    regexMatcher.setLimit(limit);
    regexMatcher.setOffset(offset);
    regexMatcher.setInputOperator(keywordMatcher1);
    Tuple tuple;
    List<Tuple> results = new ArrayList<>();
    regexMatcher.open();
    while ((tuple = regexMatcher.getNextTuple()) != null) {
        results.add(tuple);
    }
    regexMatcher.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 83 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class RegexSplitOperatorTest method test9.

/*
     * Test in OneToOne mode. 
     */
@Test
public void test9() throws TexeraException {
    String splitRegex = "ana";
    String splitAttrName = TestConstantsRegexSplit.DESCRIPTION;
    List<String> splitResult1 = new ArrayList<>();
    splitResult1.add("b");
    splitResult1.add("ana");
    splitResult1.add("na");
    List<String> splitResult2 = new ArrayList<>();
    splitResult2.add("mississippi");
    List<List<String>> splitResults = new ArrayList<>();
    splitResults.add(splitResult1);
    splitResults.add(splitResult2);
    List<Tuple> results = computeRegexSplitResultsOnetoOne(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.STANDALONE);
    int i = 0;
    for (Tuple tuple : results) {
        Assert.assertEquals(getTupleSpanListString(tuple, RESULT_ATTR), splitResults.get(i));
        i++;
    }
}
Also used : ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 84 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class OneToNBroadcastConnectorTest method testTwoOutputsWithProjection.

/*
     * This test connects Connector with Projection
     */
@Test
public void testTwoOutputsWithProjection() throws TexeraException {
    IOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE));
    List<String> projectionFields = Arrays.asList(TestConstants.DESCRIPTION);
    Schema projectionSchema = new Schema(TestConstants.DESCRIPTION_ATTR);
    IField[] fields1 = { new TextField("Tall Angry") };
    IField[] fields2 = { new TextField("Short Brown") };
    IField[] fields3 = { new TextField("White Angry") };
    IField[] fields4 = { new TextField("Lin Clooney is Short and lin clooney is Angry") };
    IField[] fields5 = { new TextField("Tall Fair") };
    IField[] fields6 = { new TextField("Short angry") };
    Tuple tuple1 = new Tuple(projectionSchema, fields1);
    Tuple tuple2 = new Tuple(projectionSchema, fields2);
    Tuple tuple3 = new Tuple(projectionSchema, fields3);
    Tuple tuple4 = new Tuple(projectionSchema, fields4);
    Tuple tuple5 = new Tuple(projectionSchema, fields5);
    Tuple tuple6 = new Tuple(projectionSchema, fields6);
    List<Tuple> expectedResults = Arrays.asList(tuple1, tuple2, tuple3, tuple4, tuple5, tuple6);
    ProjectionPredicate projectionPredicate = new ProjectionPredicate(projectionFields);
    ProjectionOperator projection1 = new ProjectionOperator(projectionPredicate);
    ProjectionOperator projection2 = new ProjectionOperator(projectionPredicate);
    OneToNBroadcastConnector connector = new OneToNBroadcastConnector(2);
    connector.setInputOperator(sourceOperator);
    projection1.setInputOperator(connector.getOutputOperator(0));
    projection2.setInputOperator(connector.getOutputOperator(1));
    projection1.open();
    List<Tuple> projection1Results = new ArrayList<>();
    Tuple nextTuple = null;
    while ((nextTuple = projection1.getNextTuple()) != null) {
        projection1Results.add(nextTuple);
    }
    projection1.close();
    projection2.open();
    List<Tuple> projection2Results = new ArrayList<>();
    nextTuple = null;
    while ((nextTuple = projection2.getNextTuple()) != null) {
        projection2Results.add(nextTuple);
    }
    projection2.close();
    Assert.assertTrue(TestUtils.equals(expectedResults, projection1Results));
    Assert.assertTrue(TestUtils.equals(expectedResults, projection2Results));
    Assert.assertTrue(TestUtils.equals(projection1Results, projection2Results));
}
Also used : ProjectionOperator(edu.uci.ics.texera.dataflow.projection.ProjectionOperator) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Schema(edu.uci.ics.texera.api.schema.Schema) ArrayList(java.util.ArrayList) IField(edu.uci.ics.texera.api.field.IField) ProjectionPredicate(edu.uci.ics.texera.dataflow.projection.ProjectionPredicate) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) TextField(edu.uci.ics.texera.api.field.TextField) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 85 with Tuple

use of edu.uci.ics.texera.api.tuple.Tuple in project textdb by TextDB.

the class OneToNBroadcastConnectorTest method testThreeOutputsWithItself.

/*
     * This test tests if the connectors' three outputs are the same.
     */
@Test
public void testThreeOutputsWithItself() throws Exception {
    IOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE));
    OneToNBroadcastConnector connector = new OneToNBroadcastConnector(3);
    connector.setInputOperator(sourceOperator);
    IOperator output1 = connector.getOutputOperator(0);
    IOperator output2 = connector.getOutputOperator(1);
    IOperator output3 = connector.getOutputOperator(2);
    output1.open();
    output2.open();
    output3.open();
    List<Tuple> output1Results = new ArrayList<>();
    Tuple nextTuple = null;
    while ((nextTuple = output1.getNextTuple()) != null) {
        output1Results.add(nextTuple);
    }
    List<Tuple> output2Results = new ArrayList<>();
    nextTuple = null;
    while ((nextTuple = output2.getNextTuple()) != null) {
        output2Results.add(nextTuple);
    }
    List<Tuple> output3Results = new ArrayList<>();
    nextTuple = null;
    while ((nextTuple = output3.getNextTuple()) != null) {
        output3Results.add(nextTuple);
    }
    output1.close();
    output2.close();
    output3.close();
    List<Tuple> expectedResults = TestConstants.getSamplePeopleTuples();
    Assert.assertTrue(TestUtils.equals(expectedResults, output1Results));
    Assert.assertTrue(TestUtils.equals(expectedResults, output2Results));
    Assert.assertTrue(TestUtils.equals(expectedResults, output3Results));
}
Also used : IOperator(edu.uci.ics.texera.api.dataflow.IOperator) ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Aggregations

Tuple (edu.uci.ics.texera.api.tuple.Tuple)332 ArrayList (java.util.ArrayList)191 Test (org.junit.Test)178 IField (edu.uci.ics.texera.api.field.IField)130 Schema (edu.uci.ics.texera.api.schema.Schema)126 Span (edu.uci.ics.texera.api.span.Span)100 StringField (edu.uci.ics.texera.api.field.StringField)96 Attribute (edu.uci.ics.texera.api.schema.Attribute)95 IntegerField (edu.uci.ics.texera.api.field.IntegerField)92 TextField (edu.uci.ics.texera.api.field.TextField)90 DoubleField (edu.uci.ics.texera.api.field.DoubleField)65 DateField (edu.uci.ics.texera.api.field.DateField)60 SimpleDateFormat (java.text.SimpleDateFormat)58 DataWriter (edu.uci.ics.texera.storage.DataWriter)33 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)30 ListField (edu.uci.ics.texera.api.field.ListField)28 TupleSourceOperator (edu.uci.ics.texera.dataflow.source.tuple.TupleSourceOperator)24 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)23 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)21 ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)21