Search in sources :

Example 1 with RegexPredicate

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate in project textdb by TextDB.

the class PredicateBaseTest method testRegexMatcher.

@Test
public void testRegexMatcher() throws Exception {
    RegexPredicate regexPredicate = new RegexPredicate("regex", attributeNames, "spanListName");
    testPredicate(regexPredicate);
    RegexSourcePredicate regexSourcePredicate = new RegexSourcePredicate("regex", attributeNames, "tableName", "spanListName");
    testPredicate(regexSourcePredicate);
}
Also used : RegexSourcePredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexSourcePredicate) RegexPredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate) Test(org.junit.Test)

Example 2 with RegexPredicate

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate in project textdb by TextDB.

the class JoinTestHelper method getRegexMatcher.

public static RegexMatcher getRegexMatcher(String tableName, String query, String attrName) {
    try {
        ScanBasedSourceOperator scanBasedSourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
        RegexMatcher regexMatcher = new RegexMatcher(new RegexPredicate(query, Arrays.asList(attrName), SchemaConstants.SPAN_LIST));
        regexMatcher.setInputOperator(scanBasedSourceOperator);
        return regexMatcher;
    } catch (DataflowException e) {
        e.printStackTrace();
        return null;
    }
}
Also used : RegexPredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)

Example 3 with RegexPredicate

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate in project textdb by TextDB.

the class LogicalPlanTest method testInvalidLogicalPlan4.

/*
     * Test a operator graph with a disconnected component
     * 
     * KeywordSource --> RegexMatcher --> TupleSink
     * RegexMatcher --> NlpEntityOperator
     * (a disconnected graph)
     * 
     */
@Test(expected = TexeraException.class)
public void testInvalidLogicalPlan4() throws Exception {
    LogicalPlan logicalPlan = new LogicalPlan();
    String REGEX_ID_2 = "regex 2";
    RegexPredicate regexPredicate2 = new RegexPredicate("ca(lifornia)?", Arrays.asList("location", "content"), "regexResults");
    logicalPlan.addOperator(keywordSourcePredicate);
    logicalPlan.addOperator(regexPredicate);
    logicalPlan.addOperator(tupleSinkPredicate);
    logicalPlan.addOperator(regexPredicate2);
    logicalPlan.addOperator(nlpEntityPredicate);
    logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID, TUPLE_SINK_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID_2, NLP_ENTITY_ID));
    logicalPlan.buildQueryPlan();
}
Also used : RegexPredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate) Test(org.junit.Test)

Example 4 with RegexPredicate

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate in project textdb by TextDB.

the class LogicalPlanTest method testInvalidLogicalPlan3.

/*
     * Test a operator graph with a disconnected component
     *
     * KeywordSource --> RegexMatcher --> TupleSink
     * RegexMatcher --> NlpEntityOperator
     * (a disconnected graph)
     *
     */
@Test(expected = TexeraException.class)
public void testInvalidLogicalPlan3() throws Exception {
    LogicalPlan logicalPlan = new LogicalPlan();
    String REGEX_ID_2 = "regex 2";
    RegexPredicate regexPredicate2 = new RegexPredicate("ca(lifornia)?", Arrays.asList("location", "content"), "regexResults");
    logicalPlan.addOperator(keywordSourcePredicate);
    logicalPlan.addOperator(regexPredicate);
    logicalPlan.addOperator(tupleSinkPredicate);
    logicalPlan.addOperator(regexPredicate2);
    logicalPlan.addOperator(nlpEntityPredicate);
    logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID, TUPLE_SINK_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID_2, NLP_ENTITY_ID));
    logicalPlan.buildQueryPlan();
}
Also used : RegexPredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate) Test(org.junit.Test)

Example 5 with RegexPredicate

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate in project textdb by TextDB.

the class LogicalPlanTest method testGetOutputSchema5.

/*
     * Test a operator graph with a disconnected component
     *
     * KeywordSource --> RegexMatcher --> TupleSink
     * RegexMatcher --> NlpEntityOperator
     * (a disconnected graph)
     *
     */
@Test(expected = TexeraException.class)
public void testGetOutputSchema5() throws Exception {
    LogicalPlan validLogicalPlan = getLogicalPlan1();
    Plan queryPlan = validLogicalPlan.buildQueryPlan();
    HashMap<String, ISink> sinkHashMap = queryPlan.getSinkMap();
    Assert.assertEquals(1, sinkHashMap.size());
    ISink tupleSink = null;
    for (HashMap.Entry<String, ISink> entry : sinkHashMap.entrySet()) {
        tupleSink = entry.getValue();
    }
    Assert.assertNotNull(tupleSink);
    IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
    IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
    regexMatcher.open();
    Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
    Schema expectedMatcherOutputSchema = regexMatcher.getOutputSchema();
    regexMatcher.close();
    LogicalPlan logicalPlan = new LogicalPlan();
    String REGEX_ID_2 = "regex 2";
    RegexPredicate regexPredicate2 = new RegexPredicate("ca(lifornia)?", Arrays.asList("location", "content"), "regexResults");
    regexPredicate2.setID(REGEX_ID_2);
    logicalPlan.addOperator(keywordSourcePredicate);
    logicalPlan.addOperator(regexPredicate);
    logicalPlan.addOperator(tupleSinkPredicate);
    logicalPlan.addOperator(regexPredicate2);
    logicalPlan.addOperator(nlpEntityPredicate);
    logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID, TUPLE_SINK_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID_2, NLP_ENTITY_ID));
    Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
    Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
    Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
    Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
    Schema raiseExceptionSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID_2);
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) HashMap(java.util.HashMap) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) RegexPredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate) Schema(edu.uci.ics.texera.api.schema.Schema) Plan(edu.uci.ics.texera.api.engine.Plan) ISink(edu.uci.ics.texera.api.dataflow.ISink) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Test(org.junit.Test)

Aggregations

RegexPredicate (edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate)5 Test (org.junit.Test)4 RegexMatcher (edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher)2 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)1 ISink (edu.uci.ics.texera.api.dataflow.ISink)1 Plan (edu.uci.ics.texera.api.engine.Plan)1 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)1 Schema (edu.uci.ics.texera.api.schema.Schema)1 RegexSourcePredicate (edu.uci.ics.texera.dataflow.regexmatcher.RegexSourcePredicate)1 TupleSink (edu.uci.ics.texera.dataflow.sink.tuple.TupleSink)1 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)1 ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)1 HashMap (java.util.HashMap)1