Search in sources :

Example 6 with RegexMatcher

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.

the class LogicalPlanTest method testLogicalPlan1.

/*
     * Test a valid operator graph.
     * 
     * KeywordSource --> RegexMatcher --> TupleSink
     * 
     */
@Test
public void testLogicalPlan1() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan1();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    Assert.assertTrue(tupleSink instanceof TupleSink);
    IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
    Assert.assertTrue(regexMatcher instanceof RegexMatcher);
    IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
    Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Also used : ISink(edu.uci.ics.texera.api.dataflow.ISink) TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Plan(edu.uci.ics.texera.api.engine.Plan) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) Test(org.junit.Test)

Example 7 with RegexMatcher

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.

the class SimilarityJoinTest method test4.

/*
     * Tests the Similarity Join Predicate on two similar words:
     *   Galaxy S8
     *   Galaxy Note 7
     * Under the condition of similarity (NormalizedLevenshtein) > 0.8, these two words should NOT match.
     *
     */
@Test
public void test4() throws TexeraException {
    JoinTestHelper.insertToTable(NEWS_TABLE_OUTER, JoinTestConstants.getNewsTuples().get(2));
    JoinTestHelper.insertToTable(NEWS_TABLE_INNER, JoinTestConstants.getNewsTuples().get(3));
    String phoneRegex = "[Gg]alaxy.{1,6}\\d";
    RegexMatcher regexMatcherInner = JoinTestHelper.getRegexMatcher(JoinTestHelper.NEWS_TABLE_INNER, phoneRegex, JoinTestConstants.NEWS_BODY);
    RegexMatcher regexMatcherOuter = JoinTestHelper.getRegexMatcher(JoinTestHelper.NEWS_TABLE_OUTER, phoneRegex, JoinTestConstants.NEWS_BODY);
    SimilarityJoinPredicate similarityJoinPredicate = new SimilarityJoinPredicate(JoinTestConstants.NEWS_BODY, 0.8);
    List<Tuple> results = JoinTestHelper.getJoinDistanceResults(regexMatcherInner, regexMatcherOuter, similarityJoinPredicate, Integer.MAX_VALUE, 0);
    Assert.assertTrue(results.isEmpty());
}
Also used : SimilarityJoinPredicate(edu.uci.ics.texera.dataflow.join.SimilarityJoinPredicate) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Test(org.junit.Test)

Example 8 with RegexMatcher

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.

the class LogicalPlanTest method testGetOutputSchema4.

/*
     * Test getOutputSchema on a operator graph without a sink operator
     *
     * KeywordSource --> RegexMatcher
     *
     */
@Test
public void testGetOutputSchema4() throws Exception {
    LogicalPlan validLogicalPlan = getLogicalPlan1();
    Plan queryPlan = validLogicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
    IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
    regexMatcher.open();
    Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
    Schema expectedMatcherOutputSchema = regexMatcher.getOutputSchema();
    regexMatcher.close();
    LogicalPlan logicalPlan = new LogicalPlan();
    logicalPlan.addOperator(keywordSourcePredicate);
    logicalPlan.addOperator(regexPredicate);
    logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
    Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
    Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
    Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
    Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
}
Also used : ISink(edu.uci.ics.texera.api.dataflow.ISink) TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Schema(edu.uci.ics.texera.api.schema.Schema) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Plan(edu.uci.ics.texera.api.engine.Plan) Test(org.junit.Test)

Example 9 with RegexMatcher

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.

the class LogicalPlanTest method testGetOutputSchema5.

/*
     * Test a operator graph with a disconnected component
     *
     * KeywordSource --> RegexMatcher --> TupleSink
     * RegexMatcher --> NlpEntityOperator
     * (a disconnected graph)
     *
     */
@Test(expected = TexeraException.class)
public void testGetOutputSchema5() throws Exception {
    LogicalPlan validLogicalPlan = getLogicalPlan1();
    Plan queryPlan = validLogicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
    IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
    regexMatcher.open();
    Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
    Schema expectedMatcherOutputSchema = regexMatcher.getOutputSchema();
    regexMatcher.close();
    LogicalPlan logicalPlan = new LogicalPlan();
    String REGEX_ID_2 = "regex 2";
    RegexPredicate regexPredicate2 = new RegexPredicate("ca(lifornia)?", Arrays.asList("location", "content"), "regexResults");
    regexPredicate2.setID(REGEX_ID_2);
    logicalPlan.addOperator(keywordSourcePredicate);
    logicalPlan.addOperator(regexPredicate);
    logicalPlan.addOperator(tupleSinkPredicate);
    logicalPlan.addOperator(regexPredicate2);
    logicalPlan.addOperator(nlpEntityPredicate);
    logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID, TUPLE_SINK_ID));
    logicalPlan.addLink(new OperatorLink(REGEX_ID_2, NLP_ENTITY_ID));
    Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
    Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
    Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
    Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
    Schema raiseExceptionSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID_2);
}
Also used : ISink(edu.uci.ics.texera.api.dataflow.ISink) TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) RegexPredicate(edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate) Schema(edu.uci.ics.texera.api.schema.Schema) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) Plan(edu.uci.ics.texera.api.engine.Plan) Test(org.junit.Test)

Example 10 with RegexMatcher

use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.

the class LogicalPlanTest method testLogicalPlan2.

/*
     * Test a valid operator graph.
     *                  -> RegexMatcher -->
     * KeywordSource --<                     >-- Join --> TupleSink
     *                  -> NlpEntityOperator -->
     * 
     */
@Test
public void testLogicalPlan2() throws Exception {
    LogicalPlan logicalPlan = getLogicalPlan2();
    Plan queryPlan = logicalPlan.buildQueryPlan();
    ISink tupleSink = queryPlan.getRoot();
    Assert.assertTrue(tupleSink instanceof TupleSink);
    IOperator join = ((TupleSink) tupleSink).getInputOperator();
    Assert.assertTrue(join instanceof Join);
    IOperator joinInput1 = ((Join) join).getInnerInputOperator();
    Assert.assertTrue(joinInput1 instanceof RegexMatcher);
    IOperator joinInput2 = ((Join) join).getOuterInputOperator();
    Assert.assertTrue(joinInput2 instanceof NlpEntityOperator);
    IOperator connectorOut1 = ((RegexMatcher) joinInput1).getInputOperator();
    Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
    IOperator connectorOut2 = ((NlpEntityOperator) joinInput2).getInputOperator();
    Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
    HashSet<Integer> connectorIndices = new HashSet<>();
    connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
    connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
    Assert.assertEquals(connectorIndices.size(), 2);
    OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
    OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
    Assert.assertSame(connector1, connector2);
    IOperator keywordSource = connector1.getInputOperator();
    Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Also used : TupleSink(edu.uci.ics.texera.dataflow.sink.tuple.TupleSink) IOperator(edu.uci.ics.texera.api.dataflow.IOperator) Join(edu.uci.ics.texera.dataflow.join.Join) Plan(edu.uci.ics.texera.api.engine.Plan) KeywordMatcherSourceOperator(edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator) ISink(edu.uci.ics.texera.api.dataflow.ISink) ConnectorOutputOperator(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector.ConnectorOutputOperator) NlpEntityOperator(edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator) RegexMatcher(edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher) OneToNBroadcastConnector(edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

RegexMatcher (edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher)12 Test (org.junit.Test)11 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 ISink (edu.uci.ics.texera.api.dataflow.ISink)7 Plan (edu.uci.ics.texera.api.engine.Plan)7 TupleSink (edu.uci.ics.texera.dataflow.sink.tuple.TupleSink)7 Schema (edu.uci.ics.texera.api.schema.Schema)6 Tuple (edu.uci.ics.texera.api.tuple.Tuple)4 SimilarityJoinPredicate (edu.uci.ics.texera.dataflow.join.SimilarityJoinPredicate)4 OneToNBroadcastConnector (edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector)3 ConnectorOutputOperator (edu.uci.ics.texera.dataflow.connector.OneToNBroadcastConnector.ConnectorOutputOperator)3 Join (edu.uci.ics.texera.dataflow.join.Join)3 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)3 NlpEntityOperator (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator)3 IDField (edu.uci.ics.texera.api.field.IDField)2 IntegerField (edu.uci.ics.texera.api.field.IntegerField)2 TextField (edu.uci.ics.texera.api.field.TextField)2 Span (edu.uci.ics.texera.api.span.Span)2 RegexPredicate (edu.uci.ics.texera.dataflow.regexmatcher.RegexPredicate)2 HashSet (java.util.HashSet)2