use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.
the class LogicalPlanTest method testLogicalPlan1.
/*
* Test a valid operator graph.
*
* KeywordSource --> RegexMatcher --> TupleSink
*
*/
@Test
public void testLogicalPlan1() throws Exception {
LogicalPlan logicalPlan = getLogicalPlan1();
Plan queryPlan = logicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
Assert.assertTrue(tupleSink instanceof TupleSink);
IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
Assert.assertTrue(regexMatcher instanceof RegexMatcher);
IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.
the class SimilarityJoinTest method test4.
/*
* Tests the Similarity Join Predicate on two similar words:
* Galaxy S8
* Galaxy Note 7
* Under the condition of similarity (NormalizedLevenshtein) > 0.8, these two words should NOT match.
*
*/
@Test
public void test4() throws TexeraException {
JoinTestHelper.insertToTable(NEWS_TABLE_OUTER, JoinTestConstants.getNewsTuples().get(2));
JoinTestHelper.insertToTable(NEWS_TABLE_INNER, JoinTestConstants.getNewsTuples().get(3));
String phoneRegex = "[Gg]alaxy.{1,6}\\d";
RegexMatcher regexMatcherInner = JoinTestHelper.getRegexMatcher(JoinTestHelper.NEWS_TABLE_INNER, phoneRegex, JoinTestConstants.NEWS_BODY);
RegexMatcher regexMatcherOuter = JoinTestHelper.getRegexMatcher(JoinTestHelper.NEWS_TABLE_OUTER, phoneRegex, JoinTestConstants.NEWS_BODY);
SimilarityJoinPredicate similarityJoinPredicate = new SimilarityJoinPredicate(JoinTestConstants.NEWS_BODY, 0.8);
List<Tuple> results = JoinTestHelper.getJoinDistanceResults(regexMatcherInner, regexMatcherOuter, similarityJoinPredicate, Integer.MAX_VALUE, 0);
Assert.assertTrue(results.isEmpty());
}
use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.
the class LogicalPlanTest method testGetOutputSchema4.
/*
* Test getOutputSchema on a operator graph without a sink operator
*
* KeywordSource --> RegexMatcher
*
*/
@Test
public void testGetOutputSchema4() throws Exception {
LogicalPlan validLogicalPlan = getLogicalPlan1();
Plan queryPlan = validLogicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
regexMatcher.open();
Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
Schema expectedMatcherOutputSchema = regexMatcher.getOutputSchema();
regexMatcher.close();
LogicalPlan logicalPlan = new LogicalPlan();
logicalPlan.addOperator(keywordSourcePredicate);
logicalPlan.addOperator(regexPredicate);
logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
}
use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.
the class LogicalPlanTest method testGetOutputSchema5.
/*
* Test a operator graph with a disconnected component
*
* KeywordSource --> RegexMatcher --> TupleSink
* RegexMatcher --> NlpEntityOperator
* (a disconnected graph)
*
*/
@Test(expected = TexeraException.class)
public void testGetOutputSchema5() throws Exception {
LogicalPlan validLogicalPlan = getLogicalPlan1();
Plan queryPlan = validLogicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
IOperator regexMatcher = ((TupleSink) tupleSink).getInputOperator();
IOperator keywordSource = ((RegexMatcher) regexMatcher).getInputOperator();
regexMatcher.open();
Schema expectedSourceOutputSchema = keywordSource.getOutputSchema();
Schema expectedMatcherOutputSchema = regexMatcher.getOutputSchema();
regexMatcher.close();
LogicalPlan logicalPlan = new LogicalPlan();
String REGEX_ID_2 = "regex 2";
RegexPredicate regexPredicate2 = new RegexPredicate("ca(lifornia)?", Arrays.asList("location", "content"), "regexResults");
regexPredicate2.setID(REGEX_ID_2);
logicalPlan.addOperator(keywordSourcePredicate);
logicalPlan.addOperator(regexPredicate);
logicalPlan.addOperator(tupleSinkPredicate);
logicalPlan.addOperator(regexPredicate2);
logicalPlan.addOperator(nlpEntityPredicate);
logicalPlan.addLink(new OperatorLink(KEYWORD_SOURCE_ID, REGEX_ID));
logicalPlan.addLink(new OperatorLink(REGEX_ID, TUPLE_SINK_ID));
logicalPlan.addLink(new OperatorLink(REGEX_ID_2, NLP_ENTITY_ID));
Schema sourceOutputSchema = logicalPlan.getOperatorOutputSchema(KEYWORD_SOURCE_ID);
Schema matcherOutputSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID);
Assert.assertEquals(expectedSourceOutputSchema, sourceOutputSchema);
Assert.assertEquals(expectedMatcherOutputSchema, matcherOutputSchema);
Schema raiseExceptionSchema = logicalPlan.getOperatorOutputSchema(REGEX_ID_2);
}
use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcher in project textdb by TextDB.
the class LogicalPlanTest method testLogicalPlan2.
/*
* Test a valid operator graph.
* -> RegexMatcher -->
* KeywordSource --< >-- Join --> TupleSink
* -> NlpEntityOperator -->
*
*/
@Test
public void testLogicalPlan2() throws Exception {
LogicalPlan logicalPlan = getLogicalPlan2();
Plan queryPlan = logicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
Assert.assertTrue(tupleSink instanceof TupleSink);
IOperator join = ((TupleSink) tupleSink).getInputOperator();
Assert.assertTrue(join instanceof Join);
IOperator joinInput1 = ((Join) join).getInnerInputOperator();
Assert.assertTrue(joinInput1 instanceof RegexMatcher);
IOperator joinInput2 = ((Join) join).getOuterInputOperator();
Assert.assertTrue(joinInput2 instanceof NlpEntityOperator);
IOperator connectorOut1 = ((RegexMatcher) joinInput1).getInputOperator();
Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
IOperator connectorOut2 = ((NlpEntityOperator) joinInput2).getInputOperator();
Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
HashSet<Integer> connectorIndices = new HashSet<>();
connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
Assert.assertEquals(connectorIndices.size(), 2);
OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
Assert.assertSame(connector1, connector2);
IOperator keywordSource = connector1.getInputOperator();
Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Aggregations