use of edu.uci.ics.textdb.exp.nlp.entity.NlpEntityOperator in project textdb by TextDB.
the class NlpExtractorPerformanceTest method matchNLP.
/*
* This function does match based on tokenType
*/
public static void matchNLP(String tableName, NlpEntityType tokenType) throws Exception {
List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
ISourceOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
NlpEntityPredicate nlpEntityPredicate = new NlpEntityPredicate(tokenType, attributeNames, null);
NlpEntityOperator nlpEntityOperator = new NlpEntityOperator(nlpEntityPredicate);
nlpEntityOperator.setInputOperator(sourceOperator);
long startMatchTime = System.currentTimeMillis();
nlpEntityOperator.open();
Tuple nextTuple = null;
int counter = 0;
while ((nextTuple = nlpEntityOperator.getNextTuple()) != null) {
ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> spanList = spanListField.getValue();
counter += spanList.size();
}
nlpEntityOperator.close();
long endMatchTime = System.currentTimeMillis();
double matchTime = (endMatchTime - startMatchTime) / 1000.0;
totalMatchingTime += matchTime;
totalResults += counter;
}
use of edu.uci.ics.textdb.exp.nlp.entity.NlpEntityOperator in project textdb by TextDB.
the class LogicalPlanTest method testLogicalPlan3.
/*
* Test a valid operator graph.
*
* --> RegexMatcher -->
* | >-- Join1
* KeywordSource --< -> NlpEntityOperator --> >-- Join2 --> TupleSink
* | /
* --> FuzzyTokenMatcher ----->
*
*/
@Test
public void testLogicalPlan3() throws Exception {
LogicalPlan logicalPlan = getLogicalPlan3();
Plan queryPlan = logicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
Assert.assertTrue(tupleSink instanceof TupleSink);
IOperator join2 = ((TupleSink) tupleSink).getInputOperator();
Assert.assertTrue(join2 instanceof Join);
IOperator join2Input1 = ((Join) join2).getOuterInputOperator();
Assert.assertTrue(join2Input1 instanceof Join);
IOperator join2Input2 = ((Join) join2).getInnerInputOperator();
Assert.assertTrue(join2Input2 instanceof FuzzyTokenMatcher);
IOperator join1Input1 = ((Join) join2Input1).getInnerInputOperator();
Assert.assertTrue(join1Input1 instanceof RegexMatcher);
IOperator join1Input2 = ((Join) join2Input1).getOuterInputOperator();
Assert.assertTrue(join1Input2 instanceof NlpEntityOperator);
IOperator connectorOut1 = ((RegexMatcher) join1Input1).getInputOperator();
Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
IOperator connectorOut2 = ((NlpEntityOperator) join1Input2).getInputOperator();
Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
IOperator connectorOut3 = ((FuzzyTokenMatcher) join2Input2).getInputOperator();
Assert.assertTrue(connectorOut3 instanceof ConnectorOutputOperator);
HashSet<Integer> connectorIndices = new HashSet<>();
connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
connectorIndices.add(((ConnectorOutputOperator) connectorOut3).getOutputIndex());
Assert.assertEquals(connectorIndices.size(), 3);
OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
OneToNBroadcastConnector connector3 = ((ConnectorOutputOperator) connectorOut3).getOwnerConnector();
Assert.assertSame(connector1, connector2);
Assert.assertSame(connector1, connector3);
IOperator keywordSource = connector1.getInputOperator();
Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
use of edu.uci.ics.textdb.exp.nlp.entity.NlpEntityOperator in project textdb by TextDB.
the class LogicalPlanTest method testLogicalPlan2.
/*
* Test a valid operator graph.
* -> RegexMatcher -->
* KeywordSource --< >-- Join --> TupleSink
* -> NlpEntityOperator -->
*
*/
@Test
public void testLogicalPlan2() throws Exception {
LogicalPlan logicalPlan = getLogicalPlan2();
Plan queryPlan = logicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
Assert.assertTrue(tupleSink instanceof TupleSink);
IOperator join = ((TupleSink) tupleSink).getInputOperator();
Assert.assertTrue(join instanceof Join);
IOperator joinInput1 = ((Join) join).getInnerInputOperator();
Assert.assertTrue(joinInput1 instanceof RegexMatcher);
IOperator joinInput2 = ((Join) join).getOuterInputOperator();
Assert.assertTrue(joinInput2 instanceof NlpEntityOperator);
IOperator connectorOut1 = ((RegexMatcher) joinInput1).getInputOperator();
Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
IOperator connectorOut2 = ((NlpEntityOperator) joinInput2).getInputOperator();
Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
HashSet<Integer> connectorIndices = new HashSet<>();
connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
Assert.assertEquals(connectorIndices.size(), 2);
OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
Assert.assertSame(connector1, connector2);
IOperator keywordSource = connector1.getInputOperator();
Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Aggregations