use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class JoinDistanceTest method testIdsMatchFieldsMatchSpanExceedThreshold.
/*
* This case tests for the scenario when the difference of keyword spans
* to be joined is greater than the threshold.
*
* [<11, 18>]
* [<42, 48>]
* threshold = 20 (beyond threshold)
*
* Test result: An empty list is returned.
*/
@Test
public void testIdsMatchFieldsMatchSpanExceedThreshold() throws Exception {
JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "topics", conjunction);
List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 20), Integer.MAX_VALUE, 0);
Assert.assertEquals(0, resultList.size());
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class JoinTestHelper method getKeywordSource.
/**
* Provides a KeywordMatcherSourceOperator for a test table given a keyword.
* ( KeywordMatcher is used in most of Join test cases )
* @param tableName
* @param query
* @param matchingType
* @return
* @throws TexeraException
*/
public static KeywordMatcherSourceOperator getKeywordSource(String tableName, String query, KeywordMatchingType matchingType) throws TexeraException {
KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(query, Arrays.asList(JoinTestConstants.AUTHOR, JoinTestConstants.TITLE, JoinTestConstants.REVIEW), RelationManager.getInstance().getTableAnalyzerString(tableName), matchingType, tableName, SchemaConstants.SPAN_LIST);
KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
return keywordSource;
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
currentDictionaryEntry = predicate.getDictionary().getNextEntry();
if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED || predicate.getKeywordMatchingType() == KeywordMatchingType.REGEX) {
// For Substring matching and Regex matching, create a scan source operator followed by a dictionary matcher.
indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
dictionaryMatcher = new DictionaryMatcher(new DictionaryPredicate(predicate.getDictionary(), predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getSpanListName()));
dictionaryMatcher.setInputOperator(indexSource);
dictionaryMatcher.open();
outputSchema = dictionaryMatcher.getOutputSchema();
} else {
// For other keyword matching types (CONJUNCTION and PHRASE),
// create an index-based keyword source operator.
keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
keywordSource.open();
// Other keyword matching types uses a KeywordMatcher, so the
// output schema is the same as keywordMatcher's schema.
outputSchema = keywordSource.getOutputSchema();
}
cursor = OPENED;
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method computeMatchingResults.
/**
* Maintain a HashMap </Tuple_ID, Tuple> to compute all the keyword
* matching results for each tuple.
*
* @param resultMap
*/
@SuppressWarnings("unchecked")
private void computeMatchingResults() {
Tuple inputTuple;
while (true) {
while ((inputTuple = keywordSource.getNextTuple()) != null) {
String tupleID = inputTuple.getField(SchemaConstants._ID).getValue().toString();
ListField<Span> keywordResultsField = inputTuple.getField(predicate.getSpanListName(), ListField.class);
List<Span> keywordResults = keywordResultsField.getValue();
if (tupleResultMap.containsKey(tupleID)) {
tupleResultMap.get(tupleID).addAll(keywordResults);
} else {
tupleIDMap.put(tupleID, new Tuple.Builder(inputTuple).remove(predicate.getSpanListName()).build());
tupleResultMap.put(tupleID, new ArrayList<>(keywordResults));
}
}
if ((currentDictionaryEntry = predicate.getDictionary().getNextEntry()) == null) {
return;
}
keywordSource.close();
KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName());
keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
keywordSource.open();
}
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class LogicalPlanTest method testLogicalPlan2.
/*
* Test a valid operator graph.
* -> RegexMatcher -->
* KeywordSource --< >-- Join --> TupleSink
* -> NlpEntityOperator -->
*
*/
@Test
public void testLogicalPlan2() throws Exception {
LogicalPlan logicalPlan = getLogicalPlan2();
Plan queryPlan = logicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
Assert.assertTrue(tupleSink instanceof TupleSink);
IOperator join = ((TupleSink) tupleSink).getInputOperator();
Assert.assertTrue(join instanceof Join);
IOperator joinInput1 = ((Join) join).getInnerInputOperator();
Assert.assertTrue(joinInput1 instanceof RegexMatcher);
IOperator joinInput2 = ((Join) join).getOuterInputOperator();
Assert.assertTrue(joinInput2 instanceof NlpEntityOperator);
IOperator connectorOut1 = ((RegexMatcher) joinInput1).getInputOperator();
Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
IOperator connectorOut2 = ((NlpEntityOperator) joinInput2).getInputOperator();
Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
HashSet<Integer> connectorIndices = new HashSet<>();
connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
Assert.assertEquals(connectorIndices.size(), 2);
OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
Assert.assertSame(connector1, connector2);
IOperator keywordSource = connector1.getInputOperator();
Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Aggregations