use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class JoinDistanceTest method testOneSpanEncompassesOtherAndDifferenceGreaterThanThreshold.
/*
* This case tests for the scenario when one of the spans to be joined encompasses the other span
* and |(span 1 spanStartIndex) - (span 2 spanStartIndex)|
* and/or |(span 1 spanEndIndex) - (span 2 spanEndIndex)| exceed threshold.
*
* e.g.
* [<11, 18>]
* [<3, 33>]
* threshold = 10 (beyond threshold)
* Test result: Join should return an empty list.
*/
@Test
public void testOneSpanEncompassesOtherAndDifferenceGreaterThanThreshold() throws Exception {
JoinTestHelper.insertToTable(BOOK_TABLE, JoinTestConstants.bookGroup1.get(0));
KeywordMatcherSourceOperator keywordSourceOuter = JoinTestHelper.getKeywordSource(BOOK_TABLE, "special", conjunction);
KeywordMatcherSourceOperator keywordSourceInner = JoinTestHelper.getKeywordSource(BOOK_TABLE, "takes a special kind of writer", phrase);
List<Tuple> resultList = JoinTestHelper.getJoinDistanceResults(keywordSourceInner, keywordSourceOuter, new JoinDistancePredicate(JoinTestConstants.REVIEW, 10), Integer.MAX_VALUE, 0);
Assert.assertEquals(0, resultList.size());
}
use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class JoinTestHelper method getKeywordSource.
/**
* Provides a KeywordMatcherSourceOperator for a test table given a keyword.
* ( KeywordMatcher is used in most of Join test cases )
* @param tableName
* @param query
* @param matchingType
* @return
* @throws TextDBException
*/
public static KeywordMatcherSourceOperator getKeywordSource(String tableName, String query, KeywordMatchingType matchingType) throws TextDBException {
KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(query, Arrays.asList(JoinTestConstants.AUTHOR, JoinTestConstants.TITLE, JoinTestConstants.REVIEW), RelationManager.getRelationManager().getTableAnalyzerString(tableName), matchingType, tableName, SchemaConstants.SPAN_LIST);
KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
return keywordSource;
}
use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method open.
/**
* @about Opens dictionary matcher. Must call open() before calling
* getNextTuple().
*/
@Override
public void open() throws DataFlowException {
try {
currentDictionaryEntry = predicate.getDictionary().getNextEntry();
if (currentDictionaryEntry == null) {
throw new DataFlowException("Dictionary is empty");
}
if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED) {
// For Substring matching, create a scan source operator.
indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
indexSource.open();
// Substring matching's output schema needs to contains span
// list.
inputSchema = indexSource.getOutputSchema();
outputSchema = inputSchema;
if (inputSchema.containsField(predicate.getSpanListName())) {
throw new DataFlowException(ErrorMessages.DUPLICATE_ATTRIBUTE(predicate.getSpanListName(), inputSchema));
}
outputSchema = Utils.addAttributeToSchema(outputSchema, new Attribute(predicate.getSpanListName(), AttributeType.LIST));
} else {
// For other keyword matching types (conjunction and phrase),
// create keyword matcher based on index.
keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
keywordSource.open();
// Other keyword matching types uses a KeywordMatcher, so the
// output schema is the same as keywordMatcher's schema
inputSchema = keywordSource.getOutputSchema();
outputSchema = keywordSource.getOutputSchema();
}
} catch (Exception e) {
throw new DataFlowException(e.getMessage(), e);
}
}
use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class KeywordMatcherPerformanceTest method match.
/*
* This function does match for a list of queries
*/
public static void match(ArrayList<String> queryList, KeywordMatchingType opType, String luceneAnalyzerStr, String tableName) throws TextDBException, IOException {
Attribute[] attributeList = new Attribute[] { MedlineIndexWriter.ABSTRACT_ATTR };
for (String query : queryList) {
KeywordSourcePredicate predicate = new KeywordSourcePredicate(query, Utils.getAttributeNames(attributeList), luceneAnalyzerStr, opType, tableName, null);
KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(predicate);
long startMatchTime = System.currentTimeMillis();
keywordSource.open();
int counter = 0;
Tuple nextTuple = null;
while ((nextTuple = keywordSource.getNextTuple()) != null) {
ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> spanList = spanListField.getValue();
counter += spanList.size();
}
keywordSource.close();
long endMatchTime = System.currentTimeMillis();
double matchTime = (endMatchTime - startMatchTime) / 1000.0;
timeResults.add(Double.parseDouble(String.format("%.4f", matchTime)));
totalResultCount += counter;
}
}
use of edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator in project textdb by TextDB.
the class LogicalPlanTest method testLogicalPlan3.
/*
* Test a valid operator graph.
*
* --> RegexMatcher -->
* | >-- Join1
* KeywordSource --< -> NlpEntityOperator --> >-- Join2 --> TupleSink
* | /
* --> FuzzyTokenMatcher ----->
*
*/
@Test
public void testLogicalPlan3() throws Exception {
LogicalPlan logicalPlan = getLogicalPlan3();
Plan queryPlan = logicalPlan.buildQueryPlan();
ISink tupleSink = queryPlan.getRoot();
Assert.assertTrue(tupleSink instanceof TupleSink);
IOperator join2 = ((TupleSink) tupleSink).getInputOperator();
Assert.assertTrue(join2 instanceof Join);
IOperator join2Input1 = ((Join) join2).getOuterInputOperator();
Assert.assertTrue(join2Input1 instanceof Join);
IOperator join2Input2 = ((Join) join2).getInnerInputOperator();
Assert.assertTrue(join2Input2 instanceof FuzzyTokenMatcher);
IOperator join1Input1 = ((Join) join2Input1).getInnerInputOperator();
Assert.assertTrue(join1Input1 instanceof RegexMatcher);
IOperator join1Input2 = ((Join) join2Input1).getOuterInputOperator();
Assert.assertTrue(join1Input2 instanceof NlpEntityOperator);
IOperator connectorOut1 = ((RegexMatcher) join1Input1).getInputOperator();
Assert.assertTrue(connectorOut1 instanceof ConnectorOutputOperator);
IOperator connectorOut2 = ((NlpEntityOperator) join1Input2).getInputOperator();
Assert.assertTrue(connectorOut2 instanceof ConnectorOutputOperator);
IOperator connectorOut3 = ((FuzzyTokenMatcher) join2Input2).getInputOperator();
Assert.assertTrue(connectorOut3 instanceof ConnectorOutputOperator);
HashSet<Integer> connectorIndices = new HashSet<>();
connectorIndices.add(((ConnectorOutputOperator) connectorOut1).getOutputIndex());
connectorIndices.add(((ConnectorOutputOperator) connectorOut2).getOutputIndex());
connectorIndices.add(((ConnectorOutputOperator) connectorOut3).getOutputIndex());
Assert.assertEquals(connectorIndices.size(), 3);
OneToNBroadcastConnector connector1 = ((ConnectorOutputOperator) connectorOut1).getOwnerConnector();
OneToNBroadcastConnector connector2 = ((ConnectorOutputOperator) connectorOut2).getOwnerConnector();
OneToNBroadcastConnector connector3 = ((ConnectorOutputOperator) connectorOut3).getOwnerConnector();
Assert.assertSame(connector1, connector2);
Assert.assertSame(connector1, connector3);
IOperator keywordSource = connector1.getInputOperator();
Assert.assertTrue(keywordSource instanceof KeywordMatcherSourceOperator);
}
Aggregations