use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.
the class KeywordMatcherPerformanceTest method match.
/*
* This function does match for a list of queries
*/
public static void match(ArrayList<String> queryList, KeywordMatchingType opType, String luceneAnalyzerStr, String tableName) throws TexeraException, IOException {
String[] attributeNames = new String[] { MedlineIndexWriter.ABSTRACT };
for (String query : queryList) {
KeywordSourcePredicate predicate = new KeywordSourcePredicate(query, Arrays.asList(attributeNames), luceneAnalyzerStr, opType, tableName, SchemaConstants.SPAN_LIST);
KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(predicate);
long startMatchTime = System.currentTimeMillis();
keywordSource.open();
int counter = 0;
Tuple nextTuple = null;
while ((nextTuple = keywordSource.getNextTuple()) != null) {
ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> spanList = spanListField.getValue();
counter += spanList.size();
}
keywordSource.close();
long endMatchTime = System.currentTimeMillis();
double matchTime = (endMatchTime - startMatchTime) / 1000.0;
timeResults.add(Double.parseDouble(String.format("%.4f", matchTime)));
totalResultCount += counter;
}
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.
the class PredicateBaseTest method testKeyword.
@Test
public void testKeyword() throws Exception {
KeywordPredicate keywordPredicate = new KeywordPredicate("keyword", attributeNames, "standard", KeywordMatchingType.CONJUNCTION_INDEXBASED, "keywordResults");
testPredicate(keywordPredicate);
KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate("keyword", attributeNames, "standard", KeywordMatchingType.CONJUNCTION_INDEXBASED, "tableName", "keywordSourceResults");
testPredicate(keywordSourcePredicate);
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.
the class JoinTestHelper method getKeywordSource.
/**
* Provides a KeywordMatcherSourceOperator for a test table given a keyword.
* ( KeywordMatcher is used in most of Join test cases )
* @param tableName
* @param query
* @param matchingType
* @return
* @throws TexeraException
*/
public static KeywordMatcherSourceOperator getKeywordSource(String tableName, String query, KeywordMatchingType matchingType) throws TexeraException {
KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(query, Arrays.asList(JoinTestConstants.AUTHOR, JoinTestConstants.TITLE, JoinTestConstants.REVIEW), RelationManager.getInstance().getTableAnalyzerString(tableName), matchingType, tableName, SchemaConstants.SPAN_LIST);
KeywordMatcherSourceOperator keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
return keywordSource;
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
currentDictionaryEntry = predicate.getDictionary().getNextEntry();
if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED || predicate.getKeywordMatchingType() == KeywordMatchingType.REGEX) {
// For Substring matching and Regex matching, create a scan source operator followed by a dictionary matcher.
indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
dictionaryMatcher = new DictionaryMatcher(new DictionaryPredicate(predicate.getDictionary(), predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getSpanListName()));
dictionaryMatcher.setInputOperator(indexSource);
dictionaryMatcher.open();
outputSchema = dictionaryMatcher.getOutputSchema();
} else {
// For other keyword matching types (CONJUNCTION and PHRASE),
// create an index-based keyword source operator.
keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
keywordSource.open();
// Other keyword matching types uses a KeywordMatcher, so the
// output schema is the same as keywordMatcher's schema.
outputSchema = keywordSource.getOutputSchema();
}
cursor = OPENED;
}
use of edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method computeMatchingResults.
/**
* Maintain a HashMap </Tuple_ID, Tuple> to compute all the keyword
* matching results for each tuple.
*
* @param resultMap
*/
@SuppressWarnings("unchecked")
private void computeMatchingResults() {
Tuple inputTuple;
while (true) {
while ((inputTuple = keywordSource.getNextTuple()) != null) {
String tupleID = inputTuple.getField(SchemaConstants._ID).getValue().toString();
ListField<Span> keywordResultsField = inputTuple.getField(predicate.getSpanListName(), ListField.class);
List<Span> keywordResults = keywordResultsField.getValue();
if (tupleResultMap.containsKey(tupleID)) {
tupleResultMap.get(tupleID).addAll(keywordResults);
} else {
tupleIDMap.put(tupleID, new Tuple.Builder(inputTuple).remove(predicate.getSpanListName()).build());
tupleResultMap.put(tupleID, new ArrayList<>(keywordResults));
}
}
if ((currentDictionaryEntry = predicate.getDictionary().getNextEntry()) == null) {
return;
}
keywordSource.close();
KeywordSourcePredicate keywordSourcePredicate = new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName());
keywordSource = new KeywordMatcherSourceOperator(keywordSourcePredicate);
keywordSource.open();
}
}
Aggregations