use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class DictionaryMatcherTestHelper method getScanSourceResults.
/**
* Get the results from a DictionaryMatcher with a ScanSource Operator
* (which scans the table first and then feeds the data into the dictionary matcher)
*
* @param tableName
* @param dictionary
* @param attributeNames
* @param matchingType
* @param limit
* @param offset
* @return
* @throws TexeraException
*/
public static List<Tuple> getScanSourceResults(String tableName, Dictionary dictionary, List<String> attributeNames, KeywordMatchingType matchingType, int limit, int offset) throws TexeraException {
RelationManager relationManager = RelationManager.getInstance();
String luceneAnalyzerStr = relationManager.getTableAnalyzerString(tableName);
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
DictionaryPredicate dictiaonryPredicate = new DictionaryPredicate(dictionary, attributeNames, luceneAnalyzerStr, matchingType, RESULTS);
DictionaryMatcher dictionaryMatcher = new DictionaryMatcher(dictiaonryPredicate);
dictionaryMatcher.setLimit(limit);
dictionaryMatcher.setOffset(offset);
dictionaryMatcher.setInputOperator(scanSource);
Tuple tuple;
List<Tuple> results = new ArrayList<>();
dictionaryMatcher.open();
while ((tuple = dictionaryMatcher.getNextTuple()) != null) {
results.add(tuple);
}
dictionaryMatcher.close();
return results;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class FuzzyTokenMatcherTestHelper method getScanSourceResults.
/*
* Gets the query results by scanning the table and passing the data into a FuzzyTokenMatcher.
*/
public static List<Tuple> getScanSourceResults(String tableName, String query, double threshold, List<String> attributeNames, int limit, int offset) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
FuzzyTokenPredicate fuzzyTokenPredicate = new FuzzyTokenPredicate(query, attributeNames, RelationManager.getInstance().getTableAnalyzerString(tableName), threshold, RESULTS);
FuzzyTokenMatcher fuzzyTokenMatcher = new FuzzyTokenMatcher(fuzzyTokenPredicate);
fuzzyTokenMatcher.setLimit(limit);
fuzzyTokenMatcher.setOffset(offset);
fuzzyTokenMatcher.setInputOperator(scanSource);
Tuple tuple;
List<Tuple> results = new ArrayList<>();
fuzzyTokenMatcher.open();
while ((tuple = fuzzyTokenMatcher.getNextTuple()) != null) {
results.add(tuple);
}
fuzzyTokenMatcher.close();
return results;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
currentDictionaryEntry = predicate.getDictionary().getNextEntry();
if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED || predicate.getKeywordMatchingType() == KeywordMatchingType.REGEX) {
// For Substring matching and Regex matching, create a scan source operator followed by a dictionary matcher.
indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
dictionaryMatcher = new DictionaryMatcher(new DictionaryPredicate(predicate.getDictionary(), predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getSpanListName()));
dictionaryMatcher.setInputOperator(indexSource);
dictionaryMatcher.open();
outputSchema = dictionaryMatcher.getOutputSchema();
} else {
// For other keyword matching types (CONJUNCTION and PHRASE),
// create an index-based keyword source operator.
keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
keywordSource.open();
// Other keyword matching types uses a KeywordMatcher, so the
// output schema is the same as keywordMatcher's schema.
outputSchema = keywordSource.getOutputSchema();
}
cursor = OPENED;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class SamplerTest method matchSamplerTable.
/*
* To test if the sampled tuples are equal to the first K tuples of the sampler table
* in both the order and content.
*/
public static boolean matchSamplerTable(List<Tuple> sampleList) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(SAMPLER_TABLE));
scanSource.open();
ListIterator<Tuple> iter = null;
iter = sampleList.listIterator();
while (iter.hasNext()) {
Tuple nextTableTuple = scanSource.getNextTuple();
Tuple nextSampledTuple = iter.next();
if (!nextSampledTuple.equals(nextTableTuple)) {
scanSource.close();
return false;
}
}
scanSource.close();
return true;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class SamplerTest method computeSampleResults.
public static List<Tuple> computeSampleResults(String tableName, int k, SampleType sampleType) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
Sampler tupleSampler = new Sampler(new SamplerPredicate(k, sampleType));
tupleSampler.setInputOperator(scanSource);
List<Tuple> results = new ArrayList<>();
Tuple tuple;
tupleSampler.open();
while ((tuple = tupleSampler.getNextTuple()) != null) {
results.add(tuple);
}
tupleSampler.close();
return results;
}
Aggregations