use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class RegexSplitOperatorTest method computeRegexSplitResultsOneToMany.
public static List<Tuple> computeRegexSplitResultsOneToMany(String tableName, String splitAttrName, String splitRegex, RegexSplitPredicate.SplitType splitType) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
RegexSplitOperator regexSplit = new RegexSplitOperator(new RegexSplitPredicate(splitRegex, splitAttrName, RegexOutputType.ONE_TO_MANY, splitType, RESULT_ATTR));
regexSplit.setInputOperator(scanSource);
List<Tuple> results = new ArrayList<>();
regexSplit.open();
Tuple tuple;
while ((tuple = regexSplit.getNextTuple()) != null) {
results.add(tuple);
}
regexSplit.close();
return results;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class SamplerTest method computeSampleResults.
public static List<Tuple> computeSampleResults(String tableName, int k, SampleType sampleType) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
Sampler tupleSampler = new Sampler(new SamplerPredicate(k, sampleType));
tupleSampler.setInputOperator(scanSource);
List<Tuple> results = new ArrayList<>();
Tuple tuple;
tupleSampler.open();
while ((tuple = tupleSampler.getNextTuple()) != null) {
results.add(tuple);
}
tupleSampler.close();
return results;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class SamplerTest method matchSamplerTable.
/*
* To test if the sampled tuples are equal to the first K tuples of the sampler table
* in both the order and content.
*/
public static boolean matchSamplerTable(List<Tuple> sampleList) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(SAMPLER_TABLE));
scanSource.open();
ListIterator<Tuple> iter = null;
iter = sampleList.listIterator();
while (iter.hasNext()) {
Tuple nextTableTuple = scanSource.getNextTuple();
Tuple nextSampledTuple = iter.next();
if (!nextSampledTuple.equals(nextTableTuple)) {
scanSource.close();
return false;
}
}
scanSource.close();
return true;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class SamplerTest method containedInSamplerTable.
/*
* To test if all the sampled tuples are in the sampler table.
*/
public static boolean containedInSamplerTable(List<Tuple> sampleList) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(SAMPLER_TABLE));
scanSource.open();
Tuple nextTuple = null;
List<Tuple> returnedTuples = new ArrayList<Tuple>();
while ((nextTuple = scanSource.getNextTuple()) != null) {
returnedTuples.add(nextTuple);
}
scanSource.close();
boolean contains = TestUtils.containsAll(returnedTuples, sampleList);
return contains;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
currentDictionaryEntry = predicate.getDictionary().getNextEntry();
if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED || predicate.getKeywordMatchingType() == KeywordMatchingType.REGEX) {
// For Substring matching and Regex matching, create a scan source operator followed by a dictionary matcher.
indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
dictionaryMatcher = new DictionaryMatcher(new DictionaryPredicate(predicate.getDictionary(), predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getSpanListName()));
dictionaryMatcher.setInputOperator(indexSource);
dictionaryMatcher.open();
outputSchema = dictionaryMatcher.getOutputSchema();
} else {
// For other keyword matching types (CONJUNCTION and PHRASE),
// create an index-based keyword source operator.
keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
keywordSource.open();
// Other keyword matching types uses a KeywordMatcher, so the
// output schema is the same as keywordMatcher's schema.
outputSchema = keywordSource.getOutputSchema();
}
cursor = OPENED;
}
Aggregations