use of edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class DictionaryMatcherSourceOperator method open.
/**
* @about Opens dictionary matcher. Must call open() before calling
* getNextTuple().
*/
@Override
public void open() throws DataFlowException {
try {
currentDictionaryEntry = predicate.getDictionary().getNextEntry();
if (currentDictionaryEntry == null) {
throw new DataFlowException("Dictionary is empty");
}
if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED) {
// For Substring matching, create a scan source operator.
indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
indexSource.open();
// Substring matching's output schema needs to contains span
// list.
inputSchema = indexSource.getOutputSchema();
outputSchema = inputSchema;
if (inputSchema.containsField(predicate.getSpanListName())) {
throw new DataFlowException(ErrorMessages.DUPLICATE_ATTRIBUTE(predicate.getSpanListName(), inputSchema));
}
outputSchema = Utils.addAttributeToSchema(outputSchema, new Attribute(predicate.getSpanListName(), AttributeType.LIST));
} else {
// For other keyword matching types (conjunction and phrase),
// create keyword matcher based on index.
keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
keywordSource.open();
// Other keyword matching types uses a KeywordMatcher, so the
// output schema is the same as keywordMatcher's schema
inputSchema = keywordSource.getOutputSchema();
outputSchema = keywordSource.getOutputSchema();
}
} catch (Exception e) {
throw new DataFlowException(e.getMessage(), e);
}
}
use of edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class NlpExtractorPerformanceTest method matchNLP.
/*
* This function does match based on tokenType
*/
public static void matchNLP(String tableName, NlpEntityType tokenType) throws Exception {
List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
ISourceOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
NlpEntityPredicate nlpEntityPredicate = new NlpEntityPredicate(tokenType, attributeNames, null);
NlpEntityOperator nlpEntityOperator = new NlpEntityOperator(nlpEntityPredicate);
nlpEntityOperator.setInputOperator(sourceOperator);
long startMatchTime = System.currentTimeMillis();
nlpEntityOperator.open();
Tuple nextTuple = null;
int counter = 0;
while ((nextTuple = nlpEntityOperator.getNextTuple()) != null) {
ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> spanList = spanListField.getValue();
counter += spanList.size();
}
nlpEntityOperator.close();
long endMatchTime = System.currentTimeMillis();
double matchTime = (endMatchTime - startMatchTime) / 1000.0;
totalMatchingTime += matchTime;
totalResults += counter;
}
use of edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class RegexSplitOperatorTest method test8.
/*
* ID test: To test if each newly-split tuple's ID has conflict with the old tuple.
*/
@Test
public void test8() throws TextDBException {
String splitRegex = "ana";
String splitAttrName = TestConstantsRegexSplit.DESCRIPTION;
List<Tuple> results = computeRegexSplitResults(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.STANDALONE);
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(REGEX_TABLE));
Tuple tupleTable;
scanSource.open();
while ((tupleTable = scanSource.getNextTuple()) != null) {
for (Tuple tuple : results) {
Assert.assertFalse(tuple.getField(SchemaConstants._ID).equals(tupleTable.getField(SchemaConstants._ID)));
}
}
scanSource.close();
}
use of edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class SamplerTest method computeSampleResults.
public static List<Tuple> computeSampleResults(String tableName, int k, SampleType sampleType) throws TextDBException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
Sampler tupleSampler = new Sampler(new SamplerPredicate(k, sampleType));
tupleSampler.setInputOperator(scanSource);
List<Tuple> results = new ArrayList<>();
Tuple tuple;
tupleSampler.open();
while ((tuple = tupleSampler.getNextTuple()) != null) {
results.add(tuple);
}
tupleSampler.close();
return results;
}
use of edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate in project textdb by TextDB.
the class KeywordTestHelper method getScanSourceResults.
public static List<Tuple> getScanSourceResults(String tableName, String keywordQuery, List<String> attributeNames, KeywordMatchingType matchingType, int limit, int offset) throws TextDBException {
RelationManager relationManager = RelationManager.getRelationManager();
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
KeywordPredicate keywordPredicate = new KeywordPredicate(keywordQuery, attributeNames, relationManager.getTableAnalyzerString(tableName), matchingType, RESULTS, limit, offset);
KeywordMatcher keywordMatcher = new KeywordMatcher(keywordPredicate);
keywordMatcher.setInputOperator(scanSource);
Tuple tuple;
List<Tuple> results = new ArrayList<>();
keywordMatcher.open();
while ((tuple = keywordMatcher.getNextTuple()) != null) {
results.add(tuple);
}
keywordMatcher.close();
return results;
}
Aggregations