Search in sources :

Example 26 with ScanSourcePredicate

use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.

the class DictionaryMatcherTestHelper method getScanSourceResults.

/**
 * Get the results from a DictionaryMatcher with a ScanSource Operator
 *   (which scans the table first and then feeds the data into the dictionary matcher)
 *
 * @param tableName
 * @param dictionary
 * @param attributeNames
 * @param matchingType
 * @param limit
 * @param offset
 * @return
 * @throws TexeraException
 */
public static List<Tuple> getScanSourceResults(String tableName, Dictionary dictionary, List<String> attributeNames, KeywordMatchingType matchingType, int limit, int offset) throws TexeraException {
    RelationManager relationManager = RelationManager.getInstance();
    String luceneAnalyzerStr = relationManager.getTableAnalyzerString(tableName);
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    DictionaryPredicate dictiaonryPredicate = new DictionaryPredicate(dictionary, attributeNames, luceneAnalyzerStr, matchingType, RESULTS);
    DictionaryMatcher dictionaryMatcher = new DictionaryMatcher(dictiaonryPredicate);
    dictionaryMatcher.setLimit(limit);
    dictionaryMatcher.setOffset(offset);
    dictionaryMatcher.setInputOperator(scanSource);
    Tuple tuple;
    List<Tuple> results = new ArrayList<>();
    dictionaryMatcher.open();
    while ((tuple = dictionaryMatcher.getNextTuple()) != null) {
        results.add(tuple);
    }
    dictionaryMatcher.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) DictionaryPredicate(edu.uci.ics.texera.dataflow.dictionarymatcher.DictionaryPredicate) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager)

Aggregations

ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)26 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)25 Tuple (edu.uci.ics.texera.api.tuple.Tuple)21 ArrayList (java.util.ArrayList)15 Test (org.junit.Test)9 IField (edu.uci.ics.texera.api.field.IField)3 TextField (edu.uci.ics.texera.api.field.TextField)3 Schema (edu.uci.ics.texera.api.schema.Schema)3 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)2 RelationManager (edu.uci.ics.texera.storage.RelationManager)2 ISourceOperator (edu.uci.ics.texera.api.dataflow.ISourceOperator)1 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)1 StringField (edu.uci.ics.texera.api.field.StringField)1 Span (edu.uci.ics.texera.api.span.Span)1 DictionaryPredicate (edu.uci.ics.texera.dataflow.dictionarymatcher.DictionaryPredicate)1 FuzzyTokenPredicate (edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenPredicate)1 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)1 KeywordSourcePredicate (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate)1 NlpEntityOperator (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator)1 NlpEntityPredicate (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityPredicate)1