Search in sources :

Example 21 with ScanSourcePredicate

use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.

the class NlpExtractorPerformanceTest method matchNLP.

/*
     * This function does match based on tokenType
     */
public static void matchNLP(String tableName, NlpEntityType tokenType) throws Exception {
    List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
    ISourceOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    NlpEntityPredicate nlpEntityPredicate = new NlpEntityPredicate(tokenType, attributeNames, SchemaConstants.SPAN_LIST);
    NlpEntityOperator nlpEntityOperator = new NlpEntityOperator(nlpEntityPredicate);
    nlpEntityOperator.setInputOperator(sourceOperator);
    long startMatchTime = System.currentTimeMillis();
    nlpEntityOperator.open();
    Tuple nextTuple = null;
    int counter = 0;
    while ((nextTuple = nlpEntityOperator.getNextTuple()) != null) {
        ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
        List<Span> spanList = spanListField.getValue();
        counter += spanList.size();
    }
    nlpEntityOperator.close();
    long endMatchTime = System.currentTimeMillis();
    double matchTime = (endMatchTime - startMatchTime) / 1000.0;
    totalMatchingTime += matchTime;
    totalResults += counter;
}
Also used : NlpEntityPredicate(edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityPredicate) Span(edu.uci.ics.texera.api.span.Span) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ISourceOperator(edu.uci.ics.texera.api.dataflow.ISourceOperator) NlpEntityOperator(edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 22 with ScanSourcePredicate

use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.

the class FuzzyTokenMatcherTestHelper method getScanSourceResults.

/*
     * Gets the query results by scanning the table and passing the data into a FuzzyTokenMatcher.
     */
public static List<Tuple> getScanSourceResults(String tableName, String query, double threshold, List<String> attributeNames, int limit, int offset) throws TexeraException {
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    FuzzyTokenPredicate fuzzyTokenPredicate = new FuzzyTokenPredicate(query, attributeNames, RelationManager.getInstance().getTableAnalyzerString(tableName), threshold, RESULTS);
    FuzzyTokenMatcher fuzzyTokenMatcher = new FuzzyTokenMatcher(fuzzyTokenPredicate);
    fuzzyTokenMatcher.setLimit(limit);
    fuzzyTokenMatcher.setOffset(offset);
    fuzzyTokenMatcher.setInputOperator(scanSource);
    Tuple tuple;
    List<Tuple> results = new ArrayList<>();
    fuzzyTokenMatcher.open();
    while ((tuple = fuzzyTokenMatcher.getNextTuple()) != null) {
        results.add(tuple);
    }
    fuzzyTokenMatcher.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple) FuzzyTokenPredicate(edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenPredicate)

Example 23 with ScanSourcePredicate

use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.

the class PredicateBaseTest method testScanSource.

@Test
public void testScanSource() throws Exception {
    ScanSourcePredicate scanSourcePredicate = new ScanSourcePredicate("tableName");
    testPredicate(scanSourcePredicate);
}
Also used : ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Test(org.junit.Test)

Example 24 with ScanSourcePredicate

use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.

the class KeywordTestHelper method getScanSourceResults.

public static List<Tuple> getScanSourceResults(String tableName, String keywordQuery, List<String> attributeNames, KeywordMatchingType matchingType, int limit, int offset) throws TexeraException {
    RelationManager relationManager = RelationManager.getInstance();
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    KeywordPredicate keywordPredicate = new KeywordPredicate(keywordQuery, attributeNames, relationManager.getTableAnalyzerString(tableName), matchingType, RESULTS);
    KeywordMatcher keywordMatcher = new KeywordMatcher(keywordPredicate);
    keywordMatcher.setLimit(limit);
    keywordMatcher.setOffset(offset);
    keywordMatcher.setInputOperator(scanSource);
    Tuple tuple;
    List<Tuple> results = new ArrayList<>();
    keywordMatcher.open();
    while ((tuple = keywordMatcher.getNextTuple()) != null) {
        results.add(tuple);
    }
    keywordMatcher.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple) RelationManager(edu.uci.ics.texera.storage.RelationManager)

Example 25 with ScanSourcePredicate

use of edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate in project textdb by TextDB.

the class NlpEntityTest method getQueryResults.

public List<Tuple> getQueryResults(String tableName, List<String> attributeNames, NlpEntityType nlpEntityType, int limit, int offset) throws Exception {
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    NlpEntityPredicate nlpEntityPredicate = new NlpEntityPredicate(nlpEntityType, attributeNames, RESULTS);
    NlpEntityOperator nlpEntityOperator = new NlpEntityOperator(nlpEntityPredicate);
    nlpEntityOperator.setInputOperator(scanSource);
    nlpEntityOperator.setLimit(limit);
    nlpEntityOperator.setOffset(offset);
    Tuple nextTuple = null;
    List<Tuple> results = new ArrayList<Tuple>();
    nlpEntityOperator.open();
    while ((nextTuple = nlpEntityOperator.getNextTuple()) != null) {
        results.add(nextTuple);
    }
    nlpEntityOperator.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Aggregations

ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)26 ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)25 Tuple (edu.uci.ics.texera.api.tuple.Tuple)21 ArrayList (java.util.ArrayList)15 Test (org.junit.Test)9 IField (edu.uci.ics.texera.api.field.IField)3 TextField (edu.uci.ics.texera.api.field.TextField)3 Schema (edu.uci.ics.texera.api.schema.Schema)3 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)2 RelationManager (edu.uci.ics.texera.storage.RelationManager)2 ISourceOperator (edu.uci.ics.texera.api.dataflow.ISourceOperator)1 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)1 StringField (edu.uci.ics.texera.api.field.StringField)1 Span (edu.uci.ics.texera.api.span.Span)1 DictionaryPredicate (edu.uci.ics.texera.dataflow.dictionarymatcher.DictionaryPredicate)1 FuzzyTokenPredicate (edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenPredicate)1 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)1 KeywordSourcePredicate (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate)1 NlpEntityOperator (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator)1 NlpEntityPredicate (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityPredicate)1