Search in sources :

Example 1 with ScanBasedSourceOperator

use of edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class DictionaryMatcherSourceOperator method open.

/**
     * @about Opens dictionary matcher. Must call open() before calling
     *        getNextTuple().
     */
@Override
public void open() throws DataFlowException {
    try {
        currentDictionaryEntry = predicate.getDictionary().getNextEntry();
        if (currentDictionaryEntry == null) {
            throw new DataFlowException("Dictionary is empty");
        }
        if (predicate.getKeywordMatchingType() == KeywordMatchingType.SUBSTRING_SCANBASED) {
            // For Substring matching, create a scan source operator.
            indexSource = new ScanBasedSourceOperator(new ScanSourcePredicate(predicate.getTableName()));
            indexSource.open();
            // Substring matching's output schema needs to contains span
            // list.
            inputSchema = indexSource.getOutputSchema();
            outputSchema = inputSchema;
            if (inputSchema.containsField(predicate.getSpanListName())) {
                throw new DataFlowException(ErrorMessages.DUPLICATE_ATTRIBUTE(predicate.getSpanListName(), inputSchema));
            }
            outputSchema = Utils.addAttributeToSchema(outputSchema, new Attribute(predicate.getSpanListName(), AttributeType.LIST));
        } else {
            // For other keyword matching types (conjunction and phrase),
            // create keyword matcher based on index.
            keywordSource = new KeywordMatcherSourceOperator(new KeywordSourcePredicate(currentDictionaryEntry, predicate.getAttributeNames(), predicate.getAnalyzerString(), predicate.getKeywordMatchingType(), predicate.getTableName(), predicate.getSpanListName()));
            keywordSource.open();
            // Other keyword matching types uses a KeywordMatcher, so the
            // output schema is the same as keywordMatcher's schema
            inputSchema = keywordSource.getOutputSchema();
            outputSchema = keywordSource.getOutputSchema();
        }
    } catch (Exception e) {
        throw new DataFlowException(e.getMessage(), e);
    }
}
Also used : KeywordSourcePredicate(edu.uci.ics.textdb.exp.keywordmatcher.KeywordSourcePredicate) Attribute(edu.uci.ics.textdb.api.schema.Attribute) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) ScanBasedSourceOperator(edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate) DataFlowException(edu.uci.ics.textdb.api.exception.DataFlowException) TextDBException(edu.uci.ics.textdb.api.exception.TextDBException) KeywordMatcherSourceOperator(edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)

Example 2 with ScanBasedSourceOperator

use of edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class NlpExtractorPerformanceTest method matchNLP.

/*
     * This function does match based on tokenType
     */
public static void matchNLP(String tableName, NlpEntityType tokenType) throws Exception {
    List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
    ISourceOperator sourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    NlpEntityPredicate nlpEntityPredicate = new NlpEntityPredicate(tokenType, attributeNames, null);
    NlpEntityOperator nlpEntityOperator = new NlpEntityOperator(nlpEntityPredicate);
    nlpEntityOperator.setInputOperator(sourceOperator);
    long startMatchTime = System.currentTimeMillis();
    nlpEntityOperator.open();
    Tuple nextTuple = null;
    int counter = 0;
    while ((nextTuple = nlpEntityOperator.getNextTuple()) != null) {
        ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
        List<Span> spanList = spanListField.getValue();
        counter += spanList.size();
    }
    nlpEntityOperator.close();
    long endMatchTime = System.currentTimeMillis();
    double matchTime = (endMatchTime - startMatchTime) / 1000.0;
    totalMatchingTime += matchTime;
    totalResults += counter;
}
Also used : NlpEntityPredicate(edu.uci.ics.textdb.exp.nlp.entity.NlpEntityPredicate) Span(edu.uci.ics.textdb.api.span.Span) ScanBasedSourceOperator(edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator) ISourceOperator(edu.uci.ics.textdb.api.dataflow.ISourceOperator) NlpEntityOperator(edu.uci.ics.textdb.exp.nlp.entity.NlpEntityOperator) ScanSourcePredicate(edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 3 with ScanBasedSourceOperator

use of edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class RegexSplitOperatorTest method test8.

/*
     * ID test: To test if each newly-split tuple's ID has conflict with the old tuple.
     */
@Test
public void test8() throws TextDBException {
    String splitRegex = "ana";
    String splitAttrName = TestConstantsRegexSplit.DESCRIPTION;
    List<Tuple> results = computeRegexSplitResults(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.STANDALONE);
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(REGEX_TABLE));
    Tuple tupleTable;
    scanSource.open();
    while ((tupleTable = scanSource.getNextTuple()) != null) {
        for (Tuple tuple : results) {
            Assert.assertFalse(tuple.getField(SchemaConstants._ID).equals(tupleTable.getField(SchemaConstants._ID)));
        }
    }
    scanSource.close();
}
Also used : Tuple(edu.uci.ics.textdb.api.tuple.Tuple) ScanBasedSourceOperator(edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate) Test(org.junit.Test)

Example 4 with ScanBasedSourceOperator

use of edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class SamplerTest method computeSampleResults.

public static List<Tuple> computeSampleResults(String tableName, int k, SampleType sampleType) throws TextDBException {
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    Sampler tupleSampler = new Sampler(new SamplerPredicate(k, sampleType));
    tupleSampler.setInputOperator(scanSource);
    List<Tuple> results = new ArrayList<>();
    Tuple tuple;
    tupleSampler.open();
    while ((tuple = tupleSampler.getNextTuple()) != null) {
        results.add(tuple);
    }
    tupleSampler.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.textdb.api.tuple.Tuple)

Example 5 with ScanBasedSourceOperator

use of edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class KeywordTestHelper method getScanSourceResults.

public static List<Tuple> getScanSourceResults(String tableName, String keywordQuery, List<String> attributeNames, KeywordMatchingType matchingType, int limit, int offset) throws TextDBException {
    RelationManager relationManager = RelationManager.getRelationManager();
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    KeywordPredicate keywordPredicate = new KeywordPredicate(keywordQuery, attributeNames, relationManager.getTableAnalyzerString(tableName), matchingType, RESULTS, limit, offset);
    KeywordMatcher keywordMatcher = new KeywordMatcher(keywordPredicate);
    keywordMatcher.setInputOperator(scanSource);
    Tuple tuple;
    List<Tuple> results = new ArrayList<>();
    keywordMatcher.open();
    while ((tuple = keywordMatcher.getNextTuple()) != null) {
        results.add(tuple);
    }
    keywordMatcher.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.textdb.api.tuple.Tuple) RelationManager(edu.uci.ics.textdb.storage.RelationManager)

Aggregations

ScanBasedSourceOperator (edu.uci.ics.textdb.exp.source.scan.ScanBasedSourceOperator)20 ScanSourcePredicate (edu.uci.ics.textdb.exp.source.scan.ScanSourcePredicate)20 Tuple (edu.uci.ics.textdb.api.tuple.Tuple)17 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)6 IField (edu.uci.ics.textdb.api.field.IField)3 TextField (edu.uci.ics.textdb.api.field.TextField)3 Schema (edu.uci.ics.textdb.api.schema.Schema)3 IOperator (edu.uci.ics.textdb.api.dataflow.IOperator)2 DataFlowException (edu.uci.ics.textdb.api.exception.DataFlowException)2 RelationManager (edu.uci.ics.textdb.storage.RelationManager)2 ISourceOperator (edu.uci.ics.textdb.api.dataflow.ISourceOperator)1 TextDBException (edu.uci.ics.textdb.api.exception.TextDBException)1 StringField (edu.uci.ics.textdb.api.field.StringField)1 Attribute (edu.uci.ics.textdb.api.schema.Attribute)1 Span (edu.uci.ics.textdb.api.span.Span)1 DictionaryPredicate (edu.uci.ics.textdb.exp.dictionarymatcher.DictionaryPredicate)1 FuzzyTokenPredicate (edu.uci.ics.textdb.exp.fuzzytokenmatcher.FuzzyTokenPredicate)1 KeywordMatcherSourceOperator (edu.uci.ics.textdb.exp.keywordmatcher.KeywordMatcherSourceOperator)1 KeywordSourcePredicate (edu.uci.ics.textdb.exp.keywordmatcher.KeywordSourcePredicate)1