Search in sources :

Example 21 with ScanBasedSourceOperator

use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class RegexMatcherTestHelper method getScanSourceResults.

public static List<Tuple> getScanSourceResults(String tableName, String regex, List<String> attributeNames, int limit, int offset) throws TexeraException {
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    RegexPredicate regexPredicate = new RegexPredicate(regex, attributeNames, RESULTS);
    RegexMatcher regexMatcher = new RegexMatcher(regexPredicate);
    regexMatcher.setLimit(limit);
    regexMatcher.setOffset(offset);
    regexMatcher.setInputOperator(scanSource);
    Tuple tuple;
    List<Tuple> results = new ArrayList<>();
    regexMatcher.open();
    while ((tuple = regexMatcher.getNextTuple()) != null) {
        results.add(tuple);
    }
    regexMatcher.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 22 with ScanBasedSourceOperator

use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class RegexSplitOperatorTest method computeRegexSplitResultsOnetoOne.

public static List<Tuple> computeRegexSplitResultsOnetoOne(String tableName, String splitAttrName, String splitRegex, RegexSplitPredicate.SplitType splitType) throws TexeraException {
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    RegexSplitOperator regexSplit = new RegexSplitOperator(new RegexSplitPredicate(RegexOutputType.ONE_TO_ONE, splitRegex, splitAttrName, splitType, RESULT_ATTR));
    regexSplit.setInputOperator(scanSource);
    List<Tuple> results = new ArrayList<>();
    regexSplit.open();
    Tuple tuple;
    while ((tuple = regexSplit.getNextTuple()) != null) {
        results.add(tuple);
    }
    regexSplit.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 23 with ScanBasedSourceOperator

use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class RegexSplitOperatorTest method test8.

/*
     * ID test: To test if each newly-split tuple's ID has conflict with the old tuple.
     */
@Test
public void test8() throws TexeraException {
    String splitRegex = "ana";
    String splitAttrName = TestConstantsRegexSplit.DESCRIPTION;
    List<Tuple> results = computeRegexSplitResultsOneToMany(REGEX_TABLE, splitAttrName, splitRegex, RegexSplitPredicate.SplitType.STANDALONE);
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(REGEX_TABLE));
    Tuple tupleTable;
    scanSource.open();
    while ((tupleTable = scanSource.getNextTuple()) != null) {
        for (Tuple tuple : results) {
            Assert.assertFalse(tuple.getField(SchemaConstants._ID).equals(tupleTable.getField(SchemaConstants._ID)));
        }
    }
    scanSource.close();
}
Also used : Tuple(edu.uci.ics.texera.api.tuple.Tuple) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Test(org.junit.Test)

Example 24 with ScanBasedSourceOperator

use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.

the class RegexSplitOperatorTest method computeRegexSplitResultsOneToMany.

public static List<Tuple> computeRegexSplitResultsOneToMany(String tableName, String splitAttrName, String splitRegex, RegexSplitPredicate.SplitType splitType) throws TexeraException {
    ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
    RegexSplitOperator regexSplit = new RegexSplitOperator(new RegexSplitPredicate(RegexOutputType.ONE_TO_MANY, splitRegex, splitAttrName, splitType, RESULT_ATTR));
    regexSplit.setInputOperator(scanSource);
    List<Tuple> results = new ArrayList<>();
    regexSplit.open();
    Tuple tuple;
    while ((tuple = regexSplit.getNextTuple()) != null) {
        results.add(tuple);
    }
    regexSplit.close();
    return results;
}
Also used : ArrayList(java.util.ArrayList) ScanBasedSourceOperator(edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator) ScanSourcePredicate(edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Aggregations

ScanBasedSourceOperator (edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator)24 ScanSourcePredicate (edu.uci.ics.texera.dataflow.source.scan.ScanSourcePredicate)24 Tuple (edu.uci.ics.texera.api.tuple.Tuple)21 ArrayList (java.util.ArrayList)15 Test (org.junit.Test)8 IField (edu.uci.ics.texera.api.field.IField)3 TextField (edu.uci.ics.texera.api.field.TextField)3 Schema (edu.uci.ics.texera.api.schema.Schema)3 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)2 RelationManager (edu.uci.ics.texera.storage.RelationManager)2 ISourceOperator (edu.uci.ics.texera.api.dataflow.ISourceOperator)1 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)1 StringField (edu.uci.ics.texera.api.field.StringField)1 Span (edu.uci.ics.texera.api.span.Span)1 DictionaryPredicate (edu.uci.ics.texera.dataflow.dictionarymatcher.DictionaryPredicate)1 FuzzyTokenPredicate (edu.uci.ics.texera.dataflow.fuzzytokenmatcher.FuzzyTokenPredicate)1 KeywordMatcherSourceOperator (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordMatcherSourceOperator)1 KeywordSourcePredicate (edu.uci.ics.texera.dataflow.keywordmatcher.KeywordSourcePredicate)1 NlpEntityOperator (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityOperator)1 NlpEntityPredicate (edu.uci.ics.texera.dataflow.nlp.entity.NlpEntityPredicate)1