use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class JoinTestHelper method getRegexMatcher.
public static RegexMatcher getRegexMatcher(String tableName, String query, String attrName) {
try {
ScanBasedSourceOperator scanBasedSourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
RegexMatcher regexMatcher = new RegexMatcher(new RegexPredicate(query, Arrays.asList(attrName), SchemaConstants.SPAN_LIST));
regexMatcher.setInputOperator(scanBasedSourceOperator);
return regexMatcher;
} catch (DataflowException e) {
e.printStackTrace();
return null;
}
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class ProjectionOperatorTest method testProjection1.
@Test
public void testProjection1() throws Exception {
List<String> projectionFields = Arrays.asList(TestConstants.DESCRIPTION);
Schema projectionSchema = new Schema(TestConstants.DESCRIPTION_ATTR);
IField[] fields1 = { new TextField("Tall Angry") };
IField[] fields2 = { new TextField("Short Brown") };
IField[] fields3 = { new TextField("White Angry") };
IField[] fields4 = { new TextField("Lin Clooney is Short and lin clooney is Angry") };
IField[] fields5 = { new TextField("Tall Fair") };
IField[] fields6 = { new TextField("Short angry") };
Tuple tuple1 = new Tuple(projectionSchema, fields1);
Tuple tuple2 = new Tuple(projectionSchema, fields2);
Tuple tuple3 = new Tuple(projectionSchema, fields3);
Tuple tuple4 = new Tuple(projectionSchema, fields4);
Tuple tuple5 = new Tuple(projectionSchema, fields5);
Tuple tuple6 = new Tuple(projectionSchema, fields6);
List<Tuple> expectedResults = Arrays.asList(tuple1, tuple2, tuple3, tuple4, tuple5, tuple6);
List<Tuple> returnedResults = getProjectionResults(new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE)), projectionFields);
Assert.assertTrue(TestUtils.equals(expectedResults, returnedResults));
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class RegexMatcherTestHelper method getScanSourceResults.
public static List<Tuple> getScanSourceResults(String tableName, String keywordQuery1, String regex, List<String> attributeNames, KeywordMatchingType matchingType, String spanListName1, int limit, int offset, String keywordQuery2, String spanListName2) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
KeywordPredicate keywordPredicate = new KeywordPredicate(keywordQuery1, attributeNames, LuceneAnalyzerConstants.standardAnalyzerString(), matchingType, spanListName1);
KeywordMatcher keywordMatcher = new KeywordMatcher(keywordPredicate);
KeywordPredicate keywordPredicate1 = new KeywordPredicate(keywordQuery2, attributeNames, LuceneAnalyzerConstants.standardAnalyzerString(), matchingType, spanListName2);
KeywordMatcher keywordMatcher1 = new KeywordMatcher(keywordPredicate1);
keywordMatcher1.setInputOperator(keywordMatcher);
keywordMatcher.setInputOperator(scanSource);
RegexPredicate regexPredicate = new RegexPredicate(regex, attributeNames, RESULTS);
RegexMatcher regexMatcher = new RegexMatcher(regexPredicate);
regexMatcher.setLimit(limit);
regexMatcher.setOffset(offset);
regexMatcher.setInputOperator(keywordMatcher1);
Tuple tuple;
List<Tuple> results = new ArrayList<>();
regexMatcher.open();
while ((tuple = regexMatcher.getNextTuple()) != null) {
results.add(tuple);
}
regexMatcher.close();
return results;
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class ScanBasedSourceOperatorTest method testFlow.
@Test
public void testFlow() throws TexeraException, ParseException {
List<Tuple> actualTuples = TestConstants.getSamplePeopleTuples();
ScanBasedSourceOperator scanBasedSourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(PEOPLE_TABLE));
scanBasedSourceOperator.open();
Tuple nextTuple = null;
int numTuples = 0;
List<Tuple> returnedTuples = new ArrayList<Tuple>();
while ((nextTuple = scanBasedSourceOperator.getNextTuple()) != null) {
returnedTuples.add(nextTuple);
numTuples++;
}
Assert.assertEquals(actualTuples.size(), numTuples);
boolean contains = TestUtils.equals(actualTuples, returnedTuples);
Assert.assertTrue(contains);
scanBasedSourceOperator.close();
}
use of edu.uci.ics.texera.dataflow.source.scan.ScanBasedSourceOperator in project textdb by TextDB.
the class WordCountTest method computePayLoadWordCount.
// Compute result by tuple's PayLoad.
public static HashMap<String, Integer> computePayLoadWordCount(String tableName, String attribute) throws TexeraException {
ScanBasedSourceOperator scanSource = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
WordCountOperator wordCount = null;
HashMap<String, Integer> result = new HashMap<String, Integer>();
if (tableName.equals(COUNT_TABLE)) {
wordCount = new WordCountOperator(new WordCountOperatorPredicate(TestConstants.DESCRIPTION, LuceneAnalyzerConstants.standardAnalyzerString()));
} else if (tableName.equals(COUNT_CHINESE_TABLE)) {
wordCount = new WordCountOperator(new WordCountOperatorPredicate(TestConstantsChineseWordCount.DESCRIPTION, LuceneAnalyzerConstants.chineseAnalyzerString()));
}
wordCount.setInputOperator(scanSource);
wordCount.open();
Tuple tuple;
while ((tuple = wordCount.getNextTuple()) != null) {
result.put((String) tuple.getField(WordCountOperator.WORD).getValue(), (Integer) tuple.getField(WordCountOperator.COUNT).getValue());
}
wordCount.close();
return result;
}
Aggregations