use of edu.uci.ics.texera.dataflow.regexmatcher.RegexSourcePredicate in project textdb by TextDB.
the class PredicateBaseTest method testRegexMatcher.
@Test
public void testRegexMatcher() throws Exception {
RegexPredicate regexPredicate = new RegexPredicate("regex", attributeNames, "spanListName");
testPredicate(regexPredicate);
RegexSourcePredicate regexSourcePredicate = new RegexSourcePredicate("regex", attributeNames, "tableName", "spanListName");
testPredicate(regexSourcePredicate);
}
use of edu.uci.ics.texera.dataflow.regexmatcher.RegexSourcePredicate in project textdb by TextDB.
the class RegexMatcherPerformanceTest method matchRegex.
/*
* This function does match for a list of regex queries
*/
public static void matchRegex(List<String> regexes, String tableName) throws TexeraException, IOException {
List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
for (String regex : regexes) {
// analyzer should generate grams all in lower case to build a lower
// case index.
RegexSourcePredicate predicate = new RegexSourcePredicate(regex, attributeNames, tableName, SchemaConstants.SPAN_LIST);
RegexMatcherSourceOperator regexSource = new RegexMatcherSourceOperator(predicate);
long startMatchTime = System.currentTimeMillis();
regexSource.open();
int counter = 0;
Tuple nextTuple = null;
while ((nextTuple = regexSource.getNextTuple()) != null) {
ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> spanList = spanListField.getValue();
counter += spanList.size();
}
regexSource.close();
long endMatchTime = System.currentTimeMillis();
double matchTime = (endMatchTime - startMatchTime) / 1000.0;
totalMatchingTime += matchTime;
totalRegexResultCount += counter;
}
}
Aggregations