use of edu.uci.ics.texera.dataflow.regexmatcher.RegexMatcherSourceOperator in project textdb by TextDB.
the class RegexMatcherPerformanceTest method matchRegex.
/*
* This function does match for a list of regex queries
*/
public static void matchRegex(List<String> regexes, String tableName) throws TexeraException, IOException {
List<String> attributeNames = Arrays.asList(MedlineIndexWriter.ABSTRACT);
for (String regex : regexes) {
// analyzer should generate grams all in lower case to build a lower
// case index.
RegexSourcePredicate predicate = new RegexSourcePredicate(regex, attributeNames, tableName, SchemaConstants.SPAN_LIST);
RegexMatcherSourceOperator regexSource = new RegexMatcherSourceOperator(predicate);
long startMatchTime = System.currentTimeMillis();
regexSource.open();
int counter = 0;
Tuple nextTuple = null;
while ((nextTuple = regexSource.getNextTuple()) != null) {
ListField<Span> spanListField = nextTuple.getField(SchemaConstants.SPAN_LIST);
List<Span> spanList = spanListField.getValue();
counter += spanList.size();
}
regexSource.close();
long endMatchTime = System.currentTimeMillis();
double matchTime = (endMatchTime - startMatchTime) / 1000.0;
totalMatchingTime += matchTime;
totalRegexResultCount += counter;
}
}
Aggregations