use of edu.uci.ics.texera.dataflow.regexmatcher.label.LabeledRegexProcessor in project textdb by TextDB.
the class RegexMatcher method setUp.
@Override
protected void setUp() throws DataflowException {
if (inputOperator == null) {
throw new DataflowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
Schema inputSchema = inputOperator.getOutputSchema();
this.addResultAttribute = predicate.getSpanListName() != null;
Schema.checkAttributeExists(inputSchema, predicate.getAttributeNames());
if (addResultAttribute) {
Schema.checkAttributeNotExists(inputSchema, predicate.getSpanListName());
}
Schema.Builder outputSchemaBuilder = new Schema.Builder(inputOperator.getOutputSchema());
if (addResultAttribute) {
outputSchemaBuilder.add(predicate.getSpanListName(), AttributeType.LIST);
}
outputSchema = outputSchemaBuilder.build();
findRegexType();
// Check if labeled or unlabeled
if (this.regexType == RegexType.NO_LABELS) {
regexPattern = predicate.isIgnoreCase() ? Pattern.compile(predicate.getRegex(), Pattern.CASE_INSENSITIVE) : Pattern.compile(predicate.getRegex());
} else if (this.regexType == RegexType.LABELED_WITH_QUALIFIERS) {
labeledRegexProcessor = new LabeledRegexProcessor(predicate);
} else {
labledRegexNoQualifierProcessor = new LabledRegexNoQualifierProcessor(predicate);
}
}
Aggregations