use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class KeywordMatcher method appendSubstringMatchingSpans.
private List<Span> appendSubstringMatchingSpans(Tuple inputTuple, List<String> attributeNames, String queryKeyword) throws DataflowException {
List<Span> matchingResults = new ArrayList<>();
for (String attributeName : attributeNames) {
// AttributeType attributeType = this.inputSchema.getAttribute(attributeName).getAttributeType();
String fieldValue = inputTuple.getField(attributeName).getValue().toString();
AttributeType attributeType = inputTuple.getSchema().getAttribute(attributeName).getType();
// types other than TEXT and STRING: throw Exception for now
if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
throw new DataflowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet");
}
// for STRING type, the query should match the fieldValue completely
if (attributeType == AttributeType.STRING) {
if (fieldValue.equals(queryKeyword)) {
matchingResults.add(new Span(attributeName, 0, queryKeyword.length(), queryKeyword, fieldValue));
}
}
if (attributeType == AttributeType.TEXT) {
String fieldValueLowerCase = fieldValue.toLowerCase();
String queryKeywordLowerCase = queryKeyword.toLowerCase();
for (int i = 0; i < fieldValueLowerCase.length(); i++) {
int index = -1;
if ((index = fieldValueLowerCase.indexOf(queryKeywordLowerCase, i)) != -1) {
matchingResults.add(new Span(attributeName, index, index + queryKeyword.length(), queryKeyword, fieldValue.substring(index, index + queryKeyword.length())));
i = index + 1;
} else {
break;
}
}
}
}
return matchingResults;
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class KeywordMatcherSourceOperator method buildPhraseQuery.
private Query buildPhraseQuery() throws DataflowException {
BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
for (String attributeName : this.predicate.getAttributeNames()) {
AttributeType attributeType = this.inputSchema.getAttribute(attributeName).getType();
// types other than TEXT and STRING: throw Exception for now
if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
throw new DataflowException("KeywordPredicate: Fields other than STRING and TEXT are not supported yet");
}
if (attributeType == AttributeType.STRING) {
Query termQuery = new TermQuery(new Term(attributeName, predicate.getQuery()));
booleanQueryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
}
if (attributeType == AttributeType.TEXT) {
if (queryTokenList.size() == 1) {
Query termQuery = new TermQuery(new Term(attributeName, predicate.getQuery().toLowerCase()));
booleanQueryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
} else {
PhraseQuery.Builder phraseQueryBuilder = new PhraseQuery.Builder();
for (int i = 0; i < queryTokensWithStopwords.size(); i++) {
if (!StandardAnalyzer.STOP_WORDS_SET.contains(queryTokensWithStopwords.get(i))) {
phraseQueryBuilder.add(new Term(attributeName, queryTokensWithStopwords.get(i).toLowerCase()), i);
}
}
PhraseQuery phraseQuery = phraseQueryBuilder.build();
booleanQueryBuilder.add(phraseQuery, BooleanClause.Occur.SHOULD);
}
}
}
return booleanQueryBuilder.build();
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class EmojiSentimentOperator method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
if (inputOperator == null) {
throw new DataflowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
inputOperator.open();
Schema inputSchema = inputOperator.getOutputSchema();
// check if input schema is present
if (!inputSchema.containsAttribute(predicate.getInputAttributeName())) {
throw new TexeraException(String.format("input attribute %s is not in the input schema %s", predicate.getInputAttributeName(), inputSchema.getAttributeNames()));
}
// check if attribute type is valid
AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getType();
boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
if (!isValidType) {
throw new TexeraException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
}
// generate output schema by transforming the input schema
outputSchema = transformSchema(inputOperator.getOutputSchema());
cursor = OPENED;
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class NltkSentimentOperator method open.
@Override
public void open() throws TexeraException {
if (cursor != CLOSED) {
return;
}
if (inputOperator == null) {
throw new DataflowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
inputOperator.open();
Schema inputSchema = inputOperator.getOutputSchema();
// check if the input schema is presented
if (!inputSchema.containsAttribute(predicate.getInputAttributeName())) {
throw new TexeraException(String.format("input attribute %s is not in the input schema %s", predicate.getInputAttributeName(), inputSchema.getAttributeNames()));
}
// check if the attribute type is valid
AttributeType inputAttributeType = inputSchema.getAttribute(predicate.getInputAttributeName()).getType();
boolean isValidType = inputAttributeType.equals(AttributeType.STRING) || inputAttributeType.equals(AttributeType.TEXT);
if (!isValidType) {
throw new TexeraException(String.format("input attribute %s must have type String or Text, its actual type is %s", predicate.getInputAttributeName(), inputAttributeType));
}
// generate output schema by transforming the input schema
outputSchema = transformSchema(inputOperator.getOutputSchema());
cursor = OPENED;
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class ProjectionOperator method setUp.
@Override
protected void setUp() throws TexeraException {
inputSchema = inputOperator.getOutputSchema();
List<Attribute> outputAttributes = inputSchema.getAttributes().stream().filter(attr -> predicate.getProjectionFields().contains(attr.getName().toLowerCase())).collect(Collectors.toList());
if (outputAttributes.size() != predicate.getProjectionFields().size()) {
throw new DataflowException("input schema doesn't contain one of the attributes to be projected");
}
outputSchema = new Schema(outputAttributes.stream().toArray(Attribute[]::new));
}
Aggregations