use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class ExcelSink method close.
@Override
public void close() throws TextDBException {
if (cursor == CLOSED) {
return;
}
inputOperator.close();
try {
wb.write(fileOut);
fileOut.close();
cursor = CLOSED;
} catch (IOException e) {
throw new DataFlowException(e);
}
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class KeywordMatcher method computeSubstringMatchingResult.
private List<Span> computeSubstringMatchingResult(Tuple inputTuple) throws DataFlowException {
List<Span> matchingResults = new ArrayList<>();
for (String attributeName : this.predicate.getAttributeNames()) {
AttributeType attributeType = this.inputSchema.getAttribute(attributeName).getAttributeType();
String fieldValue = inputTuple.getField(attributeName).getValue().toString();
// types other than TEXT and STRING: throw Exception for now
if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
throw new DataFlowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet");
}
// for STRING type, the query should match the fieldValue completely
if (attributeType == AttributeType.STRING) {
if (fieldValue.equals(predicate.getQuery())) {
matchingResults.add(new Span(attributeName, 0, predicate.getQuery().length(), predicate.getQuery(), fieldValue));
}
}
if (attributeType == AttributeType.TEXT) {
String regex = predicate.getQuery().toLowerCase();
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(fieldValue.toLowerCase());
while (matcher.find()) {
int start = matcher.start();
int end = matcher.end();
matchingResults.add(new Span(attributeName, start, end, predicate.getQuery(), fieldValue.substring(start, end)));
}
}
}
return matchingResults;
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class KeywordMatcherSourceOperator method buildPhraseQuery.
private Query buildPhraseQuery() throws DataFlowException {
BooleanQuery.Builder booleanQueryBuilder = new BooleanQuery.Builder();
for (String attributeName : this.predicate.getAttributeNames()) {
AttributeType attributeType = this.inputSchema.getAttribute(attributeName).getAttributeType();
// types other than TEXT and STRING: throw Exception for now
if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
throw new DataFlowException("KeywordPredicate: Fields other than STRING and TEXT are not supported yet");
}
if (attributeType == AttributeType.STRING) {
Query termQuery = new TermQuery(new Term(attributeName, predicate.getQuery()));
booleanQueryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
}
if (attributeType == AttributeType.TEXT) {
if (queryTokenList.size() == 1) {
Query termQuery = new TermQuery(new Term(attributeName, predicate.getQuery().toLowerCase()));
booleanQueryBuilder.add(termQuery, BooleanClause.Occur.SHOULD);
} else {
PhraseQuery.Builder phraseQueryBuilder = new PhraseQuery.Builder();
for (int i = 0; i < queryTokensWithStopwords.size(); i++) {
if (!StandardAnalyzer.STOP_WORDS_SET.contains(queryTokensWithStopwords.get(i))) {
phraseQueryBuilder.add(new Term(attributeName, queryTokensWithStopwords.get(i).toLowerCase()), i);
}
}
PhraseQuery phraseQuery = phraseQueryBuilder.build();
booleanQueryBuilder.add(phraseQuery, BooleanClause.Occur.SHOULD);
}
}
}
return booleanQueryBuilder.build();
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class Join method open.
@Override
public void open() throws TextDBException {
if (cursor != CLOSED) {
return;
}
if (innerOperator == null) {
throw new DataFlowException("Inner Input Operator is not set.");
}
if (outerOperator == null) {
throw new DataFlowException("Outer Input Operator is not set.");
}
// generate output schema from schema of inner and outer operator
innerOperator.open();
Schema innerOperatorSchema = innerOperator.getOutputSchema();
outerOperator.open();
Schema outerOperatorSchema = outerOperator.getOutputSchema();
this.outputSchema = joinPredicate.generateOutputSchema(innerOperatorSchema, outerOperatorSchema);
cursor = OPENED;
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class RelationManager method getTableAnalyzer.
/**
* Gets the Lucene analyzer of a table.
*
* @param tableName, the name of the table, case insensitive
* @return
* @throws StorageException
*/
public Analyzer getTableAnalyzer(String tableName) throws StorageException {
String analyzerString = getTableAnalyzerString(tableName);
// convert a lucene analyzer string to an analyzer object
Analyzer luceneAnalyzer = null;
try {
luceneAnalyzer = LuceneAnalyzerConstants.getLuceneAnalyzer(analyzerString);
} catch (DataFlowException e) {
throw new StorageException(e);
}
return luceneAnalyzer;
}
Aggregations