use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class KeywordMatcher method computeConjunctionMatchingResult.
private List<Span> computeConjunctionMatchingResult(Tuple inputTuple) throws DataFlowException {
ListField<Span> payloadField = inputTuple.getField(SchemaConstants.PAYLOAD);
List<Span> payload = payloadField.getValue();
List<Span> relevantSpans = filterRelevantSpans(payload);
List<Span> matchingResults = new ArrayList<>();
for (String attributeName : this.predicate.getAttributeNames()) {
AttributeType attributeType = this.inputSchema.getAttribute(attributeName).getAttributeType();
String fieldValue = inputTuple.getField(attributeName).getValue().toString();
// types other than TEXT and STRING: throw Exception for now
if (attributeType != AttributeType.STRING && attributeType != AttributeType.TEXT) {
throw new DataFlowException("KeywordMatcher: Fields other than STRING and TEXT are not supported yet");
}
// for STRING type, the query should match the fieldValue completely
if (attributeType == AttributeType.STRING) {
if (fieldValue.equals(predicate.getQuery())) {
Span span = new Span(attributeName, 0, predicate.getQuery().length(), predicate.getQuery(), fieldValue);
matchingResults.add(span);
}
}
// list for this field
if (attributeType == AttributeType.TEXT) {
List<Span> fieldSpanList = relevantSpans.stream().filter(span -> span.getAttributeName().equals(attributeName)).collect(Collectors.toList());
if (isAllQueryTokensPresent(fieldSpanList, queryTokenSet)) {
matchingResults.addAll(fieldSpanList);
}
}
}
return matchingResults;
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class AbstractSingleInputOperator method open.
@Override
public void open() throws TextDBException {
if (cursor != CLOSED) {
return;
}
try {
if (this.inputOperator == null) {
throw new DataFlowException(ErrorMessages.INPUT_OPERATOR_NOT_SPECIFIED);
}
inputOperator.open();
setUp();
} catch (Exception e) {
throw new DataFlowException(e.getMessage(), e);
}
cursor = OPENED;
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class ComparableMatcher method compareInt.
private boolean compareInt(Tuple inputTuple) {
Object compareToObject = predicate.getCompareToValue();
Class<?> compareToType = compareToObject.getClass();
Integer value = inputTuple.getField(predicate.getAttributeName(), IntegerField.class).getValue();
if (compareToType.equals(Integer.class)) {
return compareValues(value, (int) compareToObject, predicate.getComparisonType());
} else if (compareToType.equals(Double.class)) {
return compareValues((double) value, (double) compareToObject, predicate.getComparisonType());
} else if (compareToType.equals(String.class)) {
try {
Double compareToValue = Double.parseDouble((String) predicate.getCompareToValue());
return compareValues((double) value, compareToValue, predicate.getComparisonType());
} catch (NumberFormatException e) {
throw new DataFlowException("Unable to parse to number " + e.getMessage());
}
} else {
throw new DataFlowException("Value " + predicate.getCompareToValue() + " is not a valid number type");
}
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class RunTests method main.
/*
* Write Indices Run all performance tests.
*
* Passed in below arguments:
* file folder path (where data set stored)
* result folder path (where performance test results stored)
* standard index folder path (where standard index stored)
* trigram index folder path(where trigram index stored)
* queries folder path (where query files stored)
*
* If above arguments are not passed in, default paths will be used (refer
* to PerfTestUtils.java) If some of the arguments are not applicable,
* define them as empty string.
*
* Make necessary changes for arguments, such as query file name, threshold
* list, and regexQueries
*
*/
public static void main(String[] args) {
try {
PerfTestUtils.setFileFolder(args[0]);
PerfTestUtils.setResultFolder(args[1]);
PerfTestUtils.setStandardIndexFolder(args[2]);
PerfTestUtils.setTrigramIndexFolder(args[3]);
PerfTestUtils.setQueryFolder(args[4]);
} catch (ArrayIndexOutOfBoundsException e) {
System.out.println("missing arguments will be set to default");
}
try {
PerfTestUtils.deleteDirectory(new File(PerfTestUtils.standardIndexFolder));
PerfTestUtils.deleteDirectory(new File(PerfTestUtils.trigramIndexFolder));
PerfTestUtils.writeStandardAnalyzerIndices();
PerfTestUtils.writeTrigramIndices();
List<Double> thresholds = Arrays.asList(0.8, 0.65, 0.5, 0.35);
List<String> regexQueries = Arrays.asList("mosquitos?", "v[ir]{2}[us]{2}", "market(ing)?", "medic(ine|al|ation|are|aid)?", "[A-Z][aeiou|AEIOU][A-Za-z]*");
KeywordMatcherPerformanceTest.runTest("sample_queries.txt");
DictionaryMatcherPerformanceTest.runTest("sample_queries.txt");
FuzzyTokenMatcherPerformanceTest.runTest("sample_queries.txt", thresholds);
RegexMatcherPerformanceTest.runTest(regexQueries);
NlpExtractorPerformanceTest.runTest();
} catch (StorageException | DataFlowException | IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.uci.ics.textdb.api.exception.DataFlowException in project textdb by TextDB.
the class JoinTestHelper method getRegexMatcher.
public static RegexMatcher getRegexMatcher(String tableName, String query, String attrName) {
try {
ScanBasedSourceOperator scanBasedSourceOperator = new ScanBasedSourceOperator(new ScanSourcePredicate(tableName));
RegexMatcher regexMatcher = new RegexMatcher(new RegexPredicate(query, Arrays.asList(attrName), SchemaConstants.SPAN_LIST));
regexMatcher.setInputOperator(scanBasedSourceOperator);
return regexMatcher;
} catch (DataFlowException e) {
e.printStackTrace();
return null;
}
}
Aggregations