use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class ComparableMatcher method compareDateTime.
private boolean compareDateTime(Tuple inputTuple) throws DataflowException {
LocalDateTime dateTime = inputTuple.getField(predicate.getAttributeName(), DateTimeField.class).getValue();
String compareToString = predicate.getCompareToValue().toString();
// try to parse the input as date time string first
try {
LocalDateTime compareToDateTime = LocalDateTime.parse(compareToString);
return compareValues(dateTime, compareToDateTime, predicate.getComparisonType());
} catch (DateTimeParseException e) {
// if it fails, then try to parse as date time string and compare on date
try {
LocalDate compareToDate = LocalDate.parse(compareToString);
return compareValues(dateTime.toLocalDate(), compareToDate, predicate.getComparisonType());
} catch (DateTimeParseException e2) {
throw new DataflowException("Unable to parse date or time: " + compareToString);
}
}
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class ComparableMatcher method compareDouble.
private boolean compareDouble(Tuple inputTuple) {
Object compareToObject = predicate.getCompareToValue();
Class<?> compareToType = compareToObject.getClass();
Double value = inputTuple.getField(predicate.getAttributeName(), DoubleField.class).getValue();
if (compareToType.equals(Integer.class)) {
return compareValues(value, (double) (int) compareToObject, predicate.getComparisonType());
} else if (compareToType.equals(Double.class)) {
return compareValues(value, (double) compareToObject, predicate.getComparisonType());
} else if (compareToType.equals(String.class)) {
try {
Double compareToValue = Double.parseDouble((String) predicate.getCompareToValue());
return compareValues(value, compareToValue, predicate.getComparisonType());
} catch (NumberFormatException e) {
throw new DataflowException("Unable to parse to number " + e.getMessage());
}
} else {
throw new DataflowException("Value " + predicate.getCompareToValue() + " is not a valid number type");
}
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class ComparableMatcher method compareInt.
private boolean compareInt(Tuple inputTuple) {
Object compareToObject = predicate.getCompareToValue();
Class<?> compareToType = compareToObject.getClass();
Integer value = inputTuple.getField(predicate.getAttributeName(), IntegerField.class).getValue();
if (compareToType.equals(Integer.class)) {
return compareValues(value, (int) compareToObject, predicate.getComparisonType());
} else if (compareToType.equals(Double.class)) {
return compareValues((double) value, (double) compareToObject, predicate.getComparisonType());
} else if (compareToType.equals(String.class)) {
try {
Double compareToValue = Double.parseDouble((String) predicate.getCompareToValue());
return compareValues((double) value, compareToValue, predicate.getComparisonType());
} catch (NumberFormatException e) {
throw new DataflowException("Unable to parse to number " + e.getMessage());
}
} else {
throw new DataflowException("Value " + predicate.getCompareToValue() + " is not a valid number type");
}
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class RunTests method main.
/*
* Write Indices Run all performance tests.
*
* Passed in below arguments:
* file folder path (where data set stored)
* result folder path (where performance test results stored)
* standard index folder path (where standard index stored)
* trigram index folder path(where trigram index stored)
* queries folder path (where query files stored)
*
* If above arguments are not passed in, default paths will be used (refer
* to PerfTestUtils.java) If some of the arguments are not applicable,
* define them as empty string.
*
* Make necessary changes for arguments, such as query file name, threshold
* list, and regexQueries
*
*/
public static void main(String[] args) {
try {
PerfTestUtils.setFileFolder(args[0]);
PerfTestUtils.setResultFolder(args[1]);
PerfTestUtils.setStandardIndexFolder(args[2]);
PerfTestUtils.setTrigramIndexFolder(args[3]);
PerfTestUtils.setQueryFolder(args[4]);
} catch (ArrayIndexOutOfBoundsException e) {
System.out.println("missing arguments will be set to default");
}
try {
PerfTestUtils.deleteDirectory(new File(PerfTestUtils.standardIndexFolder));
PerfTestUtils.deleteDirectory(new File(PerfTestUtils.trigramIndexFolder));
PerfTestUtils.writeStandardAnalyzerIndices();
PerfTestUtils.writeTrigramIndices();
List<Double> thresholds = Arrays.asList(0.8, 0.65, 0.5, 0.35);
List<String> regexQueries = Arrays.asList("mosquitos?", "v[ir]{2}[us]{2}", "market(ing)?", "medic(ine|al|ation|are|aid)?", "[A-Z][aeiou|AEIOU][A-Za-z]*");
KeywordMatcherPerformanceTest.runTest("sample_queries.txt");
DictionaryMatcherPerformanceTest.runTest("sample_queries.txt");
FuzzyTokenMatcherPerformanceTest.runTest("sample_queries.txt", thresholds);
RegexMatcherPerformanceTest.runTest(regexQueries);
NlpExtractorPerformanceTest.runTest();
} catch (StorageException | DataflowException | IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.
the class RelationManager method createTable.
/**
* Creates a new table.
* Table name must be unique (case insensitive).
* LuceneAnalyzer must be a valid analyzer string.
*
* The "_id" attribute will be added to the table schema.
* System automatically generates a unique ID for each tuple inserted to a table,
* the generated ID will be in "_id" field.
*
* @param tableName, the name of the table, must be unique, case is not sensitive
* @param indexDirectory, the directory to store the index and data, must not duplicate with other tables' directories
* @param schema, the schema of the table
* @param luceneAnalyzerString, the string representing the lucene analyzer used
* @throws StorageException
*/
public void createTable(String tableName, Path indexDirectory, Schema schema, String luceneAnalyzerString) throws StorageException {
// convert the table name to lower case
tableName = tableName.toLowerCase();
// table should not exist
if (checkTableExistence(tableName)) {
throw new StorageException(String.format("Table %s already exists.", tableName));
}
// create folder if it's not there
// and convert the index directory to its absolute path
String indexDirectoryStr;
try {
if (Files.notExists(indexDirectory)) {
Files.createDirectories(indexDirectory);
}
indexDirectoryStr = indexDirectory.toRealPath().toString();
} catch (IOException e) {
throw new StorageException(e);
}
// check if the indexDirectory overlaps with another table's index directory
Query indexDirectoryQuery = new TermQuery(new Term(CatalogConstants.TABLE_DIRECTORY, indexDirectoryStr));
DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, indexDirectoryQuery);
tableCatalogDataReader.setPayloadAdded(false);
tableCatalogDataReader.open();
Tuple nextTuple = tableCatalogDataReader.getNextTuple();
tableCatalogDataReader.close();
// if the index directory is already taken by another table, throws an exception
if (nextTuple != null) {
String overlapTableName = nextTuple.getField(CatalogConstants.TABLE_NAME).getValue().toString();
throw new StorageException(String.format("Table %s already takes the index directory %s. Please choose another directory.", overlapTableName, indexDirectory));
}
// check if the lucene analyzer string is valid
Analyzer luceneAnalyzer = null;
try {
luceneAnalyzer = LuceneAnalyzerConstants.getLuceneAnalyzer(luceneAnalyzerString);
} catch (DataflowException e) {
throw new StorageException("Lucene Analyzer String is not valid.");
}
// create the directory and clear all data in the index directory
Schema tableSchema = Schema.Builder.getSchemaWithID(schema);
DataStore tableDataStore = new DataStore(indexDirectory, tableSchema);
DataWriter dataWriter = new DataWriter(tableDataStore, luceneAnalyzer);
dataWriter.open();
dataWriter.clearData();
dataWriter.close();
// write table info to catalog
writeTableInfoToCatalog(tableName, indexDirectory, schema, luceneAnalyzerString);
}
Aggregations