Search in sources :

Example 26 with DataflowException

use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.

the class ComparableMatcher method compareDateTime.

private boolean compareDateTime(Tuple inputTuple) throws DataflowException {
    LocalDateTime dateTime = inputTuple.getField(predicate.getAttributeName(), DateTimeField.class).getValue();
    String compareToString = predicate.getCompareToValue().toString();
    // try to parse the input as date time string first
    try {
        LocalDateTime compareToDateTime = LocalDateTime.parse(compareToString);
        return compareValues(dateTime, compareToDateTime, predicate.getComparisonType());
    } catch (DateTimeParseException e) {
        // if it fails, then try to parse as date time string and compare on date
        try {
            LocalDate compareToDate = LocalDate.parse(compareToString);
            return compareValues(dateTime.toLocalDate(), compareToDate, predicate.getComparisonType());
        } catch (DateTimeParseException e2) {
            throw new DataflowException("Unable to parse date or time: " + compareToString);
        }
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) DateTimeParseException(java.time.format.DateTimeParseException) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) DateTimeField(edu.uci.ics.texera.api.field.DateTimeField) LocalDate(java.time.LocalDate)

Example 27 with DataflowException

use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.

the class ComparableMatcher method compareDouble.

private boolean compareDouble(Tuple inputTuple) {
    Object compareToObject = predicate.getCompareToValue();
    Class<?> compareToType = compareToObject.getClass();
    Double value = inputTuple.getField(predicate.getAttributeName(), DoubleField.class).getValue();
    if (compareToType.equals(Integer.class)) {
        return compareValues(value, (double) (int) compareToObject, predicate.getComparisonType());
    } else if (compareToType.equals(Double.class)) {
        return compareValues(value, (double) compareToObject, predicate.getComparisonType());
    } else if (compareToType.equals(String.class)) {
        try {
            Double compareToValue = Double.parseDouble((String) predicate.getCompareToValue());
            return compareValues(value, compareToValue, predicate.getComparisonType());
        } catch (NumberFormatException e) {
            throw new DataflowException("Unable to parse to number " + e.getMessage());
        }
    } else {
        throw new DataflowException("Value " + predicate.getCompareToValue() + " is not a valid number type");
    }
}
Also used : DataflowException(edu.uci.ics.texera.api.exception.DataflowException) DoubleField(edu.uci.ics.texera.api.field.DoubleField)

Example 28 with DataflowException

use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.

the class ComparableMatcher method compareInt.

private boolean compareInt(Tuple inputTuple) {
    Object compareToObject = predicate.getCompareToValue();
    Class<?> compareToType = compareToObject.getClass();
    Integer value = inputTuple.getField(predicate.getAttributeName(), IntegerField.class).getValue();
    if (compareToType.equals(Integer.class)) {
        return compareValues(value, (int) compareToObject, predicate.getComparisonType());
    } else if (compareToType.equals(Double.class)) {
        return compareValues((double) value, (double) compareToObject, predicate.getComparisonType());
    } else if (compareToType.equals(String.class)) {
        try {
            Double compareToValue = Double.parseDouble((String) predicate.getCompareToValue());
            return compareValues((double) value, compareToValue, predicate.getComparisonType());
        } catch (NumberFormatException e) {
            throw new DataflowException("Unable to parse to number " + e.getMessage());
        }
    } else {
        throw new DataflowException("Value " + predicate.getCompareToValue() + " is not a valid number type");
    }
}
Also used : DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IntegerField(edu.uci.ics.texera.api.field.IntegerField)

Example 29 with DataflowException

use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.

the class RunTests method main.

/*
     * Write Indices Run all performance tests.
     * 
     * Passed in below arguments: 
     * file folder path (where data set stored)
     * result folder path (where performance test results stored) 
     * standard index folder path (where standard index stored) 
     * trigram index folder path(where trigram index stored) 
     * queries folder path (where query files stored)
     * 
     * If above arguments are not passed in, default paths will be used (refer
     * to PerfTestUtils.java) If some of the arguments are not applicable,
     * define them as empty string.
     * 
     * Make necessary changes for arguments, such as query file name, threshold
     * list, and regexQueries
     *
     */
public static void main(String[] args) {
    try {
        PerfTestUtils.setFileFolder(args[0]);
        PerfTestUtils.setResultFolder(args[1]);
        PerfTestUtils.setStandardIndexFolder(args[2]);
        PerfTestUtils.setTrigramIndexFolder(args[3]);
        PerfTestUtils.setQueryFolder(args[4]);
    } catch (ArrayIndexOutOfBoundsException e) {
        System.out.println("missing arguments will be set to default");
    }
    try {
        PerfTestUtils.deleteDirectory(new File(PerfTestUtils.standardIndexFolder));
        PerfTestUtils.deleteDirectory(new File(PerfTestUtils.trigramIndexFolder));
        PerfTestUtils.writeStandardAnalyzerIndices();
        PerfTestUtils.writeTrigramIndices();
        List<Double> thresholds = Arrays.asList(0.8, 0.65, 0.5, 0.35);
        List<String> regexQueries = Arrays.asList("mosquitos?", "v[ir]{2}[us]{2}", "market(ing)?", "medic(ine|al|ation|are|aid)?", "[A-Z][aeiou|AEIOU][A-Za-z]*");
        KeywordMatcherPerformanceTest.runTest("sample_queries.txt");
        DictionaryMatcherPerformanceTest.runTest("sample_queries.txt");
        FuzzyTokenMatcherPerformanceTest.runTest("sample_queries.txt", thresholds);
        RegexMatcherPerformanceTest.runTest(regexQueries);
        NlpExtractorPerformanceTest.runTest();
    } catch (StorageException | DataflowException | IOException e) {
        e.printStackTrace();
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IOException(java.io.IOException) File(java.io.File) StorageException(edu.uci.ics.texera.api.exception.StorageException) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) IOException(java.io.IOException) StorageException(edu.uci.ics.texera.api.exception.StorageException)

Example 30 with DataflowException

use of edu.uci.ics.texera.api.exception.DataflowException in project textdb by TextDB.

the class RelationManager method createTable.

/**
 * Creates a new table.
 *   Table name must be unique (case insensitive).
 *   LuceneAnalyzer must be a valid analyzer string.
 *
 * The "_id" attribute will be added to the table schema.
 * System automatically generates a unique ID for each tuple inserted to a table,
 *   the generated ID will be in "_id" field.
 *
 * @param tableName, the name of the table, must be unique, case is not sensitive
 * @param indexDirectory, the directory to store the index and data, must not duplicate with other tables' directories
 * @param schema, the schema of the table
 * @param luceneAnalyzerString, the string representing the lucene analyzer used
 * @throws StorageException
 */
public void createTable(String tableName, Path indexDirectory, Schema schema, String luceneAnalyzerString) throws StorageException {
    // convert the table name to lower case
    tableName = tableName.toLowerCase();
    // table should not exist
    if (checkTableExistence(tableName)) {
        throw new StorageException(String.format("Table %s already exists.", tableName));
    }
    // create folder if it's not there
    // and convert the index directory to its absolute path
    String indexDirectoryStr;
    try {
        if (Files.notExists(indexDirectory)) {
            Files.createDirectories(indexDirectory);
        }
        indexDirectoryStr = indexDirectory.toRealPath().toString();
    } catch (IOException e) {
        throw new StorageException(e);
    }
    // check if the indexDirectory overlaps with another table's index directory
    Query indexDirectoryQuery = new TermQuery(new Term(CatalogConstants.TABLE_DIRECTORY, indexDirectoryStr));
    DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, indexDirectoryQuery);
    tableCatalogDataReader.setPayloadAdded(false);
    tableCatalogDataReader.open();
    Tuple nextTuple = tableCatalogDataReader.getNextTuple();
    tableCatalogDataReader.close();
    // if the index directory is already taken by another table, throws an exception
    if (nextTuple != null) {
        String overlapTableName = nextTuple.getField(CatalogConstants.TABLE_NAME).getValue().toString();
        throw new StorageException(String.format("Table %s already takes the index directory %s. Please choose another directory.", overlapTableName, indexDirectory));
    }
    // check if the lucene analyzer string is valid
    Analyzer luceneAnalyzer = null;
    try {
        luceneAnalyzer = LuceneAnalyzerConstants.getLuceneAnalyzer(luceneAnalyzerString);
    } catch (DataflowException e) {
        throw new StorageException("Lucene Analyzer String is not valid.");
    }
    // create the directory and clear all data in the index directory
    Schema tableSchema = Schema.Builder.getSchemaWithID(schema);
    DataStore tableDataStore = new DataStore(indexDirectory, tableSchema);
    DataWriter dataWriter = new DataWriter(tableDataStore, luceneAnalyzer);
    dataWriter.open();
    dataWriter.clearData();
    dataWriter.close();
    // write table info to catalog
    writeTableInfoToCatalog(tableName, indexDirectory, schema, luceneAnalyzerString);
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Schema(edu.uci.ics.texera.api.schema.Schema) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) StorageException(edu.uci.ics.texera.api.exception.StorageException) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Aggregations

DataflowException (edu.uci.ics.texera.api.exception.DataflowException)56 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)23 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)20 Schema (edu.uci.ics.texera.api.schema.Schema)20 Tuple (edu.uci.ics.texera.api.tuple.Tuple)18 IOException (java.io.IOException)14 Span (edu.uci.ics.texera.api.span.Span)11 Collectors (java.util.stream.Collectors)10 SchemaConstants (edu.uci.ics.texera.api.constants.SchemaConstants)9 ArrayList (java.util.ArrayList)9 Attribute (edu.uci.ics.texera.api.schema.Attribute)8 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)7 IField (edu.uci.ics.texera.api.field.IField)7 ListField (edu.uci.ics.texera.api.field.ListField)7 List (java.util.List)7 AbstractSingleInputOperator (edu.uci.ics.texera.dataflow.common.AbstractSingleInputOperator)6 ErrorMessages (edu.uci.ics.texera.api.constants.ErrorMessages)5 StorageException (edu.uci.ics.texera.api.exception.StorageException)5 IntegerField (edu.uci.ics.texera.api.field.IntegerField)4 DataflowUtils (edu.uci.ics.texera.dataflow.utils.DataflowUtils)4