Search in sources :

Example 66 with Schema

use of edu.uci.ics.texera.api.schema.Schema in project textdb by TextDB.

the class RelationManager method createTable.

/**
 * Creates a new table.
 *   Table name must be unique (case insensitive).
 *   LuceneAnalyzer must be a valid analyzer string.
 *
 * The "_id" attribute will be added to the table schema.
 * System automatically generates a unique ID for each tuple inserted to a table,
 *   the generated ID will be in "_id" field.
 *
 * @param tableName, the name of the table, must be unique, case is not sensitive
 * @param indexDirectory, the directory to store the index and data, must not duplicate with other tables' directories
 * @param schema, the schema of the table
 * @param luceneAnalyzerString, the string representing the lucene analyzer used
 * @throws StorageException
 */
public void createTable(String tableName, Path indexDirectory, Schema schema, String luceneAnalyzerString) throws StorageException {
    // convert the table name to lower case
    tableName = tableName.toLowerCase();
    // table should not exist
    if (checkTableExistence(tableName)) {
        throw new StorageException(String.format("Table %s already exists.", tableName));
    }
    // create folder if it's not there
    // and convert the index directory to its absolute path
    String indexDirectoryStr;
    try {
        if (Files.notExists(indexDirectory)) {
            Files.createDirectories(indexDirectory);
        }
        indexDirectoryStr = indexDirectory.toRealPath().toString();
    } catch (IOException e) {
        throw new StorageException(e);
    }
    // check if the indexDirectory overlaps with another table's index directory
    Query indexDirectoryQuery = new TermQuery(new Term(CatalogConstants.TABLE_DIRECTORY, indexDirectoryStr));
    DataReader tableCatalogDataReader = new DataReader(CatalogConstants.TABLE_CATALOG_DATASTORE, indexDirectoryQuery);
    tableCatalogDataReader.setPayloadAdded(false);
    tableCatalogDataReader.open();
    Tuple nextTuple = tableCatalogDataReader.getNextTuple();
    tableCatalogDataReader.close();
    // if the index directory is already taken by another table, throws an exception
    if (nextTuple != null) {
        String overlapTableName = nextTuple.getField(CatalogConstants.TABLE_NAME).getValue().toString();
        throw new StorageException(String.format("Table %s already takes the index directory %s. Please choose another directory.", overlapTableName, indexDirectory));
    }
    // check if the lucene analyzer string is valid
    Analyzer luceneAnalyzer = null;
    try {
        luceneAnalyzer = LuceneAnalyzerConstants.getLuceneAnalyzer(luceneAnalyzerString);
    } catch (DataflowException e) {
        throw new StorageException("Lucene Analyzer String is not valid.");
    }
    // create the directory and clear all data in the index directory
    Schema tableSchema = Schema.Builder.getSchemaWithID(schema);
    DataStore tableDataStore = new DataStore(indexDirectory, tableSchema);
    DataWriter dataWriter = new DataWriter(tableDataStore, luceneAnalyzer);
    dataWriter.open();
    dataWriter.clearData();
    dataWriter.close();
    // write table info to catalog
    writeTableInfoToCatalog(tableName, indexDirectory, schema, luceneAnalyzerString);
}
Also used : TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) TermQuery(org.apache.lucene.search.TermQuery) Schema(edu.uci.ics.texera.api.schema.Schema) IOException(java.io.IOException) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) DataflowException(edu.uci.ics.texera.api.exception.DataflowException) StorageException(edu.uci.ics.texera.api.exception.StorageException) Tuple(edu.uci.ics.texera.api.tuple.Tuple)

Example 67 with Schema

use of edu.uci.ics.texera.api.schema.Schema in project textdb by TextDB.

the class RelationManagerTest method test15.

@Test
public void test15() throws Exception {
    String tableName1 = "relation_manager_test_table_15_1";
    String tableName2 = "relation_manager_test_table_15_2";
    String indexDirectory = "./index/test_table/relation_manager_test_table_15";
    Schema schema = new Schema(new Attribute("content", AttributeType.TEXT));
    String luceneAnalyzerString = "standard";
    relationManager.deleteTable(tableName1);
    relationManager.deleteTable(tableName2);
    relationManager.createTable(tableName1, Paths.get(indexDirectory), schema, luceneAnalyzerString);
    // create another table with the same directory should fail
    try {
        relationManager.createTable(tableName2, Paths.get(indexDirectory), schema, luceneAnalyzerString);
        Assert.fail("Storage exception should be thrown because of duplicate index directories");
    } catch (StorageException e) {
    }
    relationManager.deleteTable(tableName1);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) StorageException(edu.uci.ics.texera.api.exception.StorageException) Test(org.junit.Test)

Example 68 with Schema

use of edu.uci.ics.texera.api.schema.Schema in project textdb by TextDB.

the class RelationManagerTest method test17.

/*
	* Test on getMetaData() to see if it successfully get metadata from "relation_manager_test_table"
    */
@Test
public void test17() throws Exception {
    String tableName = "relation_manager_test_table";
    String tableDirectory = "./index/test_table";
    Schema tableSchema = new Schema(new Attribute("content", AttributeType.STRING), new Attribute("number", AttributeType.STRING));
    RelationManager relationManager = RelationManager.getInstance();
    relationManager.deleteTable(tableName);
    relationManager.createTable(tableName, Paths.get(tableDirectory), tableSchema, LuceneAnalyzerConstants.standardAnalyzerString());
    List<TableMetadata> metaData = relationManager.getMetaData();
    // result should contain metadata about test table "relation_manager_test_table"
    List<TableMetadata> result = metaData.stream().filter(x -> x.getTableName().equals(tableName)).collect(Collectors.toList());
    Assert.assertEquals(result.size(), 1);
    TableMetadata testTable = result.get(0);
    List<String> testTableSchema = testTable.getSchema().getAttributeNames();
    Assert.assertEquals(tableName, testTable.getTableName());
    Assert.assertEquals("_id", testTableSchema.get(0));
    Assert.assertEquals("content", testTableSchema.get(1));
    Assert.assertEquals("number", testTableSchema.get(2));
    relationManager.deleteTable(tableName);
}
Also used : Query(org.apache.lucene.search.Query) LuceneAnalyzerConstants(edu.uci.ics.texera.storage.constants.LuceneAnalyzerConstants) Analyzer(org.apache.lucene.analysis.Analyzer) Tuple(edu.uci.ics.texera.api.tuple.Tuple) Term(org.apache.lucene.index.Term) TexeraException(edu.uci.ics.texera.api.exception.TexeraException) Test(org.junit.Test) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) TermQuery(org.apache.lucene.search.TermQuery) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Paths(java.nio.file.Paths) StringField(edu.uci.ics.texera.api.field.StringField) AttributeType(edu.uci.ics.texera.api.schema.AttributeType) Schema(edu.uci.ics.texera.api.schema.Schema) StorageException(edu.uci.ics.texera.api.exception.StorageException) Attribute(edu.uci.ics.texera.api.schema.Attribute) Assert(org.junit.Assert) IDField(edu.uci.ics.texera.api.field.IDField) Before(org.junit.Before) Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) Test(org.junit.Test)

Example 69 with Schema

use of edu.uci.ics.texera.api.schema.Schema in project textdb by TextDB.

the class RelationManagerTest method test7.

/*
     * Test creating and deleting multiple tables in relation manager.
     */
@Test
public void test7() throws Exception {
    String tableName = "relation_manager_test_table";
    String tableDirectory = "./index/test_table";
    Schema tableSchema = new Schema(new Attribute("city", AttributeType.STRING), new Attribute("description", AttributeType.TEXT), new Attribute("tax rate", AttributeType.DOUBLE), new Attribute("population", AttributeType.INTEGER), new Attribute("record time", AttributeType.DATE));
    int NUM_OF_LOOPS = 10;
    RelationManager relationManager = RelationManager.getInstance();
    // create tables
    for (int i = 0; i < NUM_OF_LOOPS; i++) {
        // delete previously inserted tables first
        relationManager.deleteTable(tableName + '_' + i);
        relationManager.createTable(tableName + '_' + i, Paths.get(tableDirectory + '_' + i), tableSchema, LuceneAnalyzerConstants.standardAnalyzerString());
    }
    // assert tables are correctly created
    for (int i = 0; i < NUM_OF_LOOPS; i++) {
        Assert.assertEquals(new File(tableDirectory + '_' + i).getCanonicalPath(), relationManager.getTableDirectory(tableName + '_' + i));
        Assert.assertEquals(Schema.Builder.getSchemaWithID(tableSchema), relationManager.getTableSchema(tableName + '_' + i));
    }
    // delete tables
    for (int i = 0; i < NUM_OF_LOOPS; i++) {
        relationManager.deleteTable(tableName + '_' + i);
    }
    // assert tables are correctly deleted
    int errorCount = 0;
    for (int i = 0; i < NUM_OF_LOOPS; i++) {
        try {
            relationManager.getTableDirectory(tableName + '_' + i);
        } catch (StorageException e) {
            errorCount++;
        }
    }
    Assert.assertEquals(NUM_OF_LOOPS, errorCount);
}
Also used : Attribute(edu.uci.ics.texera.api.schema.Attribute) Schema(edu.uci.ics.texera.api.schema.Schema) File(java.io.File) StorageException(edu.uci.ics.texera.api.exception.StorageException) Test(org.junit.Test)

Example 70 with Schema

use of edu.uci.ics.texera.api.schema.Schema in project textdb by TextDB.

the class RelationManagerTest method test1.

/*
     * Test the information about "table catalog" itself is stored properly.
     * 
     */
@Test
public void test1() throws Exception {
    String tableCatalogDirectory = relationManager.getTableDirectory(CatalogConstants.TABLE_CATALOG);
    Analyzer tableCatalogLuceneAnalyzer = relationManager.getTableAnalyzer(CatalogConstants.TABLE_CATALOG);
    Schema tableCatalogSchema = relationManager.getTableSchema(CatalogConstants.TABLE_CATALOG);
    Assert.assertEquals(tableCatalogDirectory, CatalogConstants.TABLE_CATALOG_DIRECTORY.toRealPath().toString());
    Assert.assertTrue(tableCatalogLuceneAnalyzer instanceof StandardAnalyzer);
    Assert.assertEquals(tableCatalogSchema, Schema.Builder.getSchemaWithID(CatalogConstants.TABLE_CATALOG_SCHEMA));
}
Also used : Schema(edu.uci.ics.texera.api.schema.Schema) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Analyzer(org.apache.lucene.analysis.Analyzer) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Test(org.junit.Test)

Aggregations

Schema (edu.uci.ics.texera.api.schema.Schema)134 Test (org.junit.Test)109 Tuple (edu.uci.ics.texera.api.tuple.Tuple)106 ArrayList (java.util.ArrayList)97 IField (edu.uci.ics.texera.api.field.IField)96 Span (edu.uci.ics.texera.api.span.Span)86 TextField (edu.uci.ics.texera.api.field.TextField)77 Attribute (edu.uci.ics.texera.api.schema.Attribute)76 StringField (edu.uci.ics.texera.api.field.StringField)72 IntegerField (edu.uci.ics.texera.api.field.IntegerField)71 DoubleField (edu.uci.ics.texera.api.field.DoubleField)60 DateField (edu.uci.ics.texera.api.field.DateField)57 SimpleDateFormat (java.text.SimpleDateFormat)54 Dictionary (edu.uci.ics.texera.dataflow.dictionarymatcher.Dictionary)29 ListField (edu.uci.ics.texera.api.field.ListField)21 IOperator (edu.uci.ics.texera.api.dataflow.IOperator)15 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)14 AttributeType (edu.uci.ics.texera.api.schema.AttributeType)13 TexeraException (edu.uci.ics.texera.api.exception.TexeraException)9 JoinDistancePredicate (edu.uci.ics.texera.dataflow.join.JoinDistancePredicate)9