Search in sources :

Example 31 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class HiveExploreServiceFileSetTestRun method testTPFSWithDateTimestamp.

@Test
public void testTPFSWithDateTimestamp() throws Exception {
    TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
    final DatasetId datasetInstanceId = NAMESPACE_ID.dataset("dtfs");
    final String tableName = getDatasetHiveName(datasetInstanceId);
    final Schema dtSchema = Schema.recordOf("dt", Schema.Field.of("id", Schema.of(Schema.Type.INT)), Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))), Schema.Field.of("dt", Schema.of(Schema.LogicalType.DATE)), Schema.Field.of("ts", Schema.nullableOf(Schema.of(Schema.LogicalType.TIMESTAMP_MILLIS))));
    // create a file set
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", dtSchema.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.newArrayList(tableName))));
    // Accessing dataset instance to perform data operations
    TimePartitionedFileSet tpfs = datasetFramework.getDataset(datasetInstanceId, DatasetDefinition.NO_ARGUMENTS, null);
    Assert.assertNotNull(tpfs);
    Location location1 = tpfs.getEmbeddedFileSet().getLocation("file1/nn");
    generateAvroFile(location1.getOutputStream(), dtSchema);
    // add some partitions. Beware that Hive expects a partition to be a directory, so we create dirs with one file
    long time1 = DATE_FORMAT.parse("12/10/14 1:00 am").getTime();
    addTimePartition(tpfs, time1, "file1");
    // verify that we can query the date and timestamp in the file with Hive
    runCommand(NAMESPACE_ID, "SELECT id, name, dt, ts FROM " + tableName + " LIMIT 50", true, Lists.newArrayList(new ColumnDesc("id", "INT", 1, null), new ColumnDesc("name", "STRING", 2, null), new ColumnDesc("dt", "DATE", 3, null), new ColumnDesc("ts", "TIMESTAMP", 4, null)), Lists.newArrayList(new QueryResult(Lists.newArrayList(1, "alice", "1970-01-01", "2018-09-07 16:09:50.595"))));
    // drop the dataset
    datasetFramework.deleteInstance(datasetInstanceId);
    // verify the Hive table is gone
    runCommand(NAMESPACE_ID, "show tables", false, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Collections.emptyList());
    // create a file set
    datasetFramework.addInstance("timePartitionedFileSet", datasetInstanceId, FileSetProperties.builder().setBasePath("somePath").setEnableExploreOnCreate(true).setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe").setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat").setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat").setTableProperty("avro.schema.literal", dtSchema.toString()).build());
    // verify that the hive table was created for this file set
    runCommand(NAMESPACE_ID, "show tables", true, Lists.newArrayList(new ColumnDesc("tab_name", "STRING", 1, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.newArrayList(tableName))));
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) Schema(io.cdap.cdap.api.data.schema.Schema) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) TimePartitionedFileSet(io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet) DatasetId(io.cdap.cdap.proto.id.DatasetId) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 32 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class HiveExploreServiceTestRun method testJoin.

@Test
public void testJoin() throws Exception {
    DatasetId myTable1 = NAMESPACE_ID.dataset("my_table_1");
    String myTable1Name = getDatasetHiveName(myTable1);
    // Performing admin operations to create dataset instance
    datasetFramework.addInstance("keyStructValueTable", myTable1, DatasetProperties.EMPTY);
    try {
        Transaction tx1 = transactionManager.startShort(100);
        // Accessing dataset instance to perform data operations
        KeyStructValueTableDefinition.KeyStructValueTable table = datasetFramework.getDataset(myTable1, DatasetDefinition.NO_ARGUMENTS, null);
        Assert.assertNotNull(table);
        table.startTx(tx1);
        KeyValue.Value value1 = new KeyValue.Value("two", Lists.newArrayList(20, 21, 22, 23, 24));
        KeyValue.Value value2 = new KeyValue.Value("third", Lists.newArrayList(30, 31, 32, 33, 34));
        table.put("2", value1);
        table.put("3", value2);
        Assert.assertEquals(value1, table.get("2"));
        Assert.assertTrue(table.commitTx());
        transactionManager.canCommit(tx1.getTransactionId(), table.getTxChanges());
        transactionManager.commit(tx1.getTransactionId(), tx1.getWritePointer());
        table.postTxCommit();
        String query = String.format("select %s.key, %s.value from %s join %s on (%s.key=%s.key)", MY_TABLE_NAME, MY_TABLE_NAME, MY_TABLE_NAME, myTable1Name, MY_TABLE_NAME, myTable1Name);
        runCommand(NAMESPACE_ID, query, true, Lists.newArrayList(new ColumnDesc(MY_TABLE_NAME + ".key", "STRING", 1, null), new ColumnDesc(MY_TABLE_NAME + ".value", "struct<name:string,ints:array<int>>", 2, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("2", "{\"name\":\"two\",\"ints\":[10,11,12,13,14]}"))));
        query = String.format("select %s.key, %s.value, %s.key, %s.value " + "from %s right outer join %s on (%s.key=%s.key)", MY_TABLE_NAME, MY_TABLE_NAME, myTable1Name, myTable1Name, MY_TABLE_NAME, myTable1Name, MY_TABLE_NAME, myTable1Name);
        runCommand(NAMESPACE_ID, query, true, Lists.newArrayList(new ColumnDesc(MY_TABLE_NAME + ".key", "STRING", 1, null), new ColumnDesc(MY_TABLE_NAME + ".value", "struct<name:string,ints:array<int>>", 2, null), new ColumnDesc(myTable1Name + ".key", "STRING", 3, null), new ColumnDesc(myTable1Name + ".value", "struct<name:string,ints:array<int>>", 4, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("2", "{\"name\":\"two\",\"ints\":[10,11,12,13,14]}", "2", "{\"name\":\"two\",\"ints\":[20,21,22,23,24]}")), new QueryResult(Lists.<Object>newArrayList(null, null, "3", "{\"name\":\"third\",\"ints\":[30,31,32,33,34]}"))));
        query = String.format("select %s.key, %s.value, %s.key, %s.value from %s " + "left outer join %s on (%s.key=%s.key)", MY_TABLE_NAME, MY_TABLE_NAME, myTable1Name, myTable1Name, MY_TABLE_NAME, myTable1Name, MY_TABLE_NAME, myTable1Name);
        runCommand(NAMESPACE_ID, query, true, Lists.newArrayList(new ColumnDesc(MY_TABLE_NAME + ".key", "STRING", 1, null), new ColumnDesc(MY_TABLE_NAME + ".value", "struct<name:string,ints:array<int>>", 2, null), new ColumnDesc(myTable1Name + ".key", "STRING", 3, null), new ColumnDesc(myTable1Name + ".value", "struct<name:string,ints:array<int>>", 4, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("1", "{\"name\":\"first\",\"ints\":[1,2,3,4,5]}", null, null)), new QueryResult(Lists.<Object>newArrayList("2", "{\"name\":\"two\",\"ints\":[10,11,12,13,14]}", "2", "{\"name\":\"two\",\"ints\":[20,21,22,23,24]}"))));
        query = String.format("select %s.key, %s.value, %s.key, %s.value from %s " + "full outer join %s on (%s.key=%s.key)", MY_TABLE_NAME, MY_TABLE_NAME, myTable1Name, myTable1Name, MY_TABLE_NAME, myTable1Name, MY_TABLE_NAME, myTable1Name);
        runCommand(NAMESPACE_ID, query, true, Lists.newArrayList(new ColumnDesc(MY_TABLE_NAME + ".key", "STRING", 1, null), new ColumnDesc(MY_TABLE_NAME + ".value", "struct<name:string,ints:array<int>>", 2, null), new ColumnDesc(myTable1Name + ".key", "STRING", 3, null), new ColumnDesc(myTable1Name + ".value", "struct<name:string,ints:array<int>>", 4, null)), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("1", "{\"name\":\"first\",\"ints\":[1,2,3,4,5]}", null, null)), new QueryResult(Lists.<Object>newArrayList("2", "{\"name\":\"two\",\"ints\":[10,11,12,13,14]}", "2", "{\"name\":\"two\",\"ints\":[20,21,22,23,24]}")), new QueryResult(Lists.<Object>newArrayList(null, null, "3", "{\"name\":\"third\",\"ints\":[30,31,32,33,34]}"))));
    } finally {
        datasetFramework.deleteInstance(myTable1);
    }
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) KeyValue(io.cdap.cdap.explore.service.datasets.KeyStructValueTableDefinition.KeyValue) Transaction(org.apache.tephra.Transaction) KeyStructValueTableDefinition(io.cdap.cdap.explore.service.datasets.KeyStructValueTableDefinition) KeyValue(io.cdap.cdap.explore.service.datasets.KeyStructValueTableDefinition.KeyValue) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Example 33 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class HiveExploreServiceTestRun method testNamespaceCreationDeletion.

@Test
public void testNamespaceCreationDeletion() throws Exception {
    NamespaceId testNamespace = new NamespaceId("test");
    ListenableFuture<ExploreExecutionResult> future = exploreClient.schemas(null, null);
    assertStatementResult(future, true, ImmutableList.of(new ColumnDesc("TABLE_SCHEM", "STRING", 1, "Schema name."), new ColumnDesc("TABLE_CATALOG", "STRING", 2, "Catalog name.")), ImmutableList.of(new QueryResult(Lists.newArrayList(NAMESPACE_DATABASE, "")), new QueryResult(Lists.newArrayList(OTHER_NAMESPACE_DATABASE, "")), new QueryResult(Lists.newArrayList(DEFAULT_DATABASE, ""))));
    future = exploreClient.addNamespace(new NamespaceMeta.Builder().setName("test").build());
    future.get();
    createNamespace(testNamespace);
    try {
        future = exploreClient.schemas(null, null);
        assertStatementResult(future, true, ImmutableList.of(new ColumnDesc("TABLE_SCHEM", "STRING", 1, "Schema name."), new ColumnDesc("TABLE_CATALOG", "STRING", 2, "Catalog name.")), ImmutableList.of(new QueryResult(Lists.newArrayList(NAMESPACE_DATABASE, "")), new QueryResult(Lists.newArrayList(OTHER_NAMESPACE_DATABASE, "")), new QueryResult(Lists.newArrayList("cdap_" + testNamespace.getNamespace(), "")), new QueryResult(Lists.newArrayList(DEFAULT_DATABASE, ""))));
        future = exploreClient.removeNamespace(testNamespace);
        future.get();
        future = exploreClient.schemas(null, null);
        assertStatementResult(future, true, ImmutableList.of(new ColumnDesc("TABLE_SCHEM", "STRING", 1, "Schema name."), new ColumnDesc("TABLE_CATALOG", "STRING", 2, "Catalog name.")), ImmutableList.of(new QueryResult(Lists.<Object>newArrayList(NAMESPACE_DATABASE, "")), new QueryResult(Lists.<Object>newArrayList(OTHER_NAMESPACE_DATABASE, "")), new QueryResult(Lists.<Object>newArrayList(DEFAULT_DATABASE, ""))));
    } finally {
        deleteNamespace(testNamespace);
    }
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) NamespaceId(io.cdap.cdap.proto.id.NamespaceId) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) ExploreExecutionResult(io.cdap.cdap.explore.client.ExploreExecutionResult) Test(org.junit.Test)

Example 34 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class HiveExploreServiceTimeoutTest method testTimeoutFetchAllResults.

@Test
public void testTimeoutFetchAllResults() throws Exception {
    Set<Long> beforeTxns = transactionManager.getCurrentState().getInProgress().keySet();
    QueryHandle handle = exploreService.execute(NAMESPACE_ID, "select key, value from " + MY_TABLE_NAME);
    Set<Long> queryTxns = Sets.difference(transactionManager.getCurrentState().getInProgress().keySet(), beforeTxns);
    Assert.assertFalse(queryTxns.isEmpty());
    QueryStatus status = waitForCompletionStatus(handle, 200, TimeUnit.MILLISECONDS, 20);
    Assert.assertEquals(QueryStatus.OpStatus.FINISHED, status.getStatus());
    Assert.assertTrue(status.hasResults());
    List<ColumnDesc> schema = exploreService.getResultSchema(handle);
    // noinspection StatementWithEmptyBody
    while (!exploreService.nextResults(handle, 100).isEmpty()) {
    // nothing to do
    }
    // Sleep for some time for txn to get closed
    TimeUnit.SECONDS.sleep(1);
    // Make sure that the transaction got closed
    Assert.assertEquals(ImmutableSet.<Long>of(), Sets.intersection(queryTxns, transactionManager.getCurrentState().getInProgress().keySet()).immutableCopy());
    // Check if calls using inactive handle still work
    Assert.assertEquals(status, exploreService.getStatus(handle));
    Assert.assertEquals(schema, exploreService.getResultSchema(handle));
    exploreService.close(handle);
    // Sleep for timeout to happen
    TimeUnit.SECONDS.sleep(INACTIVE_OPERATION_TIMEOUT_SECS + 3);
    try {
        exploreService.getStatus(handle);
        Assert.fail("Should throw HandleNotFoundException due to operation cleanup");
    } catch (HandleNotFoundException e) {
    // Expected exception due to timeout
    }
}
Also used : QueryHandle(io.cdap.cdap.proto.QueryHandle) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) QueryStatus(io.cdap.cdap.proto.QueryStatus) Test(org.junit.Test)

Example 35 with ColumnDesc

use of io.cdap.cdap.proto.ColumnDesc in project cdap by cdapio.

the class HiveExploreStructuredRecordTestRun method testRecordScannableAndWritableIsOK.

@Test
public void testRecordScannableAndWritableIsOK() throws Exception {
    DatasetId instanceId = NAMESPACE_ID.dataset("tabul");
    datasetFramework.addInstance("TableWrapper", instanceId, DatasetProperties.builder().add(DatasetProperties.SCHEMA, Schema.recordOf("intRecord", Schema.Field.of("x", Schema.of(Schema.Type.STRING))).toString()).build());
    DatasetSpecification spec = datasetFramework.getDatasetSpec(instanceId);
    try {
        exploreTableManager.enableDataset(instanceId, spec, false);
        runCommand(NAMESPACE_ID, "describe dataset_tabul", true, Lists.newArrayList(new ColumnDesc("col_name", "STRING", 1, "from deserializer"), new ColumnDesc("data_type", "STRING", 2, "from deserializer"), new ColumnDesc("comment", "STRING", 3, "from deserializer")), Lists.newArrayList(new QueryResult(Lists.<Object>newArrayList("x", "string", "from deserializer"))));
    } finally {
        datasetFramework.deleteInstance(instanceId);
    }
}
Also used : QueryResult(io.cdap.cdap.proto.QueryResult) DatasetSpecification(io.cdap.cdap.api.dataset.DatasetSpecification) ColumnDesc(io.cdap.cdap.proto.ColumnDesc) DatasetId(io.cdap.cdap.proto.id.DatasetId) Test(org.junit.Test)

Aggregations

ColumnDesc (io.cdap.cdap.proto.ColumnDesc)72 QueryResult (io.cdap.cdap.proto.QueryResult)46 Test (org.junit.Test)42 DatasetId (io.cdap.cdap.proto.id.DatasetId)32 ExploreExecutionResult (io.cdap.cdap.explore.client.ExploreExecutionResult)24 TimePartitionedFileSet (io.cdap.cdap.api.dataset.lib.TimePartitionedFileSet)18 Location (org.apache.twill.filesystem.Location)16 PartitionedFileSet (io.cdap.cdap.api.dataset.lib.PartitionedFileSet)14 SQLException (java.sql.SQLException)14 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)12 Schema (io.cdap.cdap.api.data.schema.Schema)8 Table (io.cdap.cdap.api.dataset.table.Table)8 ExploreClient (io.cdap.cdap.explore.client.ExploreClient)8 MockExploreClient (io.cdap.cdap.explore.client.MockExploreClient)8 QueryStatus (io.cdap.cdap.proto.QueryStatus)8 ResultSet (java.sql.ResultSet)8 ImmutableList (com.google.common.collect.ImmutableList)6 PartitionKey (io.cdap.cdap.api.dataset.lib.PartitionKey)6 PartitionedFileSetProperties (io.cdap.cdap.api.dataset.lib.PartitionedFileSetProperties)6 QueryHandle (io.cdap.cdap.proto.QueryHandle)6