Search in sources :

Example 1 with QueryBasedHiveConversionEntity

use of org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity in project incubator-gobblin by apache.

the class HiveAvroToOrcConverterTest method testNestedSchemaDDLandDML.

/**
 * Test nested DDL and DML generation
 * @throws IOException
 */
@Test
public void testNestedSchemaDDLandDML() throws Exception {
    String dbName = "testdb";
    String tableName = "testtable";
    String tableSdLoc = "/tmp/testtable";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    Table table = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
    Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
    WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
    wus.getJobState().setProp("orc.table.flatten.schema", "false");
    try (HiveAvroToNestedOrcConverter converter = new HiveAvroToNestedOrcConverter()) {
        Config config = ConfigFactory.parseMap(ImmutableMap.<String, String>builder().put("destinationFormats", "nestedOrc").put("nestedOrc.destination.tableName", "testtable_orc_nested").put("nestedOrc.destination.dbName", dbName).put("nestedOrc.destination.dataPath", "file:/tmp/testtable_orc_nested").build());
        ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
        List<QueryBasedHiveConversionEntity> conversionEntities = Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus), new QueryBasedHiveConversionEntity(cd, new SchemaAwareHiveTable(table, schema)), wus));
        Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
        QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
        List<String> queries = queryBasedHiveConversionEntity.getQueries();
        Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
        // Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
        // .. a random name component
        String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
        String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
        String expectedDDLQuery = StringUtils.substringAfter("(", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.ddl"));
        String expectedDMLQuery = StringUtils.substringAfter("SELECT", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.dml"));
        Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
        Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
    }
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Config(com.typesafe.config.Config) ConvertibleHiveDataset(org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset) Schema(org.apache.avro.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) QueryBasedHiveConversionEntity(org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) ConvertibleHiveDatasetTest(org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest) Test(org.testng.annotations.Test)

Example 2 with QueryBasedHiveConversionEntity

use of org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity in project incubator-gobblin by apache.

the class HiveAvroToOrcConverterTest method testFlattenSchemaDDLandDML.

/**
 * Test flattened DDL and DML generation
 * @throws IOException
 */
@Test
public void testFlattenSchemaDDLandDML() throws Exception {
    String dbName = "testdb";
    String tableName = "testtable";
    String tableSdLoc = "/tmp/testtable";
    this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
    Table table = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
    Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
    WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
    try (HiveAvroToFlattenedOrcConverter converter = new HiveAvroToFlattenedOrcConverter()) {
        Config config = ConfigFactory.parseMap(ImmutableMap.<String, String>builder().put("destinationFormats", "flattenedOrc").put("flattenedOrc.destination.dbName", dbName).put("flattenedOrc.destination.tableName", tableName + "_orc").put("flattenedOrc.destination.dataPath", "file:" + tableSdLoc + "_orc").build());
        ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
        List<QueryBasedHiveConversionEntity> conversionEntities = Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus), new QueryBasedHiveConversionEntity(cd, new SchemaAwareHiveTable(table, schema)), wus));
        Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
        QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
        List<String> queries = queryBasedHiveConversionEntity.getQueries();
        Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
        // Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
        // .. a random name component
        String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
        String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
        String expectedDDLQuery = StringUtils.substringAfter("(", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.ddl"));
        String expectedDMLQuery = StringUtils.substringAfter("SELECT", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.dml"));
        Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
        Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
    }
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Config(com.typesafe.config.Config) ConvertibleHiveDataset(org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset) Schema(org.apache.avro.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) QueryBasedHiveConversionEntity(org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) ConvertibleHiveDatasetTest(org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest) Test(org.testng.annotations.Test)

Example 3 with QueryBasedHiveConversionEntity

use of org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity in project incubator-gobblin by apache.

the class HiveAvroToOrcConverterTest method dropReplacedPartitionsTest.

@Test
public void dropReplacedPartitionsTest() throws Exception {
    Table table = ConvertibleHiveDatasetTest.getTestTable("dbName", "tableName");
    table.setTableType("VIRTUAL_VIEW");
    table.setPartitionKeys(ImmutableList.of(new FieldSchema("year", "string", ""), new FieldSchema("month", "string", "")));
    Partition part = new Partition();
    part.setParameters(ImmutableMap.of("gobblin.replaced.partitions", "2015,12|2016,01"));
    SchemaAwareHiveTable hiveTable = new SchemaAwareHiveTable(table, null);
    SchemaAwareHivePartition partition = new SchemaAwareHivePartition(table, part, null);
    QueryBasedHiveConversionEntity conversionEntity = new QueryBasedHiveConversionEntity(null, hiveTable, Optional.of(partition));
    List<ImmutableMap<String, String>> expected = ImmutableList.of(ImmutableMap.of("year", "2015", "month", "12"), ImmutableMap.of("year", "2016", "month", "01"));
    Assert.assertEquals(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(conversionEntity), expected);
    // Make sure that a partition itself is not dropped
    Partition replacedSelf = new Partition();
    replacedSelf.setParameters(ImmutableMap.of("gobblin.replaced.partitions", "2015,12|2016,01|2016,02"));
    replacedSelf.setValues(ImmutableList.of("2016", "02"));
    conversionEntity = new QueryBasedHiveConversionEntity(null, hiveTable, Optional.of(new SchemaAwareHivePartition(table, replacedSelf, null)));
    Assert.assertEquals(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(conversionEntity), expected);
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) SchemaAwareHivePartition(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHivePartition) Table(org.apache.hadoop.hive.metastore.api.Table) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SchemaAwareHivePartition(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHivePartition) QueryBasedHiveConversionEntity(org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity) SchemaAwareHiveTable(org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable) ImmutableMap(com.google.common.collect.ImmutableMap) ConvertibleHiveDatasetTest(org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest) Test(org.testng.annotations.Test)

Aggregations

ConvertibleHiveDatasetTest (org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDatasetTest)3 QueryBasedHiveConversionEntity (org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity)3 SchemaAwareHiveTable (org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 Table (org.apache.hadoop.hive.metastore.api.Table)3 Test (org.testng.annotations.Test)3 Config (com.typesafe.config.Config)2 Schema (org.apache.avro.Schema)2 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)2 ConvertibleHiveDataset (org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset)2 ImmutableMap (com.google.common.collect.ImmutableMap)1 SchemaAwareHivePartition (org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHivePartition)1 Partition (org.apache.hadoop.hive.metastore.api.Partition)1