use of org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable in project incubator-gobblin by apache.
the class HiveAvroToOrcConverterTest method testNestedSchemaDDLandDML.
/**
* Test nested DDL and DML generation
* @throws IOException
*/
@Test
public void testNestedSchemaDDLandDML() throws Exception {
String dbName = "testdb";
String tableName = "testtable";
String tableSdLoc = "/tmp/testtable";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
Table table = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
wus.getJobState().setProp("orc.table.flatten.schema", "false");
try (HiveAvroToNestedOrcConverter converter = new HiveAvroToNestedOrcConverter()) {
Config config = ConfigFactory.parseMap(ImmutableMap.<String, String>builder().put("destinationFormats", "nestedOrc").put("nestedOrc.destination.tableName", "testtable_orc_nested").put("nestedOrc.destination.dbName", dbName).put("nestedOrc.destination.dataPath", "file:/tmp/testtable_orc_nested").build());
ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
List<QueryBasedHiveConversionEntity> conversionEntities = Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus), new QueryBasedHiveConversionEntity(cd, new SchemaAwareHiveTable(table, schema)), wus));
Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
List<String> queries = queryBasedHiveConversionEntity.getQueries();
Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
// Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
// .. a random name component
String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
String expectedDDLQuery = StringUtils.substringAfter("(", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.ddl"));
String expectedDMLQuery = StringUtils.substringAfter("SELECT", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_nested.dml"));
Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
}
}
use of org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable in project incubator-gobblin by apache.
the class HiveAvroToOrcConverterTest method testFlattenSchemaDDLandDML.
/**
* Test flattened DDL and DML generation
* @throws IOException
*/
@Test
public void testFlattenSchemaDDLandDML() throws Exception {
String dbName = "testdb";
String tableName = "testtable";
String tableSdLoc = "/tmp/testtable";
this.hiveMetastoreTestUtils.getLocalMetastoreClient().dropDatabase(dbName, false, true, true);
Table table = this.hiveMetastoreTestUtils.createTestAvroTable(dbName, tableName, tableSdLoc, Optional.<String>absent());
Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json");
WorkUnitState wus = ConversionHiveTestUtils.createWus(dbName, tableName, 0);
try (HiveAvroToFlattenedOrcConverter converter = new HiveAvroToFlattenedOrcConverter()) {
Config config = ConfigFactory.parseMap(ImmutableMap.<String, String>builder().put("destinationFormats", "flattenedOrc").put("flattenedOrc.destination.dbName", dbName).put("flattenedOrc.destination.tableName", tableName + "_orc").put("flattenedOrc.destination.dataPath", "file:" + tableSdLoc + "_orc").build());
ConvertibleHiveDataset cd = ConvertibleHiveDatasetTest.createTestConvertibleDataset(config);
List<QueryBasedHiveConversionEntity> conversionEntities = Lists.newArrayList(converter.convertRecord(converter.convertSchema(schema, wus), new QueryBasedHiveConversionEntity(cd, new SchemaAwareHiveTable(table, schema)), wus));
Assert.assertEquals(conversionEntities.size(), 1, "Only one query entity should be returned");
QueryBasedHiveConversionEntity queryBasedHiveConversionEntity = conversionEntities.get(0);
List<String> queries = queryBasedHiveConversionEntity.getQueries();
Assert.assertEquals(queries.size(), 4, "4 DDL and one DML query should be returned");
// Ignoring part before first bracket in DDL and 'select' clause in DML because staging table has
// .. a random name component
String actualDDLQuery = StringUtils.substringAfter("(", queries.get(0).trim());
String actualDMLQuery = StringUtils.substringAfter("SELECT", queries.get(0).trim());
String expectedDDLQuery = StringUtils.substringAfter("(", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.ddl"));
String expectedDMLQuery = StringUtils.substringAfter("SELECT", ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.dml"));
Assert.assertEquals(actualDDLQuery, expectedDDLQuery);
Assert.assertEquals(actualDMLQuery, expectedDMLQuery);
}
}
use of org.apache.gobblin.data.management.conversion.hive.entities.SchemaAwareHiveTable in project incubator-gobblin by apache.
the class HiveAvroToOrcConverterTest method dropReplacedPartitionsTest.
@Test
public void dropReplacedPartitionsTest() throws Exception {
Table table = ConvertibleHiveDatasetTest.getTestTable("dbName", "tableName");
table.setTableType("VIRTUAL_VIEW");
table.setPartitionKeys(ImmutableList.of(new FieldSchema("year", "string", ""), new FieldSchema("month", "string", "")));
Partition part = new Partition();
part.setParameters(ImmutableMap.of("gobblin.replaced.partitions", "2015,12|2016,01"));
SchemaAwareHiveTable hiveTable = new SchemaAwareHiveTable(table, null);
SchemaAwareHivePartition partition = new SchemaAwareHivePartition(table, part, null);
QueryBasedHiveConversionEntity conversionEntity = new QueryBasedHiveConversionEntity(null, hiveTable, Optional.of(partition));
List<ImmutableMap<String, String>> expected = ImmutableList.of(ImmutableMap.of("year", "2015", "month", "12"), ImmutableMap.of("year", "2016", "month", "01"));
Assert.assertEquals(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(conversionEntity), expected);
// Make sure that a partition itself is not dropped
Partition replacedSelf = new Partition();
replacedSelf.setParameters(ImmutableMap.of("gobblin.replaced.partitions", "2015,12|2016,01|2016,02"));
replacedSelf.setValues(ImmutableList.of("2016", "02"));
conversionEntity = new QueryBasedHiveConversionEntity(null, hiveTable, Optional.of(new SchemaAwareHivePartition(table, replacedSelf, null)));
Assert.assertEquals(AbstractAvroToOrcConverter.getDropPartitionsDDLInfo(conversionEntity), expected);
}
Aggregations