use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergPartitions method testBucketTransform.
@Test
public void testBucketTransform() throws IOException {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "part_field", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("part_field", 2).build();
List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(1L, "Part1").add(2L, "Part2").add(3L, "Art3").build();
Table table = testTables.createTable(shell, "part_test", schema, spec, fileFormat, records);
HiveIcebergTestUtils.validateData(table, records, 0);
HiveIcebergTestUtils.validateDataWithSQL(shell, "part_test", records, "id");
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class HiveTableUtil method importFiles.
/**
* Import files from given partitions to an Iceberg table.
* @param sourceLocation location of the HMS table
* @param format inputformat class name of the HMS table
* @param partitionSpecProxy list of HMS table partitions wrapped in partitionSpecProxy
* @param partitionKeys list of partition keys
* @param icebergTableProperties destination iceberg table properties
* @param conf a Hadoop configuration
*/
public static void importFiles(String sourceLocation, String format, PartitionSpecProxy partitionSpecProxy, List<FieldSchema> partitionKeys, Properties icebergTableProperties, Configuration conf) throws MetaException {
RemoteIterator<LocatedFileStatus> filesIterator = null;
// this operation must be done before the iceberg table is created
if (partitionSpecProxy.size() == 0) {
filesIterator = getFilesIterator(new Path(sourceLocation), conf);
}
Table icebergTable = Catalogs.createTable(conf, icebergTableProperties);
AppendFiles append = icebergTable.newAppend();
PartitionSpec spec = icebergTable.spec();
MetricsConfig metricsConfig = MetricsConfig.fromProperties(icebergTable.properties());
String nameMappingString = icebergTable.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
NameMapping nameMapping = nameMappingString != null ? NameMappingParser.fromJson(nameMappingString) : null;
try {
if (partitionSpecProxy.size() == 0) {
List<DataFile> dataFiles = getDataFiles(filesIterator, Collections.emptyMap(), format, spec, metricsConfig, nameMapping, conf);
dataFiles.forEach(append::appendFile);
} else {
PartitionSpecProxy.PartitionIterator partitionIterator = partitionSpecProxy.getPartitionIterator();
List<Callable<Void>> tasks = new ArrayList<>();
while (partitionIterator.hasNext()) {
Partition partition = partitionIterator.next();
Callable<Void> task = () -> {
Path partitionPath = new Path(partition.getSd().getLocation());
String partitionName = Warehouse.makePartName(partitionKeys, partition.getValues());
Map<String, String> partitionSpec = Warehouse.makeSpecFromName(partitionName);
RemoteIterator<LocatedFileStatus> iterator = getFilesIterator(partitionPath, conf);
List<DataFile> dataFiles = getDataFiles(iterator, partitionSpec, format.toLowerCase(), spec, metricsConfig, nameMapping, conf);
synchronized (append) {
dataFiles.forEach(append::appendFile);
}
return null;
};
tasks.add(task);
}
int numThreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_ICEBERG_METADATA_GENERATOR_THREADS);
ExecutorService executor = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setNameFormat("iceberg-metadata-generator-%d").setDaemon(true).build());
executor.invokeAll(tasks);
executor.shutdown();
}
append.commit();
} catch (IOException | InterruptedException e) {
throw new MetaException("Cannot import hive data into iceberg table.\n" + e.getMessage());
}
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveCatalog method testCreateTableBuilder.
@Test
public void testCreateTableBuilder() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
try {
Table table = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").withProperty("key2", "value2").create();
Assert.assertEquals(location, table.location());
Assert.assertEquals(2, table.schema().columns().size());
Assert.assertEquals(1, table.spec().fields().size());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
catalog.dropTable(tableIdent);
}
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveCatalog method testReplaceTxnBuilder.
@Test
public void testReplaceTxnBuilder() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
try {
Transaction createTxn = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").createOrReplaceTransaction();
createTxn.commitTransaction();
Table table = catalog.loadTable(tableIdent);
Assert.assertEquals(1, table.spec().fields().size());
String newLocation = temp.newFolder("tbl-2").toString();
Transaction replaceTxn = catalog.buildTable(tableIdent, schema).withProperty("key2", "value2").withLocation(newLocation).replaceTransaction();
replaceTxn.commitTransaction();
table = catalog.loadTable(tableIdent);
Assert.assertEquals(newLocation, table.location());
Assert.assertNull(table.currentSnapshot());
PartitionSpec v1Expected = PartitionSpec.builderFor(table.schema()).alwaysNull("data", "data_bucket").withSpecId(1).build();
Assert.assertEquals("Table should have a spec with one void field", v1Expected, table.spec());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
catalog.dropTable(tableIdent);
}
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveCatalog method testCreateTableWithCaching.
@Test
public void testCreateTableWithCaching() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
ImmutableMap<String, String> properties = ImmutableMap.of("key1", "value1", "key2", "value2");
Catalog cachingCatalog = CachingCatalog.wrap(catalog);
try {
Table table = cachingCatalog.createTable(tableIdent, schema, spec, location, properties);
Assert.assertEquals(location, table.location());
Assert.assertEquals(2, table.schema().columns().size());
Assert.assertEquals(1, table.spec().fields().size());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
cachingCatalog.dropTable(tableIdent);
}
}
Aggregations