use of org.apache.iceberg.PartitionKey in project hive by apache.
the class TestHiveIcebergPartitions method testPartitionPruning.
@Test
public void testPartitionPruning() throws IOException {
Schema salesSchema = new Schema(required(1, "ss_item_sk", Types.IntegerType.get()), required(2, "ss_sold_date_sk", Types.IntegerType.get()));
PartitionSpec salesSpec = PartitionSpec.builderFor(salesSchema).identity("ss_sold_date_sk").build();
Schema dimSchema = new Schema(required(1, "d_date_sk", Types.IntegerType.get()), required(2, "d_moy", Types.IntegerType.get()));
List<Record> salesRecords = TestHelper.RecordsBuilder.newInstance(salesSchema).add(51, 5).add(61, 6).add(71, 7).add(81, 8).add(91, 9).build();
List<Record> dimRecords = TestHelper.RecordsBuilder.newInstance(salesSchema).add(1, 10).add(2, 20).add(3, 30).add(4, 40).add(5, 50).build();
Table salesTable = testTables.createTable(shell, "x1_store_sales", salesSchema, salesSpec, fileFormat, null);
PartitionKey partitionKey = new PartitionKey(salesSpec, salesSchema);
for (Record r : salesRecords) {
partitionKey.partition(r);
testTables.appendIcebergTable(shell.getHiveConf(), salesTable, fileFormat, partitionKey, ImmutableList.of(r));
}
testTables.createTable(shell, "x1_date_dim", dimSchema, fileFormat, dimRecords);
String query = "select s.ss_item_sk from x1_store_sales s, x1_date_dim d " + "where s.ss_sold_date_sk=d.d_date_sk*2 and d.d_moy=30";
// Check the query results
List<Object[]> rows = shell.executeStatement(query);
Assert.assertEquals(1, rows.size());
Assert.assertArrayEquals(new Object[] { 61 }, rows.get(0));
// Check if Dynamic Partitioning is used
Assert.assertTrue(shell.executeStatement("explain " + query).stream().filter(a -> ((String) a[0]).contains("Dynamic Partitioning Event Operator")).findAny().isPresent());
}
use of org.apache.iceberg.PartitionKey in project hive by apache.
the class HiveIcebergTestUtils method createEqualityDeleteFile.
/**
* @param table The table to create the delete file for
* @param deleteFilePath The path where the delete file should be created, relative to the table location root
* @param equalityFields List of field names that should play a role in the equality check
* @param fileFormat The file format that should be used for writing out the delete file
* @param rowsToDelete The rows that should be deleted. It's enough to fill out the fields that are relevant for the
* equality check, as listed in equalityFields, the rest of the fields are ignored
* @return The DeleteFile created
* @throws IOException If there is an error during DeleteFile write
*/
public static DeleteFile createEqualityDeleteFile(Table table, String deleteFilePath, List<String> equalityFields, FileFormat fileFormat, List<Record> rowsToDelete) throws IOException {
List<Integer> equalityFieldIds = equalityFields.stream().map(id -> table.schema().findField(id).fieldId()).collect(Collectors.toList());
Schema eqDeleteRowSchema = table.schema().select(equalityFields.toArray(new String[] {}));
FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), ArrayUtil.toIntArray(equalityFieldIds), eqDeleteRowSchema, null);
EncryptedOutputFile outputFile = table.encryption().encrypt(HadoopOutputFile.fromPath(new org.apache.hadoop.fs.Path(table.location(), deleteFilePath), new Configuration()));
PartitionKey part = new PartitionKey(table.spec(), eqDeleteRowSchema);
part.partition(rowsToDelete.get(0));
EqualityDeleteWriter<Record> eqWriter = appenderFactory.newEqDeleteWriter(outputFile, fileFormat, part);
try (EqualityDeleteWriter<Record> writer = eqWriter) {
writer.deleteAll(rowsToDelete);
}
return eqWriter.toDeleteFile();
}
use of org.apache.iceberg.PartitionKey in project hive by apache.
the class HiveIcebergTestUtils method createPositionalDeleteFile.
/**
* @param table The table to create the delete file for
* @param deleteFilePath The path where the delete file should be created, relative to the table location root
* @param fileFormat The file format that should be used for writing out the delete file
* @param partitionValues A map of partition values (partitionKey=partitionVal, ...) to be used for the delete file
* @param deletes The list of position deletes, each containing the data file path, the position of the row in the
* data file and the row itself that should be deleted
* @return The DeleteFile created
* @throws IOException If there is an error during DeleteFile write
*/
public static DeleteFile createPositionalDeleteFile(Table table, String deleteFilePath, FileFormat fileFormat, Map<String, Object> partitionValues, List<PositionDelete<Record>> deletes) throws IOException {
Schema posDeleteRowSchema = deletes.get(0).row() == null ? null : table.schema();
FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, posDeleteRowSchema);
EncryptedOutputFile outputFile = table.encryption().encrypt(HadoopOutputFile.fromPath(new org.apache.hadoop.fs.Path(table.location(), deleteFilePath), new Configuration()));
PartitionKey partitionKey = null;
if (partitionValues != null) {
Record record = GenericRecord.create(table.schema()).copy(partitionValues);
partitionKey = new PartitionKey(table.spec(), table.schema());
partitionKey.partition(record);
}
PositionDeleteWriter<Record> posWriter = appenderFactory.newPosDeleteWriter(outputFile, fileFormat, partitionKey);
try (PositionDeleteWriter<Record> writer = posWriter) {
deletes.forEach(del -> writer.delete(del.path(), del.pos(), del.row()));
}
return posWriter.toDeleteFile();
}
Aggregations