Search in sources :

Example 51 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class HiveIcebergTestUtils method getTestRecord.

/**
 * Generates a test record where every field has a value.
 * @return Record with every field set
 */
public static Record getTestRecord() {
    Record record = GenericRecord.create(HiveIcebergTestUtils.FULL_SCHEMA);
    record.set(0, true);
    record.set(1, 1);
    record.set(2, 2L);
    record.set(3, 3.1f);
    record.set(4, 4.2d);
    record.set(5, LocalDate.of(2020, 1, 21));
    // Nano is not supported ?
    record.set(6, OffsetDateTime.of(2017, 11, 22, 11, 30, 7, 0, ZoneOffset.ofHours(2)));
    record.set(7, LocalDateTime.of(2019, 2, 22, 9, 44, 54));
    record.set(8, "kilenc");
    record.set(9, new byte[] { 0, 1, 2 });
    record.set(10, ByteBuffer.wrap(new byte[] { 0, 1, 2, 3 }));
    record.set(11, new BigDecimal("0.0000000013"));
    record.set(12, LocalTime.of(11, 33));
    record.set(13, UUID.fromString("73689599-d7fc-4dfb-b94e-106ff20284a5"));
    return record;
}
Also used : GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) BigDecimal(java.math.BigDecimal)

Example 52 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class HiveIcebergTestUtils method createEqualityDeleteFile.

/**
 * @param table The table to create the delete file for
 * @param deleteFilePath The path where the delete file should be created, relative to the table location root
 * @param equalityFields List of field names that should play a role in the equality check
 * @param fileFormat The file format that should be used for writing out the delete file
 * @param rowsToDelete The rows that should be deleted. It's enough to fill out the fields that are relevant for the
 *                     equality check, as listed in equalityFields, the rest of the fields are ignored
 * @return The DeleteFile created
 * @throws IOException If there is an error during DeleteFile write
 */
public static DeleteFile createEqualityDeleteFile(Table table, String deleteFilePath, List<String> equalityFields, FileFormat fileFormat, List<Record> rowsToDelete) throws IOException {
    List<Integer> equalityFieldIds = equalityFields.stream().map(id -> table.schema().findField(id).fieldId()).collect(Collectors.toList());
    Schema eqDeleteRowSchema = table.schema().select(equalityFields.toArray(new String[] {}));
    FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), ArrayUtil.toIntArray(equalityFieldIds), eqDeleteRowSchema, null);
    EncryptedOutputFile outputFile = table.encryption().encrypt(HadoopOutputFile.fromPath(new org.apache.hadoop.fs.Path(table.location(), deleteFilePath), new Configuration()));
    PartitionKey part = new PartitionKey(table.spec(), eqDeleteRowSchema);
    part.partition(rowsToDelete.get(0));
    EqualityDeleteWriter<Record> eqWriter = appenderFactory.newEqDeleteWriter(outputFile, fileFormat, part);
    try (EqualityDeleteWriter<Record> writer = eqWriter) {
        writer.deleteAll(rowsToDelete);
    }
    return eqWriter.toDeleteFile();
}
Also used : Arrays(java.util.Arrays) HadoopOutputFile(org.apache.iceberg.hadoop.HadoopOutputFile) Types(org.apache.iceberg.types.Types) Text(org.apache.hadoop.io.Text) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) StandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector) JobID(org.apache.hadoop.mapred.JobID) LongWritable(org.apache.hadoop.io.LongWritable) ByteBuffer(java.nio.ByteBuffer) BigDecimal(java.math.BigDecimal) TimestampUtils(org.apache.hadoop.hive.common.type.TimestampUtils) ArrayUtil(org.apache.iceberg.util.ArrayUtil) ByteBuffers(org.apache.iceberg.util.ByteBuffers) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) GenericRecord(org.apache.iceberg.data.GenericRecord) PositionDeleteWriter(org.apache.iceberg.deletes.PositionDeleteWriter) LocalTime(java.time.LocalTime) PartitionKey(org.apache.iceberg.PartitionKey) ZoneOffset(java.time.ZoneOffset) Path(java.nio.file.Path) IntWritable(org.apache.hadoop.io.IntWritable) CloseableIterable(org.apache.iceberg.io.CloseableIterable) Timestamp(java.sql.Timestamp) UUID(java.util.UUID) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) List(java.util.List) OffsetDateTime(java.time.OffsetDateTime) BooleanWritable(org.apache.hadoop.io.BooleanWritable) EncryptedOutputFile(org.apache.iceberg.encryption.EncryptedOutputFile) LocalDate(java.time.LocalDate) GenericAppenderFactory(org.apache.iceberg.data.GenericAppenderFactory) PositionDelete(org.apache.iceberg.deletes.PositionDelete) LocalDateTime(java.time.LocalDateTime) IcebergGenerics(org.apache.iceberg.data.IcebergGenerics) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ArrayList(java.util.ArrayList) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) Files(java.nio.file.Files) Table(org.apache.iceberg.Table) EqualityDeleteWriter(org.apache.iceberg.deletes.EqualityDeleteWriter) IOException(java.io.IOException) FileFormat(org.apache.iceberg.FileFormat) File(java.io.File) Record(org.apache.iceberg.data.Record) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Paths(java.nio.file.Paths) TimestampTZUtil(org.apache.hadoop.hive.common.type.TimestampTZUtil) PrimitiveObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) FileAppenderFactory(org.apache.iceberg.io.FileAppenderFactory) DeleteFile(org.apache.iceberg.DeleteFile) Comparator(java.util.Comparator) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Assert(org.junit.Assert) FloatWritable(org.apache.hadoop.io.FloatWritable) Path(java.nio.file.Path) Configuration(org.apache.hadoop.conf.Configuration) EncryptedOutputFile(org.apache.iceberg.encryption.EncryptedOutputFile) Schema(org.apache.iceberg.Schema) GenericAppenderFactory(org.apache.iceberg.data.GenericAppenderFactory) PartitionKey(org.apache.iceberg.PartitionKey) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record)

Example 53 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergTruncateTable method testTruncateTablePartitionedIcebergTable.

@Test
public void testTruncateTablePartitionedIcebergTable() throws IOException, TException, InterruptedException {
    // Create a partitioned Iceberg table with some initial data and run a truncate table command on this table.
    // Then check if the data is deleted and the table statistics are reset to 0.
    String databaseName = "default";
    String tableName = "customers";
    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
    List<Record> records = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(0L, "Alice", "Brown").add(1L, "Bob", "Brown").add(2L, "Trudy", "Green").add(3L, "John", "Pink").add(4L, "Jane", "Pink").build();
    Table icebergTable = testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, records);
    testTruncateTable(databaseName, tableName, icebergTable, records, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
}
Also used : Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 54 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergTruncateTable method testMultipleTruncateTable.

@Test
public void testMultipleTruncateTable() throws IOException, TException, InterruptedException {
    // Create an Iceberg table with come records in it, then execute a truncate table command
    // and check the result. Then insert some new data and run an other truncate table command.
    // The purpose of this test is to make sure that multiple truncate table commands can
    // run after each other without any issue (like issues with locking).
    String databaseName = "default";
    String tableName = "customers";
    Table icebergTable = testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    testTruncateTable(databaseName, tableName, icebergTable, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
    List<Record> newRecords = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(3L, "Jane", "Purple").add(4L, "Tim", "Grey").add(5L, "Eva", "Yellow").add(6L, "James", "White").add(7L, "Jack", "Black").build();
    shell.executeStatement("INSERT INTO default.customers values (3, 'Jane', 'Purple'), (4, 'Tim', 'Grey')," + "(5, 'Eva', 'Yellow'), (6, 'James', 'White'), (7, 'Jack', 'Black')");
    icebergTable = testTables.loadTable(TableIdentifier.of(databaseName, tableName));
    testTruncateTable(databaseName, tableName, icebergTable, newRecords, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
}
Also used : Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 55 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergV2 method testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied.

@Test
public void testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied() throws IOException {
    Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
    Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
    // add one more row to the same partition
    shell.executeStatement("insert into customers values (1, 'Bob', 'Hoover')");
    // delete all rows with id=1 and first_name=Bob
    Schema shorterSchema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "name", Types.StringType.get()));
    List<Record> toDelete = TestHelper.RecordsBuilder.newInstance(shorterSchema).add(1L, "Bob").build();
    DeleteFile deleteFile = HiveIcebergTestUtils.createEqualityDeleteFile(tbl, "dummyPath", ImmutableList.of("customer_id", "first_name"), fileFormat, toDelete);
    tbl.newRowDelta().addDeletes(deleteFile).commit();
    List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id");
    Assert.assertEquals(2, objects.size());
    Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
    Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(1));
}
Also used : Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) DeleteFile(org.apache.iceberg.DeleteFile) Test(org.junit.Test)

Aggregations

Record (org.apache.iceberg.data.Record)114 Test (org.junit.Test)99 Schema (org.apache.iceberg.Schema)68 Table (org.apache.iceberg.Table)51 GenericRecord (org.apache.iceberg.data.GenericRecord)51 PartitionSpec (org.apache.iceberg.PartitionSpec)19 ArrayList (java.util.ArrayList)14 List (java.util.List)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 HashMap (java.util.HashMap)11 IcebergBaseTest (org.apache.drill.metastore.iceberg.IcebergBaseTest)11 TestHelper (org.apache.iceberg.mr.TestHelper)11 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 Types (org.apache.iceberg.types.Types)10 Map (java.util.Map)9 IOException (java.io.IOException)8 ImmutableMap (org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap)8 FileFormat (org.apache.iceberg.FileFormat)7 DeleteFile (org.apache.iceberg.DeleteFile)6 NestedField.optional (org.apache.iceberg.types.Types.NestedField.optional)6