Search in sources :

Example 1 with StructLikeSet

use of org.apache.iceberg.util.StructLikeSet in project hive by apache.

the class TestInputFormatReaderDeletes method rowSet.

@Override
public StructLikeSet rowSet(String name, Table table, String... columns) {
    InputFormatConfig.ConfigBuilder builder = new InputFormatConfig.ConfigBuilder(conf).readFrom(table.location());
    Schema projected = table.schema().select(columns);
    StructLikeSet set = StructLikeSet.create(projected.asStruct());
    set.addAll(TestIcebergInputFormats.TESTED_INPUT_FORMATS.stream().filter(recordFactory -> recordFactory.name().equals(inputFormat)).map(recordFactory -> recordFactory.create(builder.project(projected).conf()).getRecords()).flatMap(List::stream).map(record -> new InternalRecordWrapper(projected.asStruct()).wrap(record)).collect(Collectors.toList()));
    return set;
}
Also used : BaseTable(org.apache.iceberg.BaseTable) InternalRecordWrapper(org.apache.iceberg.data.InternalRecordWrapper) Table(org.apache.iceberg.Table) StructLikeSet(org.apache.iceberg.util.StructLikeSet) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) RunWith(org.junit.runner.RunWith) IOException(java.io.IOException) Schema(org.apache.iceberg.Schema) TestIcebergInputFormats(org.apache.iceberg.mr.hive.TestIcebergInputFormats) Collectors(java.util.stream.Collectors) FileFormat(org.apache.iceberg.FileFormat) File(java.io.File) TableMetadata(org.apache.iceberg.TableMetadata) List(java.util.List) TableOperations(org.apache.iceberg.TableOperations) Configuration(org.apache.hadoop.conf.Configuration) PartitionSpec(org.apache.iceberg.PartitionSpec) DeleteReadTests(org.apache.iceberg.data.DeleteReadTests) Assert(org.junit.Assert) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) Schema(org.apache.iceberg.Schema) InternalRecordWrapper(org.apache.iceberg.data.InternalRecordWrapper) StructLikeSet(org.apache.iceberg.util.StructLikeSet)

Example 2 with StructLikeSet

use of org.apache.iceberg.util.StructLikeSet in project hive by apache.

the class DeleteReadTests method testPositionDeletes.

@Test
public void testPositionDeletes() throws IOException {
    List<Pair<CharSequence, Long>> deletes = Lists.newArrayList(// id = 29
    Pair.of(dataFile.path(), 0L), // id = 89
    Pair.of(dataFile.path(), 3L), // id = 122
    Pair.of(dataFile.path(), 6L));
    Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
    table.newRowDelta().addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 122);
    StructLikeSet actual = rowSet(tableName, table, "*");
    Assert.assertEquals("Table should contain expected rows", expected, actual);
}
Also used : StructLikeSet(org.apache.iceberg.util.StructLikeSet) Set(java.util.Set) StructLikeSet(org.apache.iceberg.util.StructLikeSet) Pair(org.apache.iceberg.util.Pair) DeleteFile(org.apache.iceberg.DeleteFile) Test(org.junit.Test)

Example 3 with StructLikeSet

use of org.apache.iceberg.util.StructLikeSet in project hive by apache.

the class DeleteReadTests method selectColumns.

private StructLikeSet selectColumns(StructLikeSet rows, String... columns) {
    Schema projection = table.schema().select(columns);
    StructLikeSet set = StructLikeSet.create(projection.asStruct());
    rows.stream().map(row -> StructProjection.create(table.schema(), projection).wrap(row)).forEach(set::add);
    return set;
}
Also used : Types(org.apache.iceberg.types.Types) Table(org.apache.iceberg.Table) StructLikeSet(org.apache.iceberg.util.StructLikeSet) Set(java.util.Set) Pair(org.apache.iceberg.util.Pair) IOException(java.io.IOException) Test(org.junit.Test) Schema(org.apache.iceberg.Schema) Row(org.apache.iceberg.TestHelpers.Row) Sets(org.apache.iceberg.relocated.com.google.common.collect.Sets) List(java.util.List) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) Rule(org.junit.Rule) ArrayUtil(org.apache.iceberg.util.ArrayUtil) After(org.junit.After) PartitionSpec(org.apache.iceberg.PartitionSpec) DeleteFile(org.apache.iceberg.DeleteFile) DataFile(org.apache.iceberg.DataFile) Assert(org.junit.Assert) StructProjection(org.apache.iceberg.util.StructProjection) TemporaryFolder(org.junit.rules.TemporaryFolder) Files(org.apache.iceberg.Files) Before(org.junit.Before) Schema(org.apache.iceberg.Schema) StructLikeSet(org.apache.iceberg.util.StructLikeSet)

Example 4 with StructLikeSet

use of org.apache.iceberg.util.StructLikeSet in project hive by apache.

the class DeleteReadTests method testMixedPositionAndEqualityDeletes.

@Test
public void testMixedPositionAndEqualityDeletes() throws IOException {
    Schema dataSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(dataSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 29
    dataDelete.copy("data", "a"), // id = 89
    dataDelete.copy("data", "d"), // id = 122
    dataDelete.copy("data", "g"));
    DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, dataSchema);
    List<Pair<CharSequence, Long>> deletes = Lists.newArrayList(// id = 89
    Pair.of(dataFile.path(), 3L), // id = 121
    Pair.of(dataFile.path(), 5L));
    Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
    table.newRowDelta().addDeletes(eqDeletes).addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 121, 122);
    StructLikeSet actual = rowSet(tableName, table, "*");
    Assert.assertEquals("Table should contain expected rows", expected, actual);
}
Also used : StructLikeSet(org.apache.iceberg.util.StructLikeSet) Set(java.util.Set) Schema(org.apache.iceberg.Schema) StructLikeSet(org.apache.iceberg.util.StructLikeSet) DeleteFile(org.apache.iceberg.DeleteFile) Pair(org.apache.iceberg.util.Pair) Test(org.junit.Test)

Example 5 with StructLikeSet

use of org.apache.iceberg.util.StructLikeSet in project hive by apache.

the class DeleteReadTests method testEqualityDeletesSpanningMultipleDataFiles.

@Test
public void testEqualityDeletesSpanningMultipleDataFiles() throws IOException {
    // Add another DataFile with common values
    GenericRecord record = GenericRecord.create(table.schema());
    records.add(record.copy("id", 144, "data", "a"));
    this.dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), Row.of(0), records);
    table.newAppend().appendFile(dataFile).commit();
    Schema deleteRowSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(deleteRowSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 29, 144
    dataDelete.copy("data", "a"), // id = 89
    dataDelete.copy("data", "d"), // id = 122
    dataDelete.copy("data", "g"));
    DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, deleteRowSchema);
    table.newRowDelta().addDeletes(eqDeletes).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 122, 144);
    StructLikeSet actual = rowSet(tableName, table, "*");
    Assert.assertEquals("Table should contain expected rows", expected, actual);
}
Also used : Schema(org.apache.iceberg.Schema) StructLikeSet(org.apache.iceberg.util.StructLikeSet) DeleteFile(org.apache.iceberg.DeleteFile) Test(org.junit.Test)

Aggregations

StructLikeSet (org.apache.iceberg.util.StructLikeSet)10 DeleteFile (org.apache.iceberg.DeleteFile)9 Schema (org.apache.iceberg.Schema)9 Test (org.junit.Test)9 Set (java.util.Set)4 Pair (org.apache.iceberg.util.Pair)4 IOException (java.io.IOException)3 List (java.util.List)3 DataFile (org.apache.iceberg.DataFile)3 PartitionSpec (org.apache.iceberg.PartitionSpec)3 Table (org.apache.iceberg.Table)3 Assert (org.junit.Assert)3 Before (org.junit.Before)3 Files (org.apache.iceberg.Files)2 Row (org.apache.iceberg.TestHelpers.Row)2 Lists (org.apache.iceberg.relocated.com.google.common.collect.Lists)2 Sets (org.apache.iceberg.relocated.com.google.common.collect.Sets)2 Types (org.apache.iceberg.types.Types)2 ArrayUtil (org.apache.iceberg.util.ArrayUtil)2 StructProjection (org.apache.iceberg.util.StructProjection)2