use of org.apache.iceberg.DataFile in project hive by apache.
the class HiveCreateReplaceTableTest method testCreateTableTxnAndAppend.
@Test
public void testCreateTableTxnAndAppend() {
Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
Transaction txn = catalog.newCreateTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap());
AppendFiles append = txn.newAppend();
DataFile dataFile = DataFiles.builder(SPEC).withPath("/path/to/data-a.parquet").withFileSizeInBytes(0).withRecordCount(1).build();
append.appendFile(dataFile);
append.commit();
txn.commitTransaction();
Table table = catalog.loadTable(TABLE_IDENTIFIER);
Snapshot snapshot = table.currentSnapshot();
Assert.assertTrue("Table should have one manifest file", snapshot.allManifests().size() == 1);
}
use of org.apache.iceberg.DataFile in project hive by apache.
the class TestHiveTableConcurrency method testConcurrentConnections.
@Test
public synchronized void testConcurrentConnections() throws InterruptedException {
Table icebergTable = catalog.loadTable(TABLE_IDENTIFIER);
icebergTable.updateProperties().set(COMMIT_NUM_RETRIES, "20").set(COMMIT_MIN_RETRY_WAIT_MS, "25").set(COMMIT_MAX_RETRY_WAIT_MS, "25").commit();
String fileName = UUID.randomUUID().toString();
DataFile file = DataFiles.builder(icebergTable.spec()).withPath(FileFormat.PARQUET.addExtension(fileName)).withRecordCount(2).withFileSizeInBytes(0).build();
ExecutorService executorService = MoreExecutors.getExitingExecutorService((ThreadPoolExecutor) Executors.newFixedThreadPool(7));
for (int i = 0; i < 7; i++) {
executorService.submit(() -> icebergTable.newAppend().appendFile(file).commit());
}
executorService.shutdown();
Assert.assertTrue("Timeout", executorService.awaitTermination(3, TimeUnit.MINUTES));
Assert.assertEquals(7, Iterables.size(icebergTable.snapshots()));
}
use of org.apache.iceberg.DataFile in project hive by apache.
the class TestHiveTableConcurrency method testConcurrentFastAppends.
@Test
public synchronized void testConcurrentFastAppends() {
Table icebergTable = catalog.loadTable(TABLE_IDENTIFIER);
String fileName = UUID.randomUUID().toString();
DataFile file = DataFiles.builder(icebergTable.spec()).withPath(FileFormat.PARQUET.addExtension(fileName)).withRecordCount(2).withFileSizeInBytes(0).build();
ExecutorService executorService = MoreExecutors.getExitingExecutorService((ThreadPoolExecutor) Executors.newFixedThreadPool(2));
AtomicInteger barrier = new AtomicInteger(0);
Tasks.range(2).stopOnFailure().throwFailureWhenFinished().executeWith(executorService).run(index -> {
for (int numCommittedFiles = 0; numCommittedFiles < 10; numCommittedFiles++) {
while (barrier.get() < numCommittedFiles * 2) {
try {
Thread.sleep(10);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
icebergTable.newFastAppend().appendFile(file).commit();
barrier.incrementAndGet();
}
});
icebergTable.refresh();
Assert.assertEquals(20, icebergTable.currentSnapshot().allManifests().size());
}
use of org.apache.iceberg.DataFile in project hive by apache.
the class HiveTableUtil method getDataFiles.
private static List<DataFile> getDataFiles(RemoteIterator<LocatedFileStatus> fileStatusIterator, Map<String, String> partitionKeys, String format, PartitionSpec spec, MetricsConfig metricsConfig, NameMapping nameMapping, Configuration conf) throws IOException {
List<DataFile> dataFiles = new ArrayList<>();
while (fileStatusIterator.hasNext()) {
LocatedFileStatus fileStatus = fileStatusIterator.next();
String fileName = fileStatus.getPath().getName();
if (fileName.startsWith(".") || fileName.startsWith("_")) {
continue;
}
dataFiles.addAll(TableMigrationUtil.listPartition(partitionKeys, fileStatus.getPath().toString(), format, spec, conf, metricsConfig, nameMapping));
}
return dataFiles;
}
use of org.apache.iceberg.DataFile in project hive by apache.
the class TestHiveIcebergV2 method testReadAndWriteFormatV2Unpartitioned_PosDelete.
@Test
public void testReadAndWriteFormatV2Unpartitioned_PosDelete() throws IOException {
Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
// delete one of the rows
DataFile dataFile = StreamSupport.stream(tbl.currentSnapshot().addedFiles().spliterator(), false).findFirst().orElseThrow(() -> new RuntimeException("Did not find any data files for test table"));
List<PositionDelete<Record>> deletes = ImmutableList.of(positionDelete(dataFile.path(), 2L, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2)));
DeleteFile deleteFile = HiveIcebergTestUtils.createPositionalDeleteFile(tbl, "dummyPath", fileFormat, null, deletes);
tbl.newRowDelta().addDeletes(deleteFile).commit();
List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id");
// only the other two rows are present
Assert.assertEquals(2, objects.size());
Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
Assert.assertArrayEquals(new Object[] { 1L, "Bob", "Green" }, objects.get(1));
}
Aggregations