use of org.apache.hudi.table.HoodieJavaCopyOnWriteTable in project hudi by apache.
the class TestJavaCopyOnWriteActionExecutor method testFileSizeUpsertRecords.
@Test
public void testFileSizeUpsertRecords() throws Exception {
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withStorageConfig(HoodieStorageConfig.newBuilder().parquetMaxFileSize(64 * 1024).hfileMaxFileSize(64 * 1024).parquetBlockSize(64 * 1024).parquetPageSize(64 * 1024).build()).build();
String instantTime = makeNewCommitTime();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieJavaCopyOnWriteTable table = (HoodieJavaCopyOnWriteTable) HoodieJavaTable.create(config, context, metaClient);
List<HoodieRecord> records = new ArrayList<>();
// Approx 1150 records are written for block size of 64KB
for (int i = 0; i < 2000; i++) {
String recordStr = "{\"_row_key\":\"" + UUID.randomUUID().toString() + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i + "}";
RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
records.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
}
// Insert new records
BaseJavaCommitActionExecutor actionExecutor = new JavaUpsertCommitActionExecutor(context, config, table, instantTime, records);
Arrays.asList(1).stream().map(i -> actionExecutor.handleInsert(FSUtils.createNewFileIdPfx(), records.iterator())).map(Transformations::flatten).collect(Collectors.toList());
// Check the updated file
int counts = 0;
for (File file : Paths.get(basePath, "2016/01/31").toFile().listFiles()) {
if (file.getName().endsWith(table.getBaseFileExtension()) && FSUtils.getCommitTime(file.getName()).equals(instantTime)) {
LOG.info(file.getName() + "-" + file.length());
counts++;
}
}
assertEquals(3, counts, "If the number of records are more than 1150, then there should be a new file");
}
use of org.apache.hudi.table.HoodieJavaCopyOnWriteTable in project hudi by apache.
the class TestJavaCopyOnWriteActionExecutor method testBulkInsertRecords.
public void testBulkInsertRecords(String bulkInsertMode) throws Exception {
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(TRIP_EXAMPLE_SCHEMA).withBulkInsertParallelism(2).withBulkInsertSortMode(bulkInsertMode).build();
String instantTime = makeNewCommitTime();
HoodieJavaWriteClient writeClient = getHoodieWriteClient(config);
writeClient.startCommitWithTime(instantTime);
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieJavaCopyOnWriteTable table = (HoodieJavaCopyOnWriteTable) HoodieJavaTable.create(config, context, metaClient);
// Insert new records
final List<HoodieRecord> inputRecords = generateTestRecordsForBulkInsert();
JavaBulkInsertCommitActionExecutor bulkInsertExecutor = new JavaBulkInsertCommitActionExecutor(context, config, table, instantTime, inputRecords, Option.empty());
List<WriteStatus> returnedStatuses = (List<WriteStatus>) bulkInsertExecutor.execute().getWriteStatuses();
verifyStatusResult(returnedStatuses, generateExpectedPartitionNumRecords(inputRecords));
}
use of org.apache.hudi.table.HoodieJavaCopyOnWriteTable in project hudi by apache.
the class TestJavaCopyOnWriteActionExecutor method testInsertRecords.
@Test
public void testInsertRecords() throws Exception {
HoodieWriteConfig config = makeHoodieClientConfig();
String instantTime = makeNewCommitTime();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieJavaCopyOnWriteTable table = (HoodieJavaCopyOnWriteTable) HoodieJavaTable.create(config, context, metaClient);
// Case 1:
// 10 records for partition 1, 1 record for partition 2.
List<HoodieRecord> records = newHoodieRecords(10, "2016-01-31T03:16:41.415Z");
records.addAll(newHoodieRecords(1, "2016-02-01T03:16:41.415Z"));
// Insert new records
final List<HoodieRecord> recs2 = records;
BaseJavaCommitActionExecutor actionExecutor = new JavaInsertPreppedCommitActionExecutor(context, config, table, instantTime, recs2);
final List<WriteStatus> returnedStatuses = new ArrayList<>();
actionExecutor.handleInsert(FSUtils.createNewFileIdPfx(), recs2.iterator()).forEachRemaining(x -> returnedStatuses.addAll((List<WriteStatus>) x));
assertEquals(2, returnedStatuses.size());
Map<String, Long> expectedPartitionNumRecords = new HashMap<>();
expectedPartitionNumRecords.put("2016/01/31", 10L);
expectedPartitionNumRecords.put("2016/02/01", 1L);
verifyStatusResult(returnedStatuses, expectedPartitionNumRecords);
// Case 2:
// 1 record for partition 1, 5 record for partition 2, 1 records for partition 3.
records = newHoodieRecords(1, "2016-01-31T03:16:41.415Z");
records.addAll(newHoodieRecords(5, "2016-02-01T03:16:41.415Z"));
records.addAll(newHoodieRecords(1, "2016-02-02T03:16:41.415Z"));
// Insert new records
final List<HoodieRecord> recs3 = records;
BaseJavaCommitActionExecutor newActionExecutor = new JavaUpsertPreppedCommitActionExecutor(context, config, table, instantTime, recs3);
final List<WriteStatus> returnedStatuses1 = new ArrayList<>();
newActionExecutor.handleInsert(FSUtils.createNewFileIdPfx(), recs3.iterator()).forEachRemaining(x -> returnedStatuses1.addAll((List<WriteStatus>) x));
assertEquals(3, returnedStatuses1.size());
expectedPartitionNumRecords.clear();
expectedPartitionNumRecords.put("2016/01/31", 1L);
expectedPartitionNumRecords.put("2016/02/01", 5L);
expectedPartitionNumRecords.put("2016/02/02", 1L);
verifyStatusResult(returnedStatuses1, expectedPartitionNumRecords);
}
use of org.apache.hudi.table.HoodieJavaCopyOnWriteTable in project hudi by apache.
the class TestJavaCopyOnWriteActionExecutor method testInsertUpsertWithHoodieAvroPayload.
@Test
public void testInsertUpsertWithHoodieAvroPayload() throws Exception {
Schema schema = getSchemaFromResource(TestJavaCopyOnWriteActionExecutor.class, "/testDataGeneratorSchema.txt");
HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withEngineType(EngineType.JAVA).withPath(basePath).withSchema(schema.toString()).withStorageConfig(HoodieStorageConfig.newBuilder().parquetMaxFileSize(1000 * 1024).hfileMaxFileSize(1000 * 1024).build()).build();
metaClient = HoodieTableMetaClient.reload(metaClient);
final HoodieJavaCopyOnWriteTable table = (HoodieJavaCopyOnWriteTable) HoodieJavaTable.create(config, context, metaClient);
String instantTime = "000";
// Perform inserts of 100 records to test CreateHandle and BufferedExecutor
final List<HoodieRecord> inserts = dataGen.generateInsertsWithHoodieAvroPayload(instantTime, 100);
BaseJavaCommitActionExecutor actionExecutor = new JavaInsertCommitActionExecutor(context, config, table, instantTime, inserts);
final List<List<WriteStatus>> ws = new ArrayList<>();
actionExecutor.handleInsert(UUID.randomUUID().toString(), inserts.iterator()).forEachRemaining(x -> ws.add((List<WriteStatus>) x));
WriteStatus writeStatus = ws.get(0).get(0);
String fileId = writeStatus.getFileId();
metaClient.getFs().create(new Path(Paths.get(basePath, ".hoodie", "000.commit").toString())).close();
// TODO : Find race condition that causes the timeline sometime to reflect 000.commit and sometimes not
final HoodieJavaCopyOnWriteTable reloadedTable = (HoodieJavaCopyOnWriteTable) HoodieJavaTable.create(config, context, HoodieTableMetaClient.reload(metaClient));
final List<HoodieRecord> updates = dataGen.generateUpdatesWithHoodieAvroPayload(instantTime, inserts);
String partitionPath = writeStatus.getPartitionPath();
long numRecordsInPartition = updates.stream().filter(u -> u.getPartitionPath().equals(partitionPath)).count();
BaseJavaCommitActionExecutor newActionExecutor = new JavaUpsertCommitActionExecutor(context, config, reloadedTable, instantTime, updates);
taskContextSupplier.reset();
final List<List<WriteStatus>> updateStatus = new ArrayList<>();
newActionExecutor.handleUpdate(partitionPath, fileId, updates.iterator()).forEachRemaining(x -> updateStatus.add((List<WriteStatus>) x));
assertEquals(updates.size() - numRecordsInPartition, updateStatus.get(0).get(0).getTotalErrorRecords());
}
use of org.apache.hudi.table.HoodieJavaCopyOnWriteTable in project hudi by apache.
the class TestJavaCopyOnWriteActionExecutor method testMetadataAggregateFromWriteStatus.
// Check if record level metadata is aggregated properly at the end of write.
@Test
public void testMetadataAggregateFromWriteStatus() throws Exception {
// Prepare the AvroParquetIO
HoodieWriteConfig config = makeHoodieClientConfigBuilder().withWriteStatusClass(MetadataMergeWriteStatus.class).build();
String firstCommitTime = makeNewCommitTime();
metaClient = HoodieTableMetaClient.reload(metaClient);
HoodieJavaCopyOnWriteTable table = (HoodieJavaCopyOnWriteTable) HoodieJavaTable.create(config, context, metaClient);
// Get some records belong to the same partition (2016/01/31)
String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}";
String recordStr2 = "{\"_row_key\":\"8eb5b87b-1feu-4edd-87b4-6ec96dc405a0\"," + "\"time\":\"2016-01-31T03:20:41.415Z\",\"number\":100}";
String recordStr3 = "{\"_row_key\":\"8eb5b87c-1fej-4edd-87b4-6ec96dc405a0\"," + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
List<HoodieRecord> records = new ArrayList<>();
RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
records.add(new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
records.add(new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
records.add(new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
// Insert new records
BaseJavaCommitActionExecutor actionExecutor = new JavaInsertCommitActionExecutor(context, config, table, firstCommitTime, records);
List<WriteStatus> writeStatuses = new ArrayList<>();
actionExecutor.handleInsert(FSUtils.createNewFileIdPfx(), records.iterator()).forEachRemaining(x -> writeStatuses.addAll((List<WriteStatus>) x));
Map<String, String> allWriteStatusMergedMetadataMap = MetadataMergeWriteStatus.mergeMetadataForWriteStatuses(writeStatuses);
assertTrue(allWriteStatusMergedMetadataMap.containsKey("InputRecordCount_1506582000"));
// For metadata key InputRecordCount_1506582000, value is 2 for each record. So sum of this
// should be 2 * 3
assertEquals("6", allWriteStatusMergedMetadataMap.get("InputRecordCount_1506582000"));
}
Aggregations