use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.
the class BulkInsertWriterHelper method toWriteStatus.
/**
* Tool to convert {@link HoodieInternalWriteStatus} into {@link WriteStatus}.
*/
private static WriteStatus toWriteStatus(HoodieInternalWriteStatus internalWriteStatus) {
WriteStatus writeStatus = new WriteStatus(false, 0.1);
writeStatus.setStat(internalWriteStatus.getStat());
writeStatus.setFileId(internalWriteStatus.getFileId());
writeStatus.setGlobalError(internalWriteStatus.getGlobalError());
writeStatus.setTotalRecords(internalWriteStatus.getTotalRecords());
writeStatus.setTotalErrorRecords(internalWriteStatus.getTotalErrorRecords());
return writeStatus;
}
use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.
the class HoodieBulkInsertInternalWriterTestBase method assertWriteStatuses.
protected void assertWriteStatuses(List<HoodieInternalWriteStatus> writeStatuses, int batches, int size, boolean areRecordsSorted, Option<List<String>> fileAbsPaths, Option<List<String>> fileNames) {
if (areRecordsSorted) {
assertEquals(batches, writeStatuses.size());
} else {
assertEquals(Math.min(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS.length, batches), writeStatuses.size());
}
Map<String, Long> sizeMap = new HashMap<>();
if (!areRecordsSorted) {
// per write status
for (int i = 0; i < batches; i++) {
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3];
if (!sizeMap.containsKey(partitionPath)) {
sizeMap.put(partitionPath, 0L);
}
sizeMap.put(partitionPath, sizeMap.get(partitionPath) + size);
}
}
int counter = 0;
for (HoodieInternalWriteStatus writeStatus : writeStatuses) {
// verify write status
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3], writeStatus.getPartitionPath());
if (areRecordsSorted) {
assertEquals(writeStatus.getTotalRecords(), size);
} else {
assertEquals(writeStatus.getTotalRecords(), sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]));
}
assertNull(writeStatus.getGlobalError());
assertEquals(writeStatus.getFailedRowsSize(), 0);
assertEquals(writeStatus.getTotalErrorRecords(), 0);
assertFalse(writeStatus.hasErrors());
assertNotNull(writeStatus.getFileId());
String fileId = writeStatus.getFileId();
if (fileAbsPaths.isPresent()) {
fileAbsPaths.get().add(basePath + "/" + writeStatus.getStat().getPath());
}
if (fileNames.isPresent()) {
fileNames.get().add(writeStatus.getStat().getPath().substring(writeStatus.getStat().getPath().lastIndexOf('/') + 1));
}
HoodieWriteStat writeStat = writeStatus.getStat();
if (areRecordsSorted) {
assertEquals(size, writeStat.getNumInserts());
assertEquals(size, writeStat.getNumWrites());
} else {
assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumInserts());
assertEquals(sizeMap.get(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter % 3]), writeStat.getNumWrites());
}
assertEquals(fileId, writeStat.getFileId());
assertEquals(HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[counter++ % 3], writeStat.getPartitionPath());
assertEquals(0, writeStat.getNumDeletes());
assertEquals(0, writeStat.getNumUpdateWrites());
assertEquals(0, writeStat.getTotalWriteErrors());
}
}
use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.
the class TestHoodieRowCreateHandle method testGlobalFailure.
/**
* Issue some corrupted or wrong schematized InternalRow after few valid InternalRows so that global error is thrown. write batch 1 of valid records write batch 2 of invalid records Global Error
* should be thrown.
*/
@Test
public void testGlobalFailure() throws Exception {
// init config and table
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).build();
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[0];
// init some args
String fileId = UUID.randomUUID().toString();
String instantTime = "000";
HoodieRowCreateHandle handle = new HoodieRowCreateHandle(table, cfg, partitionPath, fileId, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE);
int size = 10 + RANDOM.nextInt(1000);
int totalFailures = 5;
// Generate first batch of valid rows
Dataset<Row> inputRows = SparkDatasetTestUtils.getRandomRows(sqlContext, size / 2, partitionPath, false);
List<InternalRow> internalRows = SparkDatasetTestUtils.toInternalRows(inputRows, SparkDatasetTestUtils.ENCODER);
// generate some failures rows
for (int i = 0; i < totalFailures; i++) {
internalRows.add(SparkDatasetTestUtils.getInternalRowWithError(partitionPath));
}
// generate 2nd batch of valid rows
Dataset<Row> inputRows2 = SparkDatasetTestUtils.getRandomRows(sqlContext, size / 2, partitionPath, false);
internalRows.addAll(SparkDatasetTestUtils.toInternalRows(inputRows2, SparkDatasetTestUtils.ENCODER));
// issue writes
try {
for (InternalRow internalRow : internalRows) {
handle.write(internalRow);
}
fail("Should have failed");
} catch (Throwable e) {
// expected
}
// close the create handle
HoodieInternalWriteStatus writeStatus = handle.close();
List<String> fileNames = new ArrayList<>();
fileNames.add(handle.getFileName());
// verify write status
assertNotNull(writeStatus.getGlobalError());
assertTrue(writeStatus.getGlobalError().getMessage().contains("java.lang.String cannot be cast to org.apache.spark.unsafe.types.UTF8String"));
assertEquals(writeStatus.getFileId(), fileId);
assertEquals(writeStatus.getPartitionPath(), partitionPath);
// verify rows
Dataset<Row> result = sqlContext.read().parquet(basePath + "/" + partitionPath);
// passing only first batch of inputRows since after first batch global error would have been thrown
assertRows(inputRows, result, instantTime, fileNames);
}
use of org.apache.hudi.client.HoodieInternalWriteStatus in project hudi by apache.
the class TestHoodieRowCreateHandle method testRowCreateHandle.
@Test
public void testRowCreateHandle() throws Exception {
// init config and table
HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort).build();
HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
List<String> fileNames = new ArrayList<>();
List<String> fileAbsPaths = new ArrayList<>();
Dataset<Row> totalInputRows = null;
// one round per partition
for (int i = 0; i < 5; i++) {
String partitionPath = HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS[i % 3];
// init some args
String fileId = UUID.randomUUID().toString();
String instantTime = "000";
HoodieRowCreateHandle handle = new HoodieRowCreateHandle(table, cfg, partitionPath, fileId, instantTime, RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE);
int size = 10 + RANDOM.nextInt(1000);
// Generate inputs
Dataset<Row> inputRows = SparkDatasetTestUtils.getRandomRows(sqlContext, size, partitionPath, false);
if (totalInputRows == null) {
totalInputRows = inputRows;
} else {
totalInputRows = totalInputRows.union(inputRows);
}
// issue writes
HoodieInternalWriteStatus writeStatus = writeAndGetWriteStatus(inputRows, handle);
fileAbsPaths.add(basePath + "/" + writeStatus.getStat().getPath());
fileNames.add(handle.getFileName());
// verify output
assertOutput(writeStatus, size, fileId, partitionPath, instantTime, totalInputRows, fileNames, fileAbsPaths);
}
}
Aggregations