use of org.apache.hudi.common.table.log.block.HoodieDataBlock in project hudi by apache.
the class TestHoodieLogFormat method testAppendAndReadOnCorruptedLogInReverse.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testAppendAndReadOnCorruptedLogInReverse(boolean readBlocksLazily) throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = getSimpleSchema();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
writer.appendBlock(dataBlock);
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Append some arbit byte[] to thee end of the log (mimics a partially written commit)
fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
// create a block with
outputStream.write(HoodieLogFormat.MAGIC);
outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
// Write out a length that does not confirm with the content
outputStream.writeInt(1000);
// Write out footer length
outputStream.writeInt(1);
// Write out some metadata
// TODO : test for failure to write metadata - NA ?
outputStream.write(HoodieLogBlock.getLogMetadataBytes(header));
outputStream.write("something-random".getBytes());
outputStream.flush();
outputStream.close();
// Should be able to append a new block
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
writer.appendBlock(dataBlock);
writer.close();
// First round of reads - we should be able to read the first block and then EOF
HoodieLogFileReader reader = new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen()), schema, bufferSize, readBlocksLazily, true);
assertTrue(reader.hasPrev(), "Last block should be available");
HoodieLogBlock block = reader.prev();
assertTrue(block instanceof HoodieDataBlock, "Last block should be datablock");
assertTrue(reader.hasPrev(), "Last block should be available");
assertThrows(CorruptedLogFileException.class, () -> {
reader.prev();
});
reader.close();
}
use of org.apache.hudi.common.table.log.block.HoodieDataBlock in project hudi by apache.
the class TestHoodieBackedMetadata method verifyMetadataRawRecords.
/**
* Verify the metadata table on-disk raw records. When populate meta fields is enabled,
* these records should have additional meta fields in the payload. When key deduplication
* is enabled, these records on the disk should have key in the payload as empty string.
*
* @param table
* @param logFiles - Metadata table log files to be verified
* @param enableMetaFields - Enable meta fields for records
* @throws IOException
*/
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles, boolean enableMetaFields) throws IOException {
for (HoodieLogFile logFile : logFiles) {
FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
if (writerSchemaMsg == null) {
// not a data block
continue;
}
Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
recordItr.forEachRemaining(indexRecord -> {
final GenericRecord record = (GenericRecord) indexRecord;
if (enableMetaFields) {
// Metadata table records should have meta fields!
assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNotNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
} else {
// Metadata table records should not have meta fields!
assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
}
final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
assertFalse(key.isEmpty());
if (enableMetaFields) {
assertTrue(key.equals(String.valueOf(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD))));
}
});
}
}
}
}
}
use of org.apache.hudi.common.table.log.block.HoodieDataBlock in project hudi by apache.
the class TestHoodieBackedTableMetadata method verifyMetadataRawRecords.
/**
* Verify the metadata table on-disk raw records. When populate meta fields is enabled,
* these records should have additional meta fields in the payload. When key deduplication
* is enabled, these records on the disk should have key in the payload as empty string.
*
* @param table
* @param logFiles - Metadata table log files to be verified
* @throws IOException
*/
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles) throws IOException {
for (HoodieLogFile logFile : logFiles) {
FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
if (writerSchemaMsg == null) {
// not a data block
continue;
}
Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
recordItr.forEachRemaining(indexRecord -> {
final GenericRecord record = (GenericRecord) indexRecord;
assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
assertFalse(key.isEmpty());
});
}
}
}
}
}
use of org.apache.hudi.common.table.log.block.HoodieDataBlock in project hudi by apache.
the class LogReaderUtils method readSchemaFromLogFileInReverse.
private static Schema readSchemaFromLogFileInReverse(FileSystem fs, HoodieActiveTimeline activeTimeline, HoodieLogFile hoodieLogFile) throws IOException {
// set length for the HoodieLogFile as it will be leveraged by HoodieLogFormat.Reader with reverseReading enabled
Reader reader = HoodieLogFormat.newReader(fs, hoodieLogFile, null, true, true);
Schema writerSchema = null;
HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
while (reader.hasPrev()) {
HoodieLogBlock block = reader.prev();
if (block instanceof HoodieDataBlock) {
HoodieDataBlock lastBlock = (HoodieDataBlock) block;
if (completedTimeline.containsOrBeforeTimelineStarts(lastBlock.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME))) {
writerSchema = new Schema.Parser().parse(lastBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
break;
}
}
}
reader.close();
return writerSchema;
}
use of org.apache.hudi.common.table.log.block.HoodieDataBlock in project hudi by apache.
the class TestHoodieLogFormat method testConcurrentAppend.
private void testConcurrentAppend(boolean logFileExists, boolean newLogFileFormat) throws Exception {
HoodieLogFormat.WriterBuilder builder1 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs);
HoodieLogFormat.WriterBuilder builder2 = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs);
if (newLogFileFormat && logFileExists) {
// Assume there is an existing log-file with write token
builder1 = builder1.withLogVersion(1).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
builder2 = builder2.withLogVersion(1).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
} else if (newLogFileFormat) {
// First log file of the file-slice
builder1 = builder1.withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
builder2 = builder2.withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
} else {
builder1 = builder1.withLogVersion(1).withRolloverLogWriteToken(HoodieLogFormat.UNKNOWN_WRITE_TOKEN);
}
Writer writer = builder1.build();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
writer.appendBlock(dataBlock);
Writer writer2 = builder2.build();
writer2.appendBlock(dataBlock);
HoodieLogFile logFile1 = writer.getLogFile();
HoodieLogFile logFile2 = writer2.getLogFile();
writer.close();
writer2.close();
assertNotNull(logFile1.getLogWriteToken());
assertEquals(logFile1.getLogVersion(), logFile2.getLogVersion() - 1, "Log Files must have different versions");
}
Aggregations