use of org.apache.hudi.common.table.log.block.HoodieLogBlock in project hudi by apache.
the class TestHoodieLogFormat method testV0Format.
@Test
public void testV0Format() throws IOException, URISyntaxException {
// HoodieLogFormatVersion.DEFAULT_VERSION has been deprecated so we cannot
// create a writer for it. So these tests are only for the HoodieAvroDataBlock
// of older version.
Schema schema = getSimpleSchema();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
List<IndexedRecord> recordsCopy = new ArrayList<>(records);
assertEquals(100, records.size());
assertEquals(100, recordsCopy.size());
HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, schema);
byte[] content = dataBlock.getBytes(schema);
assertTrue(content.length > 0);
HoodieLogBlock logBlock = HoodieAvroDataBlock.getBlock(content, schema);
assertEquals(HoodieLogBlockType.AVRO_DATA_BLOCK, logBlock.getBlockType());
List<IndexedRecord> readRecords = getRecords((HoodieAvroDataBlock) logBlock);
assertEquals(readRecords.size(), recordsCopy.size());
for (int i = 0; i < recordsCopy.size(); ++i) {
assertEquals(recordsCopy.get(i), readRecords.get(i));
}
// Reader schema is optional if it is same as write schema
logBlock = HoodieAvroDataBlock.getBlock(content, null);
assertEquals(HoodieLogBlockType.AVRO_DATA_BLOCK, logBlock.getBlockType());
readRecords = getRecords((HoodieAvroDataBlock) logBlock);
assertEquals(readRecords.size(), recordsCopy.size());
for (int i = 0; i < recordsCopy.size(); ++i) {
assertEquals(recordsCopy.get(i), readRecords.get(i));
}
}
use of org.apache.hudi.common.table.log.block.HoodieLogBlock in project hudi by apache.
the class TestHoodieLogFormat method testAppendAndReadOnCorruptedLogInReverse.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void testAppendAndReadOnCorruptedLogInReverse(boolean readBlocksLazily) throws IOException, URISyntaxException, InterruptedException {
Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
Schema schema = getSimpleSchema();
List<IndexedRecord> records = SchemaTestUtil.generateTestRecords(0, 100);
Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
writer.appendBlock(dataBlock);
writer.close();
FileCreateUtils.createDeltaCommit(basePath, "100", fs);
// Append some arbit byte[] to thee end of the log (mimics a partially written commit)
fs = FSUtils.getFs(fs.getUri().toString(), fs.getConf());
FSDataOutputStream outputStream = fs.append(writer.getLogFile().getPath());
// create a block with
outputStream.write(HoodieLogFormat.MAGIC);
outputStream.writeInt(HoodieLogBlockType.AVRO_DATA_BLOCK.ordinal());
// Write out a length that does not confirm with the content
outputStream.writeInt(1000);
// Write out footer length
outputStream.writeInt(1);
// Write out some metadata
// TODO : test for failure to write metadata - NA ?
outputStream.write(HoodieLogBlock.getLogMetadataBytes(header));
outputStream.write("something-random".getBytes());
outputStream.flush();
outputStream.close();
// Should be able to append a new block
writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
records = SchemaTestUtil.generateTestRecords(0, 100);
dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
writer.appendBlock(dataBlock);
writer.close();
// First round of reads - we should be able to read the first block and then EOF
HoodieLogFileReader reader = new HoodieLogFileReader(fs, new HoodieLogFile(writer.getLogFile().getPath(), fs.getFileStatus(writer.getLogFile().getPath()).getLen()), schema, bufferSize, readBlocksLazily, true);
assertTrue(reader.hasPrev(), "Last block should be available");
HoodieLogBlock block = reader.prev();
assertTrue(block instanceof HoodieDataBlock, "Last block should be datablock");
assertTrue(reader.hasPrev(), "Last block should be available");
assertThrows(CorruptedLogFileException.class, () -> {
reader.prev();
});
reader.close();
}
use of org.apache.hudi.common.table.log.block.HoodieLogBlock in project hudi by apache.
the class TestHoodieBackedMetadata method verifyMetadataRawRecords.
/**
* Verify the metadata table on-disk raw records. When populate meta fields is enabled,
* these records should have additional meta fields in the payload. When key deduplication
* is enabled, these records on the disk should have key in the payload as empty string.
*
* @param table
* @param logFiles - Metadata table log files to be verified
* @param enableMetaFields - Enable meta fields for records
* @throws IOException
*/
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles, boolean enableMetaFields) throws IOException {
for (HoodieLogFile logFile : logFiles) {
FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
if (writerSchemaMsg == null) {
// not a data block
continue;
}
Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
recordItr.forEachRemaining(indexRecord -> {
final GenericRecord record = (GenericRecord) indexRecord;
if (enableMetaFields) {
// Metadata table records should have meta fields!
assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNotNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
} else {
// Metadata table records should not have meta fields!
assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
}
final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
assertFalse(key.isEmpty());
if (enableMetaFields) {
assertTrue(key.equals(String.valueOf(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD))));
}
});
}
}
}
}
}
use of org.apache.hudi.common.table.log.block.HoodieLogBlock in project hudi by apache.
the class TestHoodieBackedTableMetadata method verifyMetadataRawRecords.
/**
* Verify the metadata table on-disk raw records. When populate meta fields is enabled,
* these records should have additional meta fields in the payload. When key deduplication
* is enabled, these records on the disk should have key in the payload as empty string.
*
* @param table
* @param logFiles - Metadata table log files to be verified
* @throws IOException
*/
private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles) throws IOException {
for (HoodieLogFile logFile : logFiles) {
FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
if (writerSchemaMsg == null) {
// not a data block
continue;
}
Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
while (logFileReader.hasNext()) {
HoodieLogBlock logBlock = logFileReader.next();
if (logBlock instanceof HoodieDataBlock) {
try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
recordItr.forEachRemaining(indexRecord -> {
final GenericRecord record = (GenericRecord) indexRecord;
assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
assertFalse(key.isEmpty());
});
}
}
}
}
}
use of org.apache.hudi.common.table.log.block.HoodieLogBlock in project hudi by apache.
the class LogReaderUtils method readSchemaFromLogFileInReverse.
private static Schema readSchemaFromLogFileInReverse(FileSystem fs, HoodieActiveTimeline activeTimeline, HoodieLogFile hoodieLogFile) throws IOException {
// set length for the HoodieLogFile as it will be leveraged by HoodieLogFormat.Reader with reverseReading enabled
Reader reader = HoodieLogFormat.newReader(fs, hoodieLogFile, null, true, true);
Schema writerSchema = null;
HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
while (reader.hasPrev()) {
HoodieLogBlock block = reader.prev();
if (block instanceof HoodieDataBlock) {
HoodieDataBlock lastBlock = (HoodieDataBlock) block;
if (completedTimeline.containsOrBeforeTimelineStarts(lastBlock.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME))) {
writerSchema = new Schema.Parser().parse(lastBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
break;
}
}
}
reader.close();
return writerSchema;
}
Aggregations