use of org.apache.parquet.io.PositionOutputStream in project drill by apache.
the class ParquetFileWriter method writeMetadataFile.
/**
* @deprecated metadata files are not recommended and will be removed in 2.0.0
*/
@Deprecated
private static void writeMetadataFile(Path outputPath, ParquetMetadata metadataFooter, FileSystem fs) throws IOException {
PositionOutputStream metadata = HadoopStreams.wrap(fs.create(outputPath));
metadata.write(MAGIC);
serializeFooter(metadataFooter, metadata, null);
metadata.close();
}
use of org.apache.parquet.io.PositionOutputStream in project parquet-mr by apache.
the class TestDataPageV1Checksums method testCorruptedPage.
/**
* Test whether corruption in the page content is detected by checksum verification
*/
@Test
public void testCorruptedPage() throws IOException {
Configuration conf = new Configuration();
conf.setBoolean(ParquetOutputFormat.PAGE_WRITE_CHECKSUM_ENABLED, true);
Path path = writeSimpleParquetFile(conf, CompressionCodecName.UNCOMPRESSED);
InputFile inputFile = HadoopInputFile.fromPath(path, conf);
try (SeekableInputStream inputStream = inputFile.newStream()) {
int fileLen = (int) inputFile.getLength();
byte[] fileBytes = new byte[fileLen];
inputStream.readFully(fileBytes);
inputStream.close();
// There are 4 pages in total (2 per column), we corrupt the first page of the first column
// and the second page of the second column. We do this by altering a byte roughly in the
// middle of each page to be corrupted
fileBytes[fileLen / 8]++;
fileBytes[fileLen / 8 + ((fileLen / 4) * 3)]++;
OutputFile outputFile = HadoopOutputFile.fromPath(path, conf);
try (PositionOutputStream outputStream = outputFile.createOrOverwrite(1024 * 1024)) {
outputStream.write(fileBytes);
outputStream.close();
// First we disable checksum verification, the corruption will go undetected as it is in the
// data section of the page
conf.setBoolean(ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED, false);
try (ParquetFileReader reader = getParquetFileReader(path, conf, Arrays.asList(colADesc, colBDesc))) {
PageReadStore pageReadStore = reader.readNextRowGroup();
DataPageV1 colAPage1 = readNextPage(colADesc, pageReadStore);
assertFalse("Data in page was not corrupted", Arrays.equals(colAPage1.getBytes().toByteArray(), colAPage1Bytes));
readNextPage(colADesc, pageReadStore);
readNextPage(colBDesc, pageReadStore);
DataPageV1 colBPage2 = readNextPage(colBDesc, pageReadStore);
assertFalse("Data in page was not corrupted", Arrays.equals(colBPage2.getBytes().toByteArray(), colBPage2Bytes));
}
// Now we enable checksum verification, the corruption should be detected
conf.setBoolean(ParquetInputFormat.PAGE_VERIFY_CHECKSUM_ENABLED, true);
try (ParquetFileReader reader = getParquetFileReader(path, conf, Arrays.asList(colADesc, colBDesc))) {
// We expect an exception on the first encountered corrupt page (in readAllPages)
assertVerificationFailed(reader);
}
}
}
}
use of org.apache.parquet.io.PositionOutputStream in project parquet-mr by apache.
the class ParquetFileWriter method writeMetadataFile.
/**
* @deprecated metadata files are not recommended and will be removed in 2.0.0
*/
@Deprecated
private static void writeMetadataFile(Path outputPath, ParquetMetadata metadataFooter, FileSystem fs) throws IOException {
PositionOutputStream metadata = HadoopStreams.wrap(fs.create(outputPath));
metadata.write(MAGIC);
serializeFooter(metadataFooter, metadata, null);
metadata.close();
}
Aggregations