use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project drill by apache.
the class ParquetFileWriter method serializeOffsetIndexes.
private static void serializeOffsetIndexes(List<List<OffsetIndex>> offsetIndexes, List<BlockMetaData> blocks, PositionOutputStream out, InternalFileEncryptor fileEncryptor) throws IOException {
LOG.debug("{}: offset indexes", out.getPos());
for (int bIndex = 0, bSize = blocks.size(); bIndex < bSize; ++bIndex) {
BlockMetaData block = blocks.get(bIndex);
List<ColumnChunkMetaData> columns = block.getColumns();
List<OffsetIndex> blockOffsetIndexes = offsetIndexes.get(bIndex);
for (int cIndex = 0, cSize = columns.size(); cIndex < cSize; ++cIndex) {
OffsetIndex offsetIndex = blockOffsetIndexes.get(cIndex);
if (offsetIndex == null) {
continue;
}
ColumnChunkMetaData column = columns.get(cIndex);
BlockCipher.Encryptor offsetIndexEncryptor = null;
byte[] offsetIndexAAD = null;
if (null != fileEncryptor) {
InternalColumnEncryptionSetup columnEncryptionSetup = fileEncryptor.getColumnSetup(column.getPath(), false, cIndex);
if (columnEncryptionSetup.isEncrypted()) {
offsetIndexEncryptor = columnEncryptionSetup.getMetaDataEncryptor();
offsetIndexAAD = AesCipher.createModuleAAD(fileEncryptor.getFileAAD(), ModuleType.OffsetIndex, block.getOrdinal(), columnEncryptionSetup.getOrdinal(), -1);
}
}
long offset = out.getPos();
Util.writeOffsetIndex(ParquetMetadataConverter.toParquetOffsetIndex(offsetIndex), out, offsetIndexEncryptor, offsetIndexAAD);
column.setOffsetIndexReference(new IndexReference(offset, (int) (out.getPos() - offset)));
}
}
}
use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project drill by apache.
the class ParquetFileWriter method serializeColumnIndexes.
private static void serializeColumnIndexes(List<List<ColumnIndex>> columnIndexes, List<BlockMetaData> blocks, PositionOutputStream out, InternalFileEncryptor fileEncryptor) throws IOException {
LOG.debug("{}: column indexes", out.getPos());
for (int bIndex = 0, bSize = blocks.size(); bIndex < bSize; ++bIndex) {
BlockMetaData block = blocks.get(bIndex);
List<ColumnChunkMetaData> columns = block.getColumns();
List<ColumnIndex> blockColumnIndexes = columnIndexes.get(bIndex);
for (int cIndex = 0, cSize = columns.size(); cIndex < cSize; ++cIndex) {
ColumnChunkMetaData column = columns.get(cIndex);
org.apache.parquet.format.ColumnIndex columnIndex = ParquetMetadataConverter.toParquetColumnIndex(column.getPrimitiveType(), blockColumnIndexes.get(cIndex));
if (columnIndex == null) {
continue;
}
BlockCipher.Encryptor columnIndexEncryptor = null;
byte[] columnIndexAAD = null;
if (null != fileEncryptor) {
InternalColumnEncryptionSetup columnEncryptionSetup = fileEncryptor.getColumnSetup(column.getPath(), false, cIndex);
if (columnEncryptionSetup.isEncrypted()) {
columnIndexEncryptor = columnEncryptionSetup.getMetaDataEncryptor();
columnIndexAAD = AesCipher.createModuleAAD(fileEncryptor.getFileAAD(), ModuleType.ColumnIndex, block.getOrdinal(), columnEncryptionSetup.getOrdinal(), -1);
}
}
long offset = out.getPos();
Util.writeColumnIndex(columnIndex, out, columnIndexEncryptor, columnIndexAAD);
column.setColumnIndexReference(new IndexReference(offset, (int) (out.getPos() - offset)));
}
}
}
use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project parquet-mr by apache.
the class ParquetFileReader method readOffsetIndex.
/**
* @param column
* the column chunk which the offset index is to be returned for
* @return the offset index for the specified column chunk or {@code null} if there is no index
* @throws IOException
* if any I/O error occurs during reading the file
*/
@Private
public OffsetIndex readOffsetIndex(ColumnChunkMetaData column) throws IOException {
IndexReference ref = column.getOffsetIndexReference();
if (ref == null) {
return null;
}
f.seek(ref.getOffset());
BlockCipher.Decryptor offsetIndexDecryptor = null;
byte[] offsetIndexAAD = null;
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(column.getPath());
if (columnDecryptionSetup.isEncrypted()) {
offsetIndexDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
offsetIndexAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.OffsetIndex, column.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
}
}
return ParquetMetadataConverter.fromParquetOffsetIndex(Util.readOffsetIndex(f, offsetIndexDecryptor, offsetIndexAAD));
}
use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project parquet-mr by apache.
the class ParquetMetadataConverter method addRowGroup.
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block, InternalFileEncryptor fileEncryptor) {
// rowGroup.total_byte_size = ;
List<ColumnChunkMetaData> columns = block.getColumns();
List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
int rowGroupOrdinal = rowGroups.size();
int columnOrdinal = -1;
ByteArrayOutputStream tempOutStream = null;
for (ColumnChunkMetaData columnMetaData : columns) {
// verify this is the right offset
ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset());
// they are in the same file for now
columnChunk.file_path = block.getPath();
InternalColumnEncryptionSetup columnSetup = null;
boolean writeCryptoMetadata = false;
boolean encryptMetaData = false;
ColumnPath path = columnMetaData.getPath();
if (null != fileEncryptor) {
columnOrdinal++;
columnSetup = fileEncryptor.getColumnSetup(path, false, columnOrdinal);
writeCryptoMetadata = columnSetup.isEncrypted();
encryptMetaData = fileEncryptor.encryptColumnMetaData(columnSetup);
}
ColumnMetaData metaData = new ColumnMetaData(getType(columnMetaData.getType()), toFormatEncodings(columnMetaData.getEncodings()), Arrays.asList(columnMetaData.getPath().toArray()), toFormatCodec(columnMetaData.getCodec()), columnMetaData.getValueCount(), columnMetaData.getTotalUncompressedSize(), columnMetaData.getTotalSize(), columnMetaData.getFirstDataPageOffset());
if (columnMetaData.getEncodingStats() != null && columnMetaData.getEncodingStats().hasDictionaryPages()) {
metaData.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset());
}
long bloomFilterOffset = columnMetaData.getBloomFilterOffset();
if (bloomFilterOffset >= 0) {
metaData.setBloom_filter_offset(bloomFilterOffset);
}
if (columnMetaData.getStatistics() != null && !columnMetaData.getStatistics().isEmpty()) {
metaData.setStatistics(toParquetStatistics(columnMetaData.getStatistics(), this.statisticsTruncateLength));
}
if (columnMetaData.getEncodingStats() != null) {
metaData.setEncoding_stats(convertEncodingStats(columnMetaData.getEncodingStats()));
}
if (!encryptMetaData) {
columnChunk.setMeta_data(metaData);
} else {
// Serialize and encrypt ColumnMetadata separately
byte[] columnMetaDataAAD = AesCipher.createModuleAAD(fileEncryptor.getFileAAD(), ModuleType.ColumnMetaData, rowGroupOrdinal, columnSetup.getOrdinal(), -1);
if (null == tempOutStream) {
tempOutStream = new ByteArrayOutputStream();
} else {
tempOutStream.reset();
}
try {
writeColumnMetaData(metaData, tempOutStream, columnSetup.getMetaDataEncryptor(), columnMetaDataAAD);
} catch (IOException e) {
throw new ParquetCryptoRuntimeException("Failed to serialize and encrypt ColumnMetadata for " + columnMetaData.getPath(), e);
}
columnChunk.setEncrypted_column_metadata(tempOutStream.toByteArray());
// Keep redacted metadata version for old readers
if (!fileEncryptor.isFooterEncrypted()) {
ColumnMetaData metaDataRedacted = metaData.deepCopy();
if (metaDataRedacted.isSetStatistics())
metaDataRedacted.unsetStatistics();
if (metaDataRedacted.isSetEncoding_stats())
metaDataRedacted.unsetEncoding_stats();
columnChunk.setMeta_data(metaDataRedacted);
}
}
if (writeCryptoMetadata) {
columnChunk.setCrypto_metadata(columnSetup.getColumnCryptoMetaData());
}
// columnChunk.meta_data.index_page_offset = ;
// columnChunk.meta_data.key_value_metadata = ; // nothing yet
IndexReference columnIndexRef = columnMetaData.getColumnIndexReference();
if (columnIndexRef != null) {
columnChunk.setColumn_index_offset(columnIndexRef.getOffset());
columnChunk.setColumn_index_length(columnIndexRef.getLength());
}
IndexReference offsetIndexRef = columnMetaData.getOffsetIndexReference();
if (offsetIndexRef != null) {
columnChunk.setOffset_index_offset(offsetIndexRef.getOffset());
columnChunk.setOffset_index_length(offsetIndexRef.getLength());
}
parquetColumns.add(columnChunk);
}
RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
rowGroup.setFile_offset(block.getStartingPos());
rowGroup.setTotal_compressed_size(block.getCompressedSize());
rowGroup.setOrdinal((short) rowGroupOrdinal);
rowGroups.add(rowGroup);
}
Aggregations