use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project presto by prestodb.
the class HdfsParquetDataSource method readColumnIndex.
@Override
public Optional<ColumnIndex> readColumnIndex(ColumnChunkMetaData column) throws IOException {
IndexReference indexRef = column.getColumnIndexReference();
if (indexRef == null) {
return Optional.empty();
}
inputStream.seek(indexRef.getOffset());
return Optional.of(ParquetMetadataConverter.fromParquetColumnIndex(column.getPrimitiveType(), Util.readColumnIndex(inputStream)));
}
use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project parquet-mr by apache.
the class ParquetFileReader method readColumnIndex.
/**
* @param column
* the column chunk which the column index is to be returned for
* @return the column index for the specified column chunk or {@code null} if there is no index
* @throws IOException
* if any I/O error occurs during reading the file
*/
@Private
public ColumnIndex readColumnIndex(ColumnChunkMetaData column) throws IOException {
IndexReference ref = column.getColumnIndexReference();
if (ref == null) {
return null;
}
f.seek(ref.getOffset());
BlockCipher.Decryptor columnIndexDecryptor = null;
byte[] columnIndexAAD = null;
if (null != fileDecryptor && !fileDecryptor.plaintextFile()) {
InternalColumnDecryptionSetup columnDecryptionSetup = fileDecryptor.getColumnSetup(column.getPath());
if (columnDecryptionSetup.isEncrypted()) {
columnIndexDecryptor = columnDecryptionSetup.getMetaDataDecryptor();
columnIndexAAD = AesCipher.createModuleAAD(fileDecryptor.getFileAAD(), ModuleType.ColumnIndex, column.getRowGroupOrdinal(), columnDecryptionSetup.getOrdinal(), -1);
}
}
return ParquetMetadataConverter.fromParquetColumnIndex(column.getPrimitiveType(), Util.readColumnIndex(f, columnIndexDecryptor, columnIndexAAD));
}
use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project parquet-mr by apache.
the class ParquetFileWriter method serializeOffsetIndexes.
private static void serializeOffsetIndexes(List<List<OffsetIndex>> offsetIndexes, List<BlockMetaData> blocks, PositionOutputStream out, InternalFileEncryptor fileEncryptor) throws IOException {
LOG.debug("{}: offset indexes", out.getPos());
for (int bIndex = 0, bSize = blocks.size(); bIndex < bSize; ++bIndex) {
BlockMetaData block = blocks.get(bIndex);
List<ColumnChunkMetaData> columns = block.getColumns();
List<OffsetIndex> blockOffsetIndexes = offsetIndexes.get(bIndex);
for (int cIndex = 0, cSize = columns.size(); cIndex < cSize; ++cIndex) {
OffsetIndex offsetIndex = blockOffsetIndexes.get(cIndex);
if (offsetIndex == null) {
continue;
}
ColumnChunkMetaData column = columns.get(cIndex);
BlockCipher.Encryptor offsetIndexEncryptor = null;
byte[] offsetIndexAAD = null;
if (null != fileEncryptor) {
InternalColumnEncryptionSetup columnEncryptionSetup = fileEncryptor.getColumnSetup(column.getPath(), false, cIndex);
if (columnEncryptionSetup.isEncrypted()) {
offsetIndexEncryptor = columnEncryptionSetup.getMetaDataEncryptor();
offsetIndexAAD = AesCipher.createModuleAAD(fileEncryptor.getFileAAD(), ModuleType.OffsetIndex, block.getOrdinal(), columnEncryptionSetup.getOrdinal(), -1);
}
}
long offset = out.getPos();
Util.writeOffsetIndex(ParquetMetadataConverter.toParquetOffsetIndex(offsetIndex), out, offsetIndexEncryptor, offsetIndexAAD);
column.setOffsetIndexReference(new IndexReference(offset, (int) (out.getPos() - offset)));
}
}
}
use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project parquet-mr by apache.
the class ParquetFileWriter method serializeColumnIndexes.
private static void serializeColumnIndexes(List<List<ColumnIndex>> columnIndexes, List<BlockMetaData> blocks, PositionOutputStream out, InternalFileEncryptor fileEncryptor) throws IOException {
LOG.debug("{}: column indexes", out.getPos());
for (int bIndex = 0, bSize = blocks.size(); bIndex < bSize; ++bIndex) {
BlockMetaData block = blocks.get(bIndex);
List<ColumnChunkMetaData> columns = block.getColumns();
List<ColumnIndex> blockColumnIndexes = columnIndexes.get(bIndex);
for (int cIndex = 0, cSize = columns.size(); cIndex < cSize; ++cIndex) {
ColumnChunkMetaData column = columns.get(cIndex);
org.apache.parquet.format.ColumnIndex columnIndex = ParquetMetadataConverter.toParquetColumnIndex(column.getPrimitiveType(), blockColumnIndexes.get(cIndex));
if (columnIndex == null) {
continue;
}
BlockCipher.Encryptor columnIndexEncryptor = null;
byte[] columnIndexAAD = null;
if (null != fileEncryptor) {
InternalColumnEncryptionSetup columnEncryptionSetup = fileEncryptor.getColumnSetup(column.getPath(), false, cIndex);
if (columnEncryptionSetup.isEncrypted()) {
columnIndexEncryptor = columnEncryptionSetup.getMetaDataEncryptor();
columnIndexAAD = AesCipher.createModuleAAD(fileEncryptor.getFileAAD(), ModuleType.ColumnIndex, block.getOrdinal(), columnEncryptionSetup.getOrdinal(), -1);
}
}
long offset = out.getPos();
Util.writeColumnIndex(columnIndex, out, columnIndexEncryptor, columnIndexAAD);
column.setColumnIndexReference(new IndexReference(offset, (int) (out.getPos() - offset)));
}
}
}
use of org.apache.parquet.internal.hadoop.metadata.IndexReference in project presto by prestodb.
the class HdfsParquetDataSource method readOffsetIndex.
@Override
public Optional<OffsetIndex> readOffsetIndex(ColumnChunkMetaData column) throws IOException {
IndexReference indexRef = column.getOffsetIndexReference();
if (indexRef == null) {
return Optional.empty();
}
inputStream.seek(indexRef.getOffset());
return Optional.of(ParquetMetadataConverter.fromParquetOffsetIndex(Util.readOffsetIndex(inputStream)));
}
Aggregations