Search in sources :

Example 21 with Encoding

use of org.apache.parquet.column.Encoding in project drill by apache.

the class ParquetFileWriter method writeColumnChunk.

void writeColumnChunk(ColumnDescriptor descriptor, long valueCount, CompressionCodecName compressionCodecName, DictionaryPage dictionaryPage, BytesInput bytes, long uncompressedTotalPageSize, long compressedTotalPageSize, Statistics<?> totalStats, ColumnIndexBuilder columnIndexBuilder, OffsetIndexBuilder offsetIndexBuilder, BloomFilter bloomFilter, Set<Encoding> rlEncodings, Set<Encoding> dlEncodings, List<Encoding> dataEncodings, BlockCipher.Encryptor headerBlockEncryptor, int rowGroupOrdinal, int columnOrdinal, byte[] fileAAD) throws IOException {
    startColumn(descriptor, valueCount, compressionCodecName);
    state = state.write();
    if (dictionaryPage != null) {
        byte[] dictonaryPageHeaderAAD = null;
        if (null != headerBlockEncryptor) {
            dictonaryPageHeaderAAD = AesCipher.createModuleAAD(fileAAD, ModuleType.DictionaryPageHeader, rowGroupOrdinal, columnOrdinal, -1);
        }
        writeDictionaryPage(dictionaryPage, headerBlockEncryptor, dictonaryPageHeaderAAD);
    }
    if (bloomFilter != null) {
        // write bloom filter if one of data pages is not dictionary encoded
        boolean isWriteBloomFilter = false;
        for (Encoding encoding : dataEncodings) {
            if (encoding != Encoding.RLE_DICTIONARY) {
                isWriteBloomFilter = true;
                break;
            }
        }
        if (isWriteBloomFilter) {
            currentBloomFilters.put(String.join(".", descriptor.getPath()), bloomFilter);
        }
    }
    LOG.debug("{}: write data pages", out.getPos());
    long headersSize = bytes.size() - compressedTotalPageSize;
    this.uncompressedLength += uncompressedTotalPageSize + headersSize;
    this.compressedLength += compressedTotalPageSize + headersSize;
    LOG.debug("{}: write data pages content", out.getPos());
    currentChunkFirstDataPage = out.getPos();
    bytes.writeAllTo(out);
    encodingStatsBuilder.addDataEncodings(dataEncodings);
    if (rlEncodings.isEmpty()) {
        encodingStatsBuilder.withV2Pages();
    }
    currentEncodings.addAll(rlEncodings);
    currentEncodings.addAll(dlEncodings);
    currentEncodings.addAll(dataEncodings);
    currentStatistics = totalStats;
    this.columnIndexBuilder = columnIndexBuilder;
    this.offsetIndexBuilder = offsetIndexBuilder;
    endColumn();
}
Also used : Encoding(org.apache.parquet.column.Encoding)

Aggregations

Encoding (org.apache.parquet.column.Encoding)21 Path (org.apache.hadoop.fs.Path)6 Test (org.junit.Test)6 Configuration (org.apache.hadoop.conf.Configuration)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 EncodingStats (org.apache.parquet.column.EncodingStats)4 PageReadStore (org.apache.parquet.column.page.PageReadStore)4 BlockMetaData (org.apache.parquet.hadoop.metadata.BlockMetaData)4 ColumnChunkMetaData (org.apache.parquet.hadoop.metadata.ColumnChunkMetaData)4 File (java.io.File)3 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3 DrillRuntimeException (org.apache.drill.common.exceptions.DrillRuntimeException)3 ParquetMetadata (org.apache.parquet.hadoop.metadata.ParquetMetadata)3 MessageType (org.apache.parquet.schema.MessageType)3 ByteBuffer (java.nio.ByteBuffer)2 HashSet (java.util.HashSet)2 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 BytesInput (org.apache.parquet.bytes.BytesInput)2 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)2