Search in sources :

Example 1 with WritableIntVector

use of org.apache.flink.table.data.columnar.vector.writable.WritableIntVector in project flink by apache.

the class AbstractColumnReader method readToVector.

/**
 * Reads `total` values from this columnReader into column.
 */
@Override
public final void readToVector(int readNumber, VECTOR vector) throws IOException {
    int rowId = 0;
    WritableIntVector dictionaryIds = null;
    if (dictionary != null) {
        dictionaryIds = vector.reserveDictionaryIds(readNumber);
    }
    while (readNumber > 0) {
        // Compute the number of values we want to read in this page.
        int leftInPage = (int) (endOfPageValueCount - valuesRead);
        if (leftInPage == 0) {
            DataPage page = pageReader.readPage();
            if (page instanceof DataPageV1) {
                readPageV1((DataPageV1) page);
            } else if (page instanceof DataPageV2) {
                readPageV2((DataPageV2) page);
            } else {
                throw new RuntimeException("Unsupported page type: " + page.getClass());
            }
            leftInPage = (int) (endOfPageValueCount - valuesRead);
        }
        int num = Math.min(readNumber, leftInPage);
        if (isCurrentPageDictionaryEncoded) {
            // Read and decode dictionary ids.
            runLenDecoder.readDictionaryIds(num, dictionaryIds, vector, rowId, maxDefLevel, this.dictionaryIdsDecoder);
            if (vector.hasDictionary() || (rowId == 0 && supportLazyDecode())) {
                // Column vector supports lazy decoding of dictionary values so just set the
                // dictionary.
                // We can't do this if rowId != 0 AND the column doesn't have a dictionary (i.e.
                // some
                // non-dictionary encoded values have already been added).
                vector.setDictionary(new ParquetDictionary(dictionary));
            } else {
                readBatchFromDictionaryIds(rowId, num, vector, dictionaryIds);
            }
        } else {
            if (vector.hasDictionary() && rowId != 0) {
                // This batch already has dictionary encoded values but this new page is not.
                // The batch
                // does not support a mix of dictionary and not so we will decode the
                // dictionary.
                readBatchFromDictionaryIds(0, rowId, vector, vector.getDictionaryIds());
            }
            vector.setDictionary(null);
            readBatch(rowId, num, vector);
        }
        valuesRead += num;
        rowId += num;
        readNumber -= num;
    }
}
Also used : DataPage(org.apache.parquet.column.page.DataPage) DataPageV2(org.apache.parquet.column.page.DataPageV2) DataPageV1(org.apache.parquet.column.page.DataPageV1) ParquetDictionary(org.apache.flink.formats.parquet.vector.ParquetDictionary) WritableIntVector(org.apache.flink.table.data.columnar.vector.writable.WritableIntVector)

Example 2 with WritableIntVector

use of org.apache.flink.table.data.columnar.vector.writable.WritableIntVector in project flink by apache.

the class FixedLenBytesColumnReader method readBatchFromDictionaryIds.

@Override
protected void readBatchFromDictionaryIds(int rowId, int num, VECTOR column, WritableIntVector dictionaryIds) {
    if (ParquetSchemaConverter.is32BitDecimal(precision)) {
        WritableIntVector intVector = (WritableIntVector) column;
        for (int i = rowId; i < rowId + num; ++i) {
            if (!intVector.isNullAt(i)) {
                Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
                intVector.setInt(i, (int) heapBinaryToLong(v));
            }
        }
    } else if (ParquetSchemaConverter.is64BitDecimal(precision)) {
        WritableLongVector longVector = (WritableLongVector) column;
        for (int i = rowId; i < rowId + num; ++i) {
            if (!longVector.isNullAt(i)) {
                Binary v = dictionary.decodeToBinary(dictionaryIds.getInt(i));
                longVector.setLong(i, heapBinaryToLong(v));
            }
        }
    } else {
        WritableBytesVector bytesVector = (WritableBytesVector) column;
        for (int i = rowId; i < rowId + num; ++i) {
            if (!bytesVector.isNullAt(i)) {
                byte[] v = dictionary.decodeToBinary(dictionaryIds.getInt(i)).getBytes();
                bytesVector.appendBytes(i, v, 0, v.length);
            }
        }
    }
}
Also used : WritableBytesVector(org.apache.flink.table.data.columnar.vector.writable.WritableBytesVector) Binary(org.apache.parquet.io.api.Binary) WritableIntVector(org.apache.flink.table.data.columnar.vector.writable.WritableIntVector) WritableLongVector(org.apache.flink.table.data.columnar.vector.writable.WritableLongVector)

Aggregations

WritableIntVector (org.apache.flink.table.data.columnar.vector.writable.WritableIntVector)2 ParquetDictionary (org.apache.flink.formats.parquet.vector.ParquetDictionary)1 WritableBytesVector (org.apache.flink.table.data.columnar.vector.writable.WritableBytesVector)1 WritableLongVector (org.apache.flink.table.data.columnar.vector.writable.WritableLongVector)1 DataPage (org.apache.parquet.column.page.DataPage)1 DataPageV1 (org.apache.parquet.column.page.DataPageV1)1 DataPageV2 (org.apache.parquet.column.page.DataPageV2)1 Binary (org.apache.parquet.io.api.Binary)1