Search in sources :

Example 1 with DummyStatsCollector

use of org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector in project carbondata by apache.

the class ComplexColumnPage method initialize.

/**
 * below method will be used to initialize the column page of complex type
 * @param columnToDictMap dictionary map
 * @param pageSize number of records
 */
public void initialize(Map<String, LocalDictionaryGenerator> columnToDictMap, int pageSize, String columnCompressor) {
    DataType dataType;
    for (int i = 0; i < this.columnPages.length; i++) {
        LocalDictionaryGenerator localDictionaryGenerator = columnToDictMap.get(complexColumnInfoList.get(i).getColumnNames());
        TableSpec.ColumnSpec spec = getColumnSpec(i, localDictionaryGenerator);
        if (null == localDictionaryGenerator) {
            dataType = complexColumnInfoList.get(i).getColumnDataTypes();
            if (isColumnPageBasedOnDataType(i)) {
                // no dictionary primitive types need adaptive encoding,
                // hence store as actual value instead of byte array
                this.columnPages[i] = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, dataType, columnCompressor), pageSize);
                this.columnPages[i].setStatsCollector(PrimitivePageStatsCollector.newInstance(dataType));
            } else {
                this.columnPages[i] = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), pageSize);
                this.columnPages[i].setStatsCollector(new DummyStatsCollector());
            }
        } else {
            this.columnPages[i] = ColumnPage.newLocalDictPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), pageSize, localDictionaryGenerator, true);
            this.columnPages[i].setStatsCollector(new DummyStatsCollector());
        }
    }
}
Also used : TableSpec(org.apache.carbondata.core.datastore.TableSpec) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) DummyStatsCollector(org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector) DataType(org.apache.carbondata.core.metadata.datatype.DataType) LocalDictionaryGenerator(org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator)

Example 2 with DummyStatsCollector

use of org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector in project carbondata by apache.

the class PageLevelDictionary method getLocalDictionaryChunkForBlocklet.

/**
 * Below method will be used to get the local dictionary chunk for writing
 * @TODO Support for numeric data type dictionary exclude columns
 * @return encoded local dictionary chunk
 * @throws IOException
 * in case of problem in encoding
 */
public LocalDictionaryChunk getLocalDictionaryChunkForBlocklet() throws IOException {
    // TODO support for actual data type dictionary ColumnSPEC
    ColumnType columnType = ColumnType.PLAIN_VALUE;
    boolean isVarcharType = false;
    int lvSize = CarbonCommonConstants.SHORT_SIZE_IN_BYTE;
    if (DataTypes.VARCHAR == dataType) {
        columnType = ColumnType.PLAIN_LONG_VALUE;
        lvSize = CarbonCommonConstants.INT_SIZE_IN_BYTE;
        isVarcharType = true;
    }
    TableSpec.ColumnSpec spec = TableSpec.ColumnSpec.newInstance(columnName, DataTypes.BYTE_ARRAY, columnType);
    ColumnPage dictionaryColumnPage = ColumnPage.newPage(new ColumnPageEncoderMeta(spec, DataTypes.BYTE_ARRAY, columnCompressor), usedDictionaryValues.cardinality());
    // TODO support data type specific stats collector for numeric data types
    dictionaryColumnPage.setStatsCollector(new DummyStatsCollector());
    int rowId = 0;
    ByteBuffer byteBuffer = null;
    for (int i = usedDictionaryValues.nextSetBit(0); i >= 0; i = usedDictionaryValues.nextSetBit(i + 1)) {
        if (!isComplexTypePrimitive) {
            dictionaryColumnPage.putData(rowId++, localDictionaryGenerator.getDictionaryKeyBasedOnValue(i));
        } else {
            byte[] dictionaryKeyBasedOnValue = localDictionaryGenerator.getDictionaryKeyBasedOnValue(i);
            byteBuffer = ByteBuffer.allocate(lvSize + dictionaryKeyBasedOnValue.length);
            if (!isVarcharType) {
                byteBuffer.putShort((short) dictionaryKeyBasedOnValue.length);
            } else {
                byteBuffer.putInt(dictionaryKeyBasedOnValue.length);
            }
            byteBuffer.put(dictionaryKeyBasedOnValue);
            dictionaryColumnPage.putData(rowId++, byteBuffer.array());
        }
    }
    // creating a encoder
    ColumnPageEncoder encoder = new DirectCompressCodec(DataTypes.BYTE_ARRAY).createEncoder(null);
    // get encoded dictionary values
    LocalDictionaryChunk localDictionaryChunk = encoder.encodeDictionary(dictionaryColumnPage);
    // set compressed dictionary values
    localDictionaryChunk.setDictionary_values(CompressorFactory.getInstance().getCompressor(columnCompressor).compressByte(usedDictionaryValues.toByteArray()));
    // free the dictionary page memory
    dictionaryColumnPage.freeMemory();
    return localDictionaryChunk;
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) TableSpec(org.apache.carbondata.core.datastore.TableSpec) ColumnType(org.apache.carbondata.core.datastore.ColumnType) LocalDictionaryChunk(org.apache.carbondata.format.LocalDictionaryChunk) DummyStatsCollector(org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector) ByteBuffer(java.nio.ByteBuffer) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ColumnPageEncoderMeta(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta) DirectCompressCodec(org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec)

Aggregations

TableSpec (org.apache.carbondata.core.datastore.TableSpec)2 ColumnPageEncoderMeta (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoderMeta)2 DummyStatsCollector (org.apache.carbondata.core.datastore.page.statistics.DummyStatsCollector)2 ByteBuffer (java.nio.ByteBuffer)1 ColumnType (org.apache.carbondata.core.datastore.ColumnType)1 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)1 ColumnPageEncoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder)1 DirectCompressCodec (org.apache.carbondata.core.datastore.page.encoding.compress.DirectCompressCodec)1 LocalDictionaryGenerator (org.apache.carbondata.core.localdictionary.generator.LocalDictionaryGenerator)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 LocalDictionaryChunk (org.apache.carbondata.format.LocalDictionaryChunk)1