Search in sources :

Example 1 with ColumnarSplitter

use of org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter in project carbondata by apache.

the class SegmentProperties method fillKeyGeneratorDetails.

/**
 * Below method will fill the key generator detail of both the type of key
 * generator. This will be required for during both query execution and data
 * loading.
 */
private void fillKeyGeneratorDetails() {
    // create a dimension partitioner list
    // this list will contain information about how dimension value are
    // stored
    // it is stored in group or individually
    List<Integer> dimensionPartitionList = new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    List<Boolean> isDictionaryColumn = new ArrayList<Boolean>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    int prvcolumnGroupId = -1;
    int counter = 0;
    while (counter < dimensions.size()) {
        CarbonDimension carbonDimension = dimensions.get(counter);
        // if dimension is not a part of mdkey then no need to add
        if (!carbonDimension.getEncoder().contains(Encoding.DICTIONARY)) {
            isDictionaryColumn.add(false);
            counter++;
            continue;
        }
        // so add one
        if (carbonDimension.isColumnar()) {
            dimensionPartitionList.add(1);
            isDictionaryColumn.add(true);
        }
        // group
        if (!carbonDimension.isColumnar() && carbonDimension.columnGroupId() == prvcolumnGroupId) {
            // incrementing the previous value of the list as it is in same column group
            dimensionPartitionList.set(dimensionPartitionList.size() - 1, dimensionPartitionList.get(dimensionPartitionList.size() - 1) + 1);
        } else if (!carbonDimension.isColumnar()) {
            dimensionPartitionList.add(1);
            isDictionaryColumn.add(true);
        }
        prvcolumnGroupId = carbonDimension.columnGroupId();
        counter++;
    }
    // get the partitioner
    dimensionPartitions = ArrayUtils.toPrimitive(dimensionPartitionList.toArray(new Integer[dimensionPartitionList.size()]));
    // get the bit length of each column
    int[] bitLength = CarbonUtil.getDimensionBitLength(dimColumnsCardinality, dimensionPartitions);
    // create a key generator
    this.dimensionKeyGenerator = new MultiDimKeyVarLengthGenerator(bitLength);
    if (this.getNumberOfDictSortColumns() == bitLength.length) {
        this.sortColumnsGenerator = this.dimensionKeyGenerator;
    } else {
        int numberOfDictSortColumns = this.getNumberOfDictSortColumns();
        int[] sortColumnBitLength = new int[numberOfDictSortColumns];
        System.arraycopy(bitLength, 0, sortColumnBitLength, 0, numberOfDictSortColumns);
        this.sortColumnsGenerator = new MultiDimKeyVarLengthGenerator(sortColumnBitLength);
    }
    this.fixedLengthKeySplitter = new MultiDimKeyVarLengthVariableSplitGenerator(bitLength, dimensionPartitions);
    // get the size of each value in file block
    int[] dictionaryDimColumnValueSize = fixedLengthKeySplitter.getBlockKeySize();
    int index = -1;
    this.eachDimColumnValueSize = new int[isDictionaryColumn.size()];
    for (int i = 0; i < eachDimColumnValueSize.length; i++) {
        if (!isDictionaryColumn.get(i)) {
            eachDimColumnValueSize[i] = -1;
            continue;
        }
        eachDimColumnValueSize[i] = dictionaryDimColumnValueSize[++index];
    }
    if (complexDimensions.size() > 0) {
        int[] complexDimensionPartition = new int[complexDimColumnCardinality.length];
        // as complex dimension will be stored in column format add one
        Arrays.fill(complexDimensionPartition, 1);
        bitLength = CarbonUtil.getDimensionBitLength(complexDimColumnCardinality, complexDimensionPartition);
        for (int i = 0; i < bitLength.length; i++) {
            if (complexDimColumnCardinality[i] == 0) {
                bitLength[i] = 64;
            }
        }
        ColumnarSplitter keySplitter = new MultiDimKeyVarLengthVariableSplitGenerator(bitLength, complexDimensionPartition);
        eachComplexDimColumnValueSize = keySplitter.getBlockKeySize();
    } else {
        eachComplexDimColumnValueSize = new int[0];
    }
}
Also used : MultiDimKeyVarLengthVariableSplitGenerator(org.apache.carbondata.core.keygenerator.columnar.impl.MultiDimKeyVarLengthVariableSplitGenerator) ColumnarSplitter(org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter) MultiDimKeyVarLengthGenerator(org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 2 with ColumnarSplitter

use of org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter in project carbondata by apache.

the class CarbonFactDataHandlerColumnar method setWritingConfiguration.

/**
 * Below method will be to configure fact file writing configuration
 *
 * @throws CarbonDataWriterException
 */
private void setWritingConfiguration() throws CarbonDataWriterException {
    // get blocklet size
    this.pageSize = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.BLOCKLET_SIZE, CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
    if (version == ColumnarFormatVersion.V3) {
        this.pageSize = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
    }
    LOGGER.info("Number of rows per column blocklet " + pageSize);
    dataRows = new ArrayList<>(this.pageSize);
    int dimSet = Integer.parseInt(CarbonCommonConstants.DIMENSION_SPLIT_VALUE_IN_COLUMNAR_DEFAULTVALUE);
    // if at least one dimension is present then initialize column splitter otherwise null
    int noOfColStore = colGrpModel.getNoOfColumnStore();
    int[] keyBlockSize = new int[noOfColStore + getExpandedComplexColsCount()];
    if (model.getDimLens().length > 0) {
        // Using Variable length variable split generator
        // This will help in splitting mdkey to columns. variable split is required because all
        // columns which are part of
        // row store will be in single column store
        // e.g if {0,1,2,3,4,5} is dimension and {0,1,2) is row store dimension
        // than below splitter will return column as {0,1,2}{3}{4}{5}
        ColumnarSplitter columnarSplitter = model.getSegmentProperties().getFixedLengthKeySplitter();
        System.arraycopy(columnarSplitter.getBlockKeySize(), 0, keyBlockSize, 0, noOfColStore);
    }
    // agg type
    List<Integer> otherMeasureIndexList = new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    List<Integer> customMeasureIndexList = new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    DataType[] type = model.getMeasureDataType();
    for (int j = 0; j < type.length; j++) {
        if (type[j] != DataTypes.BYTE && !DataTypes.isDecimal(type[j])) {
            otherMeasureIndexList.add(j);
        } else {
            customMeasureIndexList.add(j);
        }
    }
    int[] otherMeasureIndex = new int[otherMeasureIndexList.size()];
    int[] customMeasureIndex = new int[customMeasureIndexList.size()];
    for (int i = 0; i < otherMeasureIndex.length; i++) {
        otherMeasureIndex[i] = otherMeasureIndexList.get(i);
    }
    for (int i = 0; i < customMeasureIndex.length; i++) {
        customMeasureIndex[i] = customMeasureIndexList.get(i);
    }
    setComplexMapSurrogateIndex(model.getDimensionCount());
    int[] blockKeySize = getBlockKeySizeWithComplexTypes(new MultiDimKeyVarLengthEquiSplitGenerator(CarbonUtil.getIncrementedCardinalityFullyFilled(model.getDimLens().clone()), (byte) dimSet).getBlockKeySize());
    System.arraycopy(blockKeySize, noOfColStore, keyBlockSize, noOfColStore, blockKeySize.length - noOfColStore);
    this.dataWriter = getFactDataWriter();
    // initialize the channel;
    this.dataWriter.initializeWriter();
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) MultiDimKeyVarLengthEquiSplitGenerator(org.apache.carbondata.core.keygenerator.columnar.impl.MultiDimKeyVarLengthEquiSplitGenerator) ColumnarSplitter(org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter) ArrayList(java.util.ArrayList) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Aggregations

ArrayList (java.util.ArrayList)2 ColumnarSplitter (org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 MultiDimKeyVarLengthEquiSplitGenerator (org.apache.carbondata.core.keygenerator.columnar.impl.MultiDimKeyVarLengthEquiSplitGenerator)1 MultiDimKeyVarLengthVariableSplitGenerator (org.apache.carbondata.core.keygenerator.columnar.impl.MultiDimKeyVarLengthVariableSplitGenerator)1 MultiDimKeyVarLengthGenerator (org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)1 GenericDataType (org.apache.carbondata.processing.datatypes.GenericDataType)1