use of org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter in project carbondata by apache.
the class SegmentProperties method fillKeyGeneratorDetails.
/**
* Below method will fill the key generator detail of both the type of key
* generator. This will be required for during both query execution and data
* loading.
*/
private void fillKeyGeneratorDetails() {
// create a dimension partitioner list
// this list will contain information about how dimension value are
// stored
// it is stored in group or individually
List<Integer> dimensionPartitionList = new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
List<Boolean> isDictionaryColumn = new ArrayList<Boolean>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
int prvcolumnGroupId = -1;
int counter = 0;
while (counter < dimensions.size()) {
CarbonDimension carbonDimension = dimensions.get(counter);
// if dimension is not a part of mdkey then no need to add
if (!carbonDimension.getEncoder().contains(Encoding.DICTIONARY)) {
isDictionaryColumn.add(false);
counter++;
continue;
}
// so add one
if (carbonDimension.isColumnar()) {
dimensionPartitionList.add(1);
isDictionaryColumn.add(true);
}
// group
if (!carbonDimension.isColumnar() && carbonDimension.columnGroupId() == prvcolumnGroupId) {
// incrementing the previous value of the list as it is in same column group
dimensionPartitionList.set(dimensionPartitionList.size() - 1, dimensionPartitionList.get(dimensionPartitionList.size() - 1) + 1);
} else if (!carbonDimension.isColumnar()) {
dimensionPartitionList.add(1);
isDictionaryColumn.add(true);
}
prvcolumnGroupId = carbonDimension.columnGroupId();
counter++;
}
// get the partitioner
dimensionPartitions = ArrayUtils.toPrimitive(dimensionPartitionList.toArray(new Integer[dimensionPartitionList.size()]));
// get the bit length of each column
int[] bitLength = CarbonUtil.getDimensionBitLength(dimColumnsCardinality, dimensionPartitions);
// create a key generator
this.dimensionKeyGenerator = new MultiDimKeyVarLengthGenerator(bitLength);
if (this.getNumberOfDictSortColumns() == bitLength.length) {
this.sortColumnsGenerator = this.dimensionKeyGenerator;
} else {
int numberOfDictSortColumns = this.getNumberOfDictSortColumns();
int[] sortColumnBitLength = new int[numberOfDictSortColumns];
System.arraycopy(bitLength, 0, sortColumnBitLength, 0, numberOfDictSortColumns);
this.sortColumnsGenerator = new MultiDimKeyVarLengthGenerator(sortColumnBitLength);
}
this.fixedLengthKeySplitter = new MultiDimKeyVarLengthVariableSplitGenerator(bitLength, dimensionPartitions);
// get the size of each value in file block
int[] dictionaryDimColumnValueSize = fixedLengthKeySplitter.getBlockKeySize();
int index = -1;
this.eachDimColumnValueSize = new int[isDictionaryColumn.size()];
for (int i = 0; i < eachDimColumnValueSize.length; i++) {
if (!isDictionaryColumn.get(i)) {
eachDimColumnValueSize[i] = -1;
continue;
}
eachDimColumnValueSize[i] = dictionaryDimColumnValueSize[++index];
}
if (complexDimensions.size() > 0) {
int[] complexDimensionPartition = new int[complexDimColumnCardinality.length];
// as complex dimension will be stored in column format add one
Arrays.fill(complexDimensionPartition, 1);
bitLength = CarbonUtil.getDimensionBitLength(complexDimColumnCardinality, complexDimensionPartition);
for (int i = 0; i < bitLength.length; i++) {
if (complexDimColumnCardinality[i] == 0) {
bitLength[i] = 64;
}
}
ColumnarSplitter keySplitter = new MultiDimKeyVarLengthVariableSplitGenerator(bitLength, complexDimensionPartition);
eachComplexDimColumnValueSize = keySplitter.getBlockKeySize();
} else {
eachComplexDimColumnValueSize = new int[0];
}
}
use of org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter in project carbondata by apache.
the class CarbonFactDataHandlerColumnar method setWritingConfiguration.
/**
* Below method will be to configure fact file writing configuration
*
* @throws CarbonDataWriterException
*/
private void setWritingConfiguration() throws CarbonDataWriterException {
// get blocklet size
this.pageSize = Integer.parseInt(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.BLOCKLET_SIZE, CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
if (version == ColumnarFormatVersion.V3) {
this.pageSize = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
}
LOGGER.info("Number of rows per column blocklet " + pageSize);
dataRows = new ArrayList<>(this.pageSize);
int dimSet = Integer.parseInt(CarbonCommonConstants.DIMENSION_SPLIT_VALUE_IN_COLUMNAR_DEFAULTVALUE);
// if at least one dimension is present then initialize column splitter otherwise null
int noOfColStore = colGrpModel.getNoOfColumnStore();
int[] keyBlockSize = new int[noOfColStore + getExpandedComplexColsCount()];
if (model.getDimLens().length > 0) {
// Using Variable length variable split generator
// This will help in splitting mdkey to columns. variable split is required because all
// columns which are part of
// row store will be in single column store
// e.g if {0,1,2,3,4,5} is dimension and {0,1,2) is row store dimension
// than below splitter will return column as {0,1,2}{3}{4}{5}
ColumnarSplitter columnarSplitter = model.getSegmentProperties().getFixedLengthKeySplitter();
System.arraycopy(columnarSplitter.getBlockKeySize(), 0, keyBlockSize, 0, noOfColStore);
}
// agg type
List<Integer> otherMeasureIndexList = new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
List<Integer> customMeasureIndexList = new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
DataType[] type = model.getMeasureDataType();
for (int j = 0; j < type.length; j++) {
if (type[j] != DataTypes.BYTE && !DataTypes.isDecimal(type[j])) {
otherMeasureIndexList.add(j);
} else {
customMeasureIndexList.add(j);
}
}
int[] otherMeasureIndex = new int[otherMeasureIndexList.size()];
int[] customMeasureIndex = new int[customMeasureIndexList.size()];
for (int i = 0; i < otherMeasureIndex.length; i++) {
otherMeasureIndex[i] = otherMeasureIndexList.get(i);
}
for (int i = 0; i < customMeasureIndex.length; i++) {
customMeasureIndex[i] = customMeasureIndexList.get(i);
}
setComplexMapSurrogateIndex(model.getDimensionCount());
int[] blockKeySize = getBlockKeySizeWithComplexTypes(new MultiDimKeyVarLengthEquiSplitGenerator(CarbonUtil.getIncrementedCardinalityFullyFilled(model.getDimLens().clone()), (byte) dimSet).getBlockKeySize());
System.arraycopy(blockKeySize, noOfColStore, keyBlockSize, noOfColStore, blockKeySize.length - noOfColStore);
this.dataWriter = getFactDataWriter();
// initialize the channel;
this.dataWriter.initializeWriter();
}
Aggregations