use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.
the class TablePage method addComplexColumn.
/**
* add a complex column into internal member compleDimensionPage
*
* @param index index of the complexDimensionPage
* @param rowId Id of the input row
* @param complexColumns byte array the complex columm to be added, extracted of input row
*/
// TODO: this function should be refactoried, ColumnPage should support complex type encoding
// directly instead of doing it here
private void addComplexColumn(int index, int rowId, byte[] complexColumns) {
GenericDataType complexDataType = model.getComplexIndexMap().get(index + model.getPrimitiveDimLens().length);
// initialize the page if first row
if (rowId == 0) {
int depthInComplexColumn = complexDataType.getColsCount();
getComplexDimensionPage()[index] = new ComplexColumnPage(pageSize, depthInComplexColumn);
}
int depthInComplexColumn = getComplexDimensionPage()[index].getDepth();
// this is the encoded columnar data which will be added to page,
// size of this list is the depth of complex column, we will fill it by input data
List<ArrayList<byte[]>> encodedComplexColumnar = new ArrayList<>();
for (int k = 0; k < depthInComplexColumn; k++) {
encodedComplexColumnar.add(new ArrayList<byte[]>());
}
// encode the complex type data and fill columnsArray
try {
ByteBuffer byteArrayInput = ByteBuffer.wrap(complexColumns);
ByteArrayOutputStream byteArrayOutput = new ByteArrayOutputStream();
DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutput);
complexDataType.parseAndBitPack(byteArrayInput, dataOutputStream, model.getComplexDimensionKeyGenerator());
complexDataType.getColumnarDataForComplexType(encodedComplexColumnar, ByteBuffer.wrap(byteArrayOutput.toByteArray()));
byteArrayOutput.close();
} catch (IOException | KeyGenException e) {
throw new CarbonDataWriterException("Problem while bit packing and writing complex datatype", e);
}
for (int depth = 0; depth < depthInComplexColumn; depth++) {
getComplexDimensionPage()[index].putComplexData(rowId, depth, encodedComplexColumnar.get(depth));
}
}
use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.
the class CarbonFactDataHandlerColumnar method isComplexTypes.
private boolean[] isComplexTypes() {
int noDictionaryCount = model.getNoDictionaryCount();
int noOfColumn = colGrpModel.getNoOfColumnStore() + noDictionaryCount + getComplexColumnCount();
int allColsCount = getColsCount(noOfColumn);
boolean[] isComplexType = new boolean[allColsCount];
List<Boolean> complexTypesList = new ArrayList<Boolean>(allColsCount);
for (int i = 0; i < noOfColumn; i++) {
GenericDataType complexDataType = model.getComplexIndexMap().get(i - noDictionaryCount);
if (complexDataType != null) {
int count = complexDataType.getColsCount();
for (int j = 0; j < count; j++) {
complexTypesList.add(true);
}
} else {
complexTypesList.add(false);
}
}
for (int i = 0; i < allColsCount; i++) {
isComplexType[i] = complexTypesList.get(i);
}
return isComplexType;
}
use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.
the class CarbonDataProcessorUtil method getComplexTypesMap.
// TODO: need to simplify it. Not required create string first.
public static Map<String, GenericDataType> getComplexTypesMap(DataField[] dataFields) {
String complexTypeString = getComplexTypeString(dataFields);
if (null == complexTypeString || complexTypeString.equals("")) {
return new LinkedHashMap<>();
}
Map<String, GenericDataType> complexTypesMap = new LinkedHashMap<String, GenericDataType>();
String[] hierarchies = complexTypeString.split(CarbonCommonConstants.SEMICOLON_SPC_CHARACTER);
for (int i = 0; i < hierarchies.length; i++) {
String[] levels = hierarchies[i].split(CarbonCommonConstants.HASH_SPC_CHARACTER);
String[] levelInfo = levels[0].split(CarbonCommonConstants.COLON_SPC_CHARACTER);
GenericDataType g = levelInfo[1].equals(CarbonCommonConstants.ARRAY) ? new ArrayDataType(levelInfo[0], "", levelInfo[3]) : new StructDataType(levelInfo[0], "", levelInfo[3]);
complexTypesMap.put(levelInfo[0], g);
for (int j = 1; j < levels.length; j++) {
levelInfo = levels[j].split(CarbonCommonConstants.COLON_SPC_CHARACTER);
switch(levelInfo[1]) {
case CarbonCommonConstants.ARRAY:
g.addChildren(new ArrayDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
break;
case CarbonCommonConstants.STRUCT:
g.addChildren(new StructDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
break;
default:
g.addChildren(new PrimitiveDataType(levelInfo[0], levelInfo[2], levelInfo[3], Integer.parseInt(levelInfo[4])));
}
}
}
return complexTypesMap;
}
use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.
the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.
/**
* This method will create a model object for carbon fact data handler
*
* @param loadModel
* @return
*/
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String tempStoreLocation) {
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
carbonFactDataHandlerModel.setTableName(tableName);
carbonFactDataHandlerModel.setMeasureCount(segmentProperties.getMeasures().size());
carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
carbonFactDataHandlerModel.setDimLens(segmentProperties.getDimColumnsCardinality());
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setNoDictionaryCount(segmentProperties.getNumberOfNoDictionaryDimension());
carbonFactDataHandlerModel.setDimensionCount(segmentProperties.getDimensions().size() - carbonFactDataHandlerModel.getNoDictionaryCount());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(tableName), carbonTable.getMeasureByTableName(tableName));
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
// get the cardinality for all all the columns including no dictionary columns
int[] formattedCardinality = CarbonUtil.getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema);
carbonFactDataHandlerModel.setColCardinality(formattedCardinality);
//TO-DO Need to handle complex types here .
Map<Integer, GenericDataType> complexIndexMap = new HashMap<Integer, GenericDataType>(segmentProperties.getComplexDimensions().size());
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
DataType[] aggType = new DataType[segmentProperties.getMeasures().size()];
int i = 0;
for (CarbonMeasure msr : segmentProperties.getMeasures()) {
aggType[i++] = msr.getDataType();
}
carbonFactDataHandlerModel.setMeasureDataType(aggType);
String carbonDataDirectoryPath = CarbonDataProcessorUtil.checkAndCreateCarbonStoreLocation(loadModel.getStorePath(), loadModel.getDatabaseName(), tableName, loadModel.getPartitionId(), loadModel.getSegmentId());
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
List<CarbonDimension> dimensionByTableName = loadModel.getCarbonDataLoadSchema().getCarbonTable().getDimensionByTableName(tableName);
boolean[] isUseInvertedIndexes = new boolean[dimensionByTableName.size()];
int index = 0;
for (CarbonDimension dimension : dimensionByTableName) {
isUseInvertedIndexes[index++] = dimension.isUseInvertedIndex();
}
carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndexes);
carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality());
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
return carbonFactDataHandlerModel;
}
use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.
the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.
/**
* Create the model using @{@link CarbonDataLoadConfiguration}
*/
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String storeLocation, int bucketId, int taskExtension) {
CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields());
int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality();
if (!configuration.isSortTable()) {
for (int i = 0; i < dimLensWithComplex.length; i++) {
if (dimLensWithComplex[i] != 0) {
dimLensWithComplex[i] = Integer.MAX_VALUE;
}
}
}
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(identifier.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + identifier.getTableName());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(identifier.getTableName()), carbonTable.getMeasureByTableName(identifier.getTableName()));
int[] colCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchema);
SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
int[] dimLens = configuration.calcDimensionLengths();
int dimensionCount = configuration.getDimensionCount();
int noDictionaryCount = configuration.getNoDictionaryCount();
int complexDimensionCount = configuration.getComplexColumnCount();
int measureCount = configuration.getMeasureCount();
int simpleDimsCount = dimensionCount - noDictionaryCount - complexDimensionCount;
int[] simpleDimsLen = new int[simpleDimsCount];
for (int i = 0; i < simpleDimsCount; i++) {
simpleDimsLen[i] = dimLens[i];
}
//To Set MDKey Index of each primitive type in complex type
int surrIndex = simpleDimsCount;
Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields()).entrySet().iterator();
Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
while (complexMap.hasNext()) {
Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
complexDataType.getValue().setOutputArrayIndex(0);
complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
simpleDimsCount++;
List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
for (GenericDataType eachPrimitive : primitiveTypes) {
eachPrimitive.setSurrogateIndex(surrIndex++);
}
}
CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(Integer.parseInt(configuration.getTaskNo()), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
carbonFactDataHandlerModel.setTableName(identifier.getTableName());
carbonFactDataHandlerModel.setMeasureCount(measureCount);
carbonFactDataHandlerModel.setStoreLocation(storeLocation);
carbonFactDataHandlerModel.setDimLens(dimLens);
carbonFactDataHandlerModel.setNoDictionaryCount(noDictionaryCount);
carbonFactDataHandlerModel.setDimensionCount(configuration.getDimensionCount() - noDictionaryCount);
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setColCardinality(colCardinality);
carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen);
carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(configuration.createKeyGeneratorForComplexDimension());
carbonFactDataHandlerModel.bucketId = bucketId;
carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
carbonFactDataHandlerModel.taskExtension = taskExtension;
return carbonFactDataHandlerModel;
}
Aggregations