Search in sources :

Example 1 with GenericDataType

use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.

the class TablePage method addComplexColumn.

/**
   * add a complex column into internal member compleDimensionPage
   *
   * @param index          index of the complexDimensionPage
   * @param rowId          Id of the input row
   * @param complexColumns byte array the complex columm to be added, extracted of input row
   */
// TODO: this function should be refactoried, ColumnPage should support complex type encoding
// directly instead of doing it here
private void addComplexColumn(int index, int rowId, byte[] complexColumns) {
    GenericDataType complexDataType = model.getComplexIndexMap().get(index + model.getPrimitiveDimLens().length);
    // initialize the page if first row
    if (rowId == 0) {
        int depthInComplexColumn = complexDataType.getColsCount();
        getComplexDimensionPage()[index] = new ComplexColumnPage(pageSize, depthInComplexColumn);
    }
    int depthInComplexColumn = getComplexDimensionPage()[index].getDepth();
    // this is the encoded columnar data which will be added to page,
    // size of this list is the depth of complex column, we will fill it by input data
    List<ArrayList<byte[]>> encodedComplexColumnar = new ArrayList<>();
    for (int k = 0; k < depthInComplexColumn; k++) {
        encodedComplexColumnar.add(new ArrayList<byte[]>());
    }
    // encode the complex type data and fill columnsArray
    try {
        ByteBuffer byteArrayInput = ByteBuffer.wrap(complexColumns);
        ByteArrayOutputStream byteArrayOutput = new ByteArrayOutputStream();
        DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutput);
        complexDataType.parseAndBitPack(byteArrayInput, dataOutputStream, model.getComplexDimensionKeyGenerator());
        complexDataType.getColumnarDataForComplexType(encodedComplexColumnar, ByteBuffer.wrap(byteArrayOutput.toByteArray()));
        byteArrayOutput.close();
    } catch (IOException | KeyGenException e) {
        throw new CarbonDataWriterException("Problem while bit packing and writing complex datatype", e);
    }
    for (int depth = 0; depth < depthInComplexColumn; depth++) {
        getComplexDimensionPage()[index].putComplexData(rowId, depth, encodedComplexColumnar.get(depth));
    }
}
Also used : DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) CarbonDataWriterException(org.apache.carbondata.processing.store.writer.exception.CarbonDataWriterException) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) KeyGenException(org.apache.carbondata.core.keygenerator.KeyGenException) ComplexColumnPage(org.apache.carbondata.core.datastore.page.ComplexColumnPage)

Example 2 with GenericDataType

use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.

the class CarbonFactDataHandlerColumnar method isComplexTypes.

private boolean[] isComplexTypes() {
    int noDictionaryCount = model.getNoDictionaryCount();
    int noOfColumn = colGrpModel.getNoOfColumnStore() + noDictionaryCount + getComplexColumnCount();
    int allColsCount = getColsCount(noOfColumn);
    boolean[] isComplexType = new boolean[allColsCount];
    List<Boolean> complexTypesList = new ArrayList<Boolean>(allColsCount);
    for (int i = 0; i < noOfColumn; i++) {
        GenericDataType complexDataType = model.getComplexIndexMap().get(i - noDictionaryCount);
        if (complexDataType != null) {
            int count = complexDataType.getColsCount();
            for (int j = 0; j < count; j++) {
                complexTypesList.add(true);
            }
        } else {
            complexTypesList.add(false);
        }
    }
    for (int i = 0; i < allColsCount; i++) {
        isComplexType[i] = complexTypesList.get(i);
    }
    return isComplexType;
}
Also used : GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) ArrayList(java.util.ArrayList) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean)

Example 3 with GenericDataType

use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.

the class CarbonDataProcessorUtil method getComplexTypesMap.

// TODO: need to simplify it. Not required create string first.
public static Map<String, GenericDataType> getComplexTypesMap(DataField[] dataFields) {
    String complexTypeString = getComplexTypeString(dataFields);
    if (null == complexTypeString || complexTypeString.equals("")) {
        return new LinkedHashMap<>();
    }
    Map<String, GenericDataType> complexTypesMap = new LinkedHashMap<String, GenericDataType>();
    String[] hierarchies = complexTypeString.split(CarbonCommonConstants.SEMICOLON_SPC_CHARACTER);
    for (int i = 0; i < hierarchies.length; i++) {
        String[] levels = hierarchies[i].split(CarbonCommonConstants.HASH_SPC_CHARACTER);
        String[] levelInfo = levels[0].split(CarbonCommonConstants.COLON_SPC_CHARACTER);
        GenericDataType g = levelInfo[1].equals(CarbonCommonConstants.ARRAY) ? new ArrayDataType(levelInfo[0], "", levelInfo[3]) : new StructDataType(levelInfo[0], "", levelInfo[3]);
        complexTypesMap.put(levelInfo[0], g);
        for (int j = 1; j < levels.length; j++) {
            levelInfo = levels[j].split(CarbonCommonConstants.COLON_SPC_CHARACTER);
            switch(levelInfo[1]) {
                case CarbonCommonConstants.ARRAY:
                    g.addChildren(new ArrayDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
                    break;
                case CarbonCommonConstants.STRUCT:
                    g.addChildren(new StructDataType(levelInfo[0], levelInfo[2], levelInfo[3]));
                    break;
                default:
                    g.addChildren(new PrimitiveDataType(levelInfo[0], levelInfo[2], levelInfo[3], Integer.parseInt(levelInfo[4])));
            }
        }
    }
    return complexTypesMap;
}
Also used : GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) PrimitiveDataType(org.apache.carbondata.processing.datatypes.PrimitiveDataType) StructDataType(org.apache.carbondata.processing.datatypes.StructDataType) ArrayDataType(org.apache.carbondata.processing.datatypes.ArrayDataType) LinkedHashMap(java.util.LinkedHashMap)

Example 4 with GenericDataType

use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.

the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.

/**
   * This method will create a model object for carbon fact data handler
   *
   * @param loadModel
   * @return
   */
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String tempStoreLocation) {
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(tableName);
    carbonFactDataHandlerModel.setMeasureCount(segmentProperties.getMeasures().size());
    carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
    carbonFactDataHandlerModel.setDimLens(segmentProperties.getDimColumnsCardinality());
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setNoDictionaryCount(segmentProperties.getNumberOfNoDictionaryDimension());
    carbonFactDataHandlerModel.setDimensionCount(segmentProperties.getDimensions().size() - carbonFactDataHandlerModel.getNoDictionaryCount());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(tableName), carbonTable.getMeasureByTableName(tableName));
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    // get the cardinality for all all the columns including no dictionary columns
    int[] formattedCardinality = CarbonUtil.getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema);
    carbonFactDataHandlerModel.setColCardinality(formattedCardinality);
    //TO-DO Need to handle complex types here .
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<Integer, GenericDataType>(segmentProperties.getComplexDimensions().size());
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    DataType[] aggType = new DataType[segmentProperties.getMeasures().size()];
    int i = 0;
    for (CarbonMeasure msr : segmentProperties.getMeasures()) {
        aggType[i++] = msr.getDataType();
    }
    carbonFactDataHandlerModel.setMeasureDataType(aggType);
    String carbonDataDirectoryPath = CarbonDataProcessorUtil.checkAndCreateCarbonStoreLocation(loadModel.getStorePath(), loadModel.getDatabaseName(), tableName, loadModel.getPartitionId(), loadModel.getSegmentId());
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    List<CarbonDimension> dimensionByTableName = loadModel.getCarbonDataLoadSchema().getCarbonTable().getDimensionByTableName(tableName);
    boolean[] isUseInvertedIndexes = new boolean[dimensionByTableName.size()];
    int index = 0;
    for (CarbonDimension dimension : dimensionByTableName) {
        isUseInvertedIndexes[index++] = dimension.isUseInvertedIndex();
    }
    carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndexes);
    carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality());
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    return carbonFactDataHandlerModel;
}
Also used : HashMap(java.util.HashMap) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType)

Example 5 with GenericDataType

use of org.apache.carbondata.processing.datatypes.GenericDataType in project carbondata by apache.

the class CarbonFactDataHandlerModel method createCarbonFactDataHandlerModel.

/**
   * Create the model using @{@link CarbonDataLoadConfiguration}
   */
public static CarbonFactDataHandlerModel createCarbonFactDataHandlerModel(CarbonDataLoadConfiguration configuration, String storeLocation, int bucketId, int taskExtension) {
    CarbonTableIdentifier identifier = configuration.getTableIdentifier().getCarbonTableIdentifier();
    boolean[] isUseInvertedIndex = CarbonDataProcessorUtil.getIsUseInvertedIndex(configuration.getDataFields());
    int[] dimLensWithComplex = configuration.getCardinalityFinder().getCardinality();
    if (!configuration.isSortTable()) {
        for (int i = 0; i < dimLensWithComplex.length; i++) {
            if (dimLensWithComplex[i] != 0) {
                dimLensWithComplex[i] = Integer.MAX_VALUE;
            }
        }
    }
    CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(identifier.getDatabaseName() + CarbonCommonConstants.UNDERSCORE + identifier.getTableName());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(identifier.getTableName()), carbonTable.getMeasureByTableName(identifier.getTableName()));
    int[] colCardinality = CarbonUtil.getFormattedCardinality(dimLensWithComplex, wrapperColumnSchema);
    SegmentProperties segmentProperties = new SegmentProperties(wrapperColumnSchema, colCardinality);
    int[] dimLens = configuration.calcDimensionLengths();
    int dimensionCount = configuration.getDimensionCount();
    int noDictionaryCount = configuration.getNoDictionaryCount();
    int complexDimensionCount = configuration.getComplexColumnCount();
    int measureCount = configuration.getMeasureCount();
    int simpleDimsCount = dimensionCount - noDictionaryCount - complexDimensionCount;
    int[] simpleDimsLen = new int[simpleDimsCount];
    for (int i = 0; i < simpleDimsCount; i++) {
        simpleDimsLen[i] = dimLens[i];
    }
    //To Set MDKey Index of each primitive type in complex type
    int surrIndex = simpleDimsCount;
    Iterator<Map.Entry<String, GenericDataType>> complexMap = CarbonDataProcessorUtil.getComplexTypesMap(configuration.getDataFields()).entrySet().iterator();
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<>(complexDimensionCount);
    while (complexMap.hasNext()) {
        Map.Entry<String, GenericDataType> complexDataType = complexMap.next();
        complexDataType.getValue().setOutputArrayIndex(0);
        complexIndexMap.put(simpleDimsCount, complexDataType.getValue());
        simpleDimsCount++;
        List<GenericDataType> primitiveTypes = new ArrayList<GenericDataType>();
        complexDataType.getValue().getAllPrimitiveChildren(primitiveTypes);
        for (GenericDataType eachPrimitive : primitiveTypes) {
            eachPrimitive.setSurrogateIndex(surrIndex++);
        }
    }
    CarbonDataFileAttributes carbonDataFileAttributes = new CarbonDataFileAttributes(Integer.parseInt(configuration.getTaskNo()), (Long) configuration.getDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP));
    String carbonDataDirectoryPath = getCarbonDataFolderLocation(configuration);
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(configuration.getSchemaUpdatedTimeStamp());
    carbonFactDataHandlerModel.setDatabaseName(identifier.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(identifier.getTableName());
    carbonFactDataHandlerModel.setMeasureCount(measureCount);
    carbonFactDataHandlerModel.setStoreLocation(storeLocation);
    carbonFactDataHandlerModel.setDimLens(dimLens);
    carbonFactDataHandlerModel.setNoDictionaryCount(noDictionaryCount);
    carbonFactDataHandlerModel.setDimensionCount(configuration.getDimensionCount() - noDictionaryCount);
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setColCardinality(colCardinality);
    carbonFactDataHandlerModel.setMeasureDataType(configuration.getMeasureDataType());
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    carbonFactDataHandlerModel.setPrimitiveDimLens(simpleDimsLen);
    carbonFactDataHandlerModel.setCarbonDataFileAttributes(carbonDataFileAttributes);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndex);
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.setComplexDimensionKeyGenerator(configuration.createKeyGeneratorForComplexDimension());
    carbonFactDataHandlerModel.bucketId = bucketId;
    carbonFactDataHandlerModel.segmentId = configuration.getSegmentId();
    carbonFactDataHandlerModel.taskExtension = taskExtension;
    return carbonFactDataHandlerModel;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

GenericDataType (org.apache.carbondata.processing.datatypes.GenericDataType)8 ArrayList (java.util.ArrayList)5 HashMap (java.util.HashMap)2 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutputStream (java.io.DataOutputStream)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)1 ComplexColumnPage (org.apache.carbondata.core.datastore.page.ComplexColumnPage)1 KeyGenException (org.apache.carbondata.core.keygenerator.KeyGenException)1 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)1 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)1