Search in sources :

Example 16 with CarbonMeasure

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Example 17 with CarbonMeasure

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.

the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.

/**
   * This method will create a model object for carbon fact data handler
   *
   * @param loadModel
   * @return
   */
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String tempStoreLocation) {
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(tableName);
    carbonFactDataHandlerModel.setMeasureCount(segmentProperties.getMeasures().size());
    carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
    carbonFactDataHandlerModel.setDimLens(segmentProperties.getDimColumnsCardinality());
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setNoDictionaryCount(segmentProperties.getNumberOfNoDictionaryDimension());
    carbonFactDataHandlerModel.setDimensionCount(segmentProperties.getDimensions().size() - carbonFactDataHandlerModel.getNoDictionaryCount());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(tableName), carbonTable.getMeasureByTableName(tableName));
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    // get the cardinality for all all the columns including no dictionary columns
    int[] formattedCardinality = CarbonUtil.getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema);
    carbonFactDataHandlerModel.setColCardinality(formattedCardinality);
    //TO-DO Need to handle complex types here .
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<Integer, GenericDataType>(segmentProperties.getComplexDimensions().size());
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    DataType[] aggType = new DataType[segmentProperties.getMeasures().size()];
    int i = 0;
    for (CarbonMeasure msr : segmentProperties.getMeasures()) {
        aggType[i++] = msr.getDataType();
    }
    carbonFactDataHandlerModel.setMeasureDataType(aggType);
    String carbonDataDirectoryPath = CarbonDataProcessorUtil.checkAndCreateCarbonStoreLocation(loadModel.getStorePath(), loadModel.getDatabaseName(), tableName, loadModel.getPartitionId(), loadModel.getSegmentId());
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    List<CarbonDimension> dimensionByTableName = loadModel.getCarbonDataLoadSchema().getCarbonTable().getDimensionByTableName(tableName);
    boolean[] isUseInvertedIndexes = new boolean[dimensionByTableName.size()];
    int index = 0;
    for (CarbonDimension dimension : dimensionByTableName) {
        isUseInvertedIndexes[index++] = dimension.isUseInvertedIndex();
    }
    carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndexes);
    carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality());
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    return carbonFactDataHandlerModel;
}
Also used : HashMap(java.util.HashMap) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType)

Example 18 with CarbonMeasure

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.

the class QueryModel method setDimAndMsrColumnNode.

private static void setDimAndMsrColumnNode(List<CarbonDimension> dimensions, List<CarbonMeasure> measures, ColumnExpression col) {
    CarbonDimension dim;
    CarbonMeasure msr;
    String columnName;
    columnName = col.getColumnName();
    dim = CarbonUtil.findDimension(dimensions, columnName);
    col.setCarbonColumn(dim);
    col.setDimension(dim);
    col.setDimension(true);
    if (null == dim) {
        msr = getCarbonMetadataMeasure(columnName, measures);
        col.setCarbonColumn(msr);
        col.setDimension(false);
    }
}
Also used : CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 19 with CarbonMeasure

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.

the class CarbonCompactionUtil method updateColumnSchemaAndGetCardinality.

/**
   * This method will return the updated cardinality according to the master schema
   *
   * @param columnCardinalityMap
   * @param carbonTable
   * @param updatedColumnSchemaList
   * @return
   */
public static int[] updateColumnSchemaAndGetCardinality(Map<String, Integer> columnCardinalityMap, CarbonTable carbonTable, List<ColumnSchema> updatedColumnSchemaList) {
    List<CarbonDimension> masterDimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
    List<Integer> updatedCardinalityList = new ArrayList<>(columnCardinalityMap.size());
    for (CarbonDimension dimension : masterDimensions) {
        Integer value = columnCardinalityMap.get(dimension.getColumnId());
        if (null == value) {
            updatedCardinalityList.add(getDimensionDefaultCardinality(dimension));
        } else {
            updatedCardinalityList.add(value);
        }
        updatedColumnSchemaList.add(dimension.getColumnSchema());
    }
    // add measures to the column schema list
    List<CarbonMeasure> masterSchemaMeasures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
    for (CarbonMeasure measure : masterSchemaMeasures) {
        updatedColumnSchemaList.add(measure.getColumnSchema());
    }
    return ArrayUtils.toPrimitive(updatedCardinalityList.toArray(new Integer[updatedCardinalityList.size()]));
}
Also used : CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 20 with CarbonMeasure

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure in project carbondata by apache.

the class DataLoadProcessBuilder method createConfiguration.

private CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    if (!new File(storeLocation).mkdirs()) {
        LOGGER.error("Error while creating the temp store path: " + storeLocation);
    }
    CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = CarbonDataProcessorUtil.getTempStoreLocationKey(databaseName, tableName, loadModel.getTaskNo(), false);
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, loadModel.getStorePath());
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    configuration.setTableIdentifier(identifier);
    configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    configuration.setHeader(loadModel.getCsvHeaderColumns());
    configuration.setPartitionId(loadModel.getPartitionId());
    configuration.setSegmentId(loadModel.getSegmentId());
    configuration.setTaskNo(loadModel.getTaskNo());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
    configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
    CarbonMetadata.getInstance().addCarbonTable(carbonTable);
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
    Map<String, String> dateFormatMap = CarbonDataProcessorUtil.getDateFormatMap(loadModel.getDateFormat());
    List<DataField> dataFields = new ArrayList<>();
    List<DataField> complexDataFields = new ArrayList<>();
    // And then add complex data types and measures.
    for (CarbonColumn column : dimensions) {
        DataField dataField = new DataField(column);
        dataField.setDateFormat(dateFormatMap.get(column.getColName()));
        if (column.isComplex()) {
            complexDataFields.add(dataField);
        } else {
            dataFields.add(dataField);
        }
    }
    dataFields.addAll(complexDataFields);
    for (CarbonColumn column : measures) {
        // This dummy measure is added when no measure was present. We no need to load it.
        if (!(column.getColName().equals("default_dummy_measure"))) {
            dataFields.add(new DataField(column));
        }
    }
    configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
    configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getFactTableName()));
    // configuration for one pass load: dictionary server info
    configuration.setUseOnePass(loadModel.getUseOnePass());
    configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
    configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
    configuration.setPreFetch(loadModel.isPreFetch());
    configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
    configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
    return configuration;
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) File(java.io.File)

Aggregations

CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)24 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)15 ArrayList (java.util.ArrayList)10 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)8 QueryMeasure (org.apache.carbondata.core.scan.model.QueryMeasure)5 HashSet (java.util.HashSet)4 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)4 DataType (org.apache.carbondata.core.metadata.datatype.DataType)3 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)3 QueryDimension (org.apache.carbondata.core.scan.model.QueryDimension)3 Test (org.junit.Test)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 Set (java.util.Set)2 Cache (org.apache.carbondata.core.cache.Cache)2 Dictionary (org.apache.carbondata.core.cache.dictionary.Dictionary)2 DictionaryColumnUniqueIdentifier (org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier)2 ColumnIdentifier (org.apache.carbondata.core.metadata.ColumnIdentifier)2 BlockExecutionInfo (org.apache.carbondata.core.scan.executor.infos.BlockExecutionInfo)2 MeasureInfo (org.apache.carbondata.core.scan.executor.infos.MeasureInfo)2