Search in sources :

Example 46 with CarbonDimension

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension in project carbondata by apache.

the class RangeValueFilterExecuterImpl method initDimensionBlockIndexes.

/**
   * This method will initialize the dimension info for the current block to be
   * used for filtering the data
   */
private void initDimensionBlockIndexes() {
    // find the dimension in the current block dimensions list
    CarbonDimension dimensionFromCurrentBlock = segmentProperties.getDimensionFromCurrentBlock(dimColEvaluatorInfo.getDimension());
    if (null != dimensionFromCurrentBlock) {
        dimColEvaluatorInfo.setColumnIndex(dimensionFromCurrentBlock.getOrdinal());
        this.dimensionBlocksIndex = segmentProperties.getDimensionOrdinalToBlockMapping().get(dimensionFromCurrentBlock.getOrdinal());
        isDimensionPresentInCurrentBlock = true;
    }
}
Also used : CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 47 with CarbonDimension

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension in project carbondata by apache.

the class FilterUtil method getStartKeyWithFilter.

/**
   * This method will fill the start key array  with the surrogate key present
   * in filterinfo instance.
   *
   * @param dimensionFilter
   * @param startKey
   */
private static void getStartKeyWithFilter(Map<CarbonDimension, List<DimColumnFilterInfo>> dimensionFilter, SegmentProperties segmentProperties, long[] startKey, List<long[]> startKeyList) {
    for (Map.Entry<CarbonDimension, List<DimColumnFilterInfo>> entry : dimensionFilter.entrySet()) {
        List<DimColumnFilterInfo> values = entry.getValue();
        if (null == values || !entry.getKey().hasEncoding(Encoding.DICTIONARY)) {
            continue;
        }
        boolean isExcludePresent = false;
        for (DimColumnFilterInfo info : values) {
            if (!info.isIncludeFilter()) {
                isExcludePresent = true;
            }
        }
        if (isExcludePresent) {
            continue;
        }
        // search the query dimension in current block dimensions. If the dimension is not found
        // that means the key cannot be included in start key formation.
        // Applicable for restructure scenarios
        CarbonDimension dimensionFromCurrentBlock = segmentProperties.getDimensionFromCurrentBlock(entry.getKey());
        if (null == dimensionFromCurrentBlock) {
            continue;
        }
        int keyOrdinalOfDimensionFromCurrentBlock = dimensionFromCurrentBlock.getKeyOrdinal();
        for (DimColumnFilterInfo info : values) {
            if (keyOrdinalOfDimensionFromCurrentBlock < startKey.length) {
                if (startKey[keyOrdinalOfDimensionFromCurrentBlock] < info.getFilterList().get(0)) {
                    startKey[keyOrdinalOfDimensionFromCurrentBlock] = info.getFilterList().get(0);
                }
            }
        }
        long[] newStartKey = new long[startKey.length];
        System.arraycopy(startKey, 0, newStartKey, 0, startKey.length);
        startKeyList.add(newStartKey);
    }
}
Also used : List(java.util.List) ArrayList(java.util.ArrayList) Map(java.util.Map) SortedMap(java.util.SortedMap) TreeMap(java.util.TreeMap) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 48 with CarbonDimension

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Example 49 with CarbonDimension

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension in project carbondata by apache.

the class SegmentProperties method intialiseColGroups.

/**
   * it fills column groups
   * e.g {{1},{2,3,4},{5},{6},{7,8,9}}
   *
   */
private void intialiseColGroups() {
    // StringBuffer columnGroups = new StringBuffer();
    List<List<Integer>> colGrpList = new ArrayList<List<Integer>>();
    List<Integer> group = new ArrayList<Integer>();
    for (int i = 0; i < dimensions.size(); i++) {
        CarbonDimension dimension = dimensions.get(i);
        if (!dimension.hasEncoding(Encoding.DICTIONARY)) {
            continue;
        }
        group.add(dimension.getOrdinal());
        // columnGroups.append(dimension.getOrdinal());
        if (i < dimensions.size() - 1) {
            int currGroupOrdinal = dimension.columnGroupId();
            int nextGroupOrdinal = dimensions.get(i + 1).columnGroupId();
            if (!(currGroupOrdinal == nextGroupOrdinal && currGroupOrdinal != -1)) {
                colGrpList.add(group);
                group = new ArrayList<Integer>();
            }
        } else {
            colGrpList.add(group);
        }
    }
    int[][] colGroups = new int[colGrpList.size()][];
    for (int i = 0; i < colGroups.length; i++) {
        colGroups[i] = new int[colGrpList.get(i).size()];
        for (int j = 0; j < colGroups[i].length; j++) {
            colGroups[i][j] = colGrpList.get(i).get(j);
        }
    }
    this.colGroupModel = CarbonUtil.getColGroupModel(colGroups);
}
Also used : ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 50 with CarbonDimension

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension in project carbondata by apache.

the class SegmentProperties method fillKeyGeneratorDetails.

/**
   * Below method will fill the key generator detail of both the type of key
   * generator. This will be required for during both query execution and data
   * loading.
   */
private void fillKeyGeneratorDetails() {
    // create a dimension partitioner list
    // this list will contain information about how dimension value are
    // stored
    // it is stored in group or individually
    List<Integer> dimensionPartitionList = new ArrayList<Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    List<Boolean> isDictionaryColumn = new ArrayList<Boolean>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
    int prvcolumnGroupId = -1;
    int counter = 0;
    while (counter < dimensions.size()) {
        CarbonDimension carbonDimension = dimensions.get(counter);
        // if dimension is not a part of mdkey then no need to add
        if (!carbonDimension.getEncoder().contains(Encoding.DICTIONARY)) {
            isDictionaryColumn.add(false);
            counter++;
            continue;
        }
        // so add one
        if (carbonDimension.isColumnar()) {
            dimensionPartitionList.add(1);
            isDictionaryColumn.add(true);
        }
        // group
        if (!carbonDimension.isColumnar() && carbonDimension.columnGroupId() == prvcolumnGroupId) {
            // incrementing the previous value of the list as it is in same column group
            dimensionPartitionList.set(dimensionPartitionList.size() - 1, dimensionPartitionList.get(dimensionPartitionList.size() - 1) + 1);
        } else if (!carbonDimension.isColumnar()) {
            dimensionPartitionList.add(1);
            isDictionaryColumn.add(true);
        }
        prvcolumnGroupId = carbonDimension.columnGroupId();
        counter++;
    }
    // get the partitioner
    dimensionPartitions = ArrayUtils.toPrimitive(dimensionPartitionList.toArray(new Integer[dimensionPartitionList.size()]));
    // get the bit length of each column
    int[] bitLength = CarbonUtil.getDimensionBitLength(dimColumnsCardinality, dimensionPartitions);
    // create a key generator
    this.dimensionKeyGenerator = new MultiDimKeyVarLengthGenerator(bitLength);
    if (this.getNumberOfDictSortColumns() == bitLength.length) {
        this.sortColumnsGenerator = this.dimensionKeyGenerator;
    } else {
        int numberOfDictSortColumns = this.getNumberOfDictSortColumns();
        int[] sortColumnBitLength = new int[numberOfDictSortColumns];
        System.arraycopy(bitLength, 0, sortColumnBitLength, 0, numberOfDictSortColumns);
        this.sortColumnsGenerator = new MultiDimKeyVarLengthGenerator(sortColumnBitLength);
    }
    this.fixedLengthKeySplitter = new MultiDimKeyVarLengthVariableSplitGenerator(bitLength, dimensionPartitions);
    // get the size of each value in file block
    int[] dictionayDimColumnValueSize = fixedLengthKeySplitter.getBlockKeySize();
    int index = -1;
    this.eachDimColumnValueSize = new int[isDictionaryColumn.size()];
    for (int i = 0; i < eachDimColumnValueSize.length; i++) {
        if (!isDictionaryColumn.get(i)) {
            eachDimColumnValueSize[i] = -1;
            continue;
        }
        eachDimColumnValueSize[i] = dictionayDimColumnValueSize[++index];
    }
    if (complexDimensions.size() > 0) {
        int[] complexDimesionParition = new int[complexDimColumnCardinality.length];
        // as complex dimension will be stored in column format add one
        Arrays.fill(complexDimesionParition, 1);
        bitLength = CarbonUtil.getDimensionBitLength(complexDimColumnCardinality, complexDimesionParition);
        for (int i = 0; i < bitLength.length; i++) {
            if (complexDimColumnCardinality[i] == 0) {
                bitLength[i] = 64;
            }
        }
        ColumnarSplitter keySplitter = new MultiDimKeyVarLengthVariableSplitGenerator(bitLength, complexDimesionParition);
        eachComplexDimColumnValueSize = keySplitter.getBlockKeySize();
    } else {
        eachComplexDimColumnValueSize = new int[0];
    }
}
Also used : MultiDimKeyVarLengthVariableSplitGenerator(org.apache.carbondata.core.keygenerator.columnar.impl.MultiDimKeyVarLengthVariableSplitGenerator) ColumnarSplitter(org.apache.carbondata.core.keygenerator.columnar.ColumnarSplitter) MultiDimKeyVarLengthGenerator(org.apache.carbondata.core.keygenerator.mdkey.MultiDimKeyVarLengthGenerator) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Aggregations

CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)85 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)31 ArrayList (java.util.ArrayList)26 Test (org.junit.Test)23 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)15 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)13 QueryDimension (org.apache.carbondata.core.scan.model.QueryDimension)9 CarbonMetadata (org.apache.carbondata.core.metadata.CarbonMetadata)7 HashSet (java.util.HashSet)6 List (java.util.List)5 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)5 Expression (org.apache.carbondata.core.scan.expression.Expression)5 RangeExpression (org.apache.carbondata.core.scan.expression.logical.RangeExpression)5 FilterOptimizer (org.apache.carbondata.core.scan.filter.intf.FilterOptimizer)5 FilterOptimizerBasic (org.apache.carbondata.core.scan.filter.intf.FilterOptimizerBasic)5 RangeFilterOptmizer (org.apache.carbondata.core.scan.filter.optimizer.RangeFilterOptmizer)5 Map (java.util.Map)4 SortedMap (java.util.SortedMap)4 TreeMap (java.util.TreeMap)4 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)4