Search in sources :

Example 11 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class DictionaryDecodeReadSupport method initialize.

/**
   * This initialization is done inside executor task
   * for column dictionary involved in decoding.
   *
   * @param carbonColumns column list
   * @param absoluteTableIdentifier table identifier
   */
@Override
public void initialize(CarbonColumn[] carbonColumns, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException {
    this.carbonColumns = carbonColumns;
    dictionaries = new Dictionary[carbonColumns.length];
    dataTypes = new DataType[carbonColumns.length];
    for (int i = 0; i < carbonColumns.length; i++) {
        if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) {
            CacheProvider cacheProvider = CacheProvider.getInstance();
            Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY, absoluteTableIdentifier.getStorePath());
            dataTypes[i] = carbonColumns[i].getDataType();
            dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i]));
        } else {
            dataTypes[i] = carbonColumns[i].getDataType();
        }
    }
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CacheProvider(org.apache.carbondata.core.cache.CacheProvider)

Example 12 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class QueryUtil method getDictionaryMap.

/**
   * Below method will be used to get the column id to its dictionary mapping
   *
   * @param dictionaryColumnIdList  dictionary column list
   * @param absoluteTableIdentifier absolute table identifier
   * @return dictionary mapping
   * @throws IOException
   */
private static Map<String, Dictionary> getDictionaryMap(List<String> dictionaryColumnIdList, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException {
    // this for dictionary unique identifier
    List<DictionaryColumnUniqueIdentifier> dictionaryColumnUniqueIdentifiers = getDictionaryColumnUniqueIdentifierList(dictionaryColumnIdList, absoluteTableIdentifier.getCarbonTableIdentifier());
    CacheProvider cacheProvider = CacheProvider.getInstance();
    Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY, absoluteTableIdentifier.getStorePath());
    List<Dictionary> columnDictionaryList = forwardDictionaryCache.getAll(dictionaryColumnUniqueIdentifiers);
    Map<String, Dictionary> columnDictionaryMap = new HashMap<>(columnDictionaryList.size());
    for (int i = 0; i < dictionaryColumnUniqueIdentifiers.size(); i++) {
        // TODO: null check for column dictionary, if cache size is less it
        // might return null here, in that case throw exception
        columnDictionaryMap.put(dictionaryColumnIdList.get(i), columnDictionaryList.get(i));
    }
    return columnDictionaryMap;
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) HashMap(java.util.HashMap) CacheProvider(org.apache.carbondata.core.cache.CacheProvider)

Example 13 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Example 14 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class CarbonStreamInputFormat method fillChildren.

private static void fillChildren(CarbonTable carbontable, GenericQueryType parentQueryType, CarbonDimension dimension, int parentBlockIndex, Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache) throws IOException {
    for (int i = 0; i < dimension.getNumberOfChild(); i++) {
        CarbonDimension child = dimension.getListOfChildDimensions().get(i);
        DataType dataType = child.getDataType();
        GenericQueryType queryType = null;
        if (DataTypes.isArrayType(dataType)) {
            queryType = new ArrayQueryType(child.getColName(), dimension.getColName(), ++parentBlockIndex);
        } else if (DataTypes.isStructType(dataType)) {
            queryType = new StructQueryType(child.getColName(), dimension.getColName(), ++parentBlockIndex);
            parentQueryType.addChildren(queryType);
        } else {
            boolean isDirectDictionary = CarbonUtil.hasEncoding(child.getEncoder(), Encoding.DIRECT_DICTIONARY);
            String dictionaryPath = carbontable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH);
            DictionaryColumnUniqueIdentifier dictionarIdentifier = new DictionaryColumnUniqueIdentifier(carbontable.getAbsoluteTableIdentifier(), child.getColumnIdentifier(), child.getDataType(), dictionaryPath);
            queryType = new PrimitiveQueryType(child.getColName(), dimension.getColName(), ++parentBlockIndex, child.getDataType(), 4, cache.get(dictionarIdentifier), isDirectDictionary);
        }
        parentQueryType.addChildren(queryType);
        if (child.getNumberOfChild() > 0) {
            fillChildren(carbontable, queryType, child, parentBlockIndex, cache);
        }
    }
}
Also used : DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) DataType(org.apache.carbondata.core.metadata.datatype.DataType) GenericQueryType(org.apache.carbondata.core.scan.filter.GenericQueryType) StructQueryType(org.apache.carbondata.core.scan.complextypes.StructQueryType) PrimitiveQueryType(org.apache.carbondata.core.scan.complextypes.PrimitiveQueryType) ArrayQueryType(org.apache.carbondata.core.scan.complextypes.ArrayQueryType) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 15 with DictionaryColumnUniqueIdentifier

use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.

the class CacheProvider method createDictionaryCacheForGivenType.

/**
 * This method will create the cache for given cache type
 *
 * @param cacheType       type of cache
 */
private void createDictionaryCacheForGivenType(CacheType cacheType) {
    Cache cacheObject = null;
    if (cacheType.equals(CacheType.REVERSE_DICTIONARY)) {
        cacheObject = new ReverseDictionaryCache<DictionaryColumnUniqueIdentifier, Dictionary>(carbonLRUCache);
    } else if (cacheType.equals(CacheType.FORWARD_DICTIONARY)) {
        cacheObject = new ForwardDictionaryCache<DictionaryColumnUniqueIdentifier, Dictionary>(carbonLRUCache);
    } else if (cacheType.equals(cacheType.EXECUTOR_BTREE)) {
        cacheObject = new BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex>(carbonLRUCache);
    } else if (cacheType.equals(cacheType.DRIVER_BTREE)) {
        cacheObject = new SegmentTaskIndexStore(carbonLRUCache);
    } else if (cacheType.equals(cacheType.DRIVER_BLOCKLET_DATAMAP)) {
        cacheObject = new BlockletDataMapIndexStore(carbonLRUCache);
    }
    cacheTypeToCacheMap.put(cacheType, cacheObject);
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) ForwardDictionaryCache(org.apache.carbondata.core.cache.dictionary.ForwardDictionaryCache) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) TableBlockUniqueIdentifier(org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore) BlockletDataMapIndexStore(org.apache.carbondata.core.indexstore.BlockletDataMapIndexStore) ReverseDictionaryCache(org.apache.carbondata.core.cache.dictionary.ReverseDictionaryCache) ForwardDictionaryCache(org.apache.carbondata.core.cache.dictionary.ForwardDictionaryCache)

Aggregations

DictionaryColumnUniqueIdentifier (org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier)22 Dictionary (org.apache.carbondata.core.cache.dictionary.Dictionary)14 ColumnIdentifier (org.apache.carbondata.core.metadata.ColumnIdentifier)11 CacheProvider (org.apache.carbondata.core.cache.CacheProvider)7 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)6 CarbonDictionaryWriter (org.apache.carbondata.core.writer.CarbonDictionaryWriter)6 ArrayList (java.util.ArrayList)5 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)5 CarbonDictionaryWriterImpl (org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl)5 CarbonDictionarySortIndexWriter (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter)5 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)4 CarbonDictionarySortIndexWriterImpl (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl)4 CarbonDictionarySortInfo (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo)4 CarbonDictionarySortInfoPreparator (org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator)4 BufferedReader (java.io.BufferedReader)3 HashSet (java.util.HashSet)3 Set (java.util.Set)3 Cache (org.apache.carbondata.core.cache.Cache)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 FileReader (java.io.FileReader)2