use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class DictionaryDecodeReadSupport method initialize.
/**
* This initialization is done inside executor task
* for column dictionary involved in decoding.
*
* @param carbonColumns column list
* @param absoluteTableIdentifier table identifier
*/
@Override
public void initialize(CarbonColumn[] carbonColumns, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException {
this.carbonColumns = carbonColumns;
dictionaries = new Dictionary[carbonColumns.length];
dataTypes = new DataType[carbonColumns.length];
for (int i = 0; i < carbonColumns.length; i++) {
if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) {
CacheProvider cacheProvider = CacheProvider.getInstance();
Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY, absoluteTableIdentifier.getStorePath());
dataTypes[i] = carbonColumns[i].getDataType();
dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i]));
} else {
dataTypes[i] = carbonColumns[i].getDataType();
}
}
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class QueryUtil method getDictionaryMap.
/**
* Below method will be used to get the column id to its dictionary mapping
*
* @param dictionaryColumnIdList dictionary column list
* @param absoluteTableIdentifier absolute table identifier
* @return dictionary mapping
* @throws IOException
*/
private static Map<String, Dictionary> getDictionaryMap(List<String> dictionaryColumnIdList, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException {
// this for dictionary unique identifier
List<DictionaryColumnUniqueIdentifier> dictionaryColumnUniqueIdentifiers = getDictionaryColumnUniqueIdentifierList(dictionaryColumnIdList, absoluteTableIdentifier.getCarbonTableIdentifier());
CacheProvider cacheProvider = CacheProvider.getInstance();
Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY, absoluteTableIdentifier.getStorePath());
List<Dictionary> columnDictionaryList = forwardDictionaryCache.getAll(dictionaryColumnUniqueIdentifiers);
Map<String, Dictionary> columnDictionaryMap = new HashMap<>(columnDictionaryList.size());
for (int i = 0; i < dictionaryColumnUniqueIdentifiers.size(); i++) {
// TODO: null check for column dictionary, if cache size is less it
// might return null here, in that case throw exception
columnDictionaryMap.put(dictionaryColumnIdList.get(i), columnDictionaryList.get(i));
}
return columnDictionaryMap;
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class StoreCreator method writeDictionary.
private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
String header = reader.readLine();
String[] split = header.split(",");
List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
allCols.addAll(dims);
List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getTableName());
allCols.addAll(msrs);
Set<String>[] set = new HashSet[dims.size()];
for (int i = 0; i < set.length; i++) {
set[i] = new HashSet<String>();
}
String line = reader.readLine();
while (line != null) {
String[] data = line.split(",");
for (int i = 0; i < set.length; i++) {
set[i].add(data[i]);
}
line = reader.readLine();
}
Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
for (int i = 0; i < set.length; i++) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
for (String value : set[i]) {
writer.write(value);
}
writer.close();
writer.commit();
Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, dims.get(i).getDataType()));
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
List<String> newDistinctValues = new ArrayList<String>();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
try {
carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
carbonDictionaryWriter.close();
}
}
reader.close();
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class CarbonStreamInputFormat method fillChildren.
private static void fillChildren(CarbonTable carbontable, GenericQueryType parentQueryType, CarbonDimension dimension, int parentBlockIndex, Cache<DictionaryColumnUniqueIdentifier, Dictionary> cache) throws IOException {
for (int i = 0; i < dimension.getNumberOfChild(); i++) {
CarbonDimension child = dimension.getListOfChildDimensions().get(i);
DataType dataType = child.getDataType();
GenericQueryType queryType = null;
if (DataTypes.isArrayType(dataType)) {
queryType = new ArrayQueryType(child.getColName(), dimension.getColName(), ++parentBlockIndex);
} else if (DataTypes.isStructType(dataType)) {
queryType = new StructQueryType(child.getColName(), dimension.getColName(), ++parentBlockIndex);
parentQueryType.addChildren(queryType);
} else {
boolean isDirectDictionary = CarbonUtil.hasEncoding(child.getEncoder(), Encoding.DIRECT_DICTIONARY);
String dictionaryPath = carbontable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH);
DictionaryColumnUniqueIdentifier dictionarIdentifier = new DictionaryColumnUniqueIdentifier(carbontable.getAbsoluteTableIdentifier(), child.getColumnIdentifier(), child.getDataType(), dictionaryPath);
queryType = new PrimitiveQueryType(child.getColName(), dimension.getColName(), ++parentBlockIndex, child.getDataType(), 4, cache.get(dictionarIdentifier), isDirectDictionary);
}
parentQueryType.addChildren(queryType);
if (child.getNumberOfChild() > 0) {
fillChildren(carbontable, queryType, child, parentBlockIndex, cache);
}
}
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class CacheProvider method createDictionaryCacheForGivenType.
/**
* This method will create the cache for given cache type
*
* @param cacheType type of cache
*/
private void createDictionaryCacheForGivenType(CacheType cacheType) {
Cache cacheObject = null;
if (cacheType.equals(CacheType.REVERSE_DICTIONARY)) {
cacheObject = new ReverseDictionaryCache<DictionaryColumnUniqueIdentifier, Dictionary>(carbonLRUCache);
} else if (cacheType.equals(CacheType.FORWARD_DICTIONARY)) {
cacheObject = new ForwardDictionaryCache<DictionaryColumnUniqueIdentifier, Dictionary>(carbonLRUCache);
} else if (cacheType.equals(cacheType.EXECUTOR_BTREE)) {
cacheObject = new BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex>(carbonLRUCache);
} else if (cacheType.equals(cacheType.DRIVER_BTREE)) {
cacheObject = new SegmentTaskIndexStore(carbonLRUCache);
} else if (cacheType.equals(cacheType.DRIVER_BLOCKLET_DATAMAP)) {
cacheObject = new BlockletDataMapIndexStore(carbonLRUCache);
}
cacheTypeToCacheMap.put(cacheType, cacheObject);
}
Aggregations