use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class IncrementalColumnDictionaryGenerator method writeSortIndex.
/**
* write dictionary sort index to file
*
* @param distinctValues
* @param dictionary
* @param dictionaryService
* @param absoluteTableIdentifier
* @param columnIdentifier
* @throws IOException
*/
private void writeSortIndex(List<String> distinctValues, Dictionary dictionary, DictionaryService dictionaryService, AbsoluteTableIdentifier absoluteTableIdentifier, ColumnIdentifier columnIdentifier) throws IOException {
CarbonDictionarySortIndexWriter carbonDictionarySortIndexWriter = null;
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, columnIdentifier.getDataType());
try {
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(distinctValues, dictionary, dimension.getDataType());
carbonDictionarySortIndexWriter = dictionaryService.getDictionarySortIndexWriter(dictionaryColumnUniqueIdentifier);
carbonDictionarySortIndexWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionarySortIndexWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
if (null != carbonDictionarySortIndexWriter) {
carbonDictionarySortIndexWriter.close();
}
}
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class IncrementalColumnDictionaryGenerator method writeDictionaryData.
@Override
public void writeDictionaryData() throws IOException {
// initialize params
AbsoluteTableIdentifier absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier();
ColumnIdentifier columnIdentifier = dimension.getColumnIdentifier();
DictionaryService dictionaryService = CarbonCommonFactory.getDictionaryService();
// create dictionary cache from dictionary File
DictionaryColumnUniqueIdentifier identifier = new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, columnIdentifier.getDataType());
Boolean isDictExists = CarbonUtil.isFileExistsForGivenColumn(identifier);
Dictionary dictionary = null;
long t1 = System.currentTimeMillis();
if (isDictExists) {
Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
dictionary = dictCache.get(identifier);
}
long dictCacheTime = System.currentTimeMillis() - t1;
long t2 = System.currentTimeMillis();
// write dictionary
CarbonDictionaryWriter dictionaryWriter = null;
dictionaryWriter = dictionaryService.getDictionaryWriter(identifier);
List<String> distinctValues = writeDictionary(dictionaryWriter, isDictExists);
long dictWriteTime = System.currentTimeMillis() - t2;
long t3 = System.currentTimeMillis();
// write sort index
if (distinctValues.size() > 0) {
writeSortIndex(distinctValues, dictionary, dictionaryService, absoluteTableIdentifier, columnIdentifier);
}
long sortIndexWriteTime = System.currentTimeMillis() - t3;
// update Meta Data
updateMetaData(dictionaryWriter);
LOGGER.audit("\n columnName: " + dimension.getColName() + "\n columnId: " + dimension.getColumnId() + "\n new distinct values count: " + distinctValues.size() + "\n create dictionary cache: " + dictCacheTime + "\n sort list, distinct and write: " + dictWriteTime + "\n write sort info: " + sortIndexWriteTime);
if (isDictExists) {
CarbonUtil.clearDictionaryCache(dictionary);
}
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class StoreCreator method writeDictionary.
private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(factFilePath), "UTF-8"));
List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
Set<String>[] set = new HashSet[dims.size()];
for (int i = 0; i < set.length; i++) {
set[i] = new HashSet<String>();
}
String line = reader.readLine();
while (line != null) {
String[] data = line.split(",");
for (int i = 0; i < set.length; i++) {
set[i].add(data[i]);
}
line = reader.readLine();
}
Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
for (int i = 0; i < set.length; i++) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
for (String value : set[i]) {
writer.write(value);
}
writer.close();
writer.commit();
Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier, columnIdentifier, dims.get(i).getDataType()));
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
List<String> newDistinctValues = new ArrayList<String>();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
try {
carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
carbonDictionaryWriter.close();
}
}
reader.close();
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class CarbonDictionaryDecodeReadSupport method initialize.
/**
* This initialization is done inside executor task
* for column dictionary involved in decoding.
*
* @param carbonColumns column list
* @param carbonTable table identifier
*/
@Override
public void initialize(CarbonColumn[] carbonColumns, CarbonTable carbonTable) throws IOException {
this.carbonColumns = carbonColumns;
dictionaries = new Dictionary[carbonColumns.length];
dataTypes = new DataType[carbonColumns.length];
for (int i = 0; i < carbonColumns.length; i++) {
if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) {
CacheProvider cacheProvider = CacheProvider.getInstance();
Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY);
dataTypes[i] = carbonColumns[i].getDataType();
String dictionaryPath = carbonTable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH);
dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier(carbonTable.getAbsoluteTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i], dictionaryPath));
} else {
dataTypes[i] = carbonColumns[i].getDataType();
}
}
}
use of org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier in project carbondata by apache.
the class DictionaryDecodeReadSupport method initialize.
/**
* This initialization is done inside executor task
* for column dictionary involved in decoding.
*
* @param carbonColumns column list
* @param carbonTable table identifier
*/
@Override
public void initialize(CarbonColumn[] carbonColumns, CarbonTable carbonTable) throws IOException {
this.carbonColumns = carbonColumns;
dictionaries = new Dictionary[carbonColumns.length];
dataTypes = new DataType[carbonColumns.length];
for (int i = 0; i < carbonColumns.length; i++) {
if (carbonColumns[i].hasEncoding(Encoding.DICTIONARY) && !carbonColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY) && !carbonColumns[i].isComplex()) {
CacheProvider cacheProvider = CacheProvider.getInstance();
Cache<DictionaryColumnUniqueIdentifier, Dictionary> forwardDictionaryCache = cacheProvider.createCache(CacheType.FORWARD_DICTIONARY);
dataTypes[i] = carbonColumns[i].getDataType();
String dictionaryPath = carbonTable.getTableInfo().getFactTable().getTableProperties().get(CarbonCommonConstants.DICTIONARY_PATH);
dictionaries[i] = forwardDictionaryCache.get(new DictionaryColumnUniqueIdentifier(carbonTable.getAbsoluteTableIdentifier(), carbonColumns[i].getColumnIdentifier(), dataTypes[i], dictionaryPath));
} else {
dataTypes[i] = carbonColumns[i].getDataType();
}
}
}
Aggregations