use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.
the class ForwardDictionaryCacheTest method writeSortIndexFile.
/**
* This method will prepare the sort index data from the given data and write
* it to a sort index file
*
* @param data
* @param columnId
* @throws IOException
*/
private void writeSortIndexFile(List<String> data, String columnId) throws IOException {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(columnId, null, null);
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, columnIdentifier.getDataType());
Map<String, Integer> dataToSurrogateKeyMap = new HashMap<>(data.size());
int surrogateKey = 0;
List<Integer> invertedIndexList = new ArrayList<>(data.size());
for (int i = 0; i < data.size(); i++) {
dataToSurrogateKeyMap.put(data.get(i), ++surrogateKey);
}
List<String> sortedKeyList = new ArrayList<>(dataToSurrogateKeyMap.keySet());
Collections.sort(sortedKeyList);
List<Integer> sortedIndexList = new ArrayList<>(data.size());
int[] invertedIndexArray = new int[sortedKeyList.size()];
for (int i = 0; i < sortedKeyList.size(); i++) {
Integer key = dataToSurrogateKeyMap.get(sortedKeyList.get(i));
sortedIndexList.add(key);
invertedIndexArray[--key] = i + 1;
}
for (int i = 0; i < invertedIndexArray.length; i++) {
invertedIndexList.add(invertedIndexArray[i]);
}
CarbonDictionarySortIndexWriter dictionarySortIndexWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
try {
dictionarySortIndexWriter.writeSortIndex(sortedIndexList);
dictionarySortIndexWriter.writeInvertedSortIndex(invertedIndexList);
} finally {
dictionarySortIndexWriter.close();
}
}
use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.
the class StoreCreator method writeDictionary.
private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
String header = reader.readLine();
String[] split = header.split(",");
List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
allCols.addAll(dims);
List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
allCols.addAll(msrs);
Set<String>[] set = new HashSet[dims.size()];
for (int i = 0; i < set.length; i++) {
set[i] = new HashSet<String>();
}
String line = reader.readLine();
while (line != null) {
String[] data = line.split(",");
for (int i = 0; i < set.length; i++) {
set[i].add(data[i]);
}
line = reader.readLine();
}
Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
for (int i = 0; i < set.length; i++) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
for (String value : set[i]) {
writer.write(value);
}
writer.close();
writer.commit();
Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
List<String> newDistinctValues = new ArrayList<String>();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
try {
carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
carbonDictionaryWriter.close();
}
}
reader.close();
}
use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.
the class ManageDictionaryAndBTree method removeDictionaryColumnFromCache.
/**
* This method will remove dictionary cache from driver for both reverse and forward dictionary
*
* @param carbonTableIdentifier
* @param storePath
* @param columnId
*/
public static void removeDictionaryColumnFromCache(CarbonTableIdentifier carbonTableIdentifier, String storePath, String columnId) {
Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, storePath);
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, new ColumnIdentifier(columnId, null, null));
dictCache.invalidate(dictionaryColumnUniqueIdentifier);
dictCache = CacheProvider.getInstance().createCache(CacheType.FORWARD_DICTIONARY, storePath);
dictCache.invalidate(dictionaryColumnUniqueIdentifier);
}
use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.
the class StoreCreator method writeDictionary.
private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
String header = reader.readLine();
String[] split = header.split(",");
List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
allCols.addAll(dims);
List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getTableName());
allCols.addAll(msrs);
Set<String>[] set = new HashSet[dims.size()];
for (int i = 0; i < set.length; i++) {
set[i] = new HashSet<String>();
}
String line = reader.readLine();
while (line != null) {
String[] data = line.split(",");
for (int i = 0; i < set.length; i++) {
set[i].add(data[i]);
}
line = reader.readLine();
}
Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
for (int i = 0; i < set.length; i++) {
ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
for (String value : set[i]) {
writer.write(value);
}
writer.close();
writer.commit();
Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, dims.get(i).getDataType()));
CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
List<String> newDistinctValues = new ArrayList<String>();
CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
try {
carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
} finally {
carbonDictionaryWriter.close();
}
}
reader.close();
}
use of org.apache.carbondata.core.metadata.ColumnIdentifier in project carbondata by apache.
the class ManageDictionaryAndBTree method removeDictionaryColumnFromCache.
/**
* This method will remove dictionary cache from driver for both reverse and forward dictionary
*
* @param carbonTableIdentifier
* @param columnId
*/
public static void removeDictionaryColumnFromCache(AbsoluteTableIdentifier carbonTableIdentifier, String columnId) {
Cache<DictionaryColumnUniqueIdentifier, Dictionary> dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, new ColumnIdentifier(columnId, null, null));
dictCache.invalidate(dictionaryColumnUniqueIdentifier);
dictCache = CacheProvider.getInstance().createCache(CacheType.FORWARD_DICTIONARY);
dictCache.invalidate(dictionaryColumnUniqueIdentifier);
}
Aggregations