use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.
the class TableIndex method prune.
/**
* Pass the valid segments and prune the index using filter expression
*
* @param allSegments
* @param filter
* @return
*/
public List<ExtendedBlocklet> prune(List<Segment> allSegments, final IndexFilter filter, final List<PartitionSpec> partitions) throws IOException {
final List<ExtendedBlocklet> blocklets = new ArrayList<>();
List<Segment> segments = getCarbonSegments(allSegments);
final Map<Segment, List<Index>> indexes;
boolean isFilterPresent = filter != null && !filter.isEmpty();
Set<Path> partitionLocations = getPartitionLocations(partitions);
if (table.isHivePartitionTable() && isFilterPresent && !partitionLocations.isEmpty()) {
indexes = indexFactory.getIndexes(segments, partitionLocations, filter);
} else {
indexes = indexFactory.getIndexes(segments, filter);
}
if (indexes.isEmpty()) {
return blocklets;
}
// for non-filter queries
// for filter queries
int totalFiles = 0;
int indexesCount = 0;
// based on segment or partition pruning
if (isFilterPresent) {
segments = new ArrayList<>(indexes.keySet());
}
for (Segment segment : segments) {
for (Index index : indexes.get(segment)) {
totalFiles += index.getNumberOfEntries();
indexesCount++;
}
}
int numOfThreadsForPruning = CarbonProperties.getNumOfThreadsForPruning();
int carbonDriverPruningMultiThreadEnableFilesCount = CarbonProperties.getDriverPruningMultiThreadEnableFilesCount();
// so no need of multi-thread pruning
if (numOfThreadsForPruning == 1 || indexesCount < numOfThreadsForPruning || totalFiles < carbonDriverPruningMultiThreadEnableFilesCount || !isFilterPresent) {
// driver should have minimum threads opened to support multiple concurrent queries.
if (!isFilterPresent) {
// if filter is not passed, then return all the blocklets.
return pruneWithoutFilter(segments, partitionLocations, blocklets);
}
return pruneWithFilter(segments, filter, partitionLocations, blocklets, indexes);
}
// handle by multi-thread
return pruneMultiThread(segments, filter, blocklets, indexes, totalFiles);
}
use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.
the class BlockletIndexStore method put.
@Override
public void put(TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper, BlockletIndexWrapper wrapper) throws IOException {
// and then use the put interface
if (null == getIfPresent(tableBlockIndexUniqueIdentifierWrapper)) {
List<BlockIndex> indexes = wrapper.getIndexes();
try {
for (BlockIndex blockIndex : indexes) {
blockIndex.convertToUnsafeDMStore();
}
// get cacheExpirationTime for table from tableProperties
long expirationTime = CarbonUtil.getExpiration_time(tableBlockIndexUniqueIdentifierWrapper.getCarbonTable());
// Locking is not required here because in LRU cache map add method is synchronized to add
// only one entry at a time and if a key already exists it will not overwrite the entry
lruCache.put(tableBlockIndexUniqueIdentifierWrapper.getTableBlockIndexUniqueIdentifier().getUniqueTableSegmentIdentifier(), wrapper, wrapper.getMemorySize(), expirationTime);
} catch (Throwable e) {
// clear all the memory acquired by index in case of any failure
for (Index blockletIndex : indexes) {
blockletIndex.clear();
}
throw new IOException("Problem in adding index to cache.", e);
}
}
}
use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.
the class BlockletIndexStore method get.
public BlockletIndexWrapper get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper, Map<String, Map<String, BlockMetaInfo>> segInfoCache) {
TableBlockIndexUniqueIdentifier identifier = identifierWrapper.getTableBlockIndexUniqueIdentifier();
String lruCacheKey = identifier.getUniqueTableSegmentIdentifier();
BlockletIndexWrapper blockletIndexWrapper = (BlockletIndexWrapper) lruCache.get(lruCacheKey);
List<BlockIndex> indexes = new ArrayList<>();
if (blockletIndexWrapper == null) {
try {
SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore(identifierWrapper.getConfiguration());
Set<String> filesRead = new HashSet<>();
String segmentFilePath = identifier.getIndexFilePath();
if (segInfoCache == null) {
segInfoCache = new HashMap<>();
}
Map<String, BlockMetaInfo> carbonDataFileBlockMetaInfoMapping = segInfoCache.get(segmentFilePath);
if (carbonDataFileBlockMetaInfoMapping == null) {
carbonDataFileBlockMetaInfoMapping = BlockletIndexUtil.createCarbonDataFileBlockMetaInfoMapping(segmentFilePath, identifierWrapper.getConfiguration());
segInfoCache.put(segmentFilePath, carbonDataFileBlockMetaInfoMapping);
}
// if the identifier is not a merge file we can directly load the indexes
if (identifier.getMergeIndexFileName() == null) {
List<DataFileFooter> indexInfos = new ArrayList<>();
Map<String, BlockMetaInfo> blockMetaInfoMap = BlockletIndexUtil.getBlockMetaInfoMap(identifierWrapper, indexFileStore, filesRead, carbonDataFileBlockMetaInfoMapping, indexInfos);
BlockIndex blockIndex = loadAndGetIndex(identifier, indexFileStore, blockMetaInfoMap, identifierWrapper.getCarbonTable(), identifierWrapper.isAddToUnsafe(), identifierWrapper.getConfiguration(), identifierWrapper.isSerializeDmStore(), indexInfos);
indexes.add(blockIndex);
blockletIndexWrapper = new BlockletIndexWrapper(identifier.getSegmentId(), indexes);
} else {
// if the identifier is a merge file then collect the index files and load the indexes
List<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = BlockletIndexUtil.getIndexFileIdentifiersFromMergeFile(identifier, indexFileStore);
for (TableBlockIndexUniqueIdentifier blockIndexUniqueIdentifier : tableBlockIndexUniqueIdentifiers) {
List<DataFileFooter> indexInfos = new ArrayList<>();
Map<String, BlockMetaInfo> blockMetaInfoMap = BlockletIndexUtil.getBlockMetaInfoMap(new TableBlockIndexUniqueIdentifierWrapper(blockIndexUniqueIdentifier, identifierWrapper.getCarbonTable()), indexFileStore, filesRead, carbonDataFileBlockMetaInfoMapping, indexInfos);
if (!blockMetaInfoMap.isEmpty()) {
BlockIndex blockIndex = loadAndGetIndex(blockIndexUniqueIdentifier, indexFileStore, blockMetaInfoMap, identifierWrapper.getCarbonTable(), identifierWrapper.isAddToUnsafe(), identifierWrapper.getConfiguration(), identifierWrapper.isSerializeDmStore(), indexInfos);
indexes.add(blockIndex);
}
}
blockletIndexWrapper = new BlockletIndexWrapper(identifier.getSegmentId(), indexes);
}
if (identifierWrapper.isAddTableBlockToUnsafeAndLRUCache()) {
long expiration_time = CarbonUtil.getExpiration_time(identifierWrapper.getCarbonTable());
lruCache.put(identifier.getUniqueTableSegmentIdentifier(), blockletIndexWrapper, blockletIndexWrapper.getMemorySize(), expiration_time);
}
} catch (Throwable e) {
// clear all the memory used by indexes loaded
for (Index index : indexes) {
index.clear();
}
LOGGER.error("memory exception when loading index: " + e.getMessage(), e);
throw new RuntimeException(e);
}
}
return blockletIndexWrapper;
}
use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.
the class BlockletIndexStore method getAll.
@Override
public List<BlockletIndexWrapper> getAll(List<TableBlockIndexUniqueIdentifierWrapper> tableSegmentUniqueIdentifiers) throws IOException {
Map<String, Map<String, BlockMetaInfo>> segInfoCache = new HashMap<String, Map<String, BlockMetaInfo>>();
List<BlockletIndexWrapper> blockletIndexWrappers = new ArrayList<>(tableSegmentUniqueIdentifiers.size());
List<TableBlockIndexUniqueIdentifierWrapper> missedIdentifiersWrapper = new ArrayList<>();
BlockletIndexWrapper blockletIndexWrapper = null;
// Get the indexes for each index file from cache.
try {
for (TableBlockIndexUniqueIdentifierWrapper identifierWrapper : tableSegmentUniqueIdentifiers) {
BlockletIndexWrapper indexWrapper = getIfPresent(identifierWrapper);
if (indexWrapper != null) {
blockletIndexWrappers.add(indexWrapper);
} else {
missedIdentifiersWrapper.add(identifierWrapper);
}
}
if (missedIdentifiersWrapper.size() > 0) {
for (TableBlockIndexUniqueIdentifierWrapper identifierWrapper : missedIdentifiersWrapper) {
blockletIndexWrapper = get(identifierWrapper, segInfoCache);
blockletIndexWrappers.add(blockletIndexWrapper);
}
}
} catch (Throwable e) {
if (null != blockletIndexWrapper) {
List<BlockIndex> indexes = blockletIndexWrapper.getIndexes();
for (Index index : indexes) {
index.clear();
}
}
throw new IOException("Problem in loading segment blocks: " + e.getMessage(), e);
}
return blockletIndexWrappers;
}
use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.
the class TableIndex method pruneMultiThread.
private List<ExtendedBlocklet> pruneMultiThread(List<Segment> segments, final IndexFilter filter, List<ExtendedBlocklet> blocklets, final Map<Segment, List<Index>> indexes, int totalFiles) {
/*
*********************************************************************************
* Below is the example of how this part of code works.
* consider a scenario of having 5 segments, 10 indexes in each segment,
* and each index has one record. So total 50 records.
*
* indexes in each segment looks like below.
* s0 [0-9], s1 [0-9], s2 [0-9], s3[0-9], s4[0-9]
*
* If number of threads are 4. so filesPerEachThread = 50/4 = 12 files per each thread.
*
* SegmentIndexGroup look like below: [SegmentId, fromIndex, toIndex]
* In each segment only those indexes are processed between fromIndex and toIndex.
*
* Final result will be: (4 list created as numOfThreadsForPruning is 4)
* Thread1 list: s0 [0-9], s1 [0-1] : 12 files
* Thread2 list: s1 [2-9], s2 [0-3] : 12 files
* Thread3 list: s2 [4-9], s3 [0-5] : 12 files
* Thread4 list: s3 [6-9], s4 [0-9] : 14 files
* so each thread will process almost equal number of records.
*
*********************************************************************************
*/
int numOfThreadsForPruning = CarbonProperties.getNumOfThreadsForPruning();
int filesPerEachThread = totalFiles / numOfThreadsForPruning;
int prev;
int filesCount = 0;
int processedFileCount = 0;
List<List<SegmentIndexGroup>> indexListForEachThread = new ArrayList<>(numOfThreadsForPruning);
List<SegmentIndexGroup> segmentIndexGroupList = new ArrayList<>();
Set<String> missingSISegments = filter.getMissingSISegments();
for (Segment segment : segments) {
List<Index> eachSegmentIndexList = indexes.get(segment);
prev = 0;
for (int i = 0; i < eachSegmentIndexList.size(); i++) {
Index index = eachSegmentIndexList.get(i);
filesCount += index.getNumberOfEntries();
if (filesCount >= filesPerEachThread) {
if (indexListForEachThread.size() != numOfThreadsForPruning - 1) {
// not the last segmentList
segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, i));
// save the last value to process in next thread
prev = i + 1;
indexListForEachThread.add(segmentIndexGroupList);
segmentIndexGroupList = new ArrayList<>();
processedFileCount += filesCount;
filesCount = 0;
} else {
// add remaining in the end
processedFileCount += filesCount;
filesCount = 0;
}
}
}
if (prev == 0 || prev != eachSegmentIndexList.size()) {
// if prev == 0. Add a segment's all indexes
// eachSegmentIndexList.size() != prev, adding the last remaining indexes of this segment
segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, eachSegmentIndexList.size() - 1));
}
}
// adding the last segmentList data
indexListForEachThread.add(segmentIndexGroupList);
processedFileCount += filesCount;
if (processedFileCount != totalFiles) {
// this should not happen
throw new RuntimeException(" not all the files processed ");
}
if (indexListForEachThread.size() < numOfThreadsForPruning) {
// If the total indexes fitted in lesser number of threads than numOfThreadsForPruning.
// Launch only that many threads where indexes are fitted while grouping.
LOG.info("indexes is distributed in " + indexListForEachThread.size() + " threads");
numOfThreadsForPruning = indexListForEachThread.size();
}
LOG.info("Number of threads selected for multi-thread block pruning is " + numOfThreadsForPruning + ". total files: " + totalFiles + ". total segments: " + segments.size());
List<Future<Void>> results = new ArrayList<>(numOfThreadsForPruning);
final Map<Segment, List<ExtendedBlocklet>> prunedBlockletMap = new ConcurrentHashMap<>(segments.size());
final ExecutorService executorService = Executors.newFixedThreadPool(numOfThreadsForPruning);
final String threadName = Thread.currentThread().getName();
for (int i = 0; i < numOfThreadsForPruning; i++) {
final List<SegmentIndexGroup> segmentIndexGroups = indexListForEachThread.get(i);
results.add(executorService.submit(new Callable<Void>() {
@Override
public Void call() throws IOException {
Thread.currentThread().setName(threadName);
for (SegmentIndexGroup segmentIndexGroup : segmentIndexGroups) {
List<ExtendedBlocklet> pruneBlocklets = new ArrayList<>();
List<Index> indexList = indexes.get(segmentIndexGroup.getSegment());
SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentPropertiesFromIndex(indexList.get(0));
Segment segment = segmentIndexGroup.getSegment();
boolean isExternalOrMissingSISegment = segment.getSegmentPath() != null || (missingSISegments != null && missingSISegments.contains(segment.getSegmentNo()));
if (filter.isResolvedOnSegment(segmentProperties)) {
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
List<Blocklet> dmPruneBlocklets;
if (!isExternalOrMissingSISegment) {
dmPruneBlocklets = indexList.get(i).prune(filter.getResolver(), segmentProperties, filterExecutor, table);
} else {
dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentResolver(), segmentProperties, filterExecutor, table);
}
pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
}
} else {
Expression filterExpression = filter.getNewCopyOfExpression();
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
List<Blocklet> dmPruneBlocklets;
if (!isExternalOrMissingSISegment) {
dmPruneBlocklets = indexList.get(i).prune(filterExpression, segmentProperties, table, filterExecutor);
} else {
dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentFilter(), segmentProperties, table, filterExecutor);
}
pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
}
}
synchronized (prunedBlockletMap) {
List<ExtendedBlocklet> pruneBlockletsExisting = prunedBlockletMap.get(segmentIndexGroup.getSegment());
if (pruneBlockletsExisting != null) {
pruneBlockletsExisting.addAll(pruneBlocklets);
} else {
prunedBlockletMap.put(segmentIndexGroup.getSegment(), pruneBlocklets);
}
}
}
return null;
}
}));
}
executorService.shutdown();
try {
executorService.awaitTermination(2, TimeUnit.HOURS);
} catch (InterruptedException e) {
LOG.error("Error in pruning index in multi-thread: " + e.getMessage());
}
// check for error
for (Future<Void> result : results) {
try {
result.get();
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
}
for (Map.Entry<Segment, List<ExtendedBlocklet>> entry : prunedBlockletMap.entrySet()) {
blocklets.addAll(entry.getValue());
}
return blocklets;
}
Aggregations