use of org.apache.carbondata.core.scan.filter.executer.FilterExecutor in project carbondata by apache.
the class TableIndex method pruneMultiThread.
private List<ExtendedBlocklet> pruneMultiThread(List<Segment> segments, final IndexFilter filter, List<ExtendedBlocklet> blocklets, final Map<Segment, List<Index>> indexes, int totalFiles) {
/*
*********************************************************************************
* Below is the example of how this part of code works.
* consider a scenario of having 5 segments, 10 indexes in each segment,
* and each index has one record. So total 50 records.
*
* indexes in each segment looks like below.
* s0 [0-9], s1 [0-9], s2 [0-9], s3[0-9], s4[0-9]
*
* If number of threads are 4. so filesPerEachThread = 50/4 = 12 files per each thread.
*
* SegmentIndexGroup look like below: [SegmentId, fromIndex, toIndex]
* In each segment only those indexes are processed between fromIndex and toIndex.
*
* Final result will be: (4 list created as numOfThreadsForPruning is 4)
* Thread1 list: s0 [0-9], s1 [0-1] : 12 files
* Thread2 list: s1 [2-9], s2 [0-3] : 12 files
* Thread3 list: s2 [4-9], s3 [0-5] : 12 files
* Thread4 list: s3 [6-9], s4 [0-9] : 14 files
* so each thread will process almost equal number of records.
*
*********************************************************************************
*/
int numOfThreadsForPruning = CarbonProperties.getNumOfThreadsForPruning();
int filesPerEachThread = totalFiles / numOfThreadsForPruning;
int prev;
int filesCount = 0;
int processedFileCount = 0;
List<List<SegmentIndexGroup>> indexListForEachThread = new ArrayList<>(numOfThreadsForPruning);
List<SegmentIndexGroup> segmentIndexGroupList = new ArrayList<>();
Set<String> missingSISegments = filter.getMissingSISegments();
for (Segment segment : segments) {
List<Index> eachSegmentIndexList = indexes.get(segment);
prev = 0;
for (int i = 0; i < eachSegmentIndexList.size(); i++) {
Index index = eachSegmentIndexList.get(i);
filesCount += index.getNumberOfEntries();
if (filesCount >= filesPerEachThread) {
if (indexListForEachThread.size() != numOfThreadsForPruning - 1) {
// not the last segmentList
segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, i));
// save the last value to process in next thread
prev = i + 1;
indexListForEachThread.add(segmentIndexGroupList);
segmentIndexGroupList = new ArrayList<>();
processedFileCount += filesCount;
filesCount = 0;
} else {
// add remaining in the end
processedFileCount += filesCount;
filesCount = 0;
}
}
}
if (prev == 0 || prev != eachSegmentIndexList.size()) {
// if prev == 0. Add a segment's all indexes
// eachSegmentIndexList.size() != prev, adding the last remaining indexes of this segment
segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, eachSegmentIndexList.size() - 1));
}
}
// adding the last segmentList data
indexListForEachThread.add(segmentIndexGroupList);
processedFileCount += filesCount;
if (processedFileCount != totalFiles) {
// this should not happen
throw new RuntimeException(" not all the files processed ");
}
if (indexListForEachThread.size() < numOfThreadsForPruning) {
// If the total indexes fitted in lesser number of threads than numOfThreadsForPruning.
// Launch only that many threads where indexes are fitted while grouping.
LOG.info("indexes is distributed in " + indexListForEachThread.size() + " threads");
numOfThreadsForPruning = indexListForEachThread.size();
}
LOG.info("Number of threads selected for multi-thread block pruning is " + numOfThreadsForPruning + ". total files: " + totalFiles + ". total segments: " + segments.size());
List<Future<Void>> results = new ArrayList<>(numOfThreadsForPruning);
final Map<Segment, List<ExtendedBlocklet>> prunedBlockletMap = new ConcurrentHashMap<>(segments.size());
final ExecutorService executorService = Executors.newFixedThreadPool(numOfThreadsForPruning);
final String threadName = Thread.currentThread().getName();
for (int i = 0; i < numOfThreadsForPruning; i++) {
final List<SegmentIndexGroup> segmentIndexGroups = indexListForEachThread.get(i);
results.add(executorService.submit(new Callable<Void>() {
@Override
public Void call() throws IOException {
Thread.currentThread().setName(threadName);
for (SegmentIndexGroup segmentIndexGroup : segmentIndexGroups) {
List<ExtendedBlocklet> pruneBlocklets = new ArrayList<>();
List<Index> indexList = indexes.get(segmentIndexGroup.getSegment());
SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentPropertiesFromIndex(indexList.get(0));
Segment segment = segmentIndexGroup.getSegment();
boolean isExternalOrMissingSISegment = segment.getSegmentPath() != null || (missingSISegments != null && missingSISegments.contains(segment.getSegmentNo()));
if (filter.isResolvedOnSegment(segmentProperties)) {
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
List<Blocklet> dmPruneBlocklets;
if (!isExternalOrMissingSISegment) {
dmPruneBlocklets = indexList.get(i).prune(filter.getResolver(), segmentProperties, filterExecutor, table);
} else {
dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentResolver(), segmentProperties, filterExecutor, table);
}
pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
}
} else {
Expression filterExpression = filter.getNewCopyOfExpression();
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
List<Blocklet> dmPruneBlocklets;
if (!isExternalOrMissingSISegment) {
dmPruneBlocklets = indexList.get(i).prune(filterExpression, segmentProperties, table, filterExecutor);
} else {
dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentFilter(), segmentProperties, table, filterExecutor);
}
pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
}
}
synchronized (prunedBlockletMap) {
List<ExtendedBlocklet> pruneBlockletsExisting = prunedBlockletMap.get(segmentIndexGroup.getSegment());
if (pruneBlockletsExisting != null) {
pruneBlockletsExisting.addAll(pruneBlocklets);
} else {
prunedBlockletMap.put(segmentIndexGroup.getSegment(), pruneBlocklets);
}
}
}
return null;
}
}));
}
executorService.shutdown();
try {
executorService.awaitTermination(2, TimeUnit.HOURS);
} catch (InterruptedException e) {
LOG.error("Error in pruning index in multi-thread: " + e.getMessage());
}
// check for error
for (Future<Void> result : results) {
try {
result.get();
} catch (InterruptedException | ExecutionException e) {
throw new RuntimeException(e);
}
}
for (Map.Entry<Segment, List<ExtendedBlocklet>> entry : prunedBlockletMap.entrySet()) {
blocklets.addAll(entry.getValue());
}
return blocklets;
}
use of org.apache.carbondata.core.scan.filter.executer.FilterExecutor in project carbondata by apache.
the class TableIndex method prune.
/**
* This method is used from any machine after it is distributed. It takes the distributable object
* to prune the filters.
*
* @param distributable
* @param filterExp
* @return
*/
public List<ExtendedBlocklet> prune(List<Index> indices, IndexInputSplit distributable, FilterResolverIntf filterExp, List<PartitionSpec> partitions) throws IOException {
List<ExtendedBlocklet> detailedBlocklets = new ArrayList<>();
List<Blocklet> blocklets = new ArrayList<>();
Set<Path> partitionsToPrune = getPartitionLocations(partitions);
SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentProperties(distributable.getSegment(), partitionsToPrune);
FilterExecutor filterExecutor = FilterUtil.getFilterExecutorTree(filterExp, segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
for (Index index : indices) {
blocklets.addAll(index.prune(filterExp, segmentProperties, filterExecutor, table));
}
BlockletSerializer serializer = new BlockletSerializer();
String writePath = identifier.getTablePath() + CarbonCommonConstants.FILE_SEPARATOR + indexSchema.getIndexName();
if (indexFactory.getIndexLevel() == IndexLevel.FG) {
FileFactory.mkdirs(writePath);
}
for (Blocklet blocklet : blocklets) {
ExtendedBlocklet detailedBlocklet = blockletDetailsFetcher.getExtendedBlocklet(blocklet, distributable.getSegment());
if (indexFactory.getIndexLevel() == IndexLevel.FG) {
String blockletWritePath = writePath + CarbonCommonConstants.FILE_SEPARATOR + System.nanoTime();
detailedBlocklet.setIndexWriterPath(blockletWritePath);
serializer.serializeBlocklet((FineGrainBlocklet) blocklet, blockletWritePath);
}
detailedBlocklet.setSegment(distributable.getSegment());
detailedBlocklets.add(detailedBlocklet);
}
return detailedBlocklets;
}
use of org.apache.carbondata.core.scan.filter.executer.FilterExecutor in project carbondata by apache.
the class TableIndex method pruneWithFilter.
private List<ExtendedBlocklet> pruneWithFilter(List<Segment> segments, IndexFilter filter, Set<Path> partitionLocations, List<ExtendedBlocklet> blocklets, Map<Segment, List<Index>> indexes) throws IOException {
Set<String> missingSISegments = filter.getMissingSISegments();
for (Segment segment : segments) {
List<Index> segmentIndices = indexes.get(segment);
if (segment == null || segmentIndices == null || segmentIndices.isEmpty()) {
continue;
}
boolean isExternalOrMissingSISegment = segment.isExternalSegment() || (missingSISegments != null && missingSISegments.contains(segment.getSegmentNo()));
List<Blocklet> pruneBlocklets = new ArrayList<>();
SegmentProperties segmentProperties;
if (segmentIndices.get(0) instanceof BlockIndex) {
segmentProperties = segmentPropertiesFetcher.getSegmentPropertiesFromIndex(segmentIndices.get(0));
} else {
segmentProperties = segmentPropertiesFetcher.getSegmentProperties(segment, partitionLocations);
}
if (filter.isResolvedOnSegment(segmentProperties)) {
FilterExecutor filterExecutor;
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(filter.getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (Index index : segmentIndices) {
if (!isExternalOrMissingSISegment) {
pruneBlocklets.addAll(index.prune(filter.getResolver(), segmentProperties, filterExecutor, this.table));
} else {
pruneBlocklets.addAll(index.prune(filter.getExternalSegmentResolver(), segmentProperties, filterExecutor, this.table));
}
}
} else {
FilterExecutor filterExecutor;
Expression expression = filter.getExpression();
if (!isExternalOrMissingSISegment) {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, expression).getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
} else {
filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, expression).getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
}
for (Index index : segmentIndices) {
if (!isExternalOrMissingSISegment) {
pruneBlocklets.addAll(index.prune(filter.getExpression(), segmentProperties, table, filterExecutor));
} else {
pruneBlocklets.addAll(index.prune(filter.getExternalSegmentFilter(), segmentProperties, table, filterExecutor));
}
}
}
blocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(pruneBlocklets, segment), segment));
}
return blocklets;
}
use of org.apache.carbondata.core.scan.filter.executer.FilterExecutor in project carbondata by apache.
the class FilterUtil method getFilterExecutorForRangeFilters.
/**
* Below method will be used to get filter executor instance for range filters
* when local dictionary is present for in blocklet
* @param rawColumnChunk
* raw column chunk
* @param exp
* filter expression
* @param isNaturalSorted
* is data was already sorted
* @return
*/
public static FilterExecutor getFilterExecutorForRangeFilters(DimensionRawColumnChunk rawColumnChunk, Expression exp, boolean isNaturalSorted) {
BitSet includeDictionaryValues;
try {
includeDictionaryValues = FilterUtil.getIncludeDictFilterValuesForRange(exp, rawColumnChunk.getLocalDictionary());
} catch (FilterUnsupportedException e) {
throw new RuntimeException(e);
}
boolean isExclude = includeDictionaryValues.cardinality() > 1 && FilterUtil.isExcludeFilterNeedsToApply(rawColumnChunk.getLocalDictionary().getDictionaryActualSize(), includeDictionaryValues.cardinality());
byte[][] encodedFilterValues = FilterUtil.getEncodedFilterValuesForRange(includeDictionaryValues, rawColumnChunk.getLocalDictionary(), isExclude);
FilterExecutor filterExecutor;
if (!isExclude) {
filterExecutor = new IncludeFilterExecutorImpl(encodedFilterValues, isNaturalSorted);
} else {
filterExecutor = new ExcludeFilterExecutorImpl(encodedFilterValues, isNaturalSorted);
}
return filterExecutor;
}
use of org.apache.carbondata.core.scan.filter.executer.FilterExecutor in project carbondata by apache.
the class BlockletIndexFactory method getTableBlockIndexUniqueIdentifierUsingSegmentMinMax.
/**
* Using blockLevel minmax values, identify if segment has to be added for further pruning and to
* load segment index info to cache
* @param segment to be identified if needed for loading block indexes
* @param segmentMetaDataInfo list of block level min max values
* @param filter filter expression
* @param identifiers tableBlockIndexUniqueIdentifiers
* @param tableBlockIndexUniqueIdentifierWrappers to add tableBlockIndexUniqueIdentifiers
*/
private void getTableBlockIndexUniqueIdentifierUsingSegmentMinMax(Segment segment, SegmentMetaDataInfo segmentMetaDataInfo, IndexFilter filter, Set<TableBlockIndexUniqueIdentifier> identifiers, List<TableBlockIndexUniqueIdentifierWrapper> tableBlockIndexUniqueIdentifierWrappers) {
boolean isScanRequired = false;
Map<String, SegmentColumnMetaDataInfo> segmentColumnMetaDataInfoMap = segmentMetaDataInfo.getSegmentColumnMetaDataInfoMap();
int length = segmentColumnMetaDataInfoMap.size();
// Add columnSchemas based on the columns present in segment
List<ColumnSchema> columnSchemas = new ArrayList<>();
byte[][] min = new byte[length][];
byte[][] max = new byte[length][];
boolean[] minMaxFlag = new boolean[length];
int i = 0;
// get current columnSchema list for the table
Map<String, ColumnSchema> tableColumnSchemas = this.getCarbonTable().getTableInfo().getFactTable().getListOfColumns().stream().collect(Collectors.toMap(ColumnSchema::getColumnUniqueId, ColumnSchema::clone));
// fill min,max and columnSchema values
for (Map.Entry<String, SegmentColumnMetaDataInfo> columnMetaData : segmentColumnMetaDataInfoMap.entrySet()) {
ColumnSchema columnSchema = tableColumnSchemas.get(columnMetaData.getKey());
if (null != columnSchema) {
// get segment sort column and column drift info
boolean isSortColumnInSegment = columnMetaData.getValue().isSortColumn();
boolean isColumnDriftInSegment = columnMetaData.getValue().isColumnDrift();
if (null != columnSchema.getColumnProperties()) {
// get current sort column and column drift info from current columnSchema
String isSortColumn = columnSchema.getColumnProperties().get(CarbonCommonConstants.SORT_COLUMNS);
String isColumnDrift = columnSchema.getColumnProperties().get(CarbonCommonConstants.COLUMN_DRIFT);
if (null != isSortColumn) {
if (isSortColumn.equalsIgnoreCase("true") && !isSortColumnInSegment) {
// Unset current column schema column properties
modifyColumnSchemaForSortColumn(columnSchema, isColumnDriftInSegment, isColumnDrift, false);
} else if (isSortColumn.equalsIgnoreCase("false") && isSortColumnInSegment) {
// set sort column to true in current column schema column properties
modifyColumnSchemaForSortColumn(columnSchema, isColumnDriftInSegment, isColumnDrift, true);
}
} else {
modifyColumnSchemaForSortColumn(columnSchema, isColumnDriftInSegment, isColumnDrift, false);
}
}
columnSchemas.add(columnSchema);
min[i] = columnMetaData.getValue().getColumnMinValue();
max[i] = columnMetaData.getValue().getColumnMaxValue();
minMaxFlag[i] = min[i].length != 0 && max[i].length != 0;
i++;
}
}
// get segmentProperties using created columnSchemas list
SegmentProperties segmentProperties = SegmentPropertiesAndSchemaHolder.getInstance().addSegmentProperties(this.getCarbonTable(), columnSchemas, segment.getSegmentNo()).getSegmentProperties();
FilterResolverIntf resolver = new IndexFilter(segmentProperties, this.getCarbonTable(), filter.getExpression()).getResolver();
// prepare filter executor using IndexFilter resolver
FilterExecutor filterExecutor = FilterUtil.getFilterExecutorTree(resolver, segmentProperties, null, null, false);
// check if block has to be pruned based on segment minmax
BitSet scanRequired = filterExecutor.isScanRequired(max, min, minMaxFlag);
if (!scanRequired.isEmpty()) {
isScanRequired = true;
}
if (isScanRequired) {
for (TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier : identifiers) {
tableBlockIndexUniqueIdentifierWrappers.add(new TableBlockIndexUniqueIdentifierWrapper(tableBlockIndexUniqueIdentifier, this.getCarbonTable()));
}
}
}
Aggregations