Search in sources :

Example 1 with Index

use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.

the class TableIndex method prune.

/**
 * Pass the valid segments and prune the index using filter expression
 *
 * @param allSegments
 * @param filter
 * @return
 */
public List<ExtendedBlocklet> prune(List<Segment> allSegments, final IndexFilter filter, final List<PartitionSpec> partitions) throws IOException {
    final List<ExtendedBlocklet> blocklets = new ArrayList<>();
    List<Segment> segments = getCarbonSegments(allSegments);
    final Map<Segment, List<Index>> indexes;
    boolean isFilterPresent = filter != null && !filter.isEmpty();
    Set<Path> partitionLocations = getPartitionLocations(partitions);
    if (table.isHivePartitionTable() && isFilterPresent && !partitionLocations.isEmpty()) {
        indexes = indexFactory.getIndexes(segments, partitionLocations, filter);
    } else {
        indexes = indexFactory.getIndexes(segments, filter);
    }
    if (indexes.isEmpty()) {
        return blocklets;
    }
    // for non-filter queries
    // for filter queries
    int totalFiles = 0;
    int indexesCount = 0;
    // based on segment or partition pruning
    if (isFilterPresent) {
        segments = new ArrayList<>(indexes.keySet());
    }
    for (Segment segment : segments) {
        for (Index index : indexes.get(segment)) {
            totalFiles += index.getNumberOfEntries();
            indexesCount++;
        }
    }
    int numOfThreadsForPruning = CarbonProperties.getNumOfThreadsForPruning();
    int carbonDriverPruningMultiThreadEnableFilesCount = CarbonProperties.getDriverPruningMultiThreadEnableFilesCount();
    // so no need of multi-thread pruning
    if (numOfThreadsForPruning == 1 || indexesCount < numOfThreadsForPruning || totalFiles < carbonDriverPruningMultiThreadEnableFilesCount || !isFilterPresent) {
        // driver should have minimum threads opened to support multiple concurrent queries.
        if (!isFilterPresent) {
            // if filter is not passed, then return all the blocklets.
            return pruneWithoutFilter(segments, partitionLocations, blocklets);
        }
        return pruneWithFilter(segments, filter, partitionLocations, blocklets, indexes);
    }
    // handle by multi-thread
    return pruneMultiThread(segments, filter, blocklets, indexes, totalFiles);
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) Index(org.apache.carbondata.core.index.dev.Index) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex) CoarseGrainIndex(org.apache.carbondata.core.index.dev.cgindex.CoarseGrainIndex) ArrayList(java.util.ArrayList) List(java.util.List) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Example 2 with Index

use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.

the class BlockletIndexStore method put.

@Override
public void put(TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper, BlockletIndexWrapper wrapper) throws IOException {
    // and then use the put interface
    if (null == getIfPresent(tableBlockIndexUniqueIdentifierWrapper)) {
        List<BlockIndex> indexes = wrapper.getIndexes();
        try {
            for (BlockIndex blockIndex : indexes) {
                blockIndex.convertToUnsafeDMStore();
            }
            // get cacheExpirationTime for table from tableProperties
            long expirationTime = CarbonUtil.getExpiration_time(tableBlockIndexUniqueIdentifierWrapper.getCarbonTable());
            // Locking is not required here because in LRU cache map add method is synchronized to add
            // only one entry at a time and if a key already exists it will not overwrite the entry
            lruCache.put(tableBlockIndexUniqueIdentifierWrapper.getTableBlockIndexUniqueIdentifier().getUniqueTableSegmentIdentifier(), wrapper, wrapper.getMemorySize(), expirationTime);
        } catch (Throwable e) {
            // clear all the memory acquired by index in case of any failure
            for (Index blockletIndex : indexes) {
                blockletIndex.clear();
            }
            throw new IOException("Problem in adding index to cache.", e);
        }
    }
}
Also used : Index(org.apache.carbondata.core.index.dev.Index) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex) IOException(java.io.IOException) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex)

Example 3 with Index

use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.

the class BlockletIndexStore method get.

public BlockletIndexWrapper get(TableBlockIndexUniqueIdentifierWrapper identifierWrapper, Map<String, Map<String, BlockMetaInfo>> segInfoCache) {
    TableBlockIndexUniqueIdentifier identifier = identifierWrapper.getTableBlockIndexUniqueIdentifier();
    String lruCacheKey = identifier.getUniqueTableSegmentIdentifier();
    BlockletIndexWrapper blockletIndexWrapper = (BlockletIndexWrapper) lruCache.get(lruCacheKey);
    List<BlockIndex> indexes = new ArrayList<>();
    if (blockletIndexWrapper == null) {
        try {
            SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore(identifierWrapper.getConfiguration());
            Set<String> filesRead = new HashSet<>();
            String segmentFilePath = identifier.getIndexFilePath();
            if (segInfoCache == null) {
                segInfoCache = new HashMap<>();
            }
            Map<String, BlockMetaInfo> carbonDataFileBlockMetaInfoMapping = segInfoCache.get(segmentFilePath);
            if (carbonDataFileBlockMetaInfoMapping == null) {
                carbonDataFileBlockMetaInfoMapping = BlockletIndexUtil.createCarbonDataFileBlockMetaInfoMapping(segmentFilePath, identifierWrapper.getConfiguration());
                segInfoCache.put(segmentFilePath, carbonDataFileBlockMetaInfoMapping);
            }
            // if the identifier is not a merge file we can directly load the indexes
            if (identifier.getMergeIndexFileName() == null) {
                List<DataFileFooter> indexInfos = new ArrayList<>();
                Map<String, BlockMetaInfo> blockMetaInfoMap = BlockletIndexUtil.getBlockMetaInfoMap(identifierWrapper, indexFileStore, filesRead, carbonDataFileBlockMetaInfoMapping, indexInfos);
                BlockIndex blockIndex = loadAndGetIndex(identifier, indexFileStore, blockMetaInfoMap, identifierWrapper.getCarbonTable(), identifierWrapper.isAddToUnsafe(), identifierWrapper.getConfiguration(), identifierWrapper.isSerializeDmStore(), indexInfos);
                indexes.add(blockIndex);
                blockletIndexWrapper = new BlockletIndexWrapper(identifier.getSegmentId(), indexes);
            } else {
                // if the identifier is a merge file then collect the index files and load the indexes
                List<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = BlockletIndexUtil.getIndexFileIdentifiersFromMergeFile(identifier, indexFileStore);
                for (TableBlockIndexUniqueIdentifier blockIndexUniqueIdentifier : tableBlockIndexUniqueIdentifiers) {
                    List<DataFileFooter> indexInfos = new ArrayList<>();
                    Map<String, BlockMetaInfo> blockMetaInfoMap = BlockletIndexUtil.getBlockMetaInfoMap(new TableBlockIndexUniqueIdentifierWrapper(blockIndexUniqueIdentifier, identifierWrapper.getCarbonTable()), indexFileStore, filesRead, carbonDataFileBlockMetaInfoMapping, indexInfos);
                    if (!blockMetaInfoMap.isEmpty()) {
                        BlockIndex blockIndex = loadAndGetIndex(blockIndexUniqueIdentifier, indexFileStore, blockMetaInfoMap, identifierWrapper.getCarbonTable(), identifierWrapper.isAddToUnsafe(), identifierWrapper.getConfiguration(), identifierWrapper.isSerializeDmStore(), indexInfos);
                        indexes.add(blockIndex);
                    }
                }
                blockletIndexWrapper = new BlockletIndexWrapper(identifier.getSegmentId(), indexes);
            }
            if (identifierWrapper.isAddTableBlockToUnsafeAndLRUCache()) {
                long expiration_time = CarbonUtil.getExpiration_time(identifierWrapper.getCarbonTable());
                lruCache.put(identifier.getUniqueTableSegmentIdentifier(), blockletIndexWrapper, blockletIndexWrapper.getMemorySize(), expiration_time);
            }
        } catch (Throwable e) {
            // clear all the memory used by indexes loaded
            for (Index index : indexes) {
                index.clear();
            }
            LOGGER.error("memory exception when loading index: " + e.getMessage(), e);
            throw new RuntimeException(e);
        }
    }
    return blockletIndexWrapper;
}
Also used : SegmentIndexFileStore(org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore) ArrayList(java.util.ArrayList) Index(org.apache.carbondata.core.index.dev.Index) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) HashSet(java.util.HashSet)

Example 4 with Index

use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.

the class BlockletIndexStore method getAll.

@Override
public List<BlockletIndexWrapper> getAll(List<TableBlockIndexUniqueIdentifierWrapper> tableSegmentUniqueIdentifiers) throws IOException {
    Map<String, Map<String, BlockMetaInfo>> segInfoCache = new HashMap<String, Map<String, BlockMetaInfo>>();
    List<BlockletIndexWrapper> blockletIndexWrappers = new ArrayList<>(tableSegmentUniqueIdentifiers.size());
    List<TableBlockIndexUniqueIdentifierWrapper> missedIdentifiersWrapper = new ArrayList<>();
    BlockletIndexWrapper blockletIndexWrapper = null;
    // Get the indexes for each index file from cache.
    try {
        for (TableBlockIndexUniqueIdentifierWrapper identifierWrapper : tableSegmentUniqueIdentifiers) {
            BlockletIndexWrapper indexWrapper = getIfPresent(identifierWrapper);
            if (indexWrapper != null) {
                blockletIndexWrappers.add(indexWrapper);
            } else {
                missedIdentifiersWrapper.add(identifierWrapper);
            }
        }
        if (missedIdentifiersWrapper.size() > 0) {
            for (TableBlockIndexUniqueIdentifierWrapper identifierWrapper : missedIdentifiersWrapper) {
                blockletIndexWrapper = get(identifierWrapper, segInfoCache);
                blockletIndexWrappers.add(blockletIndexWrapper);
            }
        }
    } catch (Throwable e) {
        if (null != blockletIndexWrapper) {
            List<BlockIndex> indexes = blockletIndexWrapper.getIndexes();
            for (Index index : indexes) {
                index.clear();
            }
        }
        throw new IOException("Problem in loading segment blocks: " + e.getMessage(), e);
    }
    return blockletIndexWrappers;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Index(org.apache.carbondata.core.index.dev.Index) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex) IOException(java.io.IOException) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with Index

use of org.apache.carbondata.core.index.dev.Index in project carbondata by apache.

the class TableIndex method pruneMultiThread.

private List<ExtendedBlocklet> pruneMultiThread(List<Segment> segments, final IndexFilter filter, List<ExtendedBlocklet> blocklets, final Map<Segment, List<Index>> indexes, int totalFiles) {
    /*
     *********************************************************************************
     * Below is the example of how this part of code works.
     * consider a scenario of having 5 segments, 10 indexes in each segment,
     * and each index has one record. So total 50 records.
     *
     * indexes in each segment looks like below.
     * s0 [0-9], s1 [0-9], s2 [0-9], s3[0-9], s4[0-9]
     *
     * If number of threads are 4. so filesPerEachThread = 50/4 = 12 files per each thread.
     *
     * SegmentIndexGroup look like below: [SegmentId, fromIndex, toIndex]
     * In each segment only those indexes are processed between fromIndex and toIndex.
     *
     * Final result will be: (4 list created as numOfThreadsForPruning is 4)
     * Thread1 list: s0 [0-9], s1 [0-1]  : 12 files
     * Thread2 list: s1 [2-9], s2 [0-3]  : 12 files
     * Thread3 list: s2 [4-9], s3 [0-5]  : 12 files
     * Thread4 list: s3 [6-9], s4 [0-9]  : 14 files
     * so each thread will process almost equal number of records.
     *
     *********************************************************************************
     */
    int numOfThreadsForPruning = CarbonProperties.getNumOfThreadsForPruning();
    int filesPerEachThread = totalFiles / numOfThreadsForPruning;
    int prev;
    int filesCount = 0;
    int processedFileCount = 0;
    List<List<SegmentIndexGroup>> indexListForEachThread = new ArrayList<>(numOfThreadsForPruning);
    List<SegmentIndexGroup> segmentIndexGroupList = new ArrayList<>();
    Set<String> missingSISegments = filter.getMissingSISegments();
    for (Segment segment : segments) {
        List<Index> eachSegmentIndexList = indexes.get(segment);
        prev = 0;
        for (int i = 0; i < eachSegmentIndexList.size(); i++) {
            Index index = eachSegmentIndexList.get(i);
            filesCount += index.getNumberOfEntries();
            if (filesCount >= filesPerEachThread) {
                if (indexListForEachThread.size() != numOfThreadsForPruning - 1) {
                    // not the last segmentList
                    segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, i));
                    // save the last value to process in next thread
                    prev = i + 1;
                    indexListForEachThread.add(segmentIndexGroupList);
                    segmentIndexGroupList = new ArrayList<>();
                    processedFileCount += filesCount;
                    filesCount = 0;
                } else {
                    // add remaining in the end
                    processedFileCount += filesCount;
                    filesCount = 0;
                }
            }
        }
        if (prev == 0 || prev != eachSegmentIndexList.size()) {
            // if prev == 0. Add a segment's all indexes
            // eachSegmentIndexList.size() != prev, adding the last remaining indexes of this segment
            segmentIndexGroupList.add(new SegmentIndexGroup(segment, prev, eachSegmentIndexList.size() - 1));
        }
    }
    // adding the last segmentList data
    indexListForEachThread.add(segmentIndexGroupList);
    processedFileCount += filesCount;
    if (processedFileCount != totalFiles) {
        // this should not happen
        throw new RuntimeException(" not all the files processed ");
    }
    if (indexListForEachThread.size() < numOfThreadsForPruning) {
        // If the total indexes fitted in lesser number of threads than numOfThreadsForPruning.
        // Launch only that many threads where indexes are fitted while grouping.
        LOG.info("indexes is distributed in " + indexListForEachThread.size() + " threads");
        numOfThreadsForPruning = indexListForEachThread.size();
    }
    LOG.info("Number of threads selected for multi-thread block pruning is " + numOfThreadsForPruning + ". total files: " + totalFiles + ". total segments: " + segments.size());
    List<Future<Void>> results = new ArrayList<>(numOfThreadsForPruning);
    final Map<Segment, List<ExtendedBlocklet>> prunedBlockletMap = new ConcurrentHashMap<>(segments.size());
    final ExecutorService executorService = Executors.newFixedThreadPool(numOfThreadsForPruning);
    final String threadName = Thread.currentThread().getName();
    for (int i = 0; i < numOfThreadsForPruning; i++) {
        final List<SegmentIndexGroup> segmentIndexGroups = indexListForEachThread.get(i);
        results.add(executorService.submit(new Callable<Void>() {

            @Override
            public Void call() throws IOException {
                Thread.currentThread().setName(threadName);
                for (SegmentIndexGroup segmentIndexGroup : segmentIndexGroups) {
                    List<ExtendedBlocklet> pruneBlocklets = new ArrayList<>();
                    List<Index> indexList = indexes.get(segmentIndexGroup.getSegment());
                    SegmentProperties segmentProperties = segmentPropertiesFetcher.getSegmentPropertiesFromIndex(indexList.get(0));
                    Segment segment = segmentIndexGroup.getSegment();
                    boolean isExternalOrMissingSISegment = segment.getSegmentPath() != null || (missingSISegments != null && missingSISegments.contains(segment.getSegmentNo()));
                    if (filter.isResolvedOnSegment(segmentProperties)) {
                        FilterExecutor filterExecutor;
                        if (!isExternalOrMissingSISegment) {
                            filterExecutor = FilterUtil.getFilterExecutorTree(filter.getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
                        } else {
                            filterExecutor = FilterUtil.getFilterExecutorTree(filter.getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
                        }
                        for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
                            List<Blocklet> dmPruneBlocklets;
                            if (!isExternalOrMissingSISegment) {
                                dmPruneBlocklets = indexList.get(i).prune(filter.getResolver(), segmentProperties, filterExecutor, table);
                            } else {
                                dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentResolver(), segmentProperties, filterExecutor, table);
                            }
                            pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
                        }
                    } else {
                        Expression filterExpression = filter.getNewCopyOfExpression();
                        FilterExecutor filterExecutor;
                        if (!isExternalOrMissingSISegment) {
                            filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
                        } else {
                            filterExecutor = FilterUtil.getFilterExecutorTree(new IndexFilter(segmentProperties, table, filterExpression).getExternalSegmentResolver(), segmentProperties, null, table.getMinMaxCacheColumns(segmentProperties), false);
                        }
                        for (int i = segmentIndexGroup.getFromIndex(); i <= segmentIndexGroup.getToIndex(); i++) {
                            List<Blocklet> dmPruneBlocklets;
                            if (!isExternalOrMissingSISegment) {
                                dmPruneBlocklets = indexList.get(i).prune(filterExpression, segmentProperties, table, filterExecutor);
                            } else {
                                dmPruneBlocklets = indexList.get(i).prune(filter.getExternalSegmentFilter(), segmentProperties, table, filterExecutor);
                            }
                            pruneBlocklets.addAll(addSegmentId(blockletDetailsFetcher.getExtendedBlocklets(dmPruneBlocklets, segment), segment));
                        }
                    }
                    synchronized (prunedBlockletMap) {
                        List<ExtendedBlocklet> pruneBlockletsExisting = prunedBlockletMap.get(segmentIndexGroup.getSegment());
                        if (pruneBlockletsExisting != null) {
                            pruneBlockletsExisting.addAll(pruneBlocklets);
                        } else {
                            prunedBlockletMap.put(segmentIndexGroup.getSegment(), pruneBlocklets);
                        }
                    }
                }
                return null;
            }
        }));
    }
    executorService.shutdown();
    try {
        executorService.awaitTermination(2, TimeUnit.HOURS);
    } catch (InterruptedException e) {
        LOG.error("Error in pruning index in multi-thread: " + e.getMessage());
    }
    // check for error
    for (Future<Void> result : results) {
        try {
            result.get();
        } catch (InterruptedException | ExecutionException e) {
            throw new RuntimeException(e);
        }
    }
    for (Map.Entry<Segment, List<ExtendedBlocklet>> entry : prunedBlockletMap.entrySet()) {
        blocklets.addAll(entry.getValue());
    }
    return blocklets;
}
Also used : FilterExecutor(org.apache.carbondata.core.scan.filter.executer.FilterExecutor) ArrayList(java.util.ArrayList) Index(org.apache.carbondata.core.index.dev.Index) BlockIndex(org.apache.carbondata.core.indexstore.blockletindex.BlockIndex) CoarseGrainIndex(org.apache.carbondata.core.index.dev.cgindex.CoarseGrainIndex) Callable(java.util.concurrent.Callable) ArrayList(java.util.ArrayList) List(java.util.List) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ExecutionException(java.util.concurrent.ExecutionException) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) Blocklet(org.apache.carbondata.core.indexstore.Blocklet) FineGrainBlocklet(org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet) Expression(org.apache.carbondata.core.scan.expression.Expression) ExecutorService(java.util.concurrent.ExecutorService) Future(java.util.concurrent.Future) SegmentProperties(org.apache.carbondata.core.datastore.block.SegmentProperties) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

Index (org.apache.carbondata.core.index.dev.Index)10 ArrayList (java.util.ArrayList)7 BlockIndex (org.apache.carbondata.core.indexstore.blockletindex.BlockIndex)7 CoarseGrainIndex (org.apache.carbondata.core.index.dev.cgindex.CoarseGrainIndex)6 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)5 IOException (java.io.IOException)3 List (java.util.List)3 SegmentProperties (org.apache.carbondata.core.datastore.block.SegmentProperties)3 FineGrainBlocklet (org.apache.carbondata.core.index.dev.fgindex.FineGrainBlocklet)3 Blocklet (org.apache.carbondata.core.indexstore.Blocklet)3 FilterExecutor (org.apache.carbondata.core.scan.filter.executer.FilterExecutor)3 HashMap (java.util.HashMap)2 Map (java.util.Map)2 CacheableIndex (org.apache.carbondata.core.index.dev.CacheableIndex)2 BlockletIndexWrapper (org.apache.carbondata.core.indexstore.BlockletIndexWrapper)2 TableBlockIndexUniqueIdentifierWrapper (org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper)2 Expression (org.apache.carbondata.core.scan.expression.Expression)2 Path (org.apache.hadoop.fs.Path)2 HashSet (java.util.HashSet)1 Callable (java.util.concurrent.Callable)1