Search in sources :

Example 1 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class CarbonInputFormat method getDataBlocksOfSegment.

/**
   * get data blocks of given segment
   */
private List<DataRefNode> getDataBlocksOfSegment(JobContext job, FilterExpressionProcessor filterExpressionProcessor, AbsoluteTableIdentifier absoluteTableIdentifier, FilterResolverIntf resolver, BitSet matchedPartitions, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    try {
        QueryStatisticsRecorder recorder = CarbonTimeStatisticsFactory.createDriverRecorder();
        QueryStatistic statistic = new QueryStatistic();
        segmentIndexMap = getSegmentAbstractIndexs(job, absoluteTableIdentifier, segmentId, cacheClient, updateStatusManager);
        List<DataRefNode> resultFilterredBlocks = new LinkedList<DataRefNode>();
        if (null != segmentIndexMap) {
            for (Map.Entry<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> entry : segmentIndexMap.entrySet()) {
                SegmentTaskIndexStore.TaskBucketHolder taskHolder = entry.getKey();
                int taskId = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(taskHolder.taskNo);
                // if this partition is not required, here will skip it.
                if (matchedPartitions == null || matchedPartitions.get(taskId)) {
                    AbstractIndex abstractIndex = entry.getValue();
                    List<DataRefNode> filterredBlocks;
                    // if no filter is given get all blocks from Btree Index
                    if (null == resolver) {
                        filterredBlocks = getDataBlocksOfIndex(abstractIndex);
                    } else {
                        // apply filter and get matching blocks
                        filterredBlocks = filterExpressionProcessor.getFilterredBlocks(abstractIndex.getDataRefNode(), resolver, abstractIndex, absoluteTableIdentifier);
                    }
                    resultFilterredBlocks.addAll(filterredBlocks);
                }
            }
        }
        statistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_DRIVER, System.currentTimeMillis());
        recorder.recordStatisticsForDriver(statistic, job.getConfiguration().get("query.id"));
        return resultFilterredBlocks;
    } finally {
        // low memory systems the same memory can be utilized efficiently
        if (null != segmentIndexMap) {
            List<TableSegmentUniqueIdentifier> tableSegmentUniqueIdentifiers = new ArrayList<>(1);
            tableSegmentUniqueIdentifiers.add(new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId));
            cacheClient.getSegmentAccessClient().clearAccessCount(tableSegmentUniqueIdentifiers);
        }
    }
}
Also used : DataRefNode(org.apache.carbondata.core.datastore.DataRefNode) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) QueryStatisticsRecorder(org.apache.carbondata.core.stats.QueryStatisticsRecorder) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore) QueryStatistic(org.apache.carbondata.core.stats.QueryStatistic)

Example 2 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class BlockIndexStoreTest method testLoadAndGetTaskIdToSegmentsMapForSingleSegment.

@Test
public void testLoadAndGetTaskIdToSegmentsMapForSingleSegment() throws IOException {
    File file = getPartFile();
    TableBlockInfo info = new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { "loclhost" }, file.length(), ColumnarFormatVersion.V1);
    CarbonTableIdentifier carbonTableIdentifier = new CarbonTableIdentifier(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "t3", "1");
    AbsoluteTableIdentifier absoluteTableIdentifier = new AbsoluteTableIdentifier("/src/test/resources", carbonTableIdentifier);
    try {
        List<TableBlockUniqueIdentifier> tableBlockInfoList = getTableBlockUniqueIdentifierList(Arrays.asList(new TableBlockInfo[] { info }), absoluteTableIdentifier);
        List<AbstractIndex> loadAndGetBlocks = cache.getAll(tableBlockInfoList);
        assertTrue(loadAndGetBlocks.size() == 1);
    } catch (Exception e) {
        assertTrue(false);
    }
    List<String> segmentIds = new ArrayList<>();
    segmentIds.add(info.getSegmentId());
    cache.removeTableBlocks(segmentIds, absoluteTableIdentifier);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) TableBlockUniqueIdentifier(org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) ArrayList(java.util.ArrayList) File(java.io.File) IOException(java.io.IOException) Test(org.junit.Test)

Example 3 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class SegmentTaskIndexStore method loadAndGetTaskIdToSegmentsMap.

/**
   * Below method will be used to load the segment of segments
   * One segment may have multiple task , so  table segment will be loaded
   * based on task id and will return the map of taksId to table segment
   * map
   *
   * @param segmentToTableBlocksInfos segment id to block info
   * @param absoluteTableIdentifier   absolute table identifier
   * @return map of taks id to segment mapping
   * @throws IOException
   */
private SegmentTaskIndexWrapper loadAndGetTaskIdToSegmentsMap(Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos, AbsoluteTableIdentifier absoluteTableIdentifier, TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier) throws IOException {
    // task id to segment map
    Iterator<Map.Entry<String, List<TableBlockInfo>>> iteratorOverSegmentBlocksInfos = segmentToTableBlocksInfos.entrySet().iterator();
    Map<TaskBucketHolder, AbstractIndex> taskIdToSegmentIndexMap = null;
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    String segmentId = null;
    TaskBucketHolder taskBucketHolder = null;
    try {
        while (iteratorOverSegmentBlocksInfos.hasNext()) {
            // segment id to table block mapping
            Map.Entry<String, List<TableBlockInfo>> next = iteratorOverSegmentBlocksInfos.next();
            // group task id to table block info mapping for the segment
            Map<TaskBucketHolder, List<TableBlockInfo>> taskIdToTableBlockInfoMap = mappedAndGetTaskIdToTableBlockInfo(segmentToTableBlocksInfos);
            segmentId = next.getKey();
            // get the existing map of task id to table segment map
            UpdateVO updateVO = updateStatusManager.getInvalidTimestampRange(segmentId);
            // check if segment is already loaded, if segment is already loaded
            //no need to load the segment block
            String lruCacheKey = tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier();
            segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
            if (segmentTaskIndexWrapper == null || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
                // get the segment loader lock object this is to avoid
                // same segment is getting loaded multiple times
                // in case of concurrent query
                Object segmentLoderLockObject = segmentLockMap.get(lruCacheKey);
                if (null == segmentLoderLockObject) {
                    segmentLoderLockObject = addAndGetSegmentLock(lruCacheKey);
                }
                // acquire lock to lod the segment
                synchronized (segmentLoderLockObject) {
                    segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
                    if (null == segmentTaskIndexWrapper || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
                        // so that the same can be updated after loading the btree.
                        if (tableSegmentUniqueIdentifier.isSegmentUpdated() && null != segmentTaskIndexWrapper) {
                            taskIdToSegmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
                        } else {
                            // creating a map of take if to table segment
                            taskIdToSegmentIndexMap = new HashMap<TaskBucketHolder, AbstractIndex>();
                            segmentTaskIndexWrapper = new SegmentTaskIndexWrapper(taskIdToSegmentIndexMap);
                            segmentTaskIndexWrapper.incrementAccessCount();
                        }
                        Iterator<Map.Entry<TaskBucketHolder, List<TableBlockInfo>>> iterator = taskIdToTableBlockInfoMap.entrySet().iterator();
                        long requiredSize = calculateRequiredSize(taskIdToTableBlockInfoMap, absoluteTableIdentifier);
                        segmentTaskIndexWrapper.setMemorySize(requiredSize + segmentTaskIndexWrapper.getMemorySize());
                        boolean isAddedToLruCache = lruCache.put(lruCacheKey, segmentTaskIndexWrapper, requiredSize);
                        if (isAddedToLruCache) {
                            while (iterator.hasNext()) {
                                Map.Entry<TaskBucketHolder, List<TableBlockInfo>> taskToBlockInfoList = iterator.next();
                                taskBucketHolder = taskToBlockInfoList.getKey();
                                taskIdToSegmentIndexMap.put(taskBucketHolder, loadBlocks(taskBucketHolder, taskToBlockInfoList.getValue(), absoluteTableIdentifier));
                            }
                        } else {
                            throw new IndexBuilderException("Can not load the segment. No Enough space available.");
                        }
                        // set the latest timestamp.
                        segmentTaskIndexWrapper.setRefreshedTimeStamp(updateVO.getCreatedOrUpdatedTimeStamp());
                        // tableSegmentMapTemp.put(next.getKey(), taskIdToSegmentIndexMap);
                        // removing from segment lock map as once segment is loaded
                        // if concurrent query is coming for same segment
                        // it will wait on the lock so after this segment will be already
                        // loaded so lock is not required, that is why removing the
                        // the lock object as it wont be useful
                        segmentLockMap.remove(lruCacheKey);
                    } else {
                        segmentTaskIndexWrapper.incrementAccessCount();
                    }
                }
            } else {
                segmentTaskIndexWrapper.incrementAccessCount();
            }
        }
    } catch (IndexBuilderException e) {
        LOGGER.error("Problem while loading the segment");
        throw e;
    }
    return segmentTaskIndexWrapper;
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) Entry(java.util.Map.Entry) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IndexBuilderException(org.apache.carbondata.core.datastore.exception.IndexBuilderException)

Example 4 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class CacheProvider method createDictionaryCacheForGivenType.

/**
   * This method will create the cache for given cache type
   *
   * @param cacheType       type of cache
   * @param carbonStorePath store path
   */
private void createDictionaryCacheForGivenType(CacheType cacheType, String carbonStorePath) {
    Cache cacheObject = null;
    if (cacheType.equals(CacheType.REVERSE_DICTIONARY)) {
        cacheObject = new ReverseDictionaryCache<DictionaryColumnUniqueIdentifier, Dictionary>(carbonStorePath, carbonLRUCache);
    } else if (cacheType.equals(CacheType.FORWARD_DICTIONARY)) {
        cacheObject = new ForwardDictionaryCache<DictionaryColumnUniqueIdentifier, Dictionary>(carbonStorePath, carbonLRUCache);
    } else if (cacheType.equals(cacheType.EXECUTOR_BTREE)) {
        cacheObject = new BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex>(carbonStorePath, carbonLRUCache);
    } else if (cacheType.equals(cacheType.DRIVER_BTREE)) {
        cacheObject = new SegmentTaskIndexStore(carbonStorePath, carbonLRUCache);
    }
    cacheTypeToCacheMap.put(cacheType, cacheObject);
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) ForwardDictionaryCache(org.apache.carbondata.core.cache.dictionary.ForwardDictionaryCache) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) TableBlockUniqueIdentifier(org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore) ReverseDictionaryCache(org.apache.carbondata.core.cache.dictionary.ReverseDictionaryCache) ForwardDictionaryCache(org.apache.carbondata.core.cache.dictionary.ForwardDictionaryCache)

Example 5 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class BlockIndexStore method loadBlock.

private AbstractIndex loadBlock(TableBlockUniqueIdentifier tableBlockUniqueIdentifier) throws IOException {
    AbstractIndex tableBlock = new BlockIndex();
    BlockInfo blockInfo = new BlockInfo(tableBlockUniqueIdentifier.getTableBlockInfo());
    String lruCacheKey = getLruCacheKey(tableBlockUniqueIdentifier.getAbsoluteTableIdentifier(), blockInfo);
    checkAndLoadTableBlocks(tableBlock, tableBlockUniqueIdentifier, lruCacheKey);
    // finally remove the lock object from block info lock as once block is loaded
    // it will not come inside this if condition
    blockInfoLock.remove(blockInfo);
    return tableBlock;
}
Also used : BlockInfo(org.apache.carbondata.core.datastore.block.BlockInfo) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) BlockIndex(org.apache.carbondata.core.datastore.block.BlockIndex)

Aggregations

AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)19 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)10 ArrayList (java.util.ArrayList)9 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)7 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)5 List (java.util.List)4 SegmentTaskIndexStore (org.apache.carbondata.core.datastore.SegmentTaskIndexStore)4 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)4 QueryStatistic (org.apache.carbondata.core.stats.QueryStatistic)4 File (java.io.File)3 IOException (java.io.IOException)3 HashMap (java.util.HashMap)3 ExecutorService (java.util.concurrent.ExecutorService)3 TableSegmentUniqueIdentifier (org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)3 IndexBuilderException (org.apache.carbondata.core.datastore.exception.IndexBuilderException)3 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)3 Test (org.junit.Test)3 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 Entry (java.util.Map.Entry)2