Search in sources :

Example 11 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class InMemoryBTreeIndex method getSegmentAbstractIndexs.

private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier identifier) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    TableSegmentUniqueIdentifier segmentUniqueIdentifier = new TableSegmentUniqueIdentifier(identifier, segment.getId());
    try {
        SegmentTaskIndexWrapper segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(segmentUniqueIdentifier);
        if (null != segmentTaskIndexWrapper) {
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
        // if segment tree is not loaded, load the segment tree
        if (segmentIndexMap == null) {
            List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job);
            Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
            segmentToTableBlocksInfos.put(segment.getId(), tableBlockInfoList);
            segmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
            // TODO: loadAndGetTaskIdToSegmentsMap can be optimized, use tableBlockInfoList as input
            // get Btree blocks for given segment
            segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(segmentUniqueIdentifier);
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
    } finally {
        cacheClient.close();
    }
    return segmentIndexMap;
}
Also used : CacheClient(org.apache.carbondata.hadoop.CacheClient) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) HashMap(java.util.HashMap) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)

Example 12 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class SegmentTaskIndexStoreTest method checkExistenceOfSegmentBTree.

@Test
public void checkExistenceOfSegmentBTree() {
    TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, "SG100");
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = taskIndexStore.getIfPresent(tableSegmentUniqueIdentifier);
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> result = segmentTaskIndexWrapper != null ? segmentTaskIndexWrapper.getTaskIdToTableSegmentMap() : null;
    assertNull(result);
}
Also used : SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) Test(org.junit.Test)

Example 13 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class CarbonInputFormat method getBlockRowCount.

/**
   * Get the row count of the Block and mapping of segment and Block count.
   * @param job
   * @param absoluteTableIdentifier
   * @return
   * @throws IOException
   * @throws KeyGenException
   */
public BlockMappingVO getBlockRowCount(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier) throws IOException, KeyGenException {
    CacheClient cacheClient = new CacheClient(absoluteTableIdentifier.getStorePath());
    try {
        SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
        SegmentStatusManager.ValidAndInvalidSegmentsInfo validAndInvalidSegments = new SegmentStatusManager(absoluteTableIdentifier).getValidAndInvalidSegments();
        Map<String, Long> blockRowCountMapping = new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        Map<String, Long> segmentAndBlockCountMapping = new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        for (String eachValidSeg : validAndInvalidSegments.getValidSegments()) {
            long countOfBlocksInSeg = 0;
            Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> taskAbstractIndexMap = getSegmentAbstractIndexs(job, absoluteTableIdentifier, eachValidSeg, cacheClient, updateStatusManager);
            for (Map.Entry<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> taskMap : taskAbstractIndexMap.entrySet()) {
                AbstractIndex taskAbstractIndex = taskMap.getValue();
                countOfBlocksInSeg += new BlockLevelTraverser().getBlockRowMapping(taskAbstractIndex, blockRowCountMapping, eachValidSeg, updateStatusManager);
            }
            segmentAndBlockCountMapping.put(eachValidSeg, countOfBlocksInSeg);
        }
        return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
    } finally {
        cacheClient.close();
    }
}
Also used : BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) BlockLevelTraverser(org.apache.carbondata.hadoop.util.BlockLevelTraverser)

Example 14 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class CarbonInputFormat method getSegmentAbstractIndexs.

/**
   * It returns index for each task file.
   * @param job
   * @param absoluteTableIdentifier
   * @param segmentId
   * @return
   * @throws IOException
   */
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
    boolean isSegmentUpdated = false;
    Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
    TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
    segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
    UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
    if (null != segmentTaskIndexWrapper) {
        segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
            taskKeys = segmentIndexMap.keySet();
            isSegmentUpdated = true;
        }
    }
    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null || isSegmentUpdated) {
        // if the segment is updated only the updated blocks TableInfo instance has to be
        // retrieved. the same will be filtered based on taskKeys , if the task is same
        // for the block then dont add it since already its btree is loaded.
        Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job, tableSegmentUniqueIdentifier, taskKeys, updateStatusManager.getInvalidTimestampRange(segmentId), updateStatusManager, segmentId, validTaskKeys);
        if (!tableBlockInfoList.isEmpty()) {
            Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
            segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
            // get Btree blocks for given segment
            tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
            tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
            segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
        if (null != taskKeys) {
            Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> finalMap = new HashMap<>(validTaskKeys.size());
            for (SegmentTaskIndexStore.TaskBucketHolder key : validTaskKeys) {
                finalMap.put(key, segmentIndexMap.get(key));
            }
            segmentIndexMap = finalMap;
        }
    }
    return segmentIndexMap;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore)

Example 15 with AbstractIndex

use of org.apache.carbondata.core.datastore.block.AbstractIndex in project carbondata by apache.

the class BlockIndexStoreTest method testloadAndGetTaskIdToSegmentsMapForSameBlockLoadedConcurrently.

@Test
public void testloadAndGetTaskIdToSegmentsMapForSameBlockLoadedConcurrently() throws IOException {
    String canonicalPath = new File(this.getClass().getResource("/").getPath() + "/../../").getCanonicalPath();
    File file = getPartFile();
    TableBlockInfo info = new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { "loclhost" }, file.length(), ColumnarFormatVersion.V1);
    TableBlockInfo info1 = new TableBlockInfo(file.getAbsolutePath(), 0, "0", new String[] { "loclhost" }, file.length(), ColumnarFormatVersion.V1);
    TableBlockInfo info2 = new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { "loclhost" }, file.length(), ColumnarFormatVersion.V1);
    TableBlockInfo info3 = new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { "loclhost" }, file.length(), ColumnarFormatVersion.V1);
    TableBlockInfo info4 = new TableBlockInfo(file.getAbsolutePath(), 0, "1", new String[] { "loclhost" }, file.length(), ColumnarFormatVersion.V1);
    CarbonTableIdentifier carbonTableIdentifier = new CarbonTableIdentifier(CarbonCommonConstants.DATABASE_DEFAULT_NAME, "t3", "1");
    AbsoluteTableIdentifier absoluteTableIdentifier = new AbsoluteTableIdentifier("/src/test/resources", carbonTableIdentifier);
    ExecutorService executor = Executors.newFixedThreadPool(3);
    executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info, info1 }), absoluteTableIdentifier));
    executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info2, info3, info4 }), absoluteTableIdentifier));
    executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info, info1 }), absoluteTableIdentifier));
    executor.submit(new BlockLoaderThread(Arrays.asList(new TableBlockInfo[] { info2, info3, info4 }), absoluteTableIdentifier));
    executor.shutdown();
    try {
        executor.awaitTermination(1, TimeUnit.DAYS);
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
    List<TableBlockInfo> tableBlockInfos = Arrays.asList(new TableBlockInfo[] { info, info1, info2, info3, info4 });
    try {
        List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers = getTableBlockUniqueIdentifierList(tableBlockInfos, absoluteTableIdentifier);
        List<AbstractIndex> loadAndGetBlocks = cache.getAll(tableBlockUniqueIdentifiers);
        assertTrue(loadAndGetBlocks.size() == 5);
    } catch (Exception e) {
        assertTrue(false);
    }
    List<String> segmentIds = new ArrayList<>();
    for (TableBlockInfo tableBlockInfo : tableBlockInfos) {
        segmentIds.add(tableBlockInfo.getSegmentId());
    }
    cache.removeTableBlocks(segmentIds, absoluteTableIdentifier);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) TableBlockUniqueIdentifier(org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier) ExecutorService(java.util.concurrent.ExecutorService) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) File(java.io.File) Test(org.junit.Test)

Aggregations

AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)16 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)8 ArrayList (java.util.ArrayList)6 TableBlockUniqueIdentifier (org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier)6 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)4 File (java.io.File)3 IOException (java.io.IOException)3 ExecutorService (java.util.concurrent.ExecutorService)3 SegmentTaskIndexStore (org.apache.carbondata.core.datastore.SegmentTaskIndexStore)3 TableSegmentUniqueIdentifier (org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)3 QueryStatistic (org.apache.carbondata.core.stats.QueryStatistic)3 Test (org.junit.Test)3 HashMap (java.util.HashMap)2 LinkedList (java.util.LinkedList)2 List (java.util.List)2 DataRefNode (org.apache.carbondata.core.datastore.DataRefNode)2 BlockInfo (org.apache.carbondata.core.datastore.block.BlockInfo)2 IndexBuilderException (org.apache.carbondata.core.datastore.exception.IndexBuilderException)2