Search in sources :

Example 6 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class CarbonInputFormat method getSegmentAbstractIndexs.

/**
   * It returns index for each task file.
   * @param job
   * @param absoluteTableIdentifier
   * @param segmentId
   * @return
   * @throws IOException
   */
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
    boolean isSegmentUpdated = false;
    Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
    TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
    segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
    UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
    if (null != segmentTaskIndexWrapper) {
        segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
            taskKeys = segmentIndexMap.keySet();
            isSegmentUpdated = true;
        }
    }
    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null || isSegmentUpdated) {
        // if the segment is updated only the updated blocks TableInfo instance has to be
        // retrieved. the same will be filtered based on taskKeys , if the task is same
        // for the block then dont add it since already its btree is loaded.
        Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
        List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job, tableSegmentUniqueIdentifier, taskKeys, updateStatusManager.getInvalidTimestampRange(segmentId), updateStatusManager, segmentId, validTaskKeys);
        if (!tableBlockInfoList.isEmpty()) {
            Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
            segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
            // get Btree blocks for given segment
            tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
            tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
            segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
            segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        }
        if (null != taskKeys) {
            Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> finalMap = new HashMap<>(validTaskKeys.size());
            for (SegmentTaskIndexStore.TaskBucketHolder key : validTaskKeys) {
                finalMap.put(key, segmentIndexMap.get(key));
            }
            segmentIndexMap = finalMap;
        }
    }
    return segmentIndexMap;
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) SegmentTaskIndexStore(org.apache.carbondata.core.datastore.SegmentTaskIndexStore)

Example 7 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class CarbonTableReader method getSegmentAbstractIndexs.

private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(/*JobContext job,*/
AbsoluteTableIdentifier absoluteTableIdentifier, CarbonTablePath tablePath, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
    Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
    boolean isSegmentUpdated = false;
    Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
    TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
    segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
    UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
    if (null != segmentTaskIndexWrapper) {
        segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
        if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
            taskKeys = segmentIndexMap.keySet();
            isSegmentUpdated = true;
        }
    }
    // if segment tree is not loaded, load the segment tree
    if (segmentIndexMap == null || isSegmentUpdated) {
        List<FileStatus> fileStatusList = new LinkedList<FileStatus>();
        List<String> segs = new ArrayList<>();
        segs.add(segmentId);
        FileSystem fs = getFileStatusOfSegments(new String[] { segmentId }, tablePath, fileStatusList);
        List<InputSplit> splits = getSplit(fileStatusList, fs);
        List<FileSplit> carbonSplits = new ArrayList<>();
        for (InputSplit inputSplit : splits) {
            FileSplit fileSplit = (FileSplit) inputSplit;
            String segId = CarbonTablePath.DataPathUtil.getSegmentId(//这里的seperator应该怎么加??
            fileSplit.getPath().toString());
            if (segId.equals(CarbonCommonConstants.INVALID_SEGMENT_ID)) {
                continue;
            }
            carbonSplits.add(fileSplit);
        }
        List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
        for (FileSplit inputSplit : carbonSplits) {
            if (isValidBlockBasedOnUpdateDetails(taskKeys, inputSplit, updateDetails, updateStatusManager, segmentId)) {
                BlockletInfos blockletInfos = new BlockletInfos(0, 0, //this level we do not need blocklet info!!!! Is this a trick?
                0);
                tableBlockInfoList.add(new TableBlockInfo(inputSplit.getPath().toString(), inputSplit.getStart(), segmentId, inputSplit.getLocations(), inputSplit.getLength(), blockletInfos, ColumnarFormatVersion.valueOf(CarbonCommonConstants.CARBON_DATA_FILE_DEFAULT_VERSION), null));
            }
        }
        Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
        segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
        // get Btree blocks for given segment
        tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
        tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
        segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
        segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
    }
    return segmentIndexMap;
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) ImmutableList(com.google.common.collect.ImmutableList) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 8 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class CarbonTableReader method getInputSplits2.

public List<CarbonLocalInputSplit> getInputSplits2(CarbonTableCacheModel tableCacheModel, Expression filters) throws Exception {
    // need apply filters to segment
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = tableCacheModel.carbonTable.getAbsoluteTableIdentifier();
    CacheClient cacheClient = new CacheClient(absoluteTableIdentifier.getStorePath());
    List<String> invalidSegments = new ArrayList<>();
    List<UpdateVO> invalidTimestampsList = new ArrayList<>();
    // get all valid segments and set them into the configuration
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
    tableCacheModel.segments = segments.getValidSegments().toArray(new String[0]);
    if (segments.getValidSegments().size() == 0) {
        return new ArrayList<>(0);
    }
    // remove entry in the segment index if there are invalid segments
    invalidSegments.addAll(segments.getInvalidSegments());
    for (String invalidSegmentId : invalidSegments) {
        invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
    }
    if (invalidSegments.size() > 0) {
        List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
        for (String segId : invalidSegments) {
            invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segId));
        }
        cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
    }
    // get filter for segment
    CarbonInputFormatUtil.processFilterExpression(filters, tableCacheModel.carbonTable);
    FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filters, tableCacheModel.carbonTable.getAbsoluteTableIdentifier());
    List<CarbonLocalInputSplit> result = new ArrayList<>();
    // for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : tableCacheModel.segments) {
        try {
            List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(filterExpressionProcessor, absoluteTableIdentifier, tableCacheModel.carbonTablePath, filterInterface, segmentNo, cacheClient, updateStatusManager);
            for (DataRefNode dataRefNode : dataRefNodes) {
                BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
                TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
                if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                    continue;
                }
                result.add(new CarbonLocalInputSplit(segmentNo, tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), Arrays.asList(tableBlockInfo.getLocations()), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion().number()));
            }
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }
    cacheClient.close();
    return result;
}
Also used : CacheClient(org.apache.carbondata.hadoop.CacheClient) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) KeyGenException(org.apache.carbondata.core.keygenerator.KeyGenException) IndexBuilderException(org.apache.carbondata.core.datastore.exception.IndexBuilderException) IOException(java.io.IOException) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Aggregations

UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)8 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)3 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)3 IOException (java.io.IOException)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 TableSegmentUniqueIdentifier (org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)2 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)2 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)2 IndexBuilderException (org.apache.carbondata.core.datastore.exception.IndexBuilderException)2 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)2 Expression (org.apache.carbondata.core.scan.expression.Expression)2 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)2 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1