Search in sources :

Example 1 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class SegmentUpdateStatusManager method getInvalidTimestampRange.

/**
   * Returns the invalid timestamp range of a segment.
   * @param segmentId
   * @return
   */
public UpdateVO getInvalidTimestampRange(String segmentId) {
    UpdateVO range = new UpdateVO();
    for (LoadMetadataDetails segment : segmentDetails) {
        if (segment.getLoadName().equalsIgnoreCase(segmentId)) {
            range.setSegmentId(segmentId);
            range.setFactTimestamp(segment.getLoadStartTime());
            if (!segment.getUpdateDeltaStartTimestamp().isEmpty() && !segment.getUpdateDeltaEndTimestamp().isEmpty()) {
                range.setUpdateDeltaStartTimestamp(CarbonUpdateUtil.getTimeStampAsLong(segment.getUpdateDeltaStartTimestamp()));
                range.setLatestUpdateTimestamp(CarbonUpdateUtil.getTimeStampAsLong(segment.getUpdateDeltaEndTimestamp()));
            }
        }
    }
    return range;
}
Also used : UpdateVO(org.apache.carbondata.core.mutate.UpdateVO)

Example 2 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class BlockIndexStore method removeTableBlocksIfHorizontalCompactionDone.

/**
   * remove TableBlocks executer level If Horizontal Compaction Done
   * @param queryModel
   */
public void removeTableBlocksIfHorizontalCompactionDone(QueryModel queryModel) {
    // get the invalid segments blocks details
    Map<String, UpdateVO> invalidBlocksVO = queryModel.getInvalidBlockVOForSegmentId();
    if (!invalidBlocksVO.isEmpty()) {
        UpdateVO updateMetadata;
        Iterator<Map.Entry<String, UpdateVO>> itr = invalidBlocksVO.entrySet().iterator();
        String blockTimestamp = null;
        while (itr.hasNext()) {
            Map.Entry<String, UpdateVO> entry = itr.next();
            TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(queryModel.getAbsoluteTableIdentifier(), entry.getKey());
            List<BlockInfo> blockInfos = segmentIdToBlockListMap.get(tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier());
            if (null != blockInfos) {
                for (BlockInfo blockInfo : blockInfos) {
                    // reading the updated block names from status manager instance
                    blockTimestamp = blockInfo.getBlockUniqueName().substring(blockInfo.getBlockUniqueName().lastIndexOf('-') + 1, blockInfo.getBlockUniqueName().length());
                    updateMetadata = entry.getValue();
                    if (CarbonUpdateUtil.isMaxQueryTimeoutExceeded(Long.parseLong(blockTimestamp))) {
                        Long blockTimeStamp = Long.parseLong(blockTimestamp);
                        if (blockTimeStamp > updateMetadata.getFactTimestamp() && (updateMetadata.getUpdateDeltaStartTimestamp() != null && blockTimeStamp < updateMetadata.getUpdateDeltaStartTimestamp())) {
                            String lruCacheKey = getLruCacheKey(queryModel.getAbsoluteTableIdentifier(), blockInfo);
                            lruCache.remove(lruCacheKey);
                        }
                    }
                }
            }
        }
    }
}
Also used : BlockInfo(org.apache.carbondata.core.datastore.block.BlockInfo) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) Map(java.util.Map)

Example 3 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class SegmentTaskIndexStore method loadAndGetTaskIdToSegmentsMap.

/**
   * Below method will be used to load the segment of segments
   * One segment may have multiple task , so  table segment will be loaded
   * based on task id and will return the map of taksId to table segment
   * map
   *
   * @param segmentToTableBlocksInfos segment id to block info
   * @param absoluteTableIdentifier   absolute table identifier
   * @return map of taks id to segment mapping
   * @throws IOException
   */
private SegmentTaskIndexWrapper loadAndGetTaskIdToSegmentsMap(Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos, AbsoluteTableIdentifier absoluteTableIdentifier, TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier) throws IOException {
    // task id to segment map
    Iterator<Map.Entry<String, List<TableBlockInfo>>> iteratorOverSegmentBlocksInfos = segmentToTableBlocksInfos.entrySet().iterator();
    Map<TaskBucketHolder, AbstractIndex> taskIdToSegmentIndexMap = null;
    SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    String segmentId = null;
    TaskBucketHolder taskBucketHolder = null;
    try {
        while (iteratorOverSegmentBlocksInfos.hasNext()) {
            // segment id to table block mapping
            Map.Entry<String, List<TableBlockInfo>> next = iteratorOverSegmentBlocksInfos.next();
            // group task id to table block info mapping for the segment
            Map<TaskBucketHolder, List<TableBlockInfo>> taskIdToTableBlockInfoMap = mappedAndGetTaskIdToTableBlockInfo(segmentToTableBlocksInfos);
            segmentId = next.getKey();
            // get the existing map of task id to table segment map
            UpdateVO updateVO = updateStatusManager.getInvalidTimestampRange(segmentId);
            // check if segment is already loaded, if segment is already loaded
            //no need to load the segment block
            String lruCacheKey = tableSegmentUniqueIdentifier.getUniqueTableSegmentIdentifier();
            segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
            if (segmentTaskIndexWrapper == null || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
                // get the segment loader lock object this is to avoid
                // same segment is getting loaded multiple times
                // in case of concurrent query
                Object segmentLoderLockObject = segmentLockMap.get(lruCacheKey);
                if (null == segmentLoderLockObject) {
                    segmentLoderLockObject = addAndGetSegmentLock(lruCacheKey);
                }
                // acquire lock to lod the segment
                synchronized (segmentLoderLockObject) {
                    segmentTaskIndexWrapper = (SegmentTaskIndexWrapper) lruCache.get(lruCacheKey);
                    if (null == segmentTaskIndexWrapper || tableSegmentUniqueIdentifier.isSegmentUpdated()) {
                        // so that the same can be updated after loading the btree.
                        if (tableSegmentUniqueIdentifier.isSegmentUpdated() && null != segmentTaskIndexWrapper) {
                            taskIdToSegmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
                        } else {
                            // creating a map of take if to table segment
                            taskIdToSegmentIndexMap = new HashMap<TaskBucketHolder, AbstractIndex>();
                            segmentTaskIndexWrapper = new SegmentTaskIndexWrapper(taskIdToSegmentIndexMap);
                            segmentTaskIndexWrapper.incrementAccessCount();
                        }
                        Iterator<Map.Entry<TaskBucketHolder, List<TableBlockInfo>>> iterator = taskIdToTableBlockInfoMap.entrySet().iterator();
                        long requiredSize = calculateRequiredSize(taskIdToTableBlockInfoMap, absoluteTableIdentifier);
                        segmentTaskIndexWrapper.setMemorySize(requiredSize + segmentTaskIndexWrapper.getMemorySize());
                        boolean isAddedToLruCache = lruCache.put(lruCacheKey, segmentTaskIndexWrapper, requiredSize);
                        if (isAddedToLruCache) {
                            while (iterator.hasNext()) {
                                Map.Entry<TaskBucketHolder, List<TableBlockInfo>> taskToBlockInfoList = iterator.next();
                                taskBucketHolder = taskToBlockInfoList.getKey();
                                taskIdToSegmentIndexMap.put(taskBucketHolder, loadBlocks(taskBucketHolder, taskToBlockInfoList.getValue(), absoluteTableIdentifier));
                            }
                        } else {
                            throw new IndexBuilderException("Can not load the segment. No Enough space available.");
                        }
                        // set the latest timestamp.
                        segmentTaskIndexWrapper.setRefreshedTimeStamp(updateVO.getCreatedOrUpdatedTimeStamp());
                        // tableSegmentMapTemp.put(next.getKey(), taskIdToSegmentIndexMap);
                        // removing from segment lock map as once segment is loaded
                        // if concurrent query is coming for same segment
                        // it will wait on the lock so after this segment will be already
                        // loaded so lock is not required, that is why removing the
                        // the lock object as it wont be useful
                        segmentLockMap.remove(lruCacheKey);
                    } else {
                        segmentTaskIndexWrapper.incrementAccessCount();
                    }
                }
            } else {
                segmentTaskIndexWrapper.incrementAccessCount();
            }
        }
    } catch (IndexBuilderException e) {
        LOGGER.error("Problem while loading the segment");
        throw e;
    }
    return segmentTaskIndexWrapper;
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) Entry(java.util.Map.Entry) SegmentTaskIndexWrapper(org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper) AbstractIndex(org.apache.carbondata.core.datastore.block.AbstractIndex) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IndexBuilderException(org.apache.carbondata.core.datastore.exception.IndexBuilderException)

Example 4 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR
   * are used to get table path to read.
   *
   * @param job
   * @return List<InputSplit> list of CarbonInputSplit
   * @throws IOException
   */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    try {
        List<String> invalidSegments = new ArrayList<>();
        List<UpdateVO> invalidTimestampsList = new ArrayList<>();
        // get all valid segments and set them into the configuration
        if (getSegmentsToAccess(job).length == 0) {
            SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
            SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
            setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
            if (segments.getValidSegments().size() == 0) {
                return new ArrayList<>(0);
            }
            // remove entry in the segment index if there are invalid segments
            invalidSegments.addAll(segments.getInvalidSegments());
            for (String invalidSegmentId : invalidSegments) {
                invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
            }
            if (invalidSegments.size() > 0) {
                List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
                for (String segId : invalidSegments) {
                    invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
                }
                cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
            }
        }
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
        // prune partitions for filter query on partition table
        BitSet matchedPartitions = null;
        if (null != filter) {
            PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
            if (null != partitionInfo) {
                Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
                matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
                if (matchedPartitions.cardinality() == 0) {
                    // no partition is required
                    return new ArrayList<InputSplit>();
                }
                if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
                    // all partitions are required, no need to prune partitions
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } finally {
        // close the cache cache client to clear LRU cache memory
        cacheClient.close();
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Partitioner(org.apache.carbondata.core.scan.partition.Partitioner) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 5 with UpdateVO

use of org.apache.carbondata.core.mutate.UpdateVO in project carbondata by apache.

the class CarbonInputFormat method getQueryModel.

public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    CarbonTable carbonTable = getCarbonTable(configuration);
    // getting the table absoluteTableIdentifier from the carbonTable
    // to avoid unnecessary deserialization
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
    QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    // update the file level index store if there are invalid segment
    if (inputSplit instanceof CarbonMultiBlockSplit) {
        CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
        List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
        if (invalidSegments.size() > 0) {
            queryModel.setInvalidSegmentIds(invalidSegments);
        }
        List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
        if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
            queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
        }
    }
    return queryModel;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonQueryPlan(org.apache.carbondata.core.scan.model.CarbonQueryPlan) Configuration(org.apache.hadoop.conf.Configuration) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Aggregations

UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)8 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)3 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)3 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)3 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)3 IOException (java.io.IOException)2 Map (java.util.Map)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 TableSegmentUniqueIdentifier (org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)2 AbstractIndex (org.apache.carbondata.core.datastore.block.AbstractIndex)2 SegmentTaskIndexWrapper (org.apache.carbondata.core.datastore.block.SegmentTaskIndexWrapper)2 IndexBuilderException (org.apache.carbondata.core.datastore.exception.IndexBuilderException)2 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)2 Expression (org.apache.carbondata.core.scan.expression.Expression)2 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)2 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1