Examples with Segment - org.apache.carbondata.core.index.Segment

Example 11 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonFileInputFormat method getSplits.

/**
 * get list of block/blocklet and make them to CarbonInputSplit
 * @param job JobContext with Configuration
 * @return list of CarbonInputSplit
 * @throws IOException
 */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    if (null == carbonTable) {
        throw new IOException("Missing/Corrupt schema file for table.");
    }
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // get all valid segments and set them into the configuration
    // check for externalTable segment (Segment_null)
    // process and resolve the expression
    ReadCommittedScope readCommittedScope;
    if (carbonTable.isTransactionalTable()) {
        readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath() + "/Fact/Part0/Segment_null/", job.getConfiguration());
    } else {
        readCommittedScope = getReadCommittedScope(job.getConfiguration());
        if (readCommittedScope == null) {
            readCommittedScope = new LatestFilesReadCommittedScope(identifier.getTablePath(), job.getConfiguration());
        } else {
            readCommittedScope.setConfiguration(job.getConfiguration());
        }
    }
    // this will be null in case of corrupt schema file.
    IndexFilter filter = getFilterPredicates(job.getConfiguration());
    // if external table Segments are found, add it to the List
    List<Segment> externalTableSegments = new ArrayList<>();
    Segment seg;
    if (carbonTable.isTransactionalTable()) {
        // SDK some cases write into the Segment Path instead of Table Path i.e. inside
        // the "Fact/Part0/Segment_null". The segment in this case is named as "null".
        // The table is denoted by default as a transactional table and goes through
        // the path of CarbonFileInputFormat. The above scenario is handled in the below code.
        seg = new Segment("null", null, readCommittedScope);
        externalTableSegments.add(seg);
    } else {
        LoadMetadataDetails[] loadMetadataDetails = readCommittedScope.getSegmentList();
        for (LoadMetadataDetails load : loadMetadataDetails) {
            seg = new Segment(load.getLoadName(), null, readCommittedScope);
            if (fileLists != null) {
                for (Object fileList : fileLists) {
                    String timestamp = CarbonTablePath.DataFileUtil.getTimeStampFromFileName(fileList.toString());
                    if (timestamp.equals(seg.getSegmentNo())) {
                        externalTableSegments.add(seg);
                        break;
                    }
                }
            } else {
                externalTableSegments.add(seg);
            }
        }
    }
    List<InputSplit> splits = new ArrayList<>();
    boolean useBlockIndex = job.getConfiguration().getBoolean("filter_blocks", true);
    // scenarios
    if (filter != null) {
        filter.resolve(false);
    }
    if (useBlockIndex) {
        // do block filtering and get split
        splits = getSplits(job, filter, externalTableSegments);
    } else {
        List<CarbonFile> carbonFiles;
        if (null != this.fileLists) {
            carbonFiles = getAllCarbonDataFiles(this.fileLists);
        } else {
            carbonFiles = getAllCarbonDataFiles(carbonTable.getTablePath());
        }
        List<String> allDeleteDeltaFiles = getAllDeleteDeltaFiles(carbonTable.getTablePath());
        for (CarbonFile carbonFile : carbonFiles) {
            // Segment id is set to null because SDK does not write carbondata files with respect
            // to segments. So no specific name is present for this load.
            CarbonInputSplit split = new CarbonInputSplit("null", carbonFile.getAbsolutePath(), 0, carbonFile.getLength(), carbonFile.getLocations(), FileFormat.COLUMNAR_V3);
            split.setVersion(ColumnarFormatVersion.V3);
            BlockletDetailInfo info = new BlockletDetailInfo();
            split.setDetailInfo(info);
            info.setBlockSize(carbonFile.getLength());
            info.setVersionNumber(split.getVersion().number());
            info.setUseMinMaxForPruning(false);
            if (CollectionUtils.isNotEmpty(allDeleteDeltaFiles)) {
                split.setDeleteDeltaFiles(getDeleteDeltaFiles(carbonFile.getAbsolutePath(), allDeleteDeltaFiles));
            }
            splits.add(split);
        }
        splits.sort(Comparator.comparing(o -> ((CarbonInputSplit) o).getFilePath()));
    }
    setAllColumnProjectionIfNotConfigured(job, carbonTable);
    return splits;
}

Also used : Segment(org.apache.carbondata.core.index.Segment) BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) ColumnarFormatVersion(org.apache.carbondata.core.metadata.ColumnarFormatVersion) CollectionUtils(org.apache.commons.collections.CollectionUtils) Configuration(org.apache.hadoop.conf.Configuration) LinkedList(java.util.LinkedList) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) InterfaceAudience(org.apache.carbondata.common.annotations.InterfaceAudience) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) InputSplit(org.apache.hadoop.mapreduce.InputSplit) InterfaceStability(org.apache.carbondata.common.annotations.InterfaceStability) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) IOException(java.io.IOException) File(java.io.File) Serializable(java.io.Serializable) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) List(java.util.List) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileFormat(org.apache.carbondata.core.statusmanager.FileFormat) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) JobContext(org.apache.hadoop.mapreduce.JobContext) Pattern(java.util.regex.Pattern) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Comparator(java.util.Comparator) CarbonFileFilter(org.apache.carbondata.core.datastore.filesystem.CarbonFileFilter) SchemaReader(org.apache.carbondata.core.metadata.schema.SchemaReader) ArrayUtils(org.apache.commons.lang.ArrayUtils) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) Segment(org.apache.carbondata.core.index.Segment) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) BlockletDetailInfo(org.apache.carbondata.core.indexstore.BlockletDetailInfo) IndexFilter(org.apache.carbondata.core.index.IndexFilter) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 12 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonOutputCommitter method commitJobFinal.

private void commitJobFinal(JobContext context, CarbonLoadModel loadModel, OperationContext operationContext, CarbonTable carbonTable, String uniqueId) throws IOException {
    if (operationContext != null) {
        LoadEvents.LoadTablePostStatusUpdateEvent postStatusUpdateEvent = new LoadEvents.LoadTablePostStatusUpdateEvent(loadModel);
        try {
            OperationListenerBus.getInstance().fireEvent(postStatusUpdateEvent, operationContext);
        } catch (Exception e) {
            throw new IOException(e);
        }
    }
    String updateTime = context.getConfiguration().get(CarbonTableOutputFormat.UPDATE_TIMESTAMP, uniqueId);
    String segmentsToBeDeleted = context.getConfiguration().get(CarbonTableOutputFormat.SEGMENTS_TO_BE_DELETED, "");
    List<Segment> segmentDeleteList = Collections.emptyList();
    if (!segmentsToBeDeleted.trim().isEmpty()) {
        segmentDeleteList = Segment.toSegmentList(segmentsToBeDeleted.split(","), null);
    }
    boolean isUpdateStatusFileUpdateRequired = (context.getConfiguration().get(CarbonTableOutputFormat.UPDATE_TIMESTAMP) != null);
    if (updateTime != null) {
        CarbonUpdateUtil.updateTableMetadataStatus(Collections.singleton(loadModel.getSegment()), carbonTable, updateTime, true, isUpdateStatusFileUpdateRequired, segmentDeleteList);
    }
}

Also used : LoadEvents(org.apache.carbondata.processing.loading.events.LoadEvents) IOException(java.io.IOException) IOException(java.io.IOException) Segment(org.apache.carbondata.core.index.Segment)

Example 13 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonOutputCommitter method overwritePartitions.

/**
 * Overwrite the partitions in case of overwrite query. It just updates the partition map files
 * of all segment files.
 */
private String overwritePartitions(CarbonLoadModel loadModel, LoadMetadataDetails newMetaEntry, String uuid, List<String> partitionList, List<PartitionSpec> currentPartitionsOfTable) throws IOException {
    CarbonTable table = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    if (partitionList != null && partitionList.size() > 0) {
        // check if any partitions overlaps
        List<String> overlappingPartitions = currentPartitionsOfTable.stream().map(partitionSpec -> partitionSpec.getLocation().toString()).filter(partitionList::contains).collect(Collectors.toList());
        if (!overlappingPartitions.isEmpty()) {
            List<LoadMetadataDetails> validLoadMetadataDetails = loadModel.getLoadMetadataDetails().stream().filter(loadMetadataDetail -> !loadMetadataDetail.getLoadName().equalsIgnoreCase(newMetaEntry.getLoadName())).collect(Collectors.toList());
            String uniqueId = String.valueOf(System.currentTimeMillis());
            List<String> toBeUpdatedSegments = new ArrayList<>(validLoadMetadataDetails.size());
            List<String> toBeDeletedSegments = new ArrayList<>(validLoadMetadataDetails.size());
            // First drop the partitions from partition mapper files of each segment
            for (LoadMetadataDetails loadMetadataDetail : validLoadMetadataDetails) {
                new SegmentFileStore(table.getTablePath(), loadMetadataDetail.getSegmentFile()).dropPartitions(loadMetadataDetail.getLoadName(), partitionList, uniqueId, toBeDeletedSegments, toBeUpdatedSegments);
            }
            newMetaEntry.setUpdateStatusFileName(uniqueId);
            // Commit the removed partitions in carbon store.
            CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false, uuid, Segment.toSegmentList(toBeDeletedSegments, null), Segment.toSegmentList(toBeUpdatedSegments, null), false);
            return uniqueId;
        } else {
            CarbonLoaderUtil.recordNewLoadMetadata(newMetaEntry, loadModel, false, false, uuid, false);
            return null;
        }
    }
    return null;
}

Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) OperationListenerBus(org.apache.carbondata.events.OperationListenerBus) Segment(org.apache.carbondata.core.index.Segment) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) SegmentStatus(org.apache.carbondata.core.statusmanager.SegmentStatus) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) StringUtils(org.apache.commons.lang3.StringUtils) CarbonLockFactory(org.apache.carbondata.core.locks.CarbonLockFactory) ObjectSerializationUtil(org.apache.carbondata.core.util.ObjectSerializationUtil) ArrayList(java.util.ArrayList) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) OperationContext(org.apache.carbondata.events.OperationContext) FileOutputCommitter(org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter) Logger(org.apache.log4j.Logger) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) Map(java.util.Map) LockUsage(org.apache.carbondata.core.locks.LockUsage) CarbonLoaderUtil(org.apache.carbondata.processing.util.CarbonLoaderUtil) Path(org.apache.hadoop.fs.Path) CarbonUpdateUtil(org.apache.carbondata.core.mutate.CarbonUpdateUtil) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) LoadEvents(org.apache.carbondata.processing.loading.events.LoadEvents) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) Set(java.util.Set) ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) List(java.util.List) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) CarbonSessionInfo(org.apache.carbondata.core.util.CarbonSessionInfo) JobContext(org.apache.hadoop.mapreduce.JobContext) JobStatus(org.apache.hadoop.mapreduce.JobStatus) CarbonProperties(org.apache.carbondata.core.util.CarbonProperties) Collections(java.util.Collections) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore)

Example 14 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonTableInputFormat method updateLoadMetaDataDetailsToSegments.

public void updateLoadMetaDataDetailsToSegments(List<Segment> validSegments, List<org.apache.carbondata.hadoop.CarbonInputSplit> prunedSplits) {
    Map<String, Segment> validSegmentsMap = validSegments.stream().collect(Collectors.toMap(Segment::getSegmentNo, segment -> segment, (e1, e2) -> e1));
    for (CarbonInputSplit split : prunedSplits) {
        Segment segment = split.getSegment();
        if (segment.getLoadMetadataDetails() == null || segment.getReadCommittedScope() == null) {
            if (validSegmentsMap.containsKey(segment.getSegmentNo())) {
                segment.setLoadMetadataDetails(validSegmentsMap.get(segment.getSegmentNo()).getLoadMetadataDetails());
                segment.setReadCommittedScope(validSegmentsMap.get(segment.getSegmentNo()).getReadCommittedScope());
            }
        }
    }
}

Also used : Arrays(java.util.Arrays) BlockLocation(org.apache.hadoop.fs.BlockLocation) FileSystem(org.apache.hadoop.fs.FileSystem) ExplainCollector(org.apache.carbondata.core.profiler.ExplainCollector) FileStatus(org.apache.hadoop.fs.FileStatus) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) IndexChooser(org.apache.carbondata.core.index.IndexChooser) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) Logger(org.apache.log4j.Logger) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ReadCommittedScope(org.apache.carbondata.core.readcommitter.ReadCommittedScope) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableStatusReadCommittedScope(org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope) DeprecatedFeatureException(org.apache.carbondata.common.exceptions.DeprecatedFeatureException) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) IndexUtil(org.apache.carbondata.core.index.IndexUtil) CarbonProperties(org.apache.carbondata.core.util.CarbonProperties) CarbonUtil(org.apache.carbondata.core.util.CarbonUtil) Segment(org.apache.carbondata.core.index.Segment) HashMap(java.util.HashMap) StreamFile(org.apache.carbondata.core.stream.StreamFile) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) IndexExprWrapper(org.apache.carbondata.core.index.dev.expr.IndexExprWrapper) SegmentStatus(org.apache.carbondata.core.statusmanager.SegmentStatus) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StageInputCollector(org.apache.carbondata.core.statusmanager.StageInputCollector) CarbonUpdateUtil(org.apache.carbondata.core.mutate.CarbonUpdateUtil) LinkedList(java.util.LinkedList) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) IndexStoreManager(org.apache.carbondata.core.index.IndexStoreManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) StreamPruner(org.apache.carbondata.core.stream.StreamPruner) CarbonCommonConstantsInternal(org.apache.carbondata.core.constants.CarbonCommonConstantsInternal) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) InputSplit(org.apache.hadoop.mapreduce.InputSplit) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) LatestFilesReadCommittedScope(org.apache.carbondata.core.readcommitter.LatestFilesReadCommittedScope) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileFormat(org.apache.carbondata.core.statusmanager.FileFormat) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) JobContext(org.apache.hadoop.mapreduce.JobContext) IndexFilter(org.apache.carbondata.core.index.IndexFilter) TableIndex(org.apache.carbondata.core.index.TableIndex) ArrayUtils(org.apache.commons.lang.ArrayUtils) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) Segment(org.apache.carbondata.core.index.Segment)

Example 15 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class CarbonInputFormat method getPrunedBlocklets.

/**
 * Prune the blocklets using the filter expression with available index.
 * First pruned with default blocklet index, then pruned with CG and FG index
 */
public List<ExtendedBlocklet> getPrunedBlocklets(JobContext job, CarbonTable carbonTable, IndexFilter filter, List<Segment> validSegments, List<Segment> invalidSegments, List<String> segmentsToBeRefreshed) throws IOException {
    ExplainCollector.addPruningInfo(carbonTable.getTableName());
    filter = filter == null ? new IndexFilter(carbonTable, null) : filter;
    ExplainCollector.setFilterStatement(filter.getExpression() == null ? "none" : filter.getExpression().getStatement());
    boolean distributedCG = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.USE_DISTRIBUTED_INDEX, CarbonCommonConstants.USE_DISTRIBUTED_INDEX_DEFAULT));
    IndexJob indexJob = IndexUtil.getIndexJob(job.getConfiguration());
    List<PartitionSpec> partitionsToPrune = getPartitionsToPrune(job.getConfiguration());
    // First prune using default index on driver side.
    TableIndex defaultIndex = IndexStoreManager.getInstance().getDefaultIndex(carbonTable);
    List<ExtendedBlocklet> prunedBlocklets;
    // This is to log the event, so user will know what is happening by seeing logs.
    LOG.info("Started block pruning ...");
    boolean isDistributedPruningEnabled = CarbonProperties.getInstance().isDistributedPruningEnabled(carbonTable.getDatabaseName(), carbonTable.getTableName());
    boolean isIndexServerContext = job.getConfiguration().get("isIndexServerContext", "false").equals("true");
    if (isDistributedPruningEnabled && !isIndexServerContext) {
        try {
            prunedBlocklets = getDistributedSplit(carbonTable, filter.getResolver(), partitionsToPrune, validSegments, invalidSegments, segmentsToBeRefreshed, false, job.getConfiguration(), filter.getMissingSISegments());
        } catch (Exception e) {
            // pruning.
            if (CarbonProperties.getInstance().isFallBackDisabled()) {
                throw e;
            }
            prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
        }
    } else {
        if (carbonTable.isTransactionalTable()) {
            IndexExprWrapper indexExprWrapper = IndexChooser.getDefaultIndex(getOrCreateCarbonTable(job.getConfiguration()), null);
            IndexUtil.loadIndexes(carbonTable, indexExprWrapper, validSegments);
        }
        prunedBlocklets = defaultIndex.prune(validSegments, filter, partitionsToPrune);
        if (ExplainCollector.enabled()) {
            ExplainCollector.setDefaultIndexPruningBlockHit(getBlockCount(prunedBlocklets));
        }
        if (prunedBlocklets.size() == 0) {
            return prunedBlocklets;
        }
        IndexChooser chooser = new IndexChooser(getOrCreateCarbonTable(job.getConfiguration()), isSecondaryIndexPruningEnabled(job.getConfiguration()));
        // Get the available CG indexes and prune further.
        IndexExprWrapper cgIndexExprWrapper = chooser.chooseCGIndex(filter.getResolver());
        if (cgIndexExprWrapper != null) {
            // Prune segments from already pruned blocklets
            IndexUtil.pruneSegments(validSegments, prunedBlocklets);
            List<ExtendedBlocklet> cgPrunedBlocklets = new ArrayList<>();
            // If SI present in cgIndexExprWrapper then set the list of
            // blocklet in segment which are pruned by default index,
            // and this list will be return from SI prune method if segment is not present in SI.
            Map<String, List<ExtendedBlocklet>> segmentsToBlocklet = new HashMap<>();
            for (ExtendedBlocklet extendedBlocklet : prunedBlocklets) {
                List<ExtendedBlocklet> extendedBlockletList = segmentsToBlocklet.getOrDefault(extendedBlocklet.getSegmentId(), new ArrayList<>());
                extendedBlockletList.add(extendedBlocklet);
                segmentsToBlocklet.put(extendedBlocklet.getSegmentId(), extendedBlockletList);
            }
            for (Segment seg : validSegments) {
                seg.setDefaultIndexPrunedBlocklets(segmentsToBlocklet.get(seg.getSegmentNo()));
            }
            boolean isCGPruneFallback = false;
            // Again prune with CG index.
            try {
                if (distributedCG && indexJob != null) {
                    cgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, IndexLevel.CG, new ArrayList<>(), job.getConfiguration());
                } else {
                    cgPrunedBlocklets = cgIndexExprWrapper.prune(validSegments, partitionsToPrune);
                }
            } catch (Exception e) {
                isCGPruneFallback = true;
                LOG.error("CG index pruning failed.", e);
            }
            // hence no need to do intersect and simply pass the prunedBlocklets from default index
            if (!isCGPruneFallback) {
                if (isIndexServerContext) {
                    // For all blocklets initialize the detail info so that it can be serialized to driver
                    for (ExtendedBlocklet blocklet : cgPrunedBlocklets) {
                        blocklet.getDetailInfo();
                        blocklet.setCgIndexPresent(true);
                    }
                }
                // since index index prune in segment scope,
                // the result need to intersect with previous pruned result
                prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, cgPrunedBlocklets);
            }
            if (ExplainCollector.enabled()) {
                ExplainCollector.recordCGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(cgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
            }
        }
        if (prunedBlocklets.size() == 0) {
            return prunedBlocklets;
        }
        // Now try to prune with FG Index.
        if (isFgIndexPruningEnable(job.getConfiguration()) && indexJob != null) {
            IndexExprWrapper fgIndexExprWrapper = chooser.chooseFGIndex(filter.getResolver());
            List<ExtendedBlocklet> fgPrunedBlocklets;
            if (fgIndexExprWrapper != null) {
                // Prune segments from already pruned blocklets
                IndexUtil.pruneSegments(validSegments, prunedBlocklets);
                // Prune segments from already pruned blocklets
                fgPrunedBlocklets = IndexUtil.executeIndexJob(carbonTable, filter.getResolver(), indexJob, partitionsToPrune, validSegments, invalidSegments, fgIndexExprWrapper.getIndexLevel(), new ArrayList<>(), job.getConfiguration());
                // note that the 'fgPrunedBlocklets' has extra index related info compared with
                // 'prunedBlocklets', so the intersection should keep the elements in 'fgPrunedBlocklets'
                prunedBlocklets = intersectFilteredBlocklets(carbonTable, prunedBlocklets, fgPrunedBlocklets);
                ExplainCollector.recordFGIndexPruning(IndexWrapperSimpleInfo.fromIndexWrapper(fgIndexExprWrapper), prunedBlocklets.size(), getBlockCount(prunedBlocklets));
            }
        }
    }
    LOG.info("Finished block pruning ...");
    return prunedBlocklets;
}

Also used : IndexJob(org.apache.carbondata.core.index.IndexJob) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TableIndex(org.apache.carbondata.core.index.TableIndex) PartitionSpec(org.apache.carbondata.core.indexstore.PartitionSpec) InvalidConfigurationException(org.apache.carbondata.core.exception.InvalidConfigurationException) IOException(java.io.IOException) Segment(org.apache.carbondata.core.index.Segment) IndexChooser(org.apache.carbondata.core.index.IndexChooser) List(java.util.List) ArrayList(java.util.ArrayList) IndexFilter(org.apache.carbondata.core.index.IndexFilter) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet) IndexExprWrapper(org.apache.carbondata.core.index.dev.expr.IndexExprWrapper)

Aggregations

Segment (org.apache.carbondata.core.index.Segment)35 ArrayList (java.util.ArrayList)24 IOException (java.io.IOException)18 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)14 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)11 HashMap (java.util.HashMap)10 List (java.util.List)9 Map (java.util.Map)8 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)8 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)8 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)8 CarbonCommonConstants (org.apache.carbondata.core.constants.CarbonCommonConstants)7 HashSet (java.util.HashSet)6 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)6 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)6 TableIndex (org.apache.carbondata.core.index.TableIndex)6 Collectors (java.util.stream.Collectors)5 LogServiceFactory (org.apache.carbondata.common.logging.LogServiceFactory)5 IndexFilter (org.apache.carbondata.core.index.IndexFilter)5 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)5