Search in sources :

Example 26 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class StreamPruner method listAllStreamFiles.

// TODO optimize and move the code to StreamSegment , but it's in the streaming module.
private List<StreamFile> listAllStreamFiles(List<Segment> segments, boolean withMinMax) throws IOException {
    List<StreamFile> streamFileList = new ArrayList<>();
    for (Segment segment : segments) {
        String segmentDir = CarbonTablePath.getSegmentPath(carbonTable.getAbsoluteTableIdentifier().getTablePath(), segment.getSegmentNo());
        String indexFile = CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir);
        if (FileFactory.isFileExist(indexFile)) {
            CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
            indexReader.openThriftReader(indexFile);
            try {
                while (indexReader.hasNext()) {
                    BlockIndex blockIndex = indexReader.readBlockIndexInfo();
                    String filePath = segmentDir + File.separator + blockIndex.getFile_name();
                    long length = blockIndex.getFile_size();
                    StreamFile streamFile = new StreamFile(segment.getSegmentNo(), filePath, length);
                    streamFileList.add(streamFile);
                    if (withMinMax) {
                        if (blockIndex.getBlock_index() != null && blockIndex.getBlock_index().getMin_max_index() != null) {
                            streamFile.setMinMaxIndex(CarbonMetadataUtil.convertExternalMinMaxIndex(blockIndex.getBlock_index().getMin_max_index()));
                        }
                    }
                }
            } finally {
                indexReader.closeThriftReader();
            }
        }
    }
    totalFileNums = streamFileList.size();
    return streamFileList;
}
Also used : CarbonIndexFileReader(org.apache.carbondata.core.reader.CarbonIndexFileReader) ArrayList(java.util.ArrayList) BlockIndex(org.apache.carbondata.format.BlockIndex) Segment(org.apache.carbondata.core.index.Segment)

Example 27 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class SecondaryIndexFactory method getIndexes.

private Map<Segment, List<CoarseGrainIndex>> getIndexes(List<Segment> segments, PositionReferenceInfo positionReferenceInfo) throws IOException {
    Map<Segment, List<CoarseGrainIndex>> indexes = new HashMap<>();
    List<String> allSegmentIds = segments.stream().map(Segment::getSegmentNo).collect(Collectors.toList());
    for (Segment segment : segments) {
        indexes.put(segment, this.getIndexes(segment, allSegmentIds, positionReferenceInfo));
    }
    return indexes;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) List(java.util.List) Segment(org.apache.carbondata.core.index.Segment)

Example 28 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class BlockletIndexFactory method getAllUncached.

@Override
public List<IndexInputSplit> getAllUncached(List<IndexInputSplit> distributableList) throws IOException {
    List<IndexInputSplit> distributableToBeLoaded = new ArrayList<>(distributableList.size());
    for (IndexInputSplit distributable : distributableList) {
        Segment segment = distributable.getSegment();
        Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableBlockIndexUniqueIdentifiers(segment);
        // filter out the tableBlockIndexUniqueIdentifiers based on distributable
        TableBlockIndexUniqueIdentifier validIdentifier = BlockletIndexUtil.filterIdentifiersBasedOnDistributable(tableBlockIndexUniqueIdentifiers, (BlockletIndexInputSplit) distributable);
        if (null == cache.getIfPresent(new TableBlockIndexUniqueIdentifierWrapper(validIdentifier, this.getCarbonTable()))) {
            ((BlockletIndexInputSplit) distributable).setTableBlockIndexUniqueIdentifier(validIdentifier);
            distributableToBeLoaded.add(distributable);
        }
    }
    return distributableToBeLoaded;
}
Also used : TableBlockIndexUniqueIdentifierWrapper(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) Segment(org.apache.carbondata.core.index.Segment)

Example 29 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class BlockletIndexFactory method getAllUncached.

@Override
public List<IndexInputSplit> getAllUncached(List<Segment> validSegments, IndexExprWrapper indexExprWrapper) throws IOException {
    List<IndexInputSplit> distributableToBeLoaded = new ArrayList<>();
    for (Segment segment : validSegments) {
        IndexInputSplitWrapper indexInputSplitWrappers = indexExprWrapper.toDistributableSegment(segment);
        Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableSegmentUniqueIdentifiers(segment);
        for (TableBlockIndexUniqueIdentifier identifier : tableBlockIndexUniqueIdentifiers) {
            BlockletIndexWrapper blockletIndexWrapper = cache.getIfPresent(new TableBlockIndexUniqueIdentifierWrapper(identifier, this.getCarbonTable()));
            if (identifier.getIndexFilePath() == null || blockletIndexWrapper == null) {
                ((BlockletIndexInputSplit) indexInputSplitWrappers.getDistributable()).setTableBlockIndexUniqueIdentifier(identifier);
                distributableToBeLoaded.add(indexInputSplitWrappers.getDistributable());
            }
        }
    }
    return distributableToBeLoaded;
}
Also used : TableBlockIndexUniqueIdentifierWrapper(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper) IndexInputSplitWrapper(org.apache.carbondata.core.index.dev.expr.IndexInputSplitWrapper) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) Segment(org.apache.carbondata.core.index.Segment) BlockletIndexWrapper(org.apache.carbondata.core.indexstore.BlockletIndexWrapper)

Example 30 with Segment

use of org.apache.carbondata.core.index.Segment in project carbondata by apache.

the class BlockletIndexFactory method getIndexes.

/**
 * Get the index for all segments
 */
public Map<Segment, List<CoarseGrainIndex>> getIndexes(List<Segment> segments, Set<Path> partitionLocations, IndexFilter filter) throws IOException {
    List<TableBlockIndexUniqueIdentifierWrapper> tableBlockIndexUniqueIdentifierWrappers = new ArrayList<>();
    Map<Segment, List<CoarseGrainIndex>> indexMap = new HashMap<>();
    Map<String, Segment> segmentMap = new HashMap<>();
    for (Segment segment : segments) {
        segmentMap.put(segment.getSegmentNo(), segment);
        Set<TableBlockIndexUniqueIdentifier> identifiers = getTableBlockIndexUniqueIdentifiers(segment);
        if (!partitionLocations.isEmpty()) {
            // get tableBlockIndexUniqueIdentifierWrappers from segment file info
            getTableBlockUniqueIdentifierWrappers(partitionLocations, tableBlockIndexUniqueIdentifierWrappers, identifiers);
        } else {
            SegmentMetaDataInfo segmentMetaDataInfo = segment.getSegmentMetaDataInfo();
            boolean isLoadAllIndex = Boolean.parseBoolean(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_LOAD_ALL_SEGMENT_INDEXES_TO_CACHE, CarbonCommonConstants.CARBON_LOAD_ALL_SEGMENT_INDEXES_TO_CACHE_DEFAULT));
            if (!isLoadAllIndex && null != segmentMetaDataInfo && null != filter && !filter.isEmpty() && null != filter.getExpression() && null == FilterUtil.getImplicitFilterExpression(filter.getExpression())) {
                getTableBlockIndexUniqueIdentifierUsingSegmentMinMax(segment, segmentMetaDataInfo, filter, identifiers, tableBlockIndexUniqueIdentifierWrappers);
            } else {
                for (TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier : identifiers) {
                    tableBlockIndexUniqueIdentifierWrappers.add(new TableBlockIndexUniqueIdentifierWrapper(tableBlockIndexUniqueIdentifier, this.getCarbonTable()));
                }
            }
        }
    }
    List<BlockletIndexWrapper> blockletIndexWrappers = cache.getAll(tableBlockIndexUniqueIdentifierWrappers);
    for (BlockletIndexWrapper wrapper : blockletIndexWrappers) {
        Segment segment = segmentMap.get(wrapper.getSegmentId());
        List<CoarseGrainIndex> indexes = indexMap.get(segment);
        if (null == indexes) {
            indexes = new ArrayList<CoarseGrainIndex>();
        }
        indexes.addAll(wrapper.getIndexes());
        indexMap.put(segment, indexes);
    }
    return indexMap;
}
Also used : SegmentMetaDataInfo(org.apache.carbondata.core.segmentmeta.SegmentMetaDataInfo) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) Segment(org.apache.carbondata.core.index.Segment) TableBlockIndexUniqueIdentifierWrapper(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper) CoarseGrainIndex(org.apache.carbondata.core.index.dev.cgindex.CoarseGrainIndex) List(java.util.List) ArrayList(java.util.ArrayList) BlockletIndexWrapper(org.apache.carbondata.core.indexstore.BlockletIndexWrapper)

Aggregations

Segment (org.apache.carbondata.core.index.Segment)35 ArrayList (java.util.ArrayList)24 IOException (java.io.IOException)18 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)14 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)11 HashMap (java.util.HashMap)10 List (java.util.List)9 Map (java.util.Map)8 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)8 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)8 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)8 CarbonCommonConstants (org.apache.carbondata.core.constants.CarbonCommonConstants)7 HashSet (java.util.HashSet)6 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)6 FileFactory (org.apache.carbondata.core.datastore.impl.FileFactory)6 TableIndex (org.apache.carbondata.core.index.TableIndex)6 Collectors (java.util.stream.Collectors)5 LogServiceFactory (org.apache.carbondata.common.logging.LogServiceFactory)5 IndexFilter (org.apache.carbondata.core.index.IndexFilter)5 PartitionSpec (org.apache.carbondata.core.indexstore.PartitionSpec)5