Search in sources :

Example 1 with TableDataMap

use of org.apache.carbondata.core.datamap.TableDataMap in project carbondata by apache.

the class DataMapWriterListener method registerAllWriter.

/**
 * register all datamap writer for specified table and segment
 */
public void registerAllWriter(CarbonTable carbonTable, String segmentId, String dataWritePath) {
    List<TableDataMap> tableIndices = DataMapStoreManager.getInstance().getAllDataMap(carbonTable);
    if (tableIndices != null) {
        for (TableDataMap tableDataMap : tableIndices) {
            DataMapFactory factory = tableDataMap.getDataMapFactory();
            register(factory, segmentId, dataWritePath);
        }
    }
}
Also used : TableDataMap(org.apache.carbondata.core.datamap.TableDataMap) DataMapFactory(org.apache.carbondata.core.datamap.dev.DataMapFactory)

Example 2 with TableDataMap

use of org.apache.carbondata.core.datamap.TableDataMap in project carbondata by apache.

the class CarbonTableInputFormat method getBlockRowCount.

/**
 * Get the row count of the Block and mapping of segment and Block count.
 */
public BlockMappingVO getBlockRowCount(Job job, CarbonTable table, List<PartitionSpec> partitions) throws IOException {
    AbsoluteTableIdentifier identifier = table.getAbsoluteTableIdentifier();
    TableDataMap blockletMap = DataMapStoreManager.getInstance().getDefaultDataMap(table);
    LoadMetadataDetails[] loadMetadataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(table, loadMetadataDetails);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo allSegments = new SegmentStatusManager(identifier).getValidAndInvalidSegments(loadMetadataDetails);
    Map<String, Long> blockRowCountMapping = new HashMap<>();
    Map<String, Long> segmentAndBlockCountMapping = new HashMap<>();
    // TODO: currently only batch segment is supported, add support for streaming table
    List<Segment> filteredSegment = getFilteredSegment(job, allSegments.getValidSegments(), false);
    List<ExtendedBlocklet> blocklets = blockletMap.prune(filteredSegment, null, partitions);
    for (ExtendedBlocklet blocklet : blocklets) {
        String blockName = blocklet.getPath();
        blockName = CarbonTablePath.getCarbonDataFileName(blockName);
        blockName = blockName + CarbonTablePath.getCarbonDataExtension();
        long rowCount = blocklet.getDetailInfo().getRowCount();
        String key = CarbonUpdateUtil.getSegmentBlockNameKey(blocklet.getSegmentId(), blockName);
        // if block is invalid then dont add the count
        SegmentUpdateDetails details = updateStatusManager.getDetailsForABlock(key);
        if (null == details || !CarbonUpdateUtil.isBlockInvalid(details.getSegmentStatus())) {
            Long blockCount = blockRowCountMapping.get(key);
            if (blockCount == null) {
                blockCount = 0L;
                Long count = segmentAndBlockCountMapping.get(blocklet.getSegmentId());
                if (count == null) {
                    count = 0L;
                }
                segmentAndBlockCountMapping.put(blocklet.getSegmentId(), count + 1);
            }
            blockCount += rowCount;
            blockRowCountMapping.put(key, blockCount);
        }
    }
    return new BlockMappingVO(blockRowCountMapping, segmentAndBlockCountMapping);
}
Also used : BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) HashMap(java.util.HashMap) TableDataMap(org.apache.carbondata.core.datamap.TableDataMap) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) Segment(org.apache.carbondata.core.datamap.Segment) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Example 3 with TableDataMap

use of org.apache.carbondata.core.datamap.TableDataMap in project carbondata by apache.

the class DistributableDataMapFormat method createRecordReader.

@Override
public RecordReader<Void, ExtendedBlocklet> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
    return new RecordReader<Void, ExtendedBlocklet>() {

        private Iterator<ExtendedBlocklet> blockletIterator;

        private ExtendedBlocklet currBlocklet;

        @Override
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            DataMapDistributableWrapper distributable = (DataMapDistributableWrapper) inputSplit;
            TableDataMap dataMap = DataMapStoreManager.getInstance().getDataMap(table, distributable.getDistributable().getDataMapSchema());
            List<ExtendedBlocklet> blocklets = dataMap.prune(distributable.getDistributable(), dataMapExprWrapper.getFilterResolverIntf(distributable.getUniqueId()), partitions);
            for (ExtendedBlocklet blocklet : blocklets) {
                blocklet.setDataMapUniqueId(distributable.getUniqueId());
            }
            blockletIterator = blocklets.iterator();
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            boolean hasNext = blockletIterator.hasNext();
            if (hasNext) {
                currBlocklet = blockletIterator.next();
            }
            return hasNext;
        }

        @Override
        public Void getCurrentKey() throws IOException, InterruptedException {
            return null;
        }

        @Override
        public ExtendedBlocklet getCurrentValue() throws IOException, InterruptedException {
            return currBlocklet;
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }
    };
}
Also used : DataMapDistributableWrapper(org.apache.carbondata.core.datamap.dev.expr.DataMapDistributableWrapper) TableDataMap(org.apache.carbondata.core.datamap.TableDataMap) RecordReader(org.apache.hadoop.mapreduce.RecordReader) Iterator(java.util.Iterator) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) InputSplit(org.apache.hadoop.mapreduce.InputSplit) ExtendedBlocklet(org.apache.carbondata.core.indexstore.ExtendedBlocklet)

Aggregations

TableDataMap (org.apache.carbondata.core.datamap.TableDataMap)3 ExtendedBlocklet (org.apache.carbondata.core.indexstore.ExtendedBlocklet)2 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 Segment (org.apache.carbondata.core.datamap.Segment)1 DataMapFactory (org.apache.carbondata.core.datamap.dev.DataMapFactory)1 DataMapDistributableWrapper (org.apache.carbondata.core.datamap.dev.expr.DataMapDistributableWrapper)1 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)1 SegmentUpdateDetails (org.apache.carbondata.core.mutate.SegmentUpdateDetails)1 BlockMappingVO (org.apache.carbondata.core.mutate.data.BlockMappingVO)1 LoadMetadataDetails (org.apache.carbondata.core.statusmanager.LoadMetadataDetails)1 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)1 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 RecordReader (org.apache.hadoop.mapreduce.RecordReader)1 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)1