Examples with BlockletIndexInputSplit - org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit

Example 1 with BlockletIndexInputSplit

use of org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit in project carbondata by apache.

the class BlockletIndexInputFormat method createRecordReader.

@Override
public RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
    return new RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema>() {

        private BlockletIndexWrapper wrapper = null;

        private TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = null;

        private TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper;

        Cache<TableBlockIndexUniqueIdentifierWrapper, BlockletIndexWrapper> cache = CacheProvider.getInstance().createCache(CacheType.DRIVER_BLOCKLET_INDEX);

        private Iterator<TableBlockIndexUniqueIdentifier> iterator;

        // Cache to avoid multiple times listing of files
        private Map<String, Map<String, BlockMetaInfo>> segInfoCache = new HashMap<>();

        @Override
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
            BlockletIndexInputSplit segmentDistributable = (BlockletIndexInputSplit) inputSplit;
            TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier = segmentDistributable.getTableBlockIndexUniqueIdentifier();
            Segment segment = Segment.toSegment(tableSegmentUniqueIdentifier.getSegmentId(), readCommittedScope);
            iterator = BlockletIndexUtil.getTableBlockUniqueIdentifiers(segment).iterator();
        }

        @Override
        public boolean nextKeyValue() {
            if (iterator.hasNext()) {
                TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = iterator.next();
                this.tableBlockIndexUniqueIdentifier = tableBlockIndexUniqueIdentifier;
                TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper = new TableBlockIndexUniqueIdentifierWrapper(tableBlockIndexUniqueIdentifier, table, false, true, true);
                this.tableBlockIndexUniqueIdentifierWrapper = tableBlockIndexUniqueIdentifierWrapper;
                wrapper = ((BlockletIndexStore) cache).get(tableBlockIndexUniqueIdentifierWrapper, segInfoCache);
                return true;
            }
            return false;
        }

        @Override
        public TableBlockIndexUniqueIdentifier getCurrentKey() {
            return tableBlockIndexUniqueIdentifier;
        }

        @Override
        public BlockletIndexDetailsWithSchema getCurrentValue() {
            BlockletIndexDetailsWithSchema blockletIndexDetailsWithSchema = new BlockletIndexDetailsWithSchema(wrapper, table.getTableInfo().isSchemaModified());
            return blockletIndexDetailsWithSchema;
        }

        @Override
        public float getProgress() {
            return 0;
        }

        @Override
        public void close() {
            if (null != tableBlockIndexUniqueIdentifierWrapper) {
                if (null != wrapper && null != wrapper.getIndexes() && !wrapper.getIndexes().isEmpty()) {
                    String segmentId = tableBlockIndexUniqueIdentifierWrapper.getTableBlockIndexUniqueIdentifier().getSegmentId();
                    // as segmentId will be same for all the indexes and segmentProperties cache is
                    // maintained at segment level so it need to be called only once for clearing
                    SegmentPropertiesAndSchemaHolder.getInstance().invalidate(segmentId, wrapper.getIndexes().get(0).getSegmentPropertiesWrapper(), tableBlockIndexUniqueIdentifierWrapper.isAddTableBlockToUnsafeAndLRUCache());
                }
            }
        }
    };
}

Also used : RecordReader(org.apache.hadoop.mapreduce.RecordReader) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Segment(org.apache.carbondata.core.index.Segment) TableBlockIndexUniqueIdentifierWrapper(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) Iterator(java.util.Iterator) HashMap(java.util.HashMap) Map(java.util.Map) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) BlockletIndexWrapper(org.apache.carbondata.core.indexstore.BlockletIndexWrapper) Cache(org.apache.carbondata.core.cache.Cache) BlockMetaInfo(org.apache.carbondata.core.indexstore.BlockMetaInfo)

Example 2 with BlockletIndexInputSplit

use of org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit in project carbondata by apache.

the class BlockletIndexInputFormat method getSplits.

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    IndexFactory indexFactory = IndexStoreManager.getInstance().getDefaultIndex(table).getIndexFactory();
    CacheableIndex factory = (CacheableIndex) indexFactory;
    List<IndexInputSplit> validDistributables = factory.getAllUncached(validSegments, indexExprWrapper);
    if (!validSegments.isEmpty()) {
        this.readCommittedScope = validSegments.get(0).getReadCommittedScope();
    }
    CarbonBlockLoaderHelper instance = CarbonBlockLoaderHelper.getInstance();
    int distributableSize = validDistributables.size();
    List<InputSplit> inputSplits = new ArrayList<>(distributableSize);
    keys = new HashSet<>();
    Iterator<IndexInputSplit> iterator = validDistributables.iterator();
    while (iterator.hasNext()) {
        BlockletIndexInputSplit next = (BlockletIndexInputSplit) iterator.next();
        String key = next.getSegmentPath();
        if (instance.checkAlreadySubmittedBlock(table.getAbsoluteTableIdentifier(), key)) {
            inputSplits.add(next);
            keys.add(key);
        }
    }
    int sizeOfDistToBeLoaded = inputSplits.size();
    LOGGER.info("Submitted blocks " + sizeOfDistToBeLoaded + ", " + distributableSize + " . Rest already considered for load in other job.");
    return inputSplits;
}

Also used : IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) ArrayList(java.util.ArrayList) CacheableIndex(org.apache.carbondata.core.index.dev.CacheableIndex) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) IndexFactory(org.apache.carbondata.core.index.dev.IndexFactory) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit)

Aggregations

IndexInputSplit (org.apache.carbondata.core.index.IndexInputSplit)2 BlockletIndexInputSplit (org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 Map (java.util.Map)1 Cache (org.apache.carbondata.core.cache.Cache)1 Segment (org.apache.carbondata.core.index.Segment)1 CacheableIndex (org.apache.carbondata.core.index.dev.CacheableIndex)1 IndexFactory (org.apache.carbondata.core.index.dev.IndexFactory)1 BlockMetaInfo (org.apache.carbondata.core.indexstore.BlockMetaInfo)1 BlockletIndexWrapper (org.apache.carbondata.core.indexstore.BlockletIndexWrapper)1 TableBlockIndexUniqueIdentifier (org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier)1 TableBlockIndexUniqueIdentifierWrapper (org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper)1 RecordReader (org.apache.hadoop.mapreduce.RecordReader)1 TaskAttemptContext (org.apache.hadoop.mapreduce.TaskAttemptContext)1