Examples with IndexInputSplit - org.apache.carbondata.core.index.IndexInputSplit

Example 1 with IndexInputSplit

use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.

the class BloomCoarseGrainIndexFactory method toDistributable.

@Override
public List<IndexInputSplit> toDistributable(Segment segment) {
    List<IndexInputSplit> indexInputSplitList = new ArrayList<>();
    Set<String> shardPaths = segmentMap.get(segment.getSegmentNo());
    if (shardPaths == null) {
        shardPaths = getAllShardPaths(getCarbonTable().getTablePath(), segment.getSegmentNo(), indexName);
        segmentMap.put(segment.getSegmentNo(), shardPaths);
    }
    Set<String> filteredShards = segment.getFilteredIndexShardNames();
    for (String shardPath : shardPaths) {
        // for merge shard, shard pruning delay to be done before pruning blocklet
        if (shardPath.endsWith(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME) || filteredShards.contains(new File(shardPath).getName())) {
            IndexInputSplit bloomIndexInputSplit = new BloomIndexInputSplit(shardPath, filteredShards);
            bloomIndexInputSplit.setSegment(segment);
            bloomIndexInputSplit.setIndexSchema(getIndexSchema());
            indexInputSplitList.add(bloomIndexInputSplit);
        }
    }
    return indexInputSplitList;
}

Also used : IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) ArrayList(java.util.ArrayList) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File)

Example 2 with IndexInputSplit

use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.

the class IndexExprWrapperImpl method toDistributable.

@Override
public List<IndexInputSplitWrapper> toDistributable(List<Segment> segments) {
    List<IndexInputSplit> indexInputSplits = index.toDistributable(segments);
    List<IndexInputSplitWrapper> wrappers = new ArrayList<>();
    for (IndexInputSplit distributable : indexInputSplits) {
        wrappers.add(new IndexInputSplitWrapper(uniqueId, distributable));
    }
    return wrappers;
}

Also used : IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) ArrayList(java.util.ArrayList)

Example 3 with IndexInputSplit

use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.

the class TestBlockletIndexFactory method getValidDistributables.

@Test
public void getValidDistributables() throws IOException {
    BlockletIndexInputSplit blockletIndexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756709.carbonindex");
    Segment segment = new Segment("0", null, new TableStatusReadCommittedScope(carbonTable.getAbsoluteTableIdentifier(), new Configuration(false)));
    blockletIndexInputSplit.setSegment(segment);
    BlockletIndexInputSplit indexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756701.carbonindex");
    indexInputSplit.setSegment(segment);
    List<IndexInputSplit> indexInputSplits = new ArrayList<>(2);
    indexInputSplits.add(blockletIndexInputSplit);
    indexInputSplits.add(indexInputSplit);
    new MockUp<BlockletIndexFactory>() {

        @Mock
        Set<TableBlockIndexUniqueIdentifier> getTableBlockIndexUniqueIdentifiers(Segment segment) {
            TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier1 = new TableBlockIndexUniqueIdentifier("/opt/store/default/carbon_table/Fact/Part0/Segment_0", "0_batchno0-0-1521012756701.carbonindex", null, "0");
            Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = new HashSet<>(3);
            tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier);
            tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier1);
            return tableBlockIndexUniqueIdentifiers;
        }
    };
    List<IndexInputSplit> validDistributables = blockletIndexFactory.getAllUncached(indexInputSplits);
    assert 1 == validDistributables.size();
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) TableStatusReadCommittedScope(org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope) MockUp(mockit.MockUp) Segment(org.apache.carbondata.core.index.Segment) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with IndexInputSplit

use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.

the class BlockletIndexInputFormat method getSplits.

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    IndexFactory indexFactory = IndexStoreManager.getInstance().getDefaultIndex(table).getIndexFactory();
    CacheableIndex factory = (CacheableIndex) indexFactory;
    List<IndexInputSplit> validDistributables = factory.getAllUncached(validSegments, indexExprWrapper);
    if (!validSegments.isEmpty()) {
        this.readCommittedScope = validSegments.get(0).getReadCommittedScope();
    }
    CarbonBlockLoaderHelper instance = CarbonBlockLoaderHelper.getInstance();
    int distributableSize = validDistributables.size();
    List<InputSplit> inputSplits = new ArrayList<>(distributableSize);
    keys = new HashSet<>();
    Iterator<IndexInputSplit> iterator = validDistributables.iterator();
    while (iterator.hasNext()) {
        BlockletIndexInputSplit next = (BlockletIndexInputSplit) iterator.next();
        String key = next.getSegmentPath();
        if (instance.checkAlreadySubmittedBlock(table.getAbsoluteTableIdentifier(), key)) {
            inputSplits.add(next);
            keys.add(key);
        }
    }
    int sizeOfDistToBeLoaded = inputSplits.size();
    LOGGER.info("Submitted blocks " + sizeOfDistToBeLoaded + ", " + distributableSize + " . Rest already considered for load in other job.");
    return inputSplits;
}

Also used : IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) ArrayList(java.util.ArrayList) CacheableIndex(org.apache.carbondata.core.index.dev.CacheableIndex) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit) IndexFactory(org.apache.carbondata.core.index.dev.IndexFactory) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) InputSplit(org.apache.hadoop.mapreduce.InputSplit) BlockletIndexInputSplit(org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit)

Example 5 with IndexInputSplit

use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.

the class BlockletIndexFactory method getAllUncached.

@Override
public List<IndexInputSplit> getAllUncached(List<IndexInputSplit> distributableList) throws IOException {
    List<IndexInputSplit> distributableToBeLoaded = new ArrayList<>(distributableList.size());
    for (IndexInputSplit distributable : distributableList) {
        Segment segment = distributable.getSegment();
        Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableBlockIndexUniqueIdentifiers(segment);
        // filter out the tableBlockIndexUniqueIdentifiers based on distributable
        TableBlockIndexUniqueIdentifier validIdentifier = BlockletIndexUtil.filterIdentifiersBasedOnDistributable(tableBlockIndexUniqueIdentifiers, (BlockletIndexInputSplit) distributable);
        if (null == cache.getIfPresent(new TableBlockIndexUniqueIdentifierWrapper(validIdentifier, this.getCarbonTable()))) {
            ((BlockletIndexInputSplit) distributable).setTableBlockIndexUniqueIdentifier(validIdentifier);
            distributableToBeLoaded.add(distributable);
        }
    }
    return distributableToBeLoaded;
}

Also used : TableBlockIndexUniqueIdentifierWrapper(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper) IndexInputSplit(org.apache.carbondata.core.index.IndexInputSplit) ArrayList(java.util.ArrayList) TableBlockIndexUniqueIdentifier(org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier) Segment(org.apache.carbondata.core.index.Segment)

Aggregations

ArrayList (java.util.ArrayList)7 IndexInputSplit (org.apache.carbondata.core.index.IndexInputSplit)7 Segment (org.apache.carbondata.core.index.Segment)3 TableBlockIndexUniqueIdentifier (org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifier)3 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 TableBlockIndexUniqueIdentifierWrapper (org.apache.carbondata.core.indexstore.TableBlockIndexUniqueIdentifierWrapper)2 File (java.io.File)1 HashSet (java.util.HashSet)1 MockUp (mockit.MockUp)1 CacheableIndex (org.apache.carbondata.core.index.dev.CacheableIndex)1 IndexFactory (org.apache.carbondata.core.index.dev.IndexFactory)1 IndexInputSplitWrapper (org.apache.carbondata.core.index.dev.expr.IndexInputSplitWrapper)1 BlockletIndexWrapper (org.apache.carbondata.core.indexstore.BlockletIndexWrapper)1 BlockletIndexInputSplit (org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit)1 TableStatusReadCommittedScope (org.apache.carbondata.core.readcommitter.TableStatusReadCommittedScope)1 Configuration (org.apache.hadoop.conf.Configuration)1 InputSplit (org.apache.hadoop.mapreduce.InputSplit)1 Test (org.junit.Test)1