use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.
the class BloomCoarseGrainIndexFactory method toDistributable.
@Override
public List<IndexInputSplit> toDistributable(Segment segment) {
List<IndexInputSplit> indexInputSplitList = new ArrayList<>();
Set<String> shardPaths = segmentMap.get(segment.getSegmentNo());
if (shardPaths == null) {
shardPaths = getAllShardPaths(getCarbonTable().getTablePath(), segment.getSegmentNo(), indexName);
segmentMap.put(segment.getSegmentNo(), shardPaths);
}
Set<String> filteredShards = segment.getFilteredIndexShardNames();
for (String shardPath : shardPaths) {
// for merge shard, shard pruning delay to be done before pruning blocklet
if (shardPath.endsWith(BloomIndexFileStore.MERGE_BLOOM_INDEX_SHARD_NAME) || filteredShards.contains(new File(shardPath).getName())) {
IndexInputSplit bloomIndexInputSplit = new BloomIndexInputSplit(shardPath, filteredShards);
bloomIndexInputSplit.setSegment(segment);
bloomIndexInputSplit.setIndexSchema(getIndexSchema());
indexInputSplitList.add(bloomIndexInputSplit);
}
}
return indexInputSplitList;
}
use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.
the class IndexExprWrapperImpl method toDistributable.
@Override
public List<IndexInputSplitWrapper> toDistributable(List<Segment> segments) {
List<IndexInputSplit> indexInputSplits = index.toDistributable(segments);
List<IndexInputSplitWrapper> wrappers = new ArrayList<>();
for (IndexInputSplit distributable : indexInputSplits) {
wrappers.add(new IndexInputSplitWrapper(uniqueId, distributable));
}
return wrappers;
}
use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.
the class TestBlockletIndexFactory method getValidDistributables.
@Test
public void getValidDistributables() throws IOException {
BlockletIndexInputSplit blockletIndexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756709.carbonindex");
Segment segment = new Segment("0", null, new TableStatusReadCommittedScope(carbonTable.getAbsoluteTableIdentifier(), new Configuration(false)));
blockletIndexInputSplit.setSegment(segment);
BlockletIndexInputSplit indexInputSplit = new BlockletIndexInputSplit("/opt/store/default/carbon_table/Fact/Part0/Segment_0/0_batchno0-0-1521012756701.carbonindex");
indexInputSplit.setSegment(segment);
List<IndexInputSplit> indexInputSplits = new ArrayList<>(2);
indexInputSplits.add(blockletIndexInputSplit);
indexInputSplits.add(indexInputSplit);
new MockUp<BlockletIndexFactory>() {
@Mock
Set<TableBlockIndexUniqueIdentifier> getTableBlockIndexUniqueIdentifiers(Segment segment) {
TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier1 = new TableBlockIndexUniqueIdentifier("/opt/store/default/carbon_table/Fact/Part0/Segment_0", "0_batchno0-0-1521012756701.carbonindex", null, "0");
Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = new HashSet<>(3);
tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier);
tableBlockIndexUniqueIdentifiers.add(tableBlockIndexUniqueIdentifier1);
return tableBlockIndexUniqueIdentifiers;
}
};
List<IndexInputSplit> validDistributables = blockletIndexFactory.getAllUncached(indexInputSplits);
assert 1 == validDistributables.size();
}
use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.
the class BlockletIndexInputFormat method getSplits.
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
IndexFactory indexFactory = IndexStoreManager.getInstance().getDefaultIndex(table).getIndexFactory();
CacheableIndex factory = (CacheableIndex) indexFactory;
List<IndexInputSplit> validDistributables = factory.getAllUncached(validSegments, indexExprWrapper);
if (!validSegments.isEmpty()) {
this.readCommittedScope = validSegments.get(0).getReadCommittedScope();
}
CarbonBlockLoaderHelper instance = CarbonBlockLoaderHelper.getInstance();
int distributableSize = validDistributables.size();
List<InputSplit> inputSplits = new ArrayList<>(distributableSize);
keys = new HashSet<>();
Iterator<IndexInputSplit> iterator = validDistributables.iterator();
while (iterator.hasNext()) {
BlockletIndexInputSplit next = (BlockletIndexInputSplit) iterator.next();
String key = next.getSegmentPath();
if (instance.checkAlreadySubmittedBlock(table.getAbsoluteTableIdentifier(), key)) {
inputSplits.add(next);
keys.add(key);
}
}
int sizeOfDistToBeLoaded = inputSplits.size();
LOGGER.info("Submitted blocks " + sizeOfDistToBeLoaded + ", " + distributableSize + " . Rest already considered for load in other job.");
return inputSplits;
}
use of org.apache.carbondata.core.index.IndexInputSplit in project carbondata by apache.
the class BlockletIndexFactory method getAllUncached.
@Override
public List<IndexInputSplit> getAllUncached(List<IndexInputSplit> distributableList) throws IOException {
List<IndexInputSplit> distributableToBeLoaded = new ArrayList<>(distributableList.size());
for (IndexInputSplit distributable : distributableList) {
Segment segment = distributable.getSegment();
Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableBlockIndexUniqueIdentifiers(segment);
// filter out the tableBlockIndexUniqueIdentifiers based on distributable
TableBlockIndexUniqueIdentifier validIdentifier = BlockletIndexUtil.filterIdentifiersBasedOnDistributable(tableBlockIndexUniqueIdentifiers, (BlockletIndexInputSplit) distributable);
if (null == cache.getIfPresent(new TableBlockIndexUniqueIdentifierWrapper(validIdentifier, this.getCarbonTable()))) {
((BlockletIndexInputSplit) distributable).setTableBlockIndexUniqueIdentifier(validIdentifier);
distributableToBeLoaded.add(distributable);
}
}
return distributableToBeLoaded;
}
Aggregations