use of org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit in project carbondata by apache.
the class BlockletIndexInputFormat method createRecordReader.
@Override
public RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
return new RecordReader<TableBlockIndexUniqueIdentifier, BlockletIndexDetailsWithSchema>() {
private BlockletIndexWrapper wrapper = null;
private TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = null;
private TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper;
Cache<TableBlockIndexUniqueIdentifierWrapper, BlockletIndexWrapper> cache = CacheProvider.getInstance().createCache(CacheType.DRIVER_BLOCKLET_INDEX);
private Iterator<TableBlockIndexUniqueIdentifier> iterator;
// Cache to avoid multiple times listing of files
private Map<String, Map<String, BlockMetaInfo>> segInfoCache = new HashMap<>();
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
BlockletIndexInputSplit segmentDistributable = (BlockletIndexInputSplit) inputSplit;
TableBlockIndexUniqueIdentifier tableSegmentUniqueIdentifier = segmentDistributable.getTableBlockIndexUniqueIdentifier();
Segment segment = Segment.toSegment(tableSegmentUniqueIdentifier.getSegmentId(), readCommittedScope);
iterator = BlockletIndexUtil.getTableBlockUniqueIdentifiers(segment).iterator();
}
@Override
public boolean nextKeyValue() {
if (iterator.hasNext()) {
TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier = iterator.next();
this.tableBlockIndexUniqueIdentifier = tableBlockIndexUniqueIdentifier;
TableBlockIndexUniqueIdentifierWrapper tableBlockIndexUniqueIdentifierWrapper = new TableBlockIndexUniqueIdentifierWrapper(tableBlockIndexUniqueIdentifier, table, false, true, true);
this.tableBlockIndexUniqueIdentifierWrapper = tableBlockIndexUniqueIdentifierWrapper;
wrapper = ((BlockletIndexStore) cache).get(tableBlockIndexUniqueIdentifierWrapper, segInfoCache);
return true;
}
return false;
}
@Override
public TableBlockIndexUniqueIdentifier getCurrentKey() {
return tableBlockIndexUniqueIdentifier;
}
@Override
public BlockletIndexDetailsWithSchema getCurrentValue() {
BlockletIndexDetailsWithSchema blockletIndexDetailsWithSchema = new BlockletIndexDetailsWithSchema(wrapper, table.getTableInfo().isSchemaModified());
return blockletIndexDetailsWithSchema;
}
@Override
public float getProgress() {
return 0;
}
@Override
public void close() {
if (null != tableBlockIndexUniqueIdentifierWrapper) {
if (null != wrapper && null != wrapper.getIndexes() && !wrapper.getIndexes().isEmpty()) {
String segmentId = tableBlockIndexUniqueIdentifierWrapper.getTableBlockIndexUniqueIdentifier().getSegmentId();
// as segmentId will be same for all the indexes and segmentProperties cache is
// maintained at segment level so it need to be called only once for clearing
SegmentPropertiesAndSchemaHolder.getInstance().invalidate(segmentId, wrapper.getIndexes().get(0).getSegmentPropertiesWrapper(), tableBlockIndexUniqueIdentifierWrapper.isAddTableBlockToUnsafeAndLRUCache());
}
}
}
};
}
use of org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexInputSplit in project carbondata by apache.
the class BlockletIndexInputFormat method getSplits.
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
IndexFactory indexFactory = IndexStoreManager.getInstance().getDefaultIndex(table).getIndexFactory();
CacheableIndex factory = (CacheableIndex) indexFactory;
List<IndexInputSplit> validDistributables = factory.getAllUncached(validSegments, indexExprWrapper);
if (!validSegments.isEmpty()) {
this.readCommittedScope = validSegments.get(0).getReadCommittedScope();
}
CarbonBlockLoaderHelper instance = CarbonBlockLoaderHelper.getInstance();
int distributableSize = validDistributables.size();
List<InputSplit> inputSplits = new ArrayList<>(distributableSize);
keys = new HashSet<>();
Iterator<IndexInputSplit> iterator = validDistributables.iterator();
while (iterator.hasNext()) {
BlockletIndexInputSplit next = (BlockletIndexInputSplit) iterator.next();
String key = next.getSegmentPath();
if (instance.checkAlreadySubmittedBlock(table.getAbsoluteTableIdentifier(), key)) {
inputSplits.add(next);
keys.add(key);
}
}
int sizeOfDistToBeLoaded = inputSplits.size();
LOGGER.info("Submitted blocks " + sizeOfDistToBeLoaded + ", " + distributableSize + " . Rest already considered for load in other job.");
return inputSplits;
}
Aggregations