use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR, CarbonInputFormat.INPUT_SEGMENT_NUMBERS
* are used to get table path to read.
*
* @return
* @throws IOException
*/
private List<InputSplit> getSplits(JobContext job, FilterResolverIntf filterResolver, BitSet matchedPartitions, CacheClient cacheClient) throws IOException {
List<InputSplit> result = new LinkedList<InputSplit>();
FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
AbsoluteTableIdentifier absoluteTableIdentifier = getCarbonTable(job.getConfiguration()).getAbsoluteTableIdentifier();
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
//for each segment fetch blocks matching filter in Driver BTree
for (String segmentNo : getSegmentsToAccess(job)) {
List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(job, filterExpressionProcessor, absoluteTableIdentifier, filterResolver, matchedPartitions, segmentNo, cacheClient, updateStatusManager);
for (DataRefNode dataRefNode : dataRefNodes) {
BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
continue;
}
result.add(new CarbonInputSplit(segmentNo, new Path(tableBlockInfo.getFilePath()), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), tableBlockInfo.getLocations(), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion()));
}
}
return result;
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonInputFormat method getSegmentAbstractIndexs.
/**
* It returns index for each task file.
* @param job
* @param absoluteTableIdentifier
* @param segmentId
* @return
* @throws IOException
*/
private Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> getSegmentAbstractIndexs(JobContext job, AbsoluteTableIdentifier absoluteTableIdentifier, String segmentId, CacheClient cacheClient, SegmentUpdateStatusManager updateStatusManager) throws IOException {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> segmentIndexMap = null;
SegmentTaskIndexWrapper segmentTaskIndexWrapper = null;
boolean isSegmentUpdated = false;
Set<SegmentTaskIndexStore.TaskBucketHolder> taskKeys = null;
TableSegmentUniqueIdentifier tableSegmentUniqueIdentifier = new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segmentId);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().getIfPresent(tableSegmentUniqueIdentifier);
UpdateVO updateDetails = updateStatusManager.getInvalidTimestampRange(segmentId);
if (null != segmentTaskIndexWrapper) {
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
if (isSegmentUpdate(segmentTaskIndexWrapper, updateDetails)) {
taskKeys = segmentIndexMap.keySet();
isSegmentUpdated = true;
}
}
// if segment tree is not loaded, load the segment tree
if (segmentIndexMap == null || isSegmentUpdated) {
// if the segment is updated only the updated blocks TableInfo instance has to be
// retrieved. the same will be filtered based on taskKeys , if the task is same
// for the block then dont add it since already its btree is loaded.
Set<SegmentTaskIndexStore.TaskBucketHolder> validTaskKeys = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
List<TableBlockInfo> tableBlockInfoList = getTableBlockInfo(job, tableSegmentUniqueIdentifier, taskKeys, updateStatusManager.getInvalidTimestampRange(segmentId), updateStatusManager, segmentId, validTaskKeys);
if (!tableBlockInfoList.isEmpty()) {
Map<String, List<TableBlockInfo>> segmentToTableBlocksInfos = new HashMap<>();
segmentToTableBlocksInfos.put(segmentId, tableBlockInfoList);
// get Btree blocks for given segment
tableSegmentUniqueIdentifier.setSegmentToTableBlocksInfos(segmentToTableBlocksInfos);
tableSegmentUniqueIdentifier.setIsSegmentUpdated(isSegmentUpdated);
segmentTaskIndexWrapper = cacheClient.getSegmentAccessClient().get(tableSegmentUniqueIdentifier);
segmentIndexMap = segmentTaskIndexWrapper.getTaskIdToTableSegmentMap();
}
if (null != taskKeys) {
Map<SegmentTaskIndexStore.TaskBucketHolder, AbstractIndex> finalMap = new HashMap<>(validTaskKeys.size());
for (SegmentTaskIndexStore.TaskBucketHolder key : validTaskKeys) {
finalMap.put(key, segmentIndexMap.get(key));
}
segmentIndexMap = finalMap;
}
}
return segmentIndexMap;
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class CarbonRecordReader method initialize.
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException {
// The input split can contain single HDFS block or multiple blocks, so firstly get all the
// blocks and then set them in the query model.
List<CarbonInputSplit> splitList;
if (inputSplit instanceof CarbonInputSplit) {
splitList = new ArrayList<>(1);
splitList.add((CarbonInputSplit) inputSplit);
} else if (inputSplit instanceof CarbonMultiBlockSplit) {
// contains multiple blocks, this is an optimization for concurrent query.
CarbonMultiBlockSplit multiBlockSplit = (CarbonMultiBlockSplit) inputSplit;
splitList = multiBlockSplit.getAllSplits();
} else {
throw new RuntimeException("unsupported input split type: " + inputSplit);
}
List<TableBlockInfo> tableBlockInfoList = CarbonInputSplit.createBlocks(splitList);
queryModel.setTableBlockInfos(tableBlockInfoList);
readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
try {
carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
} catch (QueryExecutionException e) {
throw new InterruptedException(e.getMessage());
}
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class InMemoryBTreeIndex method getTableBlockInfo.
/**
* Below method will be used to get the table block info
*
* @param job job context
* @return list of table block
* @throws IOException
*/
private List<TableBlockInfo> getTableBlockInfo(JobContext job) throws IOException {
List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
// identify table blocks from all file locations of given segment
for (InputSplit inputSplit : segment.getAllSplits(job)) {
CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
BlockletInfos blockletInfos = new BlockletInfos(carbonInputSplit.getNumberOfBlocklets(), 0, carbonInputSplit.getNumberOfBlocklets());
tableBlockInfoList.add(new TableBlockInfo(carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), segment.getId(), carbonInputSplit.getLocations(), carbonInputSplit.getLength(), blockletInfos, carbonInputSplit.getVersion()));
}
return tableBlockInfoList;
}
use of org.apache.carbondata.core.datastore.block.TableBlockInfo in project carbondata by apache.
the class AbstractDataFileFooterConverter method getIndexInfo.
/**
* Below method will be used to get the index info from index file
*
* @param filePath file path of the index file
* @param tableBlockInfoList table block index
* @return list of index info
* @throws IOException problem while reading the index file
*/
public List<DataFileFooter> getIndexInfo(String filePath, List<TableBlockInfo> tableBlockInfoList) throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
try {
// open the reader
indexReader.openThriftReader(filePath);
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
// get the segment info
SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
BlockletIndex blockletIndex = null;
int counter = 0;
int index = 0;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = tableBlockInfoList.get(index);
if (Integer.parseInt(CarbonTablePath.DataFileUtil.getPartNo(tableBlockInfo.getFilePath())) == counter++) {
tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
tableBlockInfo.setVersion(ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
int blockletSize = getBlockletSize(readBlockIndexInfo);
tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize);
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
dataFileFooter.setSegmentInfo(segmentInfo);
dataFileFooters.add(dataFileFooter);
if (++index == tableBlockInfoList.size()) {
break;
}
}
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
Aggregations