use of org.apache.carbondata.core.scan.filter.TableProvider in project carbondata by apache.
the class AbstractQueryExecutor method initQuery.
/**
* Below method will be used to fill the executor properties based on query
* model it will parse the query model and get the detail and fill it in
* query properties
*
* @param queryModel
*/
protected void initQuery(QueryModel queryModel) throws IOException {
StandardLogService.setThreadName(StandardLogService.getPartitionID(queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName()), queryModel.getQueryId());
LOGGER.info("Query will be executed on table: " + queryModel.getAbsoluteTableIdentifier().getCarbonTableIdentifier().getTableName());
// add executor service for query execution
queryProperties.executorService = Executors.newCachedThreadPool();
// Initializing statistics list to record the query statistics
// creating copy on write to handle concurrent scenario
queryProperties.queryStatisticsRecorder = CarbonTimeStatisticsFactory.createExecutorRecorder(queryModel.getQueryId());
queryModel.setStatisticsRecorder(queryProperties.queryStatisticsRecorder);
QueryStatistic queryStatistic = new QueryStatistic();
// sort the block info
// so block will be loaded in sorted order this will be required for
// query execution
Collections.sort(queryModel.getTableBlockInfos());
if (queryModel.getTableBlockInfos().get(0).getDetailInfo() != null) {
List<AbstractIndex> indexList = new ArrayList<>();
Map<String, List<TableBlockInfo>> listMap = new LinkedHashMap<>();
for (TableBlockInfo blockInfo : queryModel.getTableBlockInfos()) {
List<TableBlockInfo> tableBlockInfos = listMap.get(blockInfo.getFilePath());
if (tableBlockInfos == null) {
tableBlockInfos = new ArrayList<>();
listMap.put(blockInfo.getFilePath(), tableBlockInfos);
}
BlockletDetailInfo blockletDetailInfo = blockInfo.getDetailInfo();
// the blocklet information from block file
if (blockletDetailInfo.getBlockletInfo() == null) {
readAndFillBlockletInfo(blockInfo, tableBlockInfos, blockletDetailInfo);
} else {
tableBlockInfos.add(blockInfo);
}
}
for (List<TableBlockInfo> tableBlockInfos : listMap.values()) {
indexList.add(new IndexWrapper(tableBlockInfos));
}
queryProperties.dataBlocks = indexList;
} else {
// get the table blocks
CacheProvider cacheProvider = CacheProvider.getInstance();
BlockIndexStore<TableBlockUniqueIdentifier, AbstractIndex> cache = (BlockIndexStore) cacheProvider.createCache(CacheType.EXECUTOR_BTREE);
// remove the invalid table blocks, block which is deleted or compacted
cache.removeTableBlocks(queryModel.getInvalidSegmentIds(), queryModel.getAbsoluteTableIdentifier());
List<TableBlockUniqueIdentifier> tableBlockUniqueIdentifiers = prepareTableBlockUniqueIdentifier(queryModel.getTableBlockInfos(), queryModel.getAbsoluteTableIdentifier());
cache.removeTableBlocksIfHorizontalCompactionDone(queryModel);
queryProperties.dataBlocks = cache.getAll(tableBlockUniqueIdentifiers);
}
queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_BLOCKS_EXECUTOR, System.currentTimeMillis());
queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
// calculating the total number of aggeragted columns
int measureCount = queryModel.getProjectionMeasures().size();
int currentIndex = 0;
DataType[] dataTypes = new DataType[measureCount];
for (ProjectionMeasure carbonMeasure : queryModel.getProjectionMeasures()) {
// adding the data type and aggregation type of all the measure this
// can be used
// to select the aggregator
dataTypes[currentIndex] = carbonMeasure.getMeasure().getDataType();
currentIndex++;
}
queryProperties.measureDataTypes = dataTypes;
// as aggregation will be executed in following order
// 1.aggregate dimension expression
// 2. expression
// 3. query measure
// so calculating the index of the expression start index
// and measure column start index
queryProperties.filterMeasures = new HashSet<>();
queryProperties.complexFilterDimension = new HashSet<>();
QueryUtil.getAllFilterDimensions(queryModel.getFilterExpressionResolverTree(), queryProperties.complexFilterDimension, queryProperties.filterMeasures);
CarbonTable carbonTable = queryModel.getTable();
TableProvider tableProvider = new SingleTableProvider(carbonTable);
queryStatistic = new QueryStatistic();
// dictionary column unique column id to dictionary mapping
// which will be used to get column actual data
queryProperties.columnToDictionaryMapping = QueryUtil.getDimensionDictionaryDetail(queryModel.getProjectionDimensions(), queryProperties.complexFilterDimension, queryModel.getAbsoluteTableIdentifier(), tableProvider);
queryStatistic.addStatistics(QueryStatisticsConstants.LOAD_DICTIONARY, System.currentTimeMillis());
queryProperties.queryStatisticsRecorder.recordStatistics(queryStatistic);
queryModel.setColumnToDictionaryMapping(queryProperties.columnToDictionaryMapping);
}
use of org.apache.carbondata.core.scan.filter.TableProvider in project carbondata by apache.
the class CarbonFileInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
if (getValidateSegmentsToAccess(job.getConfiguration())) {
// get all valid segments and set them into the configuration
// check for externalTable segment (Segment_null)
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
TableProvider tableProvider = new SingleTableProvider(carbonTable);
// this will be null in case of corrupt schema file.
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
carbonTable.processFilterExpression(filter, null, null);
FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), "null");
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
if (FileFactory.isFileExist(segmentDir, fileType)) {
// if external table Segments are found, add it to the List
List<Segment> externalTableSegments = new ArrayList<Segment>();
Segment seg = new Segment("null", null);
externalTableSegments.add(seg);
Map<String, String> indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir);
if (indexFiles.size() == 0) {
throw new RuntimeException("Index file not present to read the carbondata file");
}
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, externalTableSegments, null, partitionInfo, null);
return splits;
}
}
return null;
}
use of org.apache.carbondata.core.scan.filter.TableProvider in project carbondata by apache.
the class CarbonInputFormat method createQueryModel.
public QueryModel createQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
Configuration configuration = taskAttemptContext.getConfiguration();
CarbonTable carbonTable = getOrCreateCarbonTable(configuration);
TableProvider tableProvider = new SingleTableProvider(carbonTable);
// query plan includes projection column
String projectionString = getColumnProjection(configuration);
String[] projectionColumnNames = null;
if (projectionString != null) {
projectionColumnNames = projectionString.split(",");
}
QueryModel queryModel = carbonTable.createQueryWithProjection(projectionColumnNames, getDataTypeConverter(configuration));
// set the filter to the query model in order to filter blocklet before scan
Expression filter = getFilterPredicates(configuration);
boolean[] isFilterDimensions = new boolean[carbonTable.getDimensionOrdinalMax()];
// getAllMeasures returns list of visible and invisible columns
boolean[] isFilterMeasures = new boolean[carbonTable.getAllMeasures().size()];
carbonTable.processFilterExpression(filter, isFilterDimensions, isFilterMeasures);
queryModel.setIsFilterDimensions(isFilterDimensions);
queryModel.setIsFilterMeasures(isFilterMeasures);
FilterResolverIntf filterIntf = carbonTable.resolveFilter(filter, tableProvider);
queryModel.setFilterExpressionResolverTree(filterIntf);
// update the file level index store if there are invalid segment
if (inputSplit instanceof CarbonMultiBlockSplit) {
CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
if (invalidSegments.size() > 0) {
queryModel.setInvalidSegmentIds(invalidSegments);
}
List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
}
}
return queryModel;
}
use of org.apache.carbondata.core.scan.filter.TableProvider in project carbondata by apache.
the class CarbonTableInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
LoadMetadataDetails[] loadMetadataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath.getTableStatusFilePath(identifier.getTablePath()));
CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(carbonTable, loadMetadataDetails);
List<Segment> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
List<Segment> streamSegments = null;
// get all valid segments and set them into the configuration
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments(loadMetadataDetails);
if (getValidateSegmentsToAccess(job.getConfiguration())) {
List<Segment> validSegments = segments.getValidSegments();
streamSegments = segments.getStreamSegments();
streamSegments = getFilteredSegment(job, streamSegments, true);
if (validSegments.size() == 0) {
return getSplitsOfStreaming(job, identifier, streamSegments);
}
List<Segment> filteredSegmentToAccess = getFilteredSegment(job, segments.getValidSegments(), true);
if (filteredSegmentToAccess.size() == 0) {
return getSplitsOfStreaming(job, identifier, streamSegments);
} else {
setSegmentsToAccess(job.getConfiguration(), filteredSegmentToAccess);
}
// remove entry in the segment index if there are invalid segments
invalidSegments.addAll(segments.getInvalidSegments());
for (Segment invalidSegmentId : invalidSegments) {
invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId.getSegmentNo()));
}
if (invalidSegments.size() > 0) {
DataMapStoreManager.getInstance().clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()), invalidSegments);
}
}
ArrayList<Segment> validAndInProgressSegments = new ArrayList<>(segments.getValidSegments());
// Add in progress segments also to filter it as in case of aggregate table load it loads
// data from in progress table.
validAndInProgressSegments.addAll(segments.getListOfInProgressSegments());
// get updated filtered list
List<Segment> filteredSegmentToAccess = getFilteredSegment(job, new ArrayList<>(validAndInProgressSegments), false);
// Clean the updated segments from memory if the update happens on segments
List<Segment> toBeCleanedSegments = new ArrayList<>();
for (SegmentUpdateDetails segmentUpdateDetail : updateStatusManager.getUpdateStatusDetails()) {
boolean refreshNeeded = DataMapStoreManager.getInstance().getTableSegmentRefresher(carbonTable).isRefreshNeeded(segmentUpdateDetail.getSegmentName(), updateStatusManager);
if (refreshNeeded) {
toBeCleanedSegments.add(new Segment(segmentUpdateDetail.getSegmentName(), null));
}
}
// Clean segments if refresh is needed
for (Segment segment : filteredSegmentToAccess) {
if (DataMapStoreManager.getInstance().getTableSegmentRefresher(carbonTable).isRefreshNeeded(segment.getSegmentNo())) {
toBeCleanedSegments.add(segment);
}
}
if (toBeCleanedSegments.size() > 0) {
DataMapStoreManager.getInstance().clearInvalidSegments(getOrCreateCarbonTable(job.getConfiguration()), toBeCleanedSegments);
}
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
TableProvider tableProvider = new SingleTableProvider(carbonTable);
// this will be null in case of corrupt schema file.
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
carbonTable.processFilterExpression(filter, null, null);
// prune partitions for filter query on partition table
BitSet matchedPartitions = null;
if (partitionInfo != null && partitionInfo.getPartitionType() != PartitionType.NATIVE_HIVE) {
matchedPartitions = setMatchedPartitions(null, filter, partitionInfo, null);
if (matchedPartitions != null) {
if (matchedPartitions.cardinality() == 0) {
return new ArrayList<InputSplit>();
} else if (matchedPartitions.cardinality() == partitionInfo.getNumPartitions()) {
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, filteredSegmentToAccess, matchedPartitions, partitionInfo, null, updateStatusManager);
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((org.apache.carbondata.hadoop.CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((org.apache.carbondata.hadoop.CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
// add all splits of streaming
List<InputSplit> splitsOfStreaming = getSplitsOfStreaming(job, identifier, streamSegments);
if (!splitsOfStreaming.isEmpty()) {
splits.addAll(splitsOfStreaming);
}
return splits;
}
use of org.apache.carbondata.core.scan.filter.TableProvider in project carbondata by apache.
the class CarbonTableInputFormat method getSplitsOfOneSegment.
/**
* Read data in one segment. For alter table partition statement
* @param job
* @param targetSegment
* @param oldPartitionIdList get old partitionId before partitionInfo was changed
* @return
*/
public List<InputSplit> getSplitsOfOneSegment(JobContext job, String targetSegment, List<Integer> oldPartitionIdList, PartitionInfo partitionInfo) {
List<Segment> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
List<Segment> segmentList = new ArrayList<>();
segmentList.add(new Segment(targetSegment, null));
setSegmentsToAccess(job.getConfiguration(), segmentList);
try {
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
// this will be null in case of corrupt schema file.
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
carbonTable.processFilterExpression(filter, null, null);
TableProvider tableProvider = new SingleTableProvider(carbonTable);
// prune partitions for filter query on partition table
String partitionIds = job.getConfiguration().get(ALTER_PARTITION_ID);
// matchedPartitions records partitionIndex, not partitionId
BitSet matchedPartitions = null;
if (partitionInfo != null) {
matchedPartitions = setMatchedPartitions(partitionIds, filter, partitionInfo, oldPartitionIdList);
if (matchedPartitions != null) {
if (matchedPartitions.cardinality() == 0) {
return new ArrayList<InputSplit>();
} else if (matchedPartitions.cardinality() == partitionInfo.getNumPartitions()) {
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, segmentList, matchedPartitions, partitionInfo, oldPartitionIdList, new SegmentUpdateStatusManager(carbonTable));
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
return splits;
} catch (IOException e) {
throw new RuntimeException("Can't get splits of the target segment ", e);
}
}
Aggregations