use of org.apache.carbondata.hadoop.CarbonMultiBlockSplit in project carbondata by apache.
the class CarbonStreamInputFormatTest method buildInputSplit.
private InputSplit buildInputSplit() throws IOException {
CarbonInputSplit carbonInputSplit = new CarbonInputSplit();
List<CarbonInputSplit> splitList = new ArrayList<>();
splitList.add(carbonInputSplit);
return new CarbonMultiBlockSplit(splitList, new String[] { "localhost" }, FileFormat.ROW_V1);
}
use of org.apache.carbondata.hadoop.CarbonMultiBlockSplit in project carbondata by apache.
the class CarbonInputFormat method createQueryModel.
public QueryModel createQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
Configuration configuration = taskAttemptContext.getConfiguration();
CarbonTable carbonTable = getOrCreateCarbonTable(configuration);
TableProvider tableProvider = new SingleTableProvider(carbonTable);
// query plan includes projection column
String projectionString = getColumnProjection(configuration);
String[] projectionColumnNames = null;
if (projectionString != null) {
projectionColumnNames = projectionString.split(",");
}
QueryModel queryModel = carbonTable.createQueryWithProjection(projectionColumnNames, getDataTypeConverter(configuration));
// set the filter to the query model in order to filter blocklet before scan
Expression filter = getFilterPredicates(configuration);
boolean[] isFilterDimensions = new boolean[carbonTable.getDimensionOrdinalMax()];
// getAllMeasures returns list of visible and invisible columns
boolean[] isFilterMeasures = new boolean[carbonTable.getAllMeasures().size()];
carbonTable.processFilterExpression(filter, isFilterDimensions, isFilterMeasures);
queryModel.setIsFilterDimensions(isFilterDimensions);
queryModel.setIsFilterMeasures(isFilterMeasures);
FilterResolverIntf filterIntf = carbonTable.resolveFilter(filter, tableProvider);
queryModel.setFilterExpressionResolverTree(filterIntf);
// update the file level index store if there are invalid segment
if (inputSplit instanceof CarbonMultiBlockSplit) {
CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
if (invalidSegments.size() > 0) {
queryModel.setInvalidSegmentIds(invalidSegments);
}
List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
}
}
return queryModel;
}
use of org.apache.carbondata.hadoop.CarbonMultiBlockSplit in project carbondata by apache.
the class CarbonStreamRecordReader method initialize.
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
// input
if (split instanceof CarbonInputSplit) {
fileSplit = (CarbonInputSplit) split;
} else if (split instanceof CarbonMultiBlockSplit) {
fileSplit = ((CarbonMultiBlockSplit) split).getAllSplits().get(0);
} else {
fileSplit = (FileSplit) split;
}
// metadata
hadoopConf = context.getConfiguration();
if (model == null) {
CarbonTableInputFormat format = new CarbonTableInputFormat<Object>();
model = format.createQueryModel(split, context);
}
carbonTable = model.getTable();
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
dimensionCount = dimensions.size();
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
measureCount = measures.size();
List<CarbonColumn> carbonColumnList = carbonTable.getStreamStorageOrderColumn(carbonTable.getTableName());
storageColumns = carbonColumnList.toArray(new CarbonColumn[carbonColumnList.size()]);
isNoDictColumn = CarbonDataProcessorUtil.getNoDictionaryMapping(storageColumns);
directDictionaryGenerators = new DirectDictionaryGenerator[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
if (storageColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) {
directDictionaryGenerators[i] = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(storageColumns[i].getDataType());
}
}
measureDataTypes = new DataType[measureCount];
for (int i = 0; i < measureCount; i++) {
measureDataTypes[i] = storageColumns[dimensionCount + i].getDataType();
}
// decode data
allNonNull = new BitSet(storageColumns.length);
projection = model.getProjectionColumns();
isRequired = new boolean[storageColumns.length];
boolean[] isFiltlerDimensions = model.getIsFilterDimensions();
boolean[] isFiltlerMeasures = model.getIsFilterMeasures();
isFilterRequired = new boolean[storageColumns.length];
filterMap = new int[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
if (storageColumns[i].isDimension()) {
if (isFiltlerDimensions[storageColumns[i].getOrdinal()]) {
isRequired[i] = true;
isFilterRequired[i] = true;
filterMap[i] = storageColumns[i].getOrdinal();
}
} else {
if (isFiltlerMeasures[storageColumns[i].getOrdinal()]) {
isRequired[i] = true;
isFilterRequired[i] = true;
filterMap[i] = carbonTable.getDimensionOrdinalMax() + storageColumns[i].getOrdinal();
}
}
}
isProjectionRequired = new boolean[storageColumns.length];
projectionMap = new int[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
for (int j = 0; j < projection.length; j++) {
if (storageColumns[i].getColName().equals(projection[j].getColName())) {
isRequired[i] = true;
isProjectionRequired[i] = true;
projectionMap[i] = j;
break;
}
}
}
// initialize filter
if (null != model.getFilterExpressionResolverTree()) {
initializeFilter();
} else if (projection.length == 0) {
skipScanData = true;
}
}
Aggregations