Search in sources :

Example 1 with CarbonMultiBlockSplit

use of org.apache.carbondata.hadoop.CarbonMultiBlockSplit in project carbondata by apache.

the class CarbonStreamInputFormatTest method buildInputSplit.

private InputSplit buildInputSplit() throws IOException {
    CarbonInputSplit carbonInputSplit = new CarbonInputSplit();
    List<CarbonInputSplit> splitList = new ArrayList<>();
    splitList.add(carbonInputSplit);
    return new CarbonMultiBlockSplit(splitList, new String[] { "localhost" }, FileFormat.ROW_V1);
}
Also used : CarbonMultiBlockSplit(org.apache.carbondata.hadoop.CarbonMultiBlockSplit) ArrayList(java.util.ArrayList) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit)

Example 2 with CarbonMultiBlockSplit

use of org.apache.carbondata.hadoop.CarbonMultiBlockSplit in project carbondata by apache.

the class CarbonInputFormat method createQueryModel.

public QueryModel createQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    CarbonTable carbonTable = getOrCreateCarbonTable(configuration);
    TableProvider tableProvider = new SingleTableProvider(carbonTable);
    // query plan includes projection column
    String projectionString = getColumnProjection(configuration);
    String[] projectionColumnNames = null;
    if (projectionString != null) {
        projectionColumnNames = projectionString.split(",");
    }
    QueryModel queryModel = carbonTable.createQueryWithProjection(projectionColumnNames, getDataTypeConverter(configuration));
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    boolean[] isFilterDimensions = new boolean[carbonTable.getDimensionOrdinalMax()];
    // getAllMeasures returns list of visible and invisible columns
    boolean[] isFilterMeasures = new boolean[carbonTable.getAllMeasures().size()];
    carbonTable.processFilterExpression(filter, isFilterDimensions, isFilterMeasures);
    queryModel.setIsFilterDimensions(isFilterDimensions);
    queryModel.setIsFilterMeasures(isFilterMeasures);
    FilterResolverIntf filterIntf = carbonTable.resolveFilter(filter, tableProvider);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    // update the file level index store if there are invalid segment
    if (inputSplit instanceof CarbonMultiBlockSplit) {
        CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
        List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
        if (invalidSegments.size() > 0) {
            queryModel.setInvalidSegmentIds(invalidSegments);
        }
        List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
        if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
            queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
        }
    }
    return queryModel;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) TableProvider(org.apache.carbondata.core.scan.filter.TableProvider) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) SingleTableProvider(org.apache.carbondata.core.scan.filter.SingleTableProvider) Expression(org.apache.carbondata.core.scan.expression.Expression) CarbonMultiBlockSplit(org.apache.carbondata.hadoop.CarbonMultiBlockSplit) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 3 with CarbonMultiBlockSplit

use of org.apache.carbondata.hadoop.CarbonMultiBlockSplit in project carbondata by apache.

the class CarbonStreamRecordReader method initialize.

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // input
    if (split instanceof CarbonInputSplit) {
        fileSplit = (CarbonInputSplit) split;
    } else if (split instanceof CarbonMultiBlockSplit) {
        fileSplit = ((CarbonMultiBlockSplit) split).getAllSplits().get(0);
    } else {
        fileSplit = (FileSplit) split;
    }
    // metadata
    hadoopConf = context.getConfiguration();
    if (model == null) {
        CarbonTableInputFormat format = new CarbonTableInputFormat<Object>();
        model = format.createQueryModel(split, context);
    }
    carbonTable = model.getTable();
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
    dimensionCount = dimensions.size();
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
    measureCount = measures.size();
    List<CarbonColumn> carbonColumnList = carbonTable.getStreamStorageOrderColumn(carbonTable.getTableName());
    storageColumns = carbonColumnList.toArray(new CarbonColumn[carbonColumnList.size()]);
    isNoDictColumn = CarbonDataProcessorUtil.getNoDictionaryMapping(storageColumns);
    directDictionaryGenerators = new DirectDictionaryGenerator[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        if (storageColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            directDictionaryGenerators[i] = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(storageColumns[i].getDataType());
        }
    }
    measureDataTypes = new DataType[measureCount];
    for (int i = 0; i < measureCount; i++) {
        measureDataTypes[i] = storageColumns[dimensionCount + i].getDataType();
    }
    // decode data
    allNonNull = new BitSet(storageColumns.length);
    projection = model.getProjectionColumns();
    isRequired = new boolean[storageColumns.length];
    boolean[] isFiltlerDimensions = model.getIsFilterDimensions();
    boolean[] isFiltlerMeasures = model.getIsFilterMeasures();
    isFilterRequired = new boolean[storageColumns.length];
    filterMap = new int[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        if (storageColumns[i].isDimension()) {
            if (isFiltlerDimensions[storageColumns[i].getOrdinal()]) {
                isRequired[i] = true;
                isFilterRequired[i] = true;
                filterMap[i] = storageColumns[i].getOrdinal();
            }
        } else {
            if (isFiltlerMeasures[storageColumns[i].getOrdinal()]) {
                isRequired[i] = true;
                isFilterRequired[i] = true;
                filterMap[i] = carbonTable.getDimensionOrdinalMax() + storageColumns[i].getOrdinal();
            }
        }
    }
    isProjectionRequired = new boolean[storageColumns.length];
    projectionMap = new int[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        for (int j = 0; j < projection.length; j++) {
            if (storageColumns[i].getColName().equals(projection[j].getColName())) {
                isRequired[i] = true;
                isProjectionRequired[i] = true;
                projectionMap[i] = j;
                break;
            }
        }
    }
    // initialize filter
    if (null != model.getFilterExpressionResolverTree()) {
        initializeFilter();
    } else if (projection.length == 0) {
        skipScanData = true;
    }
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) BitSet(java.util.BitSet) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) CarbonMultiBlockSplit(org.apache.carbondata.hadoop.CarbonMultiBlockSplit) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat)

Aggregations

CarbonMultiBlockSplit (org.apache.carbondata.hadoop.CarbonMultiBlockSplit)3 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)2 ArrayList (java.util.ArrayList)1 BitSet (java.util.BitSet)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)1 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)1 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)1 UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)1 Expression (org.apache.carbondata.core.scan.expression.Expression)1 SingleTableProvider (org.apache.carbondata.core.scan.filter.SingleTableProvider)1 TableProvider (org.apache.carbondata.core.scan.filter.TableProvider)1 FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)1 QueryModel (org.apache.carbondata.core.scan.model.QueryModel)1 CarbonTableInputFormat (org.apache.carbondata.hadoop.api.CarbonTableInputFormat)1 Configuration (org.apache.hadoop.conf.Configuration)1 FileSplit (org.apache.hadoop.mapreduce.lib.input.FileSplit)1