Search in sources :

Example 1 with FilterResolverIntf

use of org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf in project carbondata by apache.

the class FilterExpressionProcessor method getFilterResolvertree.

/**
   * API will return a filter resolver instance which will be used by
   * executers to evaluate or execute the filters.
   *
   * @param expressionTree , resolver tree which will hold the resolver tree based on
   *                       filter expression.
   * @return FilterResolverIntf type.
   */
private FilterResolverIntf getFilterResolvertree(Expression expressionTree, AbsoluteTableIdentifier tableIdentifier) throws FilterUnsupportedException, IOException {
    FilterResolverIntf filterEvaluatorTree = createFilterResolverTree(expressionTree, tableIdentifier);
    traverseAndResolveTree(filterEvaluatorTree, tableIdentifier);
    return filterEvaluatorTree;
}
Also used : FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 2 with FilterResolverIntf

use of org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR
   * are used to get table path to read.
   *
   * @param job
   * @return List<InputSplit> list of CarbonInputSplit
   * @throws IOException
   */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    try {
        List<String> invalidSegments = new ArrayList<>();
        List<UpdateVO> invalidTimestampsList = new ArrayList<>();
        // get all valid segments and set them into the configuration
        if (getSegmentsToAccess(job).length == 0) {
            SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
            SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
            setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
            if (segments.getValidSegments().size() == 0) {
                return new ArrayList<>(0);
            }
            // remove entry in the segment index if there are invalid segments
            invalidSegments.addAll(segments.getInvalidSegments());
            for (String invalidSegmentId : invalidSegments) {
                invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
            }
            if (invalidSegments.size() > 0) {
                List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
                for (String segId : invalidSegments) {
                    invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
                }
                cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
            }
        }
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
        // prune partitions for filter query on partition table
        BitSet matchedPartitions = null;
        if (null != filter) {
            PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
            if (null != partitionInfo) {
                Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
                matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
                if (matchedPartitions.cardinality() == 0) {
                    // no partition is required
                    return new ArrayList<InputSplit>();
                }
                if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
                    // all partitions are required, no need to prune partitions
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } finally {
        // close the cache cache client to clear LRU cache memory
        cacheClient.close();
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Partitioner(org.apache.carbondata.core.scan.partition.Partitioner) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 3 with FilterResolverIntf

use of org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf in project carbondata by apache.

the class CarbonInputFormat method getQueryModel.

public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    CarbonTable carbonTable = getCarbonTable(configuration);
    // getting the table absoluteTableIdentifier from the carbonTable
    // to avoid unnecessary deserialization
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
    QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    // update the file level index store if there are invalid segment
    if (inputSplit instanceof CarbonMultiBlockSplit) {
        CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
        List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
        if (invalidSegments.size() > 0) {
            queryModel.setInvalidSegmentIds(invalidSegments);
        }
        List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
        if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
            queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
        }
    }
    return queryModel;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonQueryPlan(org.apache.carbondata.core.scan.model.CarbonQueryPlan) Configuration(org.apache.hadoop.conf.Configuration) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 4 with FilterResolverIntf

use of org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf in project carbondata by apache.

the class CarbonTableReader method getInputSplits2.

public List<CarbonLocalInputSplit> getInputSplits2(CarbonTableCacheModel tableCacheModel, Expression filters) throws Exception {
    // need apply filters to segment
    FilterExpressionProcessor filterExpressionProcessor = new FilterExpressionProcessor();
    AbsoluteTableIdentifier absoluteTableIdentifier = tableCacheModel.carbonTable.getAbsoluteTableIdentifier();
    CacheClient cacheClient = new CacheClient(absoluteTableIdentifier.getStorePath());
    List<String> invalidSegments = new ArrayList<>();
    List<UpdateVO> invalidTimestampsList = new ArrayList<>();
    // get all valid segments and set them into the configuration
    SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(absoluteTableIdentifier);
    SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier);
    SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
    tableCacheModel.segments = segments.getValidSegments().toArray(new String[0]);
    if (segments.getValidSegments().size() == 0) {
        return new ArrayList<>(0);
    }
    // remove entry in the segment index if there are invalid segments
    invalidSegments.addAll(segments.getInvalidSegments());
    for (String invalidSegmentId : invalidSegments) {
        invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
    }
    if (invalidSegments.size() > 0) {
        List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
        for (String segId : invalidSegments) {
            invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(absoluteTableIdentifier, segId));
        }
        cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
    }
    // get filter for segment
    CarbonInputFormatUtil.processFilterExpression(filters, tableCacheModel.carbonTable);
    FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filters, tableCacheModel.carbonTable.getAbsoluteTableIdentifier());
    List<CarbonLocalInputSplit> result = new ArrayList<>();
    // for each segment fetch blocks matching filter in Driver BTree
    for (String segmentNo : tableCacheModel.segments) {
        try {
            List<DataRefNode> dataRefNodes = getDataBlocksOfSegment(filterExpressionProcessor, absoluteTableIdentifier, tableCacheModel.carbonTablePath, filterInterface, segmentNo, cacheClient, updateStatusManager);
            for (DataRefNode dataRefNode : dataRefNodes) {
                BlockBTreeLeafNode leafNode = (BlockBTreeLeafNode) dataRefNode;
                TableBlockInfo tableBlockInfo = leafNode.getTableBlockInfo();
                if (CarbonUtil.isInvalidTableBlock(tableBlockInfo, updateStatusManager.getInvalidTimestampRange(tableBlockInfo.getSegmentId()), updateStatusManager)) {
                    continue;
                }
                result.add(new CarbonLocalInputSplit(segmentNo, tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset(), tableBlockInfo.getBlockLength(), Arrays.asList(tableBlockInfo.getLocations()), tableBlockInfo.getBlockletInfos().getNoOfBlockLets(), tableBlockInfo.getVersion().number()));
            }
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }
    cacheClient.close();
    return result;
}
Also used : CacheClient(org.apache.carbondata.hadoop.CacheClient) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) KeyGenException(org.apache.carbondata.core.keygenerator.KeyGenException) IndexBuilderException(org.apache.carbondata.core.datastore.exception.IndexBuilderException) IOException(java.io.IOException) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf) BlockBTreeLeafNode(org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)

Example 5 with FilterResolverIntf

use of org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf in project carbondata by apache.

the class MapredCarbonInputFormat method getQueryModel.

public QueryModel getQueryModel(Configuration configuration) throws IOException {
    CarbonTable carbonTable = getCarbonTable(configuration);
    // getting the table absoluteTableIdentifier from the carbonTable
    // to avoid unnecessary deserialization
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    if (projection == null) {
        projection = configuration.get("hive.io.file.readcolumn.names");
    }
    CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
    QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    return queryModel;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonQueryPlan(org.apache.carbondata.core.scan.model.CarbonQueryPlan) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Aggregations

FilterResolverIntf (org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)6 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)4 Expression (org.apache.carbondata.core.scan.expression.Expression)4 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)3 UpdateVO (org.apache.carbondata.core.mutate.UpdateVO)3 IOException (java.io.IOException)2 FilterExpressionProcessor (org.apache.carbondata.core.scan.filter.FilterExpressionProcessor)2 CarbonQueryPlan (org.apache.carbondata.core.scan.model.CarbonQueryPlan)2 QueryModel (org.apache.carbondata.core.scan.model.QueryModel)2 SegmentStatusManager (org.apache.carbondata.core.statusmanager.SegmentStatusManager)2 SegmentUpdateStatusManager (org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 LinkedList (java.util.LinkedList)1 TableSegmentUniqueIdentifier (org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier)1 IndexBuilderException (org.apache.carbondata.core.datastore.exception.IndexBuilderException)1 BlockBTreeLeafNode (org.apache.carbondata.core.datastore.impl.btree.BlockBTreeLeafNode)1 KeyGenException (org.apache.carbondata.core.keygenerator.KeyGenException)1 PartitionInfo (org.apache.carbondata.core.metadata.schema.PartitionInfo)1 Partitioner (org.apache.carbondata.core.scan.partition.Partitioner)1 CacheClient (org.apache.carbondata.hadoop.CacheClient)1