Search in sources :

Example 11 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class CarbonInputFormat method getQueryModel.

public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
    Configuration configuration = taskAttemptContext.getConfiguration();
    CarbonTable carbonTable = getCarbonTable(configuration);
    // getting the table absoluteTableIdentifier from the carbonTable
    // to avoid unnecessary deserialization
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
    QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
    // set the filter to the query model in order to filter blocklet before scan
    Expression filter = getFilterPredicates(configuration);
    CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
    queryModel.setFilterExpressionResolverTree(filterIntf);
    // update the file level index store if there are invalid segment
    if (inputSplit instanceof CarbonMultiBlockSplit) {
        CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
        List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
        if (invalidSegments.size() > 0) {
            queryModel.setInvalidSegmentIds(invalidSegments);
        }
        List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
        if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
            queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
        }
    }
    return queryModel;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonQueryPlan(org.apache.carbondata.core.scan.model.CarbonQueryPlan) Configuration(org.apache.hadoop.conf.Configuration) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Example 12 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class CarbonInputFormat_FT method testGetFilteredSplits.

@Test
public void testGetFilteredSplits() throws Exception {
    CarbonInputFormat carbonInputFormat = new CarbonInputFormat();
    JobConf jobConf = new JobConf(new Configuration());
    Job job = Job.getInstance(jobConf);
    FileInputFormat.addInputPath(job, new Path("/opt/carbonstore/db/table1"));
    job.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, "1,2");
    Expression expression = new EqualToExpression(new ColumnExpression("c1", DataType.STRING), new LiteralExpression("a", DataType.STRING));
    CarbonInputFormat.setFilterPredicates(job.getConfiguration(), expression);
    List splits = carbonInputFormat.getSplits(job);
    Assert.assertTrue(splits != null);
    Assert.assertTrue(!splits.isEmpty());
}
Also used : Path(org.apache.hadoop.fs.Path) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) Configuration(org.apache.hadoop.conf.Configuration) Expression(org.apache.carbondata.core.scan.expression.Expression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) CarbonInputFormat(org.apache.carbondata.hadoop.CarbonInputFormat) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) List(java.util.List) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 13 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class CarbondataRecordSetProvider method fillFilter2QueryModel.

// Build filter for QueryModel
private void fillFilter2QueryModel(QueryModel queryModel, TupleDomain<ColumnHandle> originalConstraint, CarbonTable carbonTable) {
    //queryModel.setFilterExpressionResolverTree(new FilterResolverIntf());
    //Build Predicate Expression
    ImmutableList.Builder<Expression> filters = ImmutableList.builder();
    Domain domain = null;
    for (ColumnHandle c : originalConstraint.getDomains().get().keySet()) {
        // Build ColumnExpresstion for Expresstion(Carbondata)
        CarbondataColumnHandle cdch = (CarbondataColumnHandle) c;
        Type type = cdch.getColumnType();
        DataType coltype = Spi2CarbondataTypeMapper(cdch);
        Expression colExpression = new ColumnExpression(cdch.getColumnName(), coltype);
        domain = originalConstraint.getDomains().get().get(c);
        checkArgument(domain.getType().isOrderable(), "Domain type must be orderable");
        if (domain.getValues().isNone()) {
        }
        if (domain.getValues().isAll()) {
        }
        List<Object> singleValues = new ArrayList<>();
        List<Expression> rangeFilter = new ArrayList<>();
        for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
            // Already checked
            checkState(!range.isAll());
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    Object value = ConvertDataByType(range.getLow().getValue(), type);
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            if (type == TimestampType.TIMESTAMP) {
                            //todo not now
                            } else {
                                GreaterThanExpression greater = new GreaterThanExpression(colExpression, new LiteralExpression(value, coltype));
                                rangeFilter.add(greater);
                            }
                            break;
                        case EXACTLY:
                            GreaterThanEqualToExpression greater = new GreaterThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(greater);
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    Object value = ConvertDataByType(range.getHigh().getValue(), type);
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(less);
                            break;
                        case BELOW:
                            LessThanExpression less2 = new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(less2);
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
            }
        }
        if (singleValues.size() == 1) {
            Expression ex = null;
            if (coltype.equals(DataType.STRING)) {
                ex = new EqualToExpression(colExpression, new LiteralExpression(((Slice) singleValues.get(0)).toStringUtf8(), coltype));
            } else if (coltype.equals(DataType.TIMESTAMP) || coltype.equals(DataType.DATE)) {
                Long value = (Long) singleValues.get(0) * 1000;
                ex = new EqualToExpression(colExpression, new LiteralExpression(value, coltype));
            } else
                ex = new EqualToExpression(colExpression, new LiteralExpression(singleValues.get(0), coltype));
            filters.add(ex);
        } else if (singleValues.size() > 1) {
            ListExpression candidates = null;
            List<Expression> exs = singleValues.stream().map((a) -> {
                return new LiteralExpression(ConvertDataByType(a, type), coltype);
            }).collect(Collectors.toList());
            candidates = new ListExpression(exs);
            if (candidates != null)
                filters.add(new InExpression(colExpression, candidates));
        } else if (rangeFilter.size() > 0) {
            if (rangeFilter.size() > 1) {
                Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
                if (rangeFilter.size() > 2) {
                    for (int i = 2; i < rangeFilter.size(); i++) {
                        filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
                    }
                }
            } else if (rangeFilter.size() == 1)
                filters.add(rangeFilter.get(0));
        }
    }
    Expression finalFilters;
    List<Expression> tmp = filters.build();
    if (tmp.size() > 1) {
        finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
        if (tmp.size() > 2) {
            for (int i = 2; i < tmp.size(); i++) {
                finalFilters = new AndExpression(finalFilters, tmp.get(i));
            }
        }
    } else if (tmp.size() == 1)
        finalFilters = tmp.get(0);
    else
        return;
    // todo set into QueryModel
    CarbonInputFormatUtil.processFilterExpression(finalFilters, carbonTable);
    queryModel.setFilterExpressionResolverTree(CarbonInputFormatUtil.resolveFilter(finalFilters, queryModel.getAbsoluteTableIdentifier()));
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) DataType(org.apache.carbondata.core.metadata.datatype.DataType) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) ColumnHandle(com.facebook.presto.spi.ColumnHandle) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) Range(com.facebook.presto.spi.predicate.Range) DataType(org.apache.carbondata.core.metadata.datatype.DataType) Types.checkType(org.apache.carbondata.presto.Types.checkType) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain)

Example 14 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class CarbondataSplitManager method getSplits.

public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout) {
    CarbondataTableLayoutHandle layoutHandle = (CarbondataTableLayoutHandle) layout;
    CarbondataTableHandle tableHandle = layoutHandle.getTable();
    SchemaTableName key = tableHandle.getSchemaTableName();
    // Packaging presto-TupleDomain into CarbondataColumnConstraint, to decouple from presto-spi Module
    List<CarbondataColumnConstraint> rebuildConstraints = getColumnConstraints(layoutHandle.getConstraint());
    CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(key);
    Expression filters = parseFilterExpression(layoutHandle.getConstraint(), cache.carbonTable);
    if (cache != null) {
        try {
            List<CarbonLocalInputSplit> splits = carbonTableReader.getInputSplits2(cache, filters);
            ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
            for (CarbonLocalInputSplit split : splits) {
                cSplits.add(new CarbondataSplit(connectorId, tableHandle.getSchemaTableName(), layoutHandle.getConstraint(), split, rebuildConstraints));
            }
            return new FixedSplitSource(cSplits.build());
        } catch (Exception ex) {
            System.out.println(ex.toString());
        }
    }
    return null;
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) CarbonLocalInputSplit(org.apache.carbondata.presto.impl.CarbonLocalInputSplit) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel)

Example 15 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class CarbondataSplitManager method parseFilterExpression.

/**
   * Convert presto-TupleDomain predication into Carbon scan express condition
   * @param originalConstraint  presto-TupleDomain
   * @param carbonTable
   * @return
   */
public Expression parseFilterExpression(TupleDomain<ColumnHandle> originalConstraint, CarbonTable carbonTable) {
    ImmutableList.Builder<Expression> filters = ImmutableList.builder();
    Domain domain = null;
    for (ColumnHandle c : originalConstraint.getDomains().get().keySet()) {
        CarbondataColumnHandle cdch = (CarbondataColumnHandle) c;
        Type type = cdch.getColumnType();
        List<CarbonColumn> ccols = carbonTable.getCreateOrderColumn(carbonTable.getFactTableName());
        Optional<CarbonColumn> target = ccols.stream().filter(a -> a.getColName().equals(cdch.getColumnName())).findFirst();
        if (target.get() == null)
            return null;
        DataType coltype = target.get().getDataType();
        ColumnExpression colExpression = new ColumnExpression(cdch.getColumnName(), target.get().getDataType());
        //colExpression.setColIndex(cs.getSchemaOrdinal());
        colExpression.setDimension(target.get().isDimension());
        colExpression.setDimension(carbonTable.getDimensionByName(carbonTable.getFactTableName(), cdch.getColumnName()));
        colExpression.setCarbonColumn(target.get());
        domain = originalConstraint.getDomains().get().get(c);
        checkArgument(domain.getType().isOrderable(), "Domain type must be orderable");
        if (domain.getValues().isNone()) {
        }
        if (domain.getValues().isAll()) {
        }
        List<Object> singleValues = new ArrayList<>();
        List<Expression> rangeFilter = new ArrayList<>();
        for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
            // Already checked
            checkState(!range.isAll());
            if (range.isSingleValue()) {
                singleValues.add(range.getLow().getValue());
            } else {
                List<String> rangeConjuncts = new ArrayList<>();
                if (!range.getLow().isLowerUnbounded()) {
                    Object value = ConvertDataByType(range.getLow().getValue(), type);
                    switch(range.getLow().getBound()) {
                        case ABOVE:
                            if (type == TimestampType.TIMESTAMP) {
                            //todo not now
                            } else {
                                GreaterThanExpression greater = new GreaterThanExpression(colExpression, new LiteralExpression(value, coltype));
                                rangeFilter.add(greater);
                            }
                            break;
                        case EXACTLY:
                            GreaterThanEqualToExpression greater = new GreaterThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(greater);
                            break;
                        case BELOW:
                            throw new IllegalArgumentException("Low marker should never use BELOW bound");
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
                    }
                }
                if (!range.getHigh().isUpperUnbounded()) {
                    Object value = ConvertDataByType(range.getHigh().getValue(), type);
                    switch(range.getHigh().getBound()) {
                        case ABOVE:
                            throw new IllegalArgumentException("High marker should never use ABOVE bound");
                        case EXACTLY:
                            LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(less);
                            break;
                        case BELOW:
                            LessThanExpression less2 = new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
                            rangeFilter.add(less2);
                            break;
                        default:
                            throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
                    }
                }
            }
        }
        if (singleValues.size() == 1) {
            Expression ex = null;
            if (coltype.equals(DataType.STRING)) {
                ex = new EqualToExpression(colExpression, new LiteralExpression(((Slice) singleValues.get(0)).toStringUtf8(), coltype));
            } else
                ex = new EqualToExpression(colExpression, new LiteralExpression(singleValues.get(0), coltype));
            filters.add(ex);
        } else if (singleValues.size() > 1) {
            ListExpression candidates = null;
            List<Expression> exs = singleValues.stream().map((a) -> {
                return new LiteralExpression(ConvertDataByType(a, type), coltype);
            }).collect(Collectors.toList());
            candidates = new ListExpression(exs);
            if (candidates != null)
                filters.add(new InExpression(colExpression, candidates));
        } else if (rangeFilter.size() > 0) {
            if (rangeFilter.size() > 1) {
                Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
                if (rangeFilter.size() > 2) {
                    for (int i = 2; i < rangeFilter.size(); i++) {
                        filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
                    }
                }
            } else if (//only have one value
            rangeFilter.size() == 1)
                filters.add(rangeFilter.get(0));
        }
    }
    Expression finalFilters;
    List<Expression> tmp = filters.build();
    if (tmp.size() > 1) {
        finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
        if (tmp.size() > 2) {
            for (int i = 2; i < tmp.size(); i++) {
                finalFilters = new AndExpression(finalFilters, tmp.get(i));
            }
        }
    } else if (tmp.size() == 1)
        finalFilters = tmp.get(0);
    else
        //no filter
        return null;
    return finalFilters;
}
Also used : Slice(io.airlift.slice.Slice) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) CarbonLocalInputSplit(org.apache.carbondata.presto.impl.CarbonLocalInputSplit) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) com.facebook.presto.spi.type(com.facebook.presto.spi.type) ArrayList(java.util.ArrayList) Inject(javax.inject.Inject) CarbonTableReader(org.apache.carbondata.presto.impl.CarbonTableReader) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Objects.requireNonNull(java.util.Objects.requireNonNull) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) Expression(org.apache.carbondata.core.scan.expression.Expression) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ConnectorSplitManager(com.facebook.presto.spi.connector.ConnectorSplitManager) DataType(org.apache.carbondata.core.metadata.datatype.DataType) Types.checkType(org.apache.carbondata.presto.Types.checkType) Range(com.facebook.presto.spi.predicate.Range) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) Collectors(java.util.stream.Collectors) Preconditions.checkState(com.google.common.base.Preconditions.checkState) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) Domain(com.facebook.presto.spi.predicate.Domain) org.apache.carbondata.core.scan.expression.conditional(org.apache.carbondata.core.scan.expression.conditional) List(java.util.List) Optional(java.util.Optional) com.facebook.presto.spi(com.facebook.presto.spi) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) DataType(org.apache.carbondata.core.metadata.datatype.DataType) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) Range(com.facebook.presto.spi.predicate.Range) DataType(org.apache.carbondata.core.metadata.datatype.DataType) Types.checkType(org.apache.carbondata.presto.Types.checkType) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain)

Aggregations

Expression (org.apache.carbondata.core.scan.expression.Expression)31 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)25 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)24 Test (org.junit.Test)18 ListExpression (org.apache.carbondata.core.scan.expression.conditional.ListExpression)10 AbstractDictionaryCacheTest (org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)7 OrExpression (org.apache.carbondata.core.scan.expression.logical.OrExpression)7 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)6 ArrayList (java.util.ArrayList)5 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)5 RangeExpression (org.apache.carbondata.core.scan.expression.logical.RangeExpression)5 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)4 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)4 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)4 GreaterThanEqualToExpression (org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression)4 GreaterThanExpression (org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression)4 LessThanEqualToExpression (org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression)4 NotEqualsExpression (org.apache.carbondata.core.scan.expression.conditional.NotEqualsExpression)4 TrueExpression (org.apache.carbondata.core.scan.expression.logical.TrueExpression)4