use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class CarbonInputFormat method getQueryModel.
public QueryModel getQueryModel(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {
Configuration configuration = taskAttemptContext.getConfiguration();
CarbonTable carbonTable = getCarbonTable(configuration);
// getting the table absoluteTableIdentifier from the carbonTable
// to avoid unnecessary deserialization
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
// query plan includes projection column
String projection = getColumnProjection(configuration);
CarbonQueryPlan queryPlan = CarbonInputFormatUtil.createQueryPlan(carbonTable, projection);
QueryModel queryModel = QueryModel.createModel(identifier, queryPlan, carbonTable);
// set the filter to the query model in order to filter blocklet before scan
Expression filter = getFilterPredicates(configuration);
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
FilterResolverIntf filterIntf = CarbonInputFormatUtil.resolveFilter(filter, identifier);
queryModel.setFilterExpressionResolverTree(filterIntf);
// update the file level index store if there are invalid segment
if (inputSplit instanceof CarbonMultiBlockSplit) {
CarbonMultiBlockSplit split = (CarbonMultiBlockSplit) inputSplit;
List<String> invalidSegments = split.getAllSplits().get(0).getInvalidSegments();
if (invalidSegments.size() > 0) {
queryModel.setInvalidSegmentIds(invalidSegments);
}
List<UpdateVO> invalidTimestampRangeList = split.getAllSplits().get(0).getInvalidTimestampRange();
if ((null != invalidTimestampRangeList) && (invalidTimestampRangeList.size() > 0)) {
queryModel.setInvalidBlockForSegmentId(invalidTimestampRangeList);
}
}
return queryModel;
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class CarbonInputFormat_FT method testGetFilteredSplits.
@Test
public void testGetFilteredSplits() throws Exception {
CarbonInputFormat carbonInputFormat = new CarbonInputFormat();
JobConf jobConf = new JobConf(new Configuration());
Job job = Job.getInstance(jobConf);
FileInputFormat.addInputPath(job, new Path("/opt/carbonstore/db/table1"));
job.getConfiguration().set(CarbonInputFormat.INPUT_SEGMENT_NUMBERS, "1,2");
Expression expression = new EqualToExpression(new ColumnExpression("c1", DataType.STRING), new LiteralExpression("a", DataType.STRING));
CarbonInputFormat.setFilterPredicates(job.getConfiguration(), expression);
List splits = carbonInputFormat.getSplits(job);
Assert.assertTrue(splits != null);
Assert.assertTrue(!splits.isEmpty());
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class CarbondataRecordSetProvider method fillFilter2QueryModel.
// Build filter for QueryModel
private void fillFilter2QueryModel(QueryModel queryModel, TupleDomain<ColumnHandle> originalConstraint, CarbonTable carbonTable) {
//queryModel.setFilterExpressionResolverTree(new FilterResolverIntf());
//Build Predicate Expression
ImmutableList.Builder<Expression> filters = ImmutableList.builder();
Domain domain = null;
for (ColumnHandle c : originalConstraint.getDomains().get().keySet()) {
// Build ColumnExpresstion for Expresstion(Carbondata)
CarbondataColumnHandle cdch = (CarbondataColumnHandle) c;
Type type = cdch.getColumnType();
DataType coltype = Spi2CarbondataTypeMapper(cdch);
Expression colExpression = new ColumnExpression(cdch.getColumnName(), coltype);
domain = originalConstraint.getDomains().get().get(c);
checkArgument(domain.getType().isOrderable(), "Domain type must be orderable");
if (domain.getValues().isNone()) {
}
if (domain.getValues().isAll()) {
}
List<Object> singleValues = new ArrayList<>();
List<Expression> rangeFilter = new ArrayList<>();
for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
// Already checked
checkState(!range.isAll());
if (range.isSingleValue()) {
singleValues.add(range.getLow().getValue());
} else {
List<String> rangeConjuncts = new ArrayList<>();
if (!range.getLow().isLowerUnbounded()) {
Object value = ConvertDataByType(range.getLow().getValue(), type);
switch(range.getLow().getBound()) {
case ABOVE:
if (type == TimestampType.TIMESTAMP) {
//todo not now
} else {
GreaterThanExpression greater = new GreaterThanExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(greater);
}
break;
case EXACTLY:
GreaterThanEqualToExpression greater = new GreaterThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(greater);
break;
case BELOW:
throw new IllegalArgumentException("Low marker should never use BELOW bound");
default:
throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
}
}
if (!range.getHigh().isUpperUnbounded()) {
Object value = ConvertDataByType(range.getHigh().getValue(), type);
switch(range.getHigh().getBound()) {
case ABOVE:
throw new IllegalArgumentException("High marker should never use ABOVE bound");
case EXACTLY:
LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(less);
break;
case BELOW:
LessThanExpression less2 = new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(less2);
break;
default:
throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
}
}
}
}
if (singleValues.size() == 1) {
Expression ex = null;
if (coltype.equals(DataType.STRING)) {
ex = new EqualToExpression(colExpression, new LiteralExpression(((Slice) singleValues.get(0)).toStringUtf8(), coltype));
} else if (coltype.equals(DataType.TIMESTAMP) || coltype.equals(DataType.DATE)) {
Long value = (Long) singleValues.get(0) * 1000;
ex = new EqualToExpression(colExpression, new LiteralExpression(value, coltype));
} else
ex = new EqualToExpression(colExpression, new LiteralExpression(singleValues.get(0), coltype));
filters.add(ex);
} else if (singleValues.size() > 1) {
ListExpression candidates = null;
List<Expression> exs = singleValues.stream().map((a) -> {
return new LiteralExpression(ConvertDataByType(a, type), coltype);
}).collect(Collectors.toList());
candidates = new ListExpression(exs);
if (candidates != null)
filters.add(new InExpression(colExpression, candidates));
} else if (rangeFilter.size() > 0) {
if (rangeFilter.size() > 1) {
Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
if (rangeFilter.size() > 2) {
for (int i = 2; i < rangeFilter.size(); i++) {
filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
}
}
} else if (rangeFilter.size() == 1)
filters.add(rangeFilter.get(0));
}
}
Expression finalFilters;
List<Expression> tmp = filters.build();
if (tmp.size() > 1) {
finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
if (tmp.size() > 2) {
for (int i = 2; i < tmp.size(); i++) {
finalFilters = new AndExpression(finalFilters, tmp.get(i));
}
}
} else if (tmp.size() == 1)
finalFilters = tmp.get(0);
else
return;
// todo set into QueryModel
CarbonInputFormatUtil.processFilterExpression(finalFilters, carbonTable);
queryModel.setFilterExpressionResolverTree(CarbonInputFormatUtil.resolveFilter(finalFilters, queryModel.getAbsoluteTableIdentifier()));
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class CarbondataSplitManager method getSplits.
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableLayoutHandle layout) {
CarbondataTableLayoutHandle layoutHandle = (CarbondataTableLayoutHandle) layout;
CarbondataTableHandle tableHandle = layoutHandle.getTable();
SchemaTableName key = tableHandle.getSchemaTableName();
// Packaging presto-TupleDomain into CarbondataColumnConstraint, to decouple from presto-spi Module
List<CarbondataColumnConstraint> rebuildConstraints = getColumnConstraints(layoutHandle.getConstraint());
CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(key);
Expression filters = parseFilterExpression(layoutHandle.getConstraint(), cache.carbonTable);
if (cache != null) {
try {
List<CarbonLocalInputSplit> splits = carbonTableReader.getInputSplits2(cache, filters);
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
for (CarbonLocalInputSplit split : splits) {
cSplits.add(new CarbondataSplit(connectorId, tableHandle.getSchemaTableName(), layoutHandle.getConstraint(), split, rebuildConstraints));
}
return new FixedSplitSource(cSplits.build());
} catch (Exception ex) {
System.out.println(ex.toString());
}
}
return null;
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class CarbondataSplitManager method parseFilterExpression.
/**
* Convert presto-TupleDomain predication into Carbon scan express condition
* @param originalConstraint presto-TupleDomain
* @param carbonTable
* @return
*/
public Expression parseFilterExpression(TupleDomain<ColumnHandle> originalConstraint, CarbonTable carbonTable) {
ImmutableList.Builder<Expression> filters = ImmutableList.builder();
Domain domain = null;
for (ColumnHandle c : originalConstraint.getDomains().get().keySet()) {
CarbondataColumnHandle cdch = (CarbondataColumnHandle) c;
Type type = cdch.getColumnType();
List<CarbonColumn> ccols = carbonTable.getCreateOrderColumn(carbonTable.getFactTableName());
Optional<CarbonColumn> target = ccols.stream().filter(a -> a.getColName().equals(cdch.getColumnName())).findFirst();
if (target.get() == null)
return null;
DataType coltype = target.get().getDataType();
ColumnExpression colExpression = new ColumnExpression(cdch.getColumnName(), target.get().getDataType());
//colExpression.setColIndex(cs.getSchemaOrdinal());
colExpression.setDimension(target.get().isDimension());
colExpression.setDimension(carbonTable.getDimensionByName(carbonTable.getFactTableName(), cdch.getColumnName()));
colExpression.setCarbonColumn(target.get());
domain = originalConstraint.getDomains().get().get(c);
checkArgument(domain.getType().isOrderable(), "Domain type must be orderable");
if (domain.getValues().isNone()) {
}
if (domain.getValues().isAll()) {
}
List<Object> singleValues = new ArrayList<>();
List<Expression> rangeFilter = new ArrayList<>();
for (Range range : domain.getValues().getRanges().getOrderedRanges()) {
// Already checked
checkState(!range.isAll());
if (range.isSingleValue()) {
singleValues.add(range.getLow().getValue());
} else {
List<String> rangeConjuncts = new ArrayList<>();
if (!range.getLow().isLowerUnbounded()) {
Object value = ConvertDataByType(range.getLow().getValue(), type);
switch(range.getLow().getBound()) {
case ABOVE:
if (type == TimestampType.TIMESTAMP) {
//todo not now
} else {
GreaterThanExpression greater = new GreaterThanExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(greater);
}
break;
case EXACTLY:
GreaterThanEqualToExpression greater = new GreaterThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(greater);
break;
case BELOW:
throw new IllegalArgumentException("Low marker should never use BELOW bound");
default:
throw new AssertionError("Unhandled bound: " + range.getLow().getBound());
}
}
if (!range.getHigh().isUpperUnbounded()) {
Object value = ConvertDataByType(range.getHigh().getValue(), type);
switch(range.getHigh().getBound()) {
case ABOVE:
throw new IllegalArgumentException("High marker should never use ABOVE bound");
case EXACTLY:
LessThanEqualToExpression less = new LessThanEqualToExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(less);
break;
case BELOW:
LessThanExpression less2 = new LessThanExpression(colExpression, new LiteralExpression(value, coltype));
rangeFilter.add(less2);
break;
default:
throw new AssertionError("Unhandled bound: " + range.getHigh().getBound());
}
}
}
}
if (singleValues.size() == 1) {
Expression ex = null;
if (coltype.equals(DataType.STRING)) {
ex = new EqualToExpression(colExpression, new LiteralExpression(((Slice) singleValues.get(0)).toStringUtf8(), coltype));
} else
ex = new EqualToExpression(colExpression, new LiteralExpression(singleValues.get(0), coltype));
filters.add(ex);
} else if (singleValues.size() > 1) {
ListExpression candidates = null;
List<Expression> exs = singleValues.stream().map((a) -> {
return new LiteralExpression(ConvertDataByType(a, type), coltype);
}).collect(Collectors.toList());
candidates = new ListExpression(exs);
if (candidates != null)
filters.add(new InExpression(colExpression, candidates));
} else if (rangeFilter.size() > 0) {
if (rangeFilter.size() > 1) {
Expression finalFilters = new OrExpression(rangeFilter.get(0), rangeFilter.get(1));
if (rangeFilter.size() > 2) {
for (int i = 2; i < rangeFilter.size(); i++) {
filters.add(new AndExpression(finalFilters, rangeFilter.get(i)));
}
}
} else if (//only have one value
rangeFilter.size() == 1)
filters.add(rangeFilter.get(0));
}
}
Expression finalFilters;
List<Expression> tmp = filters.build();
if (tmp.size() > 1) {
finalFilters = new AndExpression(tmp.get(0), tmp.get(1));
if (tmp.size() > 2) {
for (int i = 2; i < tmp.size(); i++) {
finalFilters = new AndExpression(finalFilters, tmp.get(i));
}
}
} else if (tmp.size() == 1)
finalFilters = tmp.get(0);
else
//no filter
return null;
return finalFilters;
}
Aggregations