use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class FilterUtilTest method testgetFilterListForRS.
@Test
public void testgetFilterListForRS() throws Exception {
Expression expression = new ColumnExpression("IMEI", DataType.STRING);
ColumnExpression columnExpression = new ColumnExpression("IMEI", DataType.STRING);
String defaultValues = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
int defaultSurrogate = 1;
int ordinal = 1;
final CarbonColumn carbonColumn = new CarbonColumn(columnSchema, ordinal, -1);
new MockUp<ColumnExpression>() {
@Mock
public CarbonColumn getCarbonColumn() {
return carbonColumn;
}
};
new MockUp<RowImpl>() {
@Mock
public Object getVal(int index) {
return "test";
}
};
assertTrue(FilterUtil.getFilterListForRS(expression, columnExpression, defaultValues, defaultSurrogate) instanceof DimColumnFilterInfo);
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class FilterUtilTest method testCheckIfDataTypeNotTimeStamp.
@Test
public void testCheckIfDataTypeNotTimeStamp() {
Expression expression = new ColumnExpression("test", DataType.STRING);
boolean result = FilterUtil.checkIfDataTypeNotTimeStamp(expression);
assertFalse(result);
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class RangeFilterProcessorTest method createFilterTree_flavor2.
@Test
public void createFilterTree_flavor2() {
// Build 3rd BTree a >= '11' or a > '12' or a <= '20' or a <= '15'
Expression inputFilter;
boolean result = false;
ColumnSchema empColumnSchema = new ColumnSchema();
empColumnSchema.setColumnName("a");
empColumnSchema.setColumnUniqueId("a");
empColumnSchema.setDimensionColumn(true);
empColumnSchema.setEncodingList(Arrays.asList(Encoding.DICTIONARY));
empColumnSchema.setDataType(DataType.STRING);
CarbonDimension empDimension = new CarbonDimension(empColumnSchema, 0, 0, 0, 0, 0);
ColumnExpression cola1 = new ColumnExpression("a", DataType.STRING);
cola1.setDimension(true);
cola1.setDimension(empDimension);
ColumnExpression cola2 = new ColumnExpression("a", DataType.STRING);
cola2.setDimension(true);
cola2.setDimension(empDimension);
ColumnExpression cola3 = new ColumnExpression("a", DataType.STRING);
cola3.setDimension(true);
cola3.setDimension(empDimension);
ColumnExpression cola4 = new ColumnExpression("a", DataType.STRING);
cola4.setDimension(true);
cola4.setDimension(empDimension);
Expression lessThan1 = new LessThanEqualToExpression(cola1, new LiteralExpression("15", DataType.STRING));
Expression lessThan2 = new LessThanEqualToExpression(cola2, new LiteralExpression("20", DataType.STRING));
Expression greaterThan1 = new GreaterThanExpression(cola3, new LiteralExpression("12", DataType.STRING));
Expression greaterThan2 = new GreaterThanEqualToExpression(cola4, new LiteralExpression("11", DataType.STRING));
Expression Or1 = new OrExpression(new NotEqualsExpression(null, null), greaterThan2);
Expression Or2 = new OrExpression(Or1, greaterThan1);
Expression Or3 = new OrExpression(Or2, lessThan2);
inputFilter = new OrExpression(Or3, lessThan1);
// Build The output
ColumnExpression colb1 = new ColumnExpression("a", DataType.STRING);
cola1.setDimension(true);
cola1.setDimension(empDimension);
ColumnExpression colb2 = new ColumnExpression("a", DataType.STRING);
cola2.setDimension(true);
cola2.setDimension(empDimension);
ColumnExpression colb3 = new ColumnExpression("a", DataType.STRING);
cola3.setDimension(true);
cola3.setDimension(empDimension);
ColumnExpression colb4 = new ColumnExpression("a", DataType.STRING);
cola4.setDimension(true);
cola4.setDimension(empDimension);
Expression lessThanb1 = new LessThanEqualToExpression(colb1, new LiteralExpression("15", DataType.STRING));
Expression lessThanb2 = new LessThanEqualToExpression(colb2, new LiteralExpression("20", DataType.STRING));
Expression greaterThanb1 = new GreaterThanExpression(colb3, new LiteralExpression("12", DataType.STRING));
Expression greaterThanb2 = new GreaterThanEqualToExpression(colb4, new LiteralExpression("11", DataType.STRING));
Expression Orb1 = new OrExpression(new NotEqualsExpression(null, null), greaterThanb2);
Expression Orb2 = new OrExpression(Orb1, greaterThanb1);
Expression Orb3 = new OrExpression(Orb2, lessThanb2);
FilterOptimizer rangeFilterOptimizer = new RangeFilterOptmizer(new FilterOptimizerBasic(), inputFilter);
rangeFilterOptimizer.optimizeFilter();
result = checkBothTrees(inputFilter, new OrExpression(Orb3, lessThanb1));
// no change
Assert.assertTrue(result);
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class RangeFilterProcessorTest method createFilterTree_noChange.
@Test
public void createFilterTree_noChange() {
// Build 2nd Tree no change a < 5 and a > 20
Expression inputFilter;
boolean result = false;
ColumnExpression cola = new ColumnExpression("a", DataType.STRING);
cola.setDimension(true);
ColumnSchema empColumnSchema = new ColumnSchema();
empColumnSchema.setColumnName("empNameCol");
empColumnSchema.setColumnUniqueId("empNameCol");
empColumnSchema.setDimensionColumn(true);
empColumnSchema.setEncodingList(Arrays.asList(Encoding.DICTIONARY));
empColumnSchema.setDataType(DataType.STRING);
CarbonDimension empDimension = new CarbonDimension(empColumnSchema, 0, 0, 0, 0, 0);
cola.setDimension(empDimension);
Expression greaterThan = new GreaterThanEqualToExpression(cola, new LiteralExpression("20", DataType.STRING));
ColumnExpression colb = new ColumnExpression("a", DataType.STRING);
colb.setDimension(true);
colb.setDimension(empDimension);
Expression lessThan = new LessThanEqualToExpression(colb, new LiteralExpression("05", DataType.STRING));
inputFilter = new AndExpression(greaterThan, lessThan);
Expression output = new AndExpression(new GreaterThanEqualToExpression(new ColumnExpression("a", DataType.STRING), new LiteralExpression("20", DataType.STRING)), new LessThanEqualToExpression(new ColumnExpression("a", DataType.STRING), new LiteralExpression("05", DataType.STRING)));
FilterOptimizer rangeFilterOptimizer = new RangeFilterOptmizer(new FilterOptimizerBasic(), inputFilter);
rangeFilterOptimizer.optimizeFilter();
result = checkBothTrees(inputFilter, output);
// no change
Assert.assertTrue(result);
}
use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.
the class CarbonInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
CacheClient cacheClient = new CacheClient(identifier.getStorePath());
try {
List<String> invalidSegments = new ArrayList<>();
List<UpdateVO> invalidTimestampsList = new ArrayList<>();
// get all valid segments and set them into the configuration
if (getSegmentsToAccess(job).length == 0) {
SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
if (segments.getValidSegments().size() == 0) {
return new ArrayList<>(0);
}
// remove entry in the segment index if there are invalid segments
invalidSegments.addAll(segments.getInvalidSegments());
for (String invalidSegmentId : invalidSegments) {
invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
}
if (invalidSegments.size() > 0) {
List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
for (String segId : invalidSegments) {
invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
}
cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
}
}
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
// this will be null in case of corrupt schema file.
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
// prune partitions for filter query on partition table
BitSet matchedPartitions = null;
if (null != filter) {
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
if (null != partitionInfo) {
Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
if (matchedPartitions.cardinality() == 0) {
// no partition is required
return new ArrayList<InputSplit>();
}
if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
// all partitions are required, no need to prune partitions
matchedPartitions = null;
}
}
}
FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
// pass the invalid segment to task side in order to remove index entry in task side
if (invalidSegments.size() > 0) {
for (InputSplit split : splits) {
((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
}
}
return splits;
} finally {
// close the cache cache client to clear LRU cache memory
cacheClient.close();
}
}
Aggregations