Search in sources :

Example 6 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class FilterUtilTest method testgetFilterListForRS.

@Test
public void testgetFilterListForRS() throws Exception {
    Expression expression = new ColumnExpression("IMEI", DataType.STRING);
    ColumnExpression columnExpression = new ColumnExpression("IMEI", DataType.STRING);
    String defaultValues = CarbonCommonConstants.MEMBER_DEFAULT_VAL;
    int defaultSurrogate = 1;
    int ordinal = 1;
    final CarbonColumn carbonColumn = new CarbonColumn(columnSchema, ordinal, -1);
    new MockUp<ColumnExpression>() {

        @Mock
        public CarbonColumn getCarbonColumn() {
            return carbonColumn;
        }
    };
    new MockUp<RowImpl>() {

        @Mock
        public Object getVal(int index) {
            return "test";
        }
    };
    assertTrue(FilterUtil.getFilterListForRS(expression, columnExpression, defaultValues, defaultSurrogate) instanceof DimColumnFilterInfo);
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) ListExpression(org.apache.carbondata.core.scan.expression.conditional.ListExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) MockUp(mockit.MockUp) Test(org.junit.Test) AbstractDictionaryCacheTest(org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)

Example 7 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class FilterUtilTest method testCheckIfDataTypeNotTimeStamp.

@Test
public void testCheckIfDataTypeNotTimeStamp() {
    Expression expression = new ColumnExpression("test", DataType.STRING);
    boolean result = FilterUtil.checkIfDataTypeNotTimeStamp(expression);
    assertFalse(result);
}
Also used : ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) ListExpression(org.apache.carbondata.core.scan.expression.conditional.ListExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) Test(org.junit.Test) AbstractDictionaryCacheTest(org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)

Example 8 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class RangeFilterProcessorTest method createFilterTree_flavor2.

@Test
public void createFilterTree_flavor2() {
    // Build 3rd BTree a >= '11' or a > '12' or a <= '20' or a <= '15'
    Expression inputFilter;
    boolean result = false;
    ColumnSchema empColumnSchema = new ColumnSchema();
    empColumnSchema.setColumnName("a");
    empColumnSchema.setColumnUniqueId("a");
    empColumnSchema.setDimensionColumn(true);
    empColumnSchema.setEncodingList(Arrays.asList(Encoding.DICTIONARY));
    empColumnSchema.setDataType(DataType.STRING);
    CarbonDimension empDimension = new CarbonDimension(empColumnSchema, 0, 0, 0, 0, 0);
    ColumnExpression cola1 = new ColumnExpression("a", DataType.STRING);
    cola1.setDimension(true);
    cola1.setDimension(empDimension);
    ColumnExpression cola2 = new ColumnExpression("a", DataType.STRING);
    cola2.setDimension(true);
    cola2.setDimension(empDimension);
    ColumnExpression cola3 = new ColumnExpression("a", DataType.STRING);
    cola3.setDimension(true);
    cola3.setDimension(empDimension);
    ColumnExpression cola4 = new ColumnExpression("a", DataType.STRING);
    cola4.setDimension(true);
    cola4.setDimension(empDimension);
    Expression lessThan1 = new LessThanEqualToExpression(cola1, new LiteralExpression("15", DataType.STRING));
    Expression lessThan2 = new LessThanEqualToExpression(cola2, new LiteralExpression("20", DataType.STRING));
    Expression greaterThan1 = new GreaterThanExpression(cola3, new LiteralExpression("12", DataType.STRING));
    Expression greaterThan2 = new GreaterThanEqualToExpression(cola4, new LiteralExpression("11", DataType.STRING));
    Expression Or1 = new OrExpression(new NotEqualsExpression(null, null), greaterThan2);
    Expression Or2 = new OrExpression(Or1, greaterThan1);
    Expression Or3 = new OrExpression(Or2, lessThan2);
    inputFilter = new OrExpression(Or3, lessThan1);
    // Build The output
    ColumnExpression colb1 = new ColumnExpression("a", DataType.STRING);
    cola1.setDimension(true);
    cola1.setDimension(empDimension);
    ColumnExpression colb2 = new ColumnExpression("a", DataType.STRING);
    cola2.setDimension(true);
    cola2.setDimension(empDimension);
    ColumnExpression colb3 = new ColumnExpression("a", DataType.STRING);
    cola3.setDimension(true);
    cola3.setDimension(empDimension);
    ColumnExpression colb4 = new ColumnExpression("a", DataType.STRING);
    cola4.setDimension(true);
    cola4.setDimension(empDimension);
    Expression lessThanb1 = new LessThanEqualToExpression(colb1, new LiteralExpression("15", DataType.STRING));
    Expression lessThanb2 = new LessThanEqualToExpression(colb2, new LiteralExpression("20", DataType.STRING));
    Expression greaterThanb1 = new GreaterThanExpression(colb3, new LiteralExpression("12", DataType.STRING));
    Expression greaterThanb2 = new GreaterThanEqualToExpression(colb4, new LiteralExpression("11", DataType.STRING));
    Expression Orb1 = new OrExpression(new NotEqualsExpression(null, null), greaterThanb2);
    Expression Orb2 = new OrExpression(Orb1, greaterThanb1);
    Expression Orb3 = new OrExpression(Orb2, lessThanb2);
    FilterOptimizer rangeFilterOptimizer = new RangeFilterOptmizer(new FilterOptimizerBasic(), inputFilter);
    rangeFilterOptimizer.optimizeFilter();
    result = checkBothTrees(inputFilter, new OrExpression(Orb3, lessThanb1));
    // no change
    Assert.assertTrue(result);
}
Also used : FilterOptimizer(org.apache.carbondata.core.scan.filter.intf.FilterOptimizer) NotEqualsExpression(org.apache.carbondata.core.scan.expression.conditional.NotEqualsExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) TrueExpression(org.apache.carbondata.core.scan.expression.logical.TrueExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) NotEqualsExpression(org.apache.carbondata.core.scan.expression.conditional.NotEqualsExpression) RangeExpression(org.apache.carbondata.core.scan.expression.logical.RangeExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) RangeFilterOptmizer(org.apache.carbondata.core.scan.filter.optimizer.RangeFilterOptmizer) FilterOptimizerBasic(org.apache.carbondata.core.scan.filter.intf.FilterOptimizerBasic) Test(org.junit.Test)

Example 9 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class RangeFilterProcessorTest method createFilterTree_noChange.

@Test
public void createFilterTree_noChange() {
    // Build 2nd Tree no change a < 5 and a > 20
    Expression inputFilter;
    boolean result = false;
    ColumnExpression cola = new ColumnExpression("a", DataType.STRING);
    cola.setDimension(true);
    ColumnSchema empColumnSchema = new ColumnSchema();
    empColumnSchema.setColumnName("empNameCol");
    empColumnSchema.setColumnUniqueId("empNameCol");
    empColumnSchema.setDimensionColumn(true);
    empColumnSchema.setEncodingList(Arrays.asList(Encoding.DICTIONARY));
    empColumnSchema.setDataType(DataType.STRING);
    CarbonDimension empDimension = new CarbonDimension(empColumnSchema, 0, 0, 0, 0, 0);
    cola.setDimension(empDimension);
    Expression greaterThan = new GreaterThanEqualToExpression(cola, new LiteralExpression("20", DataType.STRING));
    ColumnExpression colb = new ColumnExpression("a", DataType.STRING);
    colb.setDimension(true);
    colb.setDimension(empDimension);
    Expression lessThan = new LessThanEqualToExpression(colb, new LiteralExpression("05", DataType.STRING));
    inputFilter = new AndExpression(greaterThan, lessThan);
    Expression output = new AndExpression(new GreaterThanEqualToExpression(new ColumnExpression("a", DataType.STRING), new LiteralExpression("20", DataType.STRING)), new LessThanEqualToExpression(new ColumnExpression("a", DataType.STRING), new LiteralExpression("05", DataType.STRING)));
    FilterOptimizer rangeFilterOptimizer = new RangeFilterOptmizer(new FilterOptimizerBasic(), inputFilter);
    rangeFilterOptimizer.optimizeFilter();
    result = checkBothTrees(inputFilter, output);
    // no change
    Assert.assertTrue(result);
}
Also used : AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) FilterOptimizer(org.apache.carbondata.core.scan.filter.intf.FilterOptimizer) LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) TrueExpression(org.apache.carbondata.core.scan.expression.logical.TrueExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) NotEqualsExpression(org.apache.carbondata.core.scan.expression.conditional.NotEqualsExpression) RangeExpression(org.apache.carbondata.core.scan.expression.logical.RangeExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) RangeFilterOptmizer(org.apache.carbondata.core.scan.filter.optimizer.RangeFilterOptmizer) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) LessThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression) FilterOptimizerBasic(org.apache.carbondata.core.scan.filter.intf.FilterOptimizerBasic) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) GreaterThanEqualToExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression) Test(org.junit.Test)

Example 10 with Expression

use of org.apache.carbondata.core.scan.expression.Expression in project carbondata by apache.

the class CarbonInputFormat method getSplits.

/**
   * {@inheritDoc}
   * Configurations FileInputFormat.INPUT_DIR
   * are used to get table path to read.
   *
   * @param job
   * @return List<InputSplit> list of CarbonInputSplit
   * @throws IOException
   */
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    CacheClient cacheClient = new CacheClient(identifier.getStorePath());
    try {
        List<String> invalidSegments = new ArrayList<>();
        List<UpdateVO> invalidTimestampsList = new ArrayList<>();
        // get all valid segments and set them into the configuration
        if (getSegmentsToAccess(job).length == 0) {
            SegmentStatusManager segmentStatusManager = new SegmentStatusManager(identifier);
            SegmentStatusManager.ValidAndInvalidSegmentsInfo segments = segmentStatusManager.getValidAndInvalidSegments();
            SegmentUpdateStatusManager updateStatusManager = new SegmentUpdateStatusManager(identifier);
            setSegmentsToAccess(job.getConfiguration(), segments.getValidSegments());
            if (segments.getValidSegments().size() == 0) {
                return new ArrayList<>(0);
            }
            // remove entry in the segment index if there are invalid segments
            invalidSegments.addAll(segments.getInvalidSegments());
            for (String invalidSegmentId : invalidSegments) {
                invalidTimestampsList.add(updateStatusManager.getInvalidTimestampRange(invalidSegmentId));
            }
            if (invalidSegments.size() > 0) {
                List<TableSegmentUniqueIdentifier> invalidSegmentsIds = new ArrayList<>(invalidSegments.size());
                for (String segId : invalidSegments) {
                    invalidSegmentsIds.add(new TableSegmentUniqueIdentifier(identifier, segId));
                }
                cacheClient.getSegmentAccessClient().invalidateAll(invalidSegmentsIds);
            }
        }
        // process and resolve the expression
        Expression filter = getFilterPredicates(job.getConfiguration());
        CarbonTable carbonTable = getCarbonTable(job.getConfiguration());
        // this will be null in case of corrupt schema file.
        if (null == carbonTable) {
            throw new IOException("Missing/Corrupt schema file for table.");
        }
        CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
        // prune partitions for filter query on partition table
        BitSet matchedPartitions = null;
        if (null != filter) {
            PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getFactTableName());
            if (null != partitionInfo) {
                Partitioner partitioner = PartitionUtil.getPartitioner(partitionInfo);
                matchedPartitions = new FilterExpressionProcessor().getFilteredPartitions(filter, partitionInfo, partitioner);
                if (matchedPartitions.cardinality() == 0) {
                    // no partition is required
                    return new ArrayList<InputSplit>();
                }
                if (matchedPartitions.cardinality() == partitioner.numPartitions()) {
                    // all partitions are required, no need to prune partitions
                    matchedPartitions = null;
                }
            }
        }
        FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter, identifier);
        // do block filtering and get split
        List<InputSplit> splits = getSplits(job, filterInterface, matchedPartitions, cacheClient);
        // pass the invalid segment to task side in order to remove index entry in task side
        if (invalidSegments.size() > 0) {
            for (InputSplit split : splits) {
                ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
                ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
            }
        }
        return splits;
    } finally {
        // close the cache cache client to clear LRU cache memory
        cacheClient.close();
    }
}
Also used : SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) IOException(java.io.IOException) UpdateVO(org.apache.carbondata.core.mutate.UpdateVO) TableSegmentUniqueIdentifier(org.apache.carbondata.core.datastore.TableSegmentUniqueIdentifier) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) FilterExpressionProcessor(org.apache.carbondata.core.scan.filter.FilterExpressionProcessor) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) Expression(org.apache.carbondata.core.scan.expression.Expression) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Partitioner(org.apache.carbondata.core.scan.partition.Partitioner) FilterResolverIntf(org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf)

Aggregations

Expression (org.apache.carbondata.core.scan.expression.Expression)31 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)25 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)24 Test (org.junit.Test)18 ListExpression (org.apache.carbondata.core.scan.expression.conditional.ListExpression)10 AbstractDictionaryCacheTest (org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)7 OrExpression (org.apache.carbondata.core.scan.expression.logical.OrExpression)7 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)6 ArrayList (java.util.ArrayList)5 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)5 RangeExpression (org.apache.carbondata.core.scan.expression.logical.RangeExpression)5 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)4 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)4 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)4 GreaterThanEqualToExpression (org.apache.carbondata.core.scan.expression.conditional.GreaterThanEqualToExpression)4 GreaterThanExpression (org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression)4 LessThanEqualToExpression (org.apache.carbondata.core.scan.expression.conditional.LessThanEqualToExpression)4 NotEqualsExpression (org.apache.carbondata.core.scan.expression.conditional.NotEqualsExpression)4 TrueExpression (org.apache.carbondata.core.scan.expression.logical.TrueExpression)4