Search in sources :

Example 6 with IndexFilter

use of org.apache.carbondata.core.index.IndexFilter in project carbondata by apache.

the class Hive2CarbonExpressionTest method testEqualOrLessThanEqualsHiveFilter.

@Test
public void testEqualOrLessThanEqualsHiveFilter() throws IOException {
    ExprNodeDesc column1 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "id", null, false);
    List<ExprNodeDesc> children1 = Lists.newArrayList();
    ExprNodeDesc constant1 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, "1000");
    children1.add(column1);
    children1.add(constant1);
    ExprNodeGenericFuncDesc node1 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFOPEqualOrLessThan(), children1);
    Configuration configuration = new Configuration();
    CarbonInputFormat.setFilterPredicates(configuration, new IndexFilter(table, Hive2CarbonExpression.convertExprHive2Carbon(node1)));
    final Job job = new Job(new JobConf(configuration));
    final CarbonFileInputFormat format = new CarbonFileInputFormat();
    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
    format.setTablePath(job.getConfiguration(), table.getTablePath());
    format.setTableName(job.getConfiguration(), table.getTableName());
    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    List<InputSplit> list = format.getSplits(job);
    Assert.assertEquals(1, list.size());
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) Configuration(org.apache.hadoop.conf.Configuration) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) CarbonFileInputFormat(org.apache.carbondata.hadoop.api.CarbonFileInputFormat) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 7 with IndexFilter

use of org.apache.carbondata.core.index.IndexFilter in project carbondata by apache.

the class Hive2CarbonExpressionTest method testGreaterThanHiveFilter.

@Test
public void testGreaterThanHiveFilter() throws IOException {
    ExprNodeDesc column1 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "id", null, false);
    List<ExprNodeDesc> children1 = Lists.newArrayList();
    ExprNodeDesc constant1 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, "1001");
    children1.add(column1);
    children1.add(constant1);
    ExprNodeGenericFuncDesc node1 = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo, new GenericUDFOPGreaterThan(), children1);
    Configuration configuration = new Configuration();
    CarbonInputFormat.setFilterPredicates(configuration, new IndexFilter(table, Hive2CarbonExpression.convertExprHive2Carbon(node1)));
    final Job job = new Job(new JobConf(configuration));
    final CarbonFileInputFormat format = new CarbonFileInputFormat();
    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
    format.setTablePath(job.getConfiguration(), table.getTablePath());
    format.setTableName(job.getConfiguration(), table.getTableName());
    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    List<InputSplit> list = format.getSplits(job);
    Assert.assertEquals(0, list.size());
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) Configuration(org.apache.hadoop.conf.Configuration) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) CarbonFileInputFormat(org.apache.carbondata.hadoop.api.CarbonFileInputFormat) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 8 with IndexFilter

use of org.apache.carbondata.core.index.IndexFilter in project carbondata by apache.

the class CarbonReaderBuilder method prepareFileInputFormat.

private CarbonFileInputFormat prepareFileInputFormat(Job job, boolean enableBlockletDistribution, boolean disableLoadBlockIndex) throws IOException {
    if (inputSplit != null && inputSplit instanceof CarbonInputSplit) {
        tablePath = ((CarbonInputSplit) inputSplit).getSegment().getReadCommittedScope().getFilePath();
        tableName = "UnknownTable" + UUID.randomUUID();
    }
    if (null == this.fileLists && null == tablePath) {
        throw new IllegalArgumentException("Please set table path first.");
    }
    // infer schema
    CarbonTable table;
    if (null != this.fileLists) {
        if (fileLists.size() < 1) {
            throw new IllegalArgumentException("fileLists must have one file in list as least!");
        }
        String commonString = String.valueOf(fileLists.get(0));
        for (int i = 1; i < fileLists.size(); i++) {
            commonString = commonString.substring(0, StringUtils.indexOfDifference(commonString, String.valueOf(fileLists.get(i))));
        }
        int index = commonString.lastIndexOf("/");
        commonString = commonString.substring(0, index);
        table = CarbonTable.buildTable(commonString, tableName, hadoopConf);
    } else {
        table = CarbonTable.buildTable(tablePath, tableName, hadoopConf);
    }
    if (enableBlockletDistribution) {
        // set cache level to blocklet level
        Map<String, String> tableProperties = table.getTableInfo().getFactTable().getTableProperties();
        tableProperties.put(CarbonCommonConstants.CACHE_LEVEL, "BLOCKLET");
        table.getTableInfo().getFactTable().setTableProperties(tableProperties);
    }
    final CarbonFileInputFormat format = new CarbonFileInputFormat();
    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
    format.setTablePath(job.getConfiguration(), table.getTablePath());
    format.setTableName(job.getConfiguration(), table.getTableName());
    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    if (filterExpression != null) {
        format.setFilterPredicates(job.getConfiguration(), new IndexFilter(table, filterExpression, true));
    }
    if (null != this.fileLists) {
        format.setFileLists(this.fileLists);
    }
    if (projectionColumns != null) {
        // set the user projection
        int len = projectionColumns.length;
        for (int i = 0; i < len; i++) {
            if (projectionColumns[i].contains(".")) {
                throw new UnsupportedOperationException("Complex child columns projection NOT supported through CarbonReader");
            }
        }
        format.setColumnProjection(job.getConfiguration(), projectionColumns);
    }
    if ((disableLoadBlockIndex) && (filterExpression == null)) {
        job.getConfiguration().set("filter_blocks", "false");
    }
    return format;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonFileInputFormat(org.apache.carbondata.hadoop.api.CarbonFileInputFormat) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) IndexFilter(org.apache.carbondata.core.index.IndexFilter)

Example 9 with IndexFilter

use of org.apache.carbondata.core.index.IndexFilter in project carbondata by apache.

the class LocalCarbonStore method scan.

@Override
public Iterator<CarbonRow> scan(AbsoluteTableIdentifier tableIdentifier, String[] projectColumns, Expression filter) throws IOException {
    Objects.requireNonNull(tableIdentifier);
    Objects.requireNonNull(projectColumns);
    CarbonTable table = getTable(tableIdentifier.getTablePath());
    if (table.isStreamingSink() || table.isHivePartitionTable()) {
        throw new UnsupportedOperationException("streaming and partition table is not supported");
    }
    // TODO: use InputFormat to prune data and read data
    final CarbonTableInputFormat format = new CarbonTableInputFormat();
    final Job job = new Job(new Configuration());
    CarbonInputFormat.setTableInfo(job.getConfiguration(), table.getTableInfo());
    CarbonInputFormat.setTablePath(job.getConfiguration(), table.getTablePath());
    CarbonInputFormat.setTableName(job.getConfiguration(), table.getTableName());
    CarbonInputFormat.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    CarbonInputFormat.setCarbonReadSupport(job.getConfiguration(), CarbonRowReadSupport.class);
    CarbonInputFormat.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectColumns));
    if (filter != null) {
        CarbonInputFormat.setFilterPredicates(job.getConfiguration(), new IndexFilter(table, filter));
    }
    final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
    List<RecordReader<Void, Object>> readers = new ArrayList<>(splits.size());
    List<CarbonRow> rows = new ArrayList<>();
    try {
        for (InputSplit split : splits) {
            TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
            RecordReader reader = format.createRecordReader(split, attempt);
            reader.initialize(split, attempt);
            readers.add(reader);
        }
        for (RecordReader<Void, Object> reader : readers) {
            while (reader.nextKeyValue()) {
                rows.add((CarbonRow) reader.getCurrentValue());
            }
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
    } catch (InterruptedException e) {
        throw new IOException(e);
    } finally {
        for (RecordReader<Void, Object> reader : readers) {
            try {
                reader.close();
            } catch (IOException e) {
                LOGGER.error(e.getMessage(), e);
            }
        }
    }
    return rows.iterator();
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) CarbonRow(org.apache.carbondata.core.datastore.row.CarbonRow) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) RecordReader(org.apache.hadoop.mapreduce.RecordReader) ArrayList(java.util.ArrayList) IOException(java.io.IOException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) IndexFilter(org.apache.carbondata.core.index.IndexFilter) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JobID(org.apache.hadoop.mapreduce.JobID)

Example 10 with IndexFilter

use of org.apache.carbondata.core.index.IndexFilter in project carbondata by apache.

the class AbstractQueryExecutor method createFilterExpression.

private void createFilterExpression(QueryModel queryModel, SegmentProperties properties) {
    if (queryModel.getIndexFilter() != null) {
        if (!queryModel.getIndexFilter().isResolvedOnSegment(properties)) {
            IndexFilter expression = new IndexFilter(properties, queryModel.getTable(), queryModel.getIndexFilter().getExpression());
            queryModel.setIndexFilter(expression);
        }
    }
}
Also used : IndexFilter(org.apache.carbondata.core.index.IndexFilter)

Aggregations

IndexFilter (org.apache.carbondata.core.index.IndexFilter)27 Configuration (org.apache.hadoop.conf.Configuration)16 InputSplit (org.apache.hadoop.mapreduce.InputSplit)16 JobConf (org.apache.hadoop.mapred.JobConf)15 Job (org.apache.hadoop.mapreduce.Job)15 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)12 Test (org.junit.Test)12 CarbonFileInputFormat (org.apache.carbondata.hadoop.api.CarbonFileInputFormat)11 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)11 IOException (java.io.IOException)9 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)9 ArrayList (java.util.ArrayList)8 List (java.util.List)5 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)5 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)5 CarbonTableInputFormat (org.apache.carbondata.hadoop.api.CarbonTableInputFormat)5 HashMap (java.util.HashMap)4 CarbonTablePath (org.apache.carbondata.core.util.path.CarbonTablePath)4 Map (java.util.Map)3