Search in sources :

Example 1 with CarbonProjection

use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.

the class CarbonReaderBuilder method build.

public <T> CarbonReader<T> build() throws IOException, InterruptedException {
    CarbonTable table = CarbonTable.buildFromTablePath("_temp", tablePath);
    final CarbonFileInputFormat format = new CarbonFileInputFormat();
    final Job job = new Job(new Configuration());
    format.setTableInfo(job.getConfiguration(), table.getTableInfo());
    format.setTablePath(job.getConfiguration(), table.getTablePath());
    format.setTableName(job.getConfiguration(), table.getTableName());
    format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
    if (filterExpression != null) {
        format.setFilterPredicates(job.getConfiguration(), filterExpression);
    }
    if (projectionColumns != null) {
        format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns));
    }
    final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
    List<RecordReader<Void, T>> readers = new ArrayList<>(splits.size());
    for (InputSplit split : splits) {
        TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
        RecordReader reader = format.createRecordReader(split, attempt);
        reader.initialize(split, attempt);
        readers.add(reader);
    }
    return new CarbonReader<>(readers);
}
Also used : JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) RecordReader(org.apache.hadoop.mapreduce.RecordReader) ArrayList(java.util.ArrayList) CarbonFileInputFormat(org.apache.carbondata.hadoop.api.CarbonFileInputFormat) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) Job(org.apache.hadoop.mapreduce.Job) InputSplit(org.apache.hadoop.mapreduce.InputSplit) JobID(org.apache.hadoop.mapreduce.JobID)

Example 2 with CarbonProjection

use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.

the class CarbondataRecordSetProvider method getRecordSet.

@Override
public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<? extends ColumnHandle> columns) {
    CarbondataSplit carbondataSplit = checkType(split, CarbondataSplit.class, "split is not class CarbondataSplit");
    checkArgument(carbondataSplit.getConnectorId().equals(connectorId), "split is not for this connector");
    CarbonProjection carbonProjection = new CarbonProjection();
    // Convert all columns handles
    ImmutableList.Builder<CarbondataColumnHandle> handles = ImmutableList.builder();
    for (ColumnHandle handle : columns) {
        handles.add(checkType(handle, CarbondataColumnHandle.class, "handle"));
        carbonProjection.addColumn(((CarbondataColumnHandle) handle).getColumnName());
    }
    CarbonTableCacheModel tableCacheModel = carbonTableReader.getCarbonCache(carbondataSplit.getSchemaTableName());
    checkNotNull(tableCacheModel, "tableCacheModel should not be null");
    checkNotNull(tableCacheModel.carbonTable, "tableCacheModel.carbonTable should not be null");
    checkNotNull(tableCacheModel.carbonTable.getTableInfo(), "tableCacheModel.tableInfo should not be null");
    // Build Query Model
    CarbonTable targetTable = tableCacheModel.carbonTable;
    QueryModel queryModel;
    TaskAttemptContextImpl hadoopAttemptContext;
    try {
        Configuration conf = new Configuration();
        conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
        String carbonTablePath = targetTable.getAbsoluteTableIdentifier().getTablePath();
        conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
        JobConf jobConf = new JobConf(conf);
        CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, tableCacheModel.carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getConstraints()), carbonProjection);
        hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
        CarbonInputSplit carbonInputSplit = CarbonLocalInputSplit.convertSplit(carbondataSplit.getLocalInputSplit());
        queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext);
        queryModel.setVectorReader(true);
    } catch (IOException e) {
        throw new RuntimeException("Unable to get the Query Model ", e);
    }
    return new CarbondataRecordSet(targetTable, session, carbondataSplit, handles.build(), queryModel, hadoopAttemptContext);
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList(com.google.common.collect.ImmutableList) TaskAttemptID(org.apache.hadoop.mapred.TaskAttemptID) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) IOException(java.io.IOException) QueryModel(org.apache.carbondata.core.scan.model.QueryModel) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) TaskAttemptContextImpl(org.apache.hadoop.mapred.TaskAttemptContextImpl) CarbonTableCacheModel(org.apache.carbondata.presto.impl.CarbonTableCacheModel) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) JobConf(org.apache.hadoop.mapred.JobConf)

Example 3 with CarbonProjection

use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.

the class CarbonInputMapperTest method testInputFormatMapperReadAllRowsAndColumns.

@Test
public void testInputFormatMapperReadAllRowsAndColumns() throws Exception {
    try {
        String outPath = "target/output";
        CarbonProjection carbonProjection = new CarbonProjection();
        carbonProjection.addColumn("ID");
        carbonProjection.addColumn("date");
        carbonProjection.addColumn("country");
        carbonProjection.addColumn("name");
        carbonProjection.addColumn("phonetype");
        carbonProjection.addColumn("serialname");
        carbonProjection.addColumn("salary");
        runJob(outPath, carbonProjection, null);
        Assert.assertEquals("Count lines are not matching", 1000, countTheLines(outPath));
        Assert.assertEquals("Column count are not matching", 7, countTheColumns(outPath));
    } catch (Exception e) {
        Assert.assertTrue("failed", false);
        e.printStackTrace();
        throw e;
    }
}
Also used : CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) IOException(java.io.IOException) Test(org.junit.Test)

Example 4 with CarbonProjection

use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.

the class CarbonInputMapperTest method testInputFormatMapperReadAllRowsAndFewColumnsWithFilter.

@Test
public void testInputFormatMapperReadAllRowsAndFewColumnsWithFilter() throws Exception {
    try {
        String outPath = "target/output3";
        CarbonProjection carbonProjection = new CarbonProjection();
        carbonProjection.addColumn("ID");
        carbonProjection.addColumn("country");
        carbonProjection.addColumn("salary");
        Expression expression = new EqualToExpression(new ColumnExpression("country", DataType.STRING), new LiteralExpression("france", DataType.STRING));
        runJob(outPath, carbonProjection, expression);
        Assert.assertEquals("Count lines are not matching", 101, countTheLines(outPath));
        Assert.assertEquals("Column count are not matching", 3, countTheColumns(outPath));
    } catch (Exception e) {
        Assert.assertTrue("failed", false);
    }
}
Also used : CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) IOException(java.io.IOException) Test(org.junit.Test)

Example 5 with CarbonProjection

use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.

the class CarbonInputMapperTest method testInputFormatMapperReadAllRowsAndFewColumns.

@Test
public void testInputFormatMapperReadAllRowsAndFewColumns() throws Exception {
    try {
        String outPath = "target/output2";
        CarbonProjection carbonProjection = new CarbonProjection();
        carbonProjection.addColumn("ID");
        carbonProjection.addColumn("country");
        carbonProjection.addColumn("salary");
        runJob(outPath, carbonProjection, null);
        Assert.assertEquals("Count lines are not matching", 1000, countTheLines(outPath));
        Assert.assertEquals("Column count are not matching", 3, countTheColumns(outPath));
    } catch (Exception e) {
        e.printStackTrace();
        Assert.assertTrue("failed", false);
    }
}
Also used : CarbonProjection(org.apache.carbondata.hadoop.CarbonProjection) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

CarbonProjection (org.apache.carbondata.hadoop.CarbonProjection)8 IOException (java.io.IOException)7 Test (org.junit.Test)6 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)2 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)2 Expression (org.apache.carbondata.core.scan.expression.Expression)2 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)2 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)2 Configuration (org.apache.hadoop.conf.Configuration)2 ColumnHandle (com.facebook.presto.spi.ColumnHandle)1 ImmutableList (com.google.common.collect.ImmutableList)1 ArrayList (java.util.ArrayList)1 QueryModel (org.apache.carbondata.core.scan.model.QueryModel)1 CarbonInputSplit (org.apache.carbondata.hadoop.CarbonInputSplit)1 CarbonFileInputFormat (org.apache.carbondata.hadoop.api.CarbonFileInputFormat)1 CarbonTableInputFormat (org.apache.carbondata.hadoop.api.CarbonTableInputFormat)1 CarbonTableCacheModel (org.apache.carbondata.presto.impl.CarbonTableCacheModel)1 JobConf (org.apache.hadoop.mapred.JobConf)1 TaskAttemptContextImpl (org.apache.hadoop.mapred.TaskAttemptContextImpl)1 TaskAttemptID (org.apache.hadoop.mapred.TaskAttemptID)1