use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.
the class CarbonReaderBuilder method build.
public <T> CarbonReader<T> build() throws IOException, InterruptedException {
CarbonTable table = CarbonTable.buildFromTablePath("_temp", tablePath);
final CarbonFileInputFormat format = new CarbonFileInputFormat();
final Job job = new Job(new Configuration());
format.setTableInfo(job.getConfiguration(), table.getTableInfo());
format.setTablePath(job.getConfiguration(), table.getTablePath());
format.setTableName(job.getConfiguration(), table.getTableName());
format.setDatabaseName(job.getConfiguration(), table.getDatabaseName());
if (filterExpression != null) {
format.setFilterPredicates(job.getConfiguration(), filterExpression);
}
if (projectionColumns != null) {
format.setColumnProjection(job.getConfiguration(), new CarbonProjection(projectionColumns));
}
final List<InputSplit> splits = format.getSplits(new JobContextImpl(job.getConfiguration(), new JobID()));
List<RecordReader<Void, T>> readers = new ArrayList<>(splits.size());
for (InputSplit split : splits) {
TaskAttemptContextImpl attempt = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader reader = format.createRecordReader(split, attempt);
reader.initialize(split, attempt);
readers.add(reader);
}
return new CarbonReader<>(readers);
}
use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.
the class CarbondataRecordSetProvider method getRecordSet.
@Override
public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<? extends ColumnHandle> columns) {
CarbondataSplit carbondataSplit = checkType(split, CarbondataSplit.class, "split is not class CarbondataSplit");
checkArgument(carbondataSplit.getConnectorId().equals(connectorId), "split is not for this connector");
CarbonProjection carbonProjection = new CarbonProjection();
// Convert all columns handles
ImmutableList.Builder<CarbondataColumnHandle> handles = ImmutableList.builder();
for (ColumnHandle handle : columns) {
handles.add(checkType(handle, CarbondataColumnHandle.class, "handle"));
carbonProjection.addColumn(((CarbondataColumnHandle) handle).getColumnName());
}
CarbonTableCacheModel tableCacheModel = carbonTableReader.getCarbonCache(carbondataSplit.getSchemaTableName());
checkNotNull(tableCacheModel, "tableCacheModel should not be null");
checkNotNull(tableCacheModel.carbonTable, "tableCacheModel.carbonTable should not be null");
checkNotNull(tableCacheModel.carbonTable.getTableInfo(), "tableCacheModel.tableInfo should not be null");
// Build Query Model
CarbonTable targetTable = tableCacheModel.carbonTable;
QueryModel queryModel;
TaskAttemptContextImpl hadoopAttemptContext;
try {
Configuration conf = new Configuration();
conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
String carbonTablePath = targetTable.getAbsoluteTableIdentifier().getTablePath();
conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
JobConf jobConf = new JobConf(conf);
CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, tableCacheModel.carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getConstraints()), carbonProjection);
hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
CarbonInputSplit carbonInputSplit = CarbonLocalInputSplit.convertSplit(carbondataSplit.getLocalInputSplit());
queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext);
queryModel.setVectorReader(true);
} catch (IOException e) {
throw new RuntimeException("Unable to get the Query Model ", e);
}
return new CarbondataRecordSet(targetTable, session, carbondataSplit, handles.build(), queryModel, hadoopAttemptContext);
}
use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.
the class CarbonInputMapperTest method testInputFormatMapperReadAllRowsAndColumns.
@Test
public void testInputFormatMapperReadAllRowsAndColumns() throws Exception {
try {
String outPath = "target/output";
CarbonProjection carbonProjection = new CarbonProjection();
carbonProjection.addColumn("ID");
carbonProjection.addColumn("date");
carbonProjection.addColumn("country");
carbonProjection.addColumn("name");
carbonProjection.addColumn("phonetype");
carbonProjection.addColumn("serialname");
carbonProjection.addColumn("salary");
runJob(outPath, carbonProjection, null);
Assert.assertEquals("Count lines are not matching", 1000, countTheLines(outPath));
Assert.assertEquals("Column count are not matching", 7, countTheColumns(outPath));
} catch (Exception e) {
Assert.assertTrue("failed", false);
e.printStackTrace();
throw e;
}
}
use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.
the class CarbonInputMapperTest method testInputFormatMapperReadAllRowsAndFewColumnsWithFilter.
@Test
public void testInputFormatMapperReadAllRowsAndFewColumnsWithFilter() throws Exception {
try {
String outPath = "target/output3";
CarbonProjection carbonProjection = new CarbonProjection();
carbonProjection.addColumn("ID");
carbonProjection.addColumn("country");
carbonProjection.addColumn("salary");
Expression expression = new EqualToExpression(new ColumnExpression("country", DataType.STRING), new LiteralExpression("france", DataType.STRING));
runJob(outPath, carbonProjection, expression);
Assert.assertEquals("Count lines are not matching", 101, countTheLines(outPath));
Assert.assertEquals("Column count are not matching", 3, countTheColumns(outPath));
} catch (Exception e) {
Assert.assertTrue("failed", false);
}
}
use of org.apache.carbondata.hadoop.CarbonProjection in project carbondata by apache.
the class CarbonInputMapperTest method testInputFormatMapperReadAllRowsAndFewColumns.
@Test
public void testInputFormatMapperReadAllRowsAndFewColumns() throws Exception {
try {
String outPath = "target/output2";
CarbonProjection carbonProjection = new CarbonProjection();
carbonProjection.addColumn("ID");
carbonProjection.addColumn("country");
carbonProjection.addColumn("salary");
runJob(outPath, carbonProjection, null);
Assert.assertEquals("Count lines are not matching", 1000, countTheLines(outPath));
Assert.assertEquals("Column count are not matching", 3, countTheColumns(outPath));
} catch (Exception e) {
e.printStackTrace();
Assert.assertTrue("failed", false);
}
}
Aggregations