use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.
the class CarbonInputFormatUtil method createCarbonTableInputFormat.
public static <V> CarbonTableInputFormat<V> createCarbonTableInputFormat(AbsoluteTableIdentifier identifier, List<String> partitionId, Job job) throws IOException {
CarbonTableInputFormat<V> carbonTableInputFormat = new CarbonTableInputFormat<>();
CarbonTableInputFormat.setPartitionIdList(job.getConfiguration(), partitionId);
CarbonTableInputFormat.setDatabaseName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getDatabaseName());
CarbonTableInputFormat.setTableName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getTableName());
FileInputFormat.addInputPath(job, new Path(identifier.getTablePath()));
return carbonTableInputFormat;
}
use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.
the class CarbonInputFormatUtil method createCarbonInputFormat.
public static <V> CarbonTableInputFormat<V> createCarbonInputFormat(AbsoluteTableIdentifier identifier, Job job) throws IOException {
CarbonTableInputFormat<V> carbonInputFormat = new CarbonTableInputFormat<>();
CarbonTableInputFormat.setDatabaseName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getDatabaseName());
CarbonTableInputFormat.setTableName(job.getConfiguration(), identifier.getCarbonTableIdentifier().getTableName());
FileInputFormat.addInputPath(job, new Path(identifier.getTablePath()));
return carbonInputFormat;
}
use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.
the class CarbondataRecordSetProvider method getRecordSet.
@Override
public RecordSet getRecordSet(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<? extends ColumnHandle> columns) {
CarbondataSplit carbondataSplit = checkType(split, CarbondataSplit.class, "split is not class CarbondataSplit");
checkArgument(carbondataSplit.getConnectorId().equals(connectorId), "split is not for this connector");
CarbonProjection carbonProjection = new CarbonProjection();
// Convert all columns handles
ImmutableList.Builder<CarbondataColumnHandle> handles = ImmutableList.builder();
for (ColumnHandle handle : columns) {
handles.add(checkType(handle, CarbondataColumnHandle.class, "handle"));
carbonProjection.addColumn(((CarbondataColumnHandle) handle).getColumnName());
}
CarbonTableCacheModel tableCacheModel = carbonTableReader.getCarbonCache(carbondataSplit.getSchemaTableName());
checkNotNull(tableCacheModel, "tableCacheModel should not be null");
checkNotNull(tableCacheModel.carbonTable, "tableCacheModel.carbonTable should not be null");
checkNotNull(tableCacheModel.carbonTable.getTableInfo(), "tableCacheModel.tableInfo should not be null");
// Build Query Model
CarbonTable targetTable = tableCacheModel.carbonTable;
QueryModel queryModel;
TaskAttemptContextImpl hadoopAttemptContext;
try {
Configuration conf = new Configuration();
conf.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
String carbonTablePath = targetTable.getAbsoluteTableIdentifier().getTablePath();
conf.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
JobConf jobConf = new JobConf(conf);
CarbonTableInputFormat carbonTableInputFormat = createInputFormat(jobConf, tableCacheModel.carbonTable, PrestoFilterUtil.parseFilterExpression(carbondataSplit.getConstraints()), carbonProjection);
hadoopAttemptContext = new TaskAttemptContextImpl(jobConf, new TaskAttemptID("", 1, TaskType.MAP, 0, 0));
CarbonInputSplit carbonInputSplit = CarbonLocalInputSplit.convertSplit(carbondataSplit.getLocalInputSplit());
queryModel = carbonTableInputFormat.createQueryModel(carbonInputSplit, hadoopAttemptContext);
queryModel.setVectorReader(true);
} catch (IOException e) {
throw new RuntimeException("Unable to get the Query Model ", e);
}
return new CarbondataRecordSet(targetTable, session, carbondataSplit, handles.build(), queryModel, hadoopAttemptContext);
}
use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.
the class CarbonStreamRecordReader method initialize.
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
// input
if (split instanceof CarbonInputSplit) {
fileSplit = (CarbonInputSplit) split;
} else if (split instanceof CarbonMultiBlockSplit) {
fileSplit = ((CarbonMultiBlockSplit) split).getAllSplits().get(0);
} else {
fileSplit = (FileSplit) split;
}
// metadata
hadoopConf = context.getConfiguration();
if (model == null) {
CarbonTableInputFormat format = new CarbonTableInputFormat<Object>();
model = format.createQueryModel(split, context);
}
carbonTable = model.getTable();
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
dimensionCount = dimensions.size();
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
measureCount = measures.size();
List<CarbonColumn> carbonColumnList = carbonTable.getStreamStorageOrderColumn(carbonTable.getTableName());
storageColumns = carbonColumnList.toArray(new CarbonColumn[carbonColumnList.size()]);
isNoDictColumn = CarbonDataProcessorUtil.getNoDictionaryMapping(storageColumns);
directDictionaryGenerators = new DirectDictionaryGenerator[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
if (storageColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) {
directDictionaryGenerators[i] = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(storageColumns[i].getDataType());
}
}
measureDataTypes = new DataType[measureCount];
for (int i = 0; i < measureCount; i++) {
measureDataTypes[i] = storageColumns[dimensionCount + i].getDataType();
}
// decode data
allNonNull = new BitSet(storageColumns.length);
projection = model.getProjectionColumns();
isRequired = new boolean[storageColumns.length];
boolean[] isFiltlerDimensions = model.getIsFilterDimensions();
boolean[] isFiltlerMeasures = model.getIsFilterMeasures();
isFilterRequired = new boolean[storageColumns.length];
filterMap = new int[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
if (storageColumns[i].isDimension()) {
if (isFiltlerDimensions[storageColumns[i].getOrdinal()]) {
isRequired[i] = true;
isFilterRequired[i] = true;
filterMap[i] = storageColumns[i].getOrdinal();
}
} else {
if (isFiltlerMeasures[storageColumns[i].getOrdinal()]) {
isRequired[i] = true;
isFilterRequired[i] = true;
filterMap[i] = carbonTable.getDimensionOrdinalMax() + storageColumns[i].getOrdinal();
}
}
}
isProjectionRequired = new boolean[storageColumns.length];
projectionMap = new int[storageColumns.length];
for (int i = 0; i < storageColumns.length; i++) {
for (int j = 0; j < projection.length; j++) {
if (storageColumns[i].getColName().equals(projection[j].getColName())) {
isRequired[i] = true;
isProjectionRequired[i] = true;
projectionMap[i] = j;
break;
}
}
}
// initialize filter
if (null != model.getFilterExpressionResolverTree()) {
initializeFilter();
} else if (projection.length == 0) {
skipScanData = true;
}
}
use of org.apache.carbondata.hadoop.api.CarbonTableInputFormat in project carbondata by apache.
the class CarbonTableInputFormatTest method testGetSplits.
@Test
public void testGetSplits() throws Exception {
CarbonTableInputFormat carbonInputFormat = new CarbonTableInputFormat();
JobConf jobConf = new JobConf(new Configuration());
Job job = Job.getInstance(jobConf);
job.getConfiguration().set("query.id", UUID.randomUUID().toString());
String tblPath = StoreCreator.getAbsoluteTableIdentifier().getTablePath();
FileInputFormat.addInputPath(job, new Path(tblPath));
CarbonTableInputFormat.setDatabaseName(job.getConfiguration(), StoreCreator.getAbsoluteTableIdentifier().getDatabaseName());
CarbonTableInputFormat.setTableName(job.getConfiguration(), StoreCreator.getAbsoluteTableIdentifier().getTableName());
// list files to get the carbondata file
String segmentPath = CarbonTablePath.getSegmentPath(StoreCreator.getAbsoluteTableIdentifier().getTablePath(), "0");
File segmentDir = new File(segmentPath);
if (segmentDir.exists() && segmentDir.isDirectory()) {
File[] files = segmentDir.listFiles(new FileFilter() {
@Override
public boolean accept(File pathname) {
return pathname.getName().endsWith("carbondata");
}
});
if (files != null && files.length > 0) {
job.getConfiguration().set(CarbonTableInputFormat.INPUT_FILES, files[0].getName());
}
}
List splits = carbonInputFormat.getSplits(job);
Assert.assertTrue(splits != null && splits.size() == 1);
}
Aggregations