use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class CarbonTableReader method getInputSplits.
/**
* Get a carbon muti-block input splits
*
* @param tableCacheModel cached table
* @param filters carbonData filters
* @param filteredPartitions matched partitionSpec for the filter
* @param config hadoop conf
* @return list of multiblock split
* @throws IOException
*/
public List<CarbonLocalMultiBlockSplit> getInputSplits(CarbonTableCacheModel tableCacheModel, Expression filters, List<PartitionSpec> filteredPartitions, Configuration config) throws IOException {
List<CarbonLocalInputSplit> result = new ArrayList<>();
List<CarbonLocalMultiBlockSplit> multiBlockSplitList = new ArrayList<>();
CarbonTable carbonTable = tableCacheModel.getCarbonTable();
TableInfo tableInfo = tableCacheModel.getCarbonTable().getTableInfo();
config.set("presto.cli.query.id", prestoQueryId);
config.set(CarbonTableInputFormat.INPUT_SEGMENT_NUMBERS, "");
String carbonTablePath = carbonTable.getAbsoluteTableIdentifier().getTablePath();
config.set(CarbonTableInputFormat.INPUT_DIR, carbonTablePath);
config.set(CarbonTableInputFormat.DATABASE_NAME, carbonTable.getDatabaseName());
config.set(CarbonTableInputFormat.TABLE_NAME, carbonTable.getTableName());
config.set("query.id", queryId);
CarbonInputFormat.setTransactionalTable(config, carbonTable.isTransactionalTable());
CarbonInputFormat.setTableInfo(config, carbonTable.getTableInfo());
if (CarbonProperties.getInstance().isCoarseGrainSecondaryIndex(tableInfo.getDatabaseName(), tableInfo.getFactTable().getTableName(), "true")) {
CarbonInputFormat.checkAndSetSecondaryIndexPruning(carbonTable.getTableInfo(), filters, config);
}
JobConf jobConf = new JobConf(config);
try {
CarbonTableInputFormat.setTableInfo(config, tableInfo);
CarbonTableInputFormat<Object> carbonTableInputFormat = createInputFormat(jobConf, carbonTable.getAbsoluteTableIdentifier(), new IndexFilter(carbonTable, filters, true), filteredPartitions);
Job job = Job.getInstance(jobConf);
List<InputSplit> splits = carbonTableInputFormat.getSplits(job);
Gson gson = new Gson();
if (splits != null && splits.size() > 0) {
for (InputSplit inputSplit : splits) {
CarbonInputSplit carbonInputSplit = (CarbonInputSplit) inputSplit;
result.add(new CarbonLocalInputSplit(carbonInputSplit.getSegmentId(), carbonInputSplit.getPath().toString(), carbonInputSplit.getStart(), carbonInputSplit.getLength(), Arrays.asList(carbonInputSplit.getLocations()), carbonInputSplit.getNumberOfBlocklets(), carbonInputSplit.getVersion().number(), carbonInputSplit.getDeleteDeltaFiles(), carbonInputSplit.getBlockletId(), gson.toJson(carbonInputSplit.getDetailInfo()), carbonInputSplit.getFileFormat().ordinal()));
}
// Use block distribution
List<List<CarbonLocalInputSplit>> inputSplits = new ArrayList<>(result.stream().collect(Collectors.groupingBy(carbonInput -> {
if (FileFormat.ROW_V1.equals(carbonInput.getFileFormat())) {
return carbonInput.getSegmentId().concat(carbonInput.getPath()).concat(carbonInput.getStart() + "");
}
return carbonInput.getSegmentId().concat(carbonInput.getPath());
})).values());
// TODO : try to optimize the below loic as it may slowdown for huge splits
for (int j = 0; j < inputSplits.size(); j++) {
multiBlockSplitList.add(new CarbonLocalMultiBlockSplit(inputSplits.get(j), inputSplits.get(j).stream().flatMap(f -> Arrays.stream(getLocations(f))).distinct().toArray(String[]::new)));
}
LOGGER.error("Size fo MultiblockList " + multiBlockSplitList.size());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return multiBlockSplitList;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class IndexProvider method createIndexFactory.
private IndexFactory<? extends Index> createIndexFactory() throws MalformedIndexCommandException {
CarbonTable mainTable = getMainTable();
IndexSchema indexSchema = getIndexSchema();
IndexFactory<? extends Index> indexFactory;
try {
// try to create IndexClassProvider instance by taking providerName as class name
indexFactory = (IndexFactory<? extends Index>) Class.forName(indexSchema.getProviderName()).getConstructors()[0].newInstance(mainTable, indexSchema);
} catch (ClassNotFoundException e) {
// try to create IndexClassProvider instance by taking providerName as short name
indexFactory = IndexRegistry.getIndexFactoryByShortName(mainTable, indexSchema);
} catch (Throwable e) {
throw new MetadataProcessException("failed to create IndexClassProvider '" + indexSchema.getProviderName() + "'", e);
}
return indexFactory;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class PrestoFilterUtil method getPartitionFilters.
/**
* Return partition filters using domain constraints
* @param carbonTable
* @param originalConstraint
* @return
*/
public static List<String> getPartitionFilters(CarbonTable carbonTable, TupleDomain<HiveColumnHandle> originalConstraint) {
List<ColumnSchema> columnSchemas = carbonTable.getPartitionInfo().getColumnSchemaList();
List<String> filter = new ArrayList<>();
for (HiveColumnHandle columnHandle : originalConstraint.getDomains().get().keySet()) {
List<ColumnSchema> partitionedColumnSchema = columnSchemas.stream().filter(columnSchema -> columnHandle.getName().equals(columnSchema.getColumnName())).collect(toList());
if (partitionedColumnSchema.size() != 0) {
filter.addAll(createPartitionFilters(originalConstraint, columnHandle));
}
}
return filter;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class MetaCachedCarbonStore method getTable.
CarbonTable getTable(String path) throws IOException {
if (cache.containsKey(path)) {
return cache.get(path);
}
String schemaPath = CarbonTablePath.getSchemaFilePath(path);
TableInfo tableInfo;
if (!FileFactory.isFileExist(schemaPath)) {
tableInfo = SchemaReader.inferSchema(AbsoluteTableIdentifier.from(path), false);
} else {
org.apache.carbondata.format.TableInfo tableInfoFormat;
tableInfoFormat = CarbonUtil.readSchemaFile(CarbonTablePath.getSchemaFilePath(path));
SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
tableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfoFormat, "", "", "");
tableInfo.setTablePath(path);
}
CarbonTable table = CarbonTable.buildFromTableInfo(tableInfo);
cache.put(path, table);
return table;
}
use of org.apache.carbondata.core.metadata.schema.table.CarbonTable in project carbondata by apache.
the class SegmentFileStore method writeSegmentFileForOthers.
public static boolean writeSegmentFileForOthers(CarbonTable carbonTable, Segment segment, PartitionSpec partitionSpec, List<FileStatus> partitionDataFiles) throws IOException {
String tablePath = carbonTable.getTablePath();
CarbonFile[] dataFiles = null;
if (partitionDataFiles.isEmpty()) {
CarbonFile segmentFolder = FileFactory.getCarbonFile(segment.getSegmentPath());
dataFiles = segmentFolder.listFiles(file -> (!file.getName().equals("_SUCCESS") && !file.getName().endsWith(".crc")));
} else {
dataFiles = partitionDataFiles.stream().map(fileStatus -> FileFactory.getCarbonFile(fileStatus.getPath().toString())).toArray(CarbonFile[]::new);
}
if (dataFiles != null && dataFiles.length > 0) {
SegmentFile segmentFile = new SegmentFile();
segmentFile.setOptions(segment.getOptions());
FolderDetails folderDetails = new FolderDetails();
folderDetails.setStatus(SegmentStatus.SUCCESS.getMessage());
folderDetails.setRelative(false);
if (!partitionDataFiles.isEmpty()) {
folderDetails.setPartitions(partitionSpec.getPartitions());
segmentFile.addPath(partitionSpec.getLocation().toString(), folderDetails);
} else {
segmentFile.addPath(segment.getSegmentPath(), folderDetails);
}
for (CarbonFile file : dataFiles) {
folderDetails.getFiles().add(file.getName());
}
String segmentFileFolder = CarbonTablePath.getSegmentFilesLocation(tablePath);
CarbonFile carbonFile = FileFactory.getCarbonFile(segmentFileFolder);
if (!carbonFile.exists()) {
carbonFile.mkdirs();
}
// write segment info to new file.
writeSegmentFile(segmentFile, segmentFileFolder + File.separator + segment.getSegmentFileName());
return true;
}
return false;
}
Aggregations