use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.
the class CarbonFileInputFormat method getSplits.
/**
* {@inheritDoc}
* Configurations FileInputFormat.INPUT_DIR
* are used to get table path to read.
*
* @param job
* @return List<InputSplit> list of CarbonInputSplit
* @throws IOException
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
if (null == carbonTable) {
throw new IOException("Missing/Corrupt schema file for table.");
}
if (getValidateSegmentsToAccess(job.getConfiguration())) {
// get all valid segments and set them into the configuration
// check for externalTable segment (Segment_null)
// process and resolve the expression
Expression filter = getFilterPredicates(job.getConfiguration());
TableProvider tableProvider = new SingleTableProvider(carbonTable);
// this will be null in case of corrupt schema file.
PartitionInfo partitionInfo = carbonTable.getPartitionInfo(carbonTable.getTableName());
carbonTable.processFilterExpression(filter, null, null);
FilterResolverIntf filterInterface = carbonTable.resolveFilter(filter, tableProvider);
String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), "null");
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
if (FileFactory.isFileExist(segmentDir, fileType)) {
// if external table Segments are found, add it to the List
List<Segment> externalTableSegments = new ArrayList<Segment>();
Segment seg = new Segment("null", null);
externalTableSegments.add(seg);
Map<String, String> indexFiles = new SegmentIndexFileStore().getIndexFilesFromSegment(segmentDir);
if (indexFiles.size() == 0) {
throw new RuntimeException("Index file not present to read the carbondata file");
}
// do block filtering and get split
List<InputSplit> splits = getSplits(job, filterInterface, externalTableSegments, null, partitionInfo, null);
return splits;
}
}
return null;
}
use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.
the class CarbonIndexFileMergeWriter method writeMergeIndexFileBasedOnSegmentFolder.
private String writeMergeIndexFileBasedOnSegmentFolder(List<String> indexFileNamesTobeAdded, boolean readFileFooterFromCarbonDataFile, String segmentPath, CarbonFile[] indexFiles) throws IOException {
SegmentIndexFileStore fileStore = new SegmentIndexFileStore();
if (readFileFooterFromCarbonDataFile) {
// this case will be used in case of upgrade where old store will not have the blocklet
// info in the index file and therefore blocklet info need to be read from the file footer
// in the carbondata file
fileStore.readAllIndexAndFillBolckletInfo(segmentPath);
} else {
fileStore.readAllIIndexOfSegment(segmentPath);
}
Map<String, byte[]> indexMap = fileStore.getCarbonIndexMap();
writeMergeIndexFile(indexFileNamesTobeAdded, segmentPath, indexMap);
for (CarbonFile indexFile : indexFiles) {
indexFile.delete();
}
return null;
}
use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.
the class SegmentFileStore method getSchemaFiles.
/**
* Reads all index files and get the schema of each index file
* @throws IOException
*/
public static Map<String, List<ColumnSchema>> getSchemaFiles(SegmentFile segmentFile, String tablePath) throws IOException {
Map<String, List<ColumnSchema>> schemaMap = new HashMap<>();
if (segmentFile == null) {
return schemaMap;
}
SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
indexFileStore.readAllIIndexOfSegment(segmentFile, tablePath, SegmentStatus.SUCCESS, true);
Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
if (indexInfo.size() > 0) {
schemaMap.put(entry.getKey(), indexInfo.get(0).getColumnInTable());
}
}
return schemaMap;
}
use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.
the class SegmentFileStore method readIndexFiles.
/**
* Reads all index files as per the status of the file. In case of @ignoreStatus is true it just
* reads all index files
* @param status
* @param ignoreStatus
* @throws IOException
*/
private void readIndexFiles(SegmentStatus status, boolean ignoreStatus) throws IOException {
if (indexFilesMap != null) {
return;
}
SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
indexFilesMap = new HashMap<>();
indexFileStore.readAllIIndexOfSegment(this.segmentFile, tablePath, status, ignoreStatus);
Map<String, byte[]> carbonIndexMap = indexFileStore.getCarbonIndexMapWithFullPath();
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter();
for (Map.Entry<String, byte[]> entry : carbonIndexMap.entrySet()) {
List<DataFileFooter> indexInfo = fileFooterConverter.getIndexInfo(entry.getKey(), entry.getValue());
List<String> blocks = new ArrayList<>();
for (DataFileFooter footer : indexInfo) {
blocks.add(footer.getBlockInfo().getTableBlockInfo().getFilePath());
}
indexFilesMap.put(entry.getKey(), blocks);
}
}
use of org.apache.carbondata.core.indexstore.blockletindex.SegmentIndexFileStore in project carbondata by apache.
the class BlockletDataMapIndexStore method getAll.
@Override
public List<BlockletDataMap> getAll(List<TableBlockIndexUniqueIdentifier> tableSegmentUniqueIdentifiers) throws IOException {
List<BlockletDataMap> blockletDataMaps = new ArrayList<>(tableSegmentUniqueIdentifiers.size());
List<TableBlockIndexUniqueIdentifier> missedIdentifiers = new ArrayList<>();
// Get the datamaps for each indexfile from cache.
try {
for (TableBlockIndexUniqueIdentifier identifier : tableSegmentUniqueIdentifiers) {
BlockletDataMap ifPresent = getIfPresent(identifier);
if (ifPresent != null) {
blockletDataMaps.add(ifPresent);
} else {
missedIdentifiers.add(identifier);
}
}
if (missedIdentifiers.size() > 0) {
SegmentIndexFileStore indexFileStore = new SegmentIndexFileStore();
Set<String> filesRead = new HashSet<>();
for (TableBlockIndexUniqueIdentifier identifier : missedIdentifiers) {
Map<String, BlockMetaInfo> blockMetaInfoMap = getBlockMetaInfoMap(identifier, indexFileStore, filesRead);
blockletDataMaps.add(loadAndGetDataMap(identifier, indexFileStore, blockMetaInfoMap));
}
}
} catch (Throwable e) {
for (BlockletDataMap dataMap : blockletDataMaps) {
dataMap.clear();
}
throw new IOException("Problem in loading segment blocks.", e);
}
return blockletDataMaps;
}
Aggregations