use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class BlockletIndexUtil method createCarbonDataFileBlockMetaInfoMapping.
/**
* This method will create file name to block Meta Info Mapping. This method will reduce the
* number of nameNode calls and using this method one namenode will fetch 1000 entries
*
* @param segmentFilePath
* @return
* @throws IOException
*/
public static Map<String, BlockMetaInfo> createCarbonDataFileBlockMetaInfoMapping(String segmentFilePath, Configuration configuration) throws IOException {
Map<String, BlockMetaInfo> fileNameToMetaInfoMapping = new TreeMap();
CarbonFile carbonFile = FileFactory.getCarbonFile(segmentFilePath, configuration);
if (carbonFile instanceof AbstractDFSCarbonFile && !(carbonFile instanceof S3CarbonFile)) {
PathFilter pathFilter = new PathFilter() {
@Override
public boolean accept(Path path) {
return CarbonTablePath.isCarbonDataFile(path.getName());
}
};
CarbonFile[] carbonFiles = carbonFile.locationAwareListFiles(pathFilter);
for (CarbonFile file : carbonFiles) {
String[] location = file.getLocations();
long len = file.getSize();
BlockMetaInfo blockMetaInfo = new BlockMetaInfo(location, len);
fileNameToMetaInfoMapping.put(file.getPath(), blockMetaInfo);
}
}
return fileNameToMetaInfoMapping;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class CarbonTestUtil method getDimRawChunk.
/**
* this method returns true if local dictionary is created for all the blocklets or not
* @param storePath
* @param blockindex
* @return dimensionRawColumnChunks
*/
public static ArrayList<DimensionRawColumnChunk> getDimRawChunk(String storePath, Integer blockindex) throws IOException {
CarbonFile[] dataFiles = FileFactory.getCarbonFile(storePath).listFiles(new CarbonFileFilter() {
@Override
public boolean accept(CarbonFile file) {
if (file.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT)) {
return true;
} else {
return false;
}
}
});
ArrayList<DimensionRawColumnChunk> dimensionRawColumnChunks = read(dataFiles[0].getAbsolutePath(), blockindex);
return dimensionRawColumnChunks;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class StageInputCollector method createInputSplits.
/**
* Read stage files and create input splits from them
*/
public static List<InputSplit> createInputSplits(ExecutorService executorService, List<CarbonFile> stageFiles) throws ExecutionException, InterruptedException {
Objects.requireNonNull(executorService);
Objects.requireNonNull(stageFiles);
long startTime = System.currentTimeMillis();
List<InputSplit> output = Collections.synchronizedList(new ArrayList<>());
Gson gson = new Gson();
// read each stage file and create input split
// read them using a thread pool to increase parallelism
List<Future<Boolean>> futures = stageFiles.stream().map(stageFile -> executorService.submit(() -> {
String filePath = stageFile.getAbsolutePath();
InputStreamReader reader = null;
try {
reader = new InputStreamReader(FileFactory.getDataInputStream(filePath));
StageInput stageInput = gson.fromJson(reader, StageInput.class);
output.addAll(stageInput.createSplits());
return true;
} catch (IOException e) {
LOGGER.error("failed to read stage file " + filePath);
return false;
} finally {
IOUtils.closeQuietly(reader);
}
})).collect(Collectors.toList());
for (Future<Boolean> future : futures) {
future.get();
}
LOGGER.info("read stage files taken " + (System.currentTimeMillis() - startTime) + "ms");
return output;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class StageInputCollector method collectStageFiles.
/**
* Collect all stage files and matched success files.
* A stage file without success file will not be collected
*/
public static void collectStageFiles(CarbonTable table, Configuration hadoopConf, List<CarbonFile> stageInputList, List<CarbonFile> successFileList) {
Objects.requireNonNull(table);
Objects.requireNonNull(hadoopConf);
Objects.requireNonNull(stageInputList);
Objects.requireNonNull(successFileList);
CarbonFile dir = FileFactory.getCarbonFile(table.getStagePath(), hadoopConf);
if (dir.exists()) {
// list the stage folder and collect all stage files who has corresponding success file,
// which means the file is committed
CarbonFile[] allFiles = dir.listFiles();
Map<String, CarbonFile> map = new HashMap<>();
Arrays.stream(allFiles).filter(file -> file.getName().endsWith(SUCCESS_FILE_SUFFIX)).forEach(file -> map.put(file.getName().substring(0, file.getName().indexOf(".")), file));
Arrays.stream(allFiles).filter(file -> !file.getName().endsWith(SUCCESS_FILE_SUFFIX)).filter(file -> map.containsKey(file.getName())).forEach(carbonFile -> {
stageInputList.add(carbonFile);
successFileList.add(map.get(carbonFile.getName()));
});
}
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class SegmentUpdateStatusManager method getDeleteDeltaInvalidFilesList.
/**
* @param block
* @param needCompleteList
* @return
*/
public CarbonFile[] getDeleteDeltaInvalidFilesList(final SegmentUpdateDetails block, final boolean needCompleteList, CarbonFile[] allSegmentFiles, boolean isAbortedFile) {
final long deltaStartTimestamp = getStartTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block);
final long deltaEndTimestamp = getEndTimeOfDeltaFile(CarbonCommonConstants.DELETE_DELTA_FILE_EXT, block);
Set<CarbonFile> files = new HashSet<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
for (CarbonFile eachFile : allSegmentFiles) {
String fileName = eachFile.getName();
if (fileName.endsWith(CarbonCommonConstants.DELETE_DELTA_FILE_EXT)) {
String blkName = CarbonTablePath.DataFileUtil.getBlockNameFromDeleteDeltaFile(fileName);
// complete list of delta files of that block is returned.
if (needCompleteList && block.getBlockName().equalsIgnoreCase(blkName)) {
files.add(eachFile);
}
// invalid delete delta files only will be returned.
long timestamp = CarbonUpdateUtil.getTimeStampAsLong(CarbonTablePath.DataFileUtil.getTimeStampFromDeleteDeltaFile(fileName));
if (block.getBlockName().equalsIgnoreCase(blkName)) {
if (isAbortedFile) {
if (timestamp > deltaEndTimestamp) {
files.add(eachFile);
}
} else if (timestamp < deltaStartTimestamp || timestamp > deltaEndTimestamp) {
files.add(eachFile);
}
}
}
}
return files.toArray(new CarbonFile[files.size()]);
}
Aggregations