use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class StreamSegment method recoverFileIfRequired.
/**
* check the health of stream data file and try to recover data file from task fault
* this method will be invoked in following scenarios.
* 1. at the begin of writing data file task
*/
public static void recoverFileIfRequired(String segmentDir, String fileName, String indexName) throws IOException {
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
String filePath = segmentDir + File.separator + fileName;
CarbonFile file = FileFactory.getCarbonFile(filePath, fileType);
String indexPath = segmentDir + File.separator + indexName;
CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
if (file.exists() && index.exists()) {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
indexReader.openThriftReader(indexPath);
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
if (blockIndex.getFile_name().equals(fileName)) {
if (blockIndex.getFile_size() == 0) {
file.delete();
} else if (blockIndex.getFile_size() < file.getSize()) {
FileFactory.truncateFile(filePath, fileType, blockIndex.getFile_size());
}
}
}
} finally {
indexReader.closeThriftReader();
}
}
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class ManageDictionaryAndBTree method deleteDictionaryFileAndCache.
/**
* This method will delete the dictionary files for the given column IDs and
* clear the dictionary cache
*
* @param columnSchema
* @param identifier
*/
public static void deleteDictionaryFileAndCache(final ColumnSchema columnSchema, AbsoluteTableIdentifier identifier) {
String metadataDirectoryPath = CarbonTablePath.getMetadataPath(identifier.getTablePath());
CarbonFile metadataDir = FileFactory.getCarbonFile(metadataDirectoryPath, FileFactory.getFileType(metadataDirectoryPath));
if (metadataDir.exists()) {
// sort index file is created with dictionary size appended to it. So all the files
// with a given column ID need to be listed
CarbonFile[] listFiles = metadataDir.listFiles(new CarbonFileFilter() {
@Override
public boolean accept(CarbonFile path) {
if (path.getName().startsWith(columnSchema.getColumnUniqueId())) {
return true;
}
return false;
}
});
for (CarbonFile file : listFiles) {
// still need to be deleted
try {
FileFactory.deleteFile(file.getCanonicalPath(), FileFactory.getFileType(file.getCanonicalPath()));
} catch (IOException e) {
LOGGER.error("Failed to delete dictionary or sortIndex file for column " + columnSchema.getColumnName() + "with column ID " + columnSchema.getColumnUniqueId());
}
}
}
// remove dictionary cache
removeDictionaryColumnFromCache(identifier, columnSchema.getColumnUniqueId());
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class CarbonTableInputFormat method getSplitsOfStreaming.
/**
* use file list in .carbonindex file to get the split of streaming.
*/
public List<InputSplit> getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier, List<Segment> streamSegments) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
if (streamSegments != null && !streamSegments.isEmpty()) {
numStreamSegments = streamSegments.size();
long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
long maxSize = getMaxSplitSize(job);
for (Segment segment : streamSegments) {
String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo());
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
if (FileFactory.isFileExist(segmentDir, fileType)) {
String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
String indexPath = segmentDir + File.separator + indexName;
CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
// index file exists
if (index.exists()) {
// data file exists
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
// map block index
indexReader.openThriftReader(indexPath);
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
String filePath = segmentDir + File.separator + blockIndex.getFile_name();
Path path = new Path(filePath);
long length = blockIndex.getFile_size();
if (length != 0) {
BlockLocation[] blkLocations;
FileSystem fs = FileFactory.getFileSystem(path);
FileStatus file = fs.getFileStatus(path);
blkLocations = fs.getFileBlockLocations(path, 0, length);
long blockSize = file.getBlockSize();
long splitSize = computeSplitSize(blockSize, minSize, maxSize);
long bytesRemaining = length;
while (((double) bytesRemaining) / splitSize > 1.1) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
bytesRemaining -= splitSize;
}
if (bytesRemaining != 0) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
}
} else {
// Create empty hosts array for zero length files
splits.add(makeSplit(segment.getSegmentNo(), path, 0, length, new String[0], FileFormat.ROW_V1));
}
}
} finally {
indexReader.closeThriftReader();
}
}
}
}
}
return splits;
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class MinMaxIndexDataMap method getCarbonMinMaxIndexFiles.
private CarbonFile[] getCarbonMinMaxIndexFiles(String filePath, String segmentId) {
String path = filePath.substring(0, filePath.lastIndexOf("/") + 1);
CarbonFile carbonFile = FileFactory.getCarbonFile(path);
return carbonFile.listFiles(new CarbonFileFilter() {
@Override
public boolean accept(CarbonFile file) {
return file.getName().endsWith(".minmaxindex");
}
});
}
use of org.apache.carbondata.core.datastore.filesystem.CarbonFile in project carbondata by apache.
the class FileFactory method truncateFile.
/**
* this method will truncate the file to the new size.
* @param path
* @param fileType
* @param newSize
* @throws IOException
*/
public static void truncateFile(String path, FileType fileType, long newSize) throws IOException {
path = path.replace("\\", "/");
FileChannel fileChannel = null;
switch(fileType) {
case LOCAL:
path = getUpdatedFilePath(path, fileType);
fileChannel = new FileOutputStream(path, true).getChannel();
try {
fileChannel.truncate(newSize);
} finally {
if (fileChannel != null) {
fileChannel.close();
}
}
return;
case HDFS:
case ALLUXIO:
case VIEWFS:
case S3:
// this method was new in hadoop 2.7, otherwise use CarbonFile.truncate to do this.
try {
Path pt = new Path(path);
FileSystem fs = pt.getFileSystem(configuration);
Method truncateMethod = fs.getClass().getDeclaredMethod("truncate", new Class[] { Path.class, long.class });
truncateMethod.invoke(fs, new Object[] { pt, newSize });
} catch (NoSuchMethodException e) {
LOGGER.error("the version of hadoop is below 2.7, there is no 'truncate'" + " method in FileSystem, It needs to use 'CarbonFile.truncate'.");
CarbonFile carbonFile = FileFactory.getCarbonFile(path, fileType);
carbonFile.truncate(path, newSize);
} catch (Exception e) {
LOGGER.error("Other exception occurred while truncating the file " + e.getMessage());
}
return;
default:
fileChannel = new FileOutputStream(path, true).getChannel();
try {
fileChannel.truncate(newSize);
} finally {
if (fileChannel != null) {
fileChannel.close();
}
}
return;
}
}
Aggregations