use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class StreamSegment method size.
/**
* calculate the size of the segment by the accumulation of data sizes in index file
*/
public static long size(String segmentDir) throws IOException {
long size = 0;
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
if (FileFactory.isFileExist(segmentDir, fileType)) {
String indexPath = CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir);
CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
if (index.exists()) {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
indexReader.openThriftReader(indexPath);
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
size += blockIndex.getFile_size();
}
} finally {
indexReader.closeThriftReader();
}
}
}
return size;
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class StreamSegment method recoverSegmentIfRequired.
/**
* check the health of stream segment and try to recover segment from job fault
* this method will be invoked in following scenarios.
* 1. at the begin of the streaming (StreamSinkFactory.getStreamSegmentId)
* 2. after job failed (CarbonAppendableStreamSink.writeDataFileJob)
*/
public static void recoverSegmentIfRequired(String segmentDir) throws IOException {
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
if (FileFactory.isFileExist(segmentDir, fileType)) {
String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
String indexPath = segmentDir + File.separator + indexName;
CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
CarbonFile[] files = listDataFiles(segmentDir, fileType);
// index file exists
if (index.exists()) {
// data file exists
if (files.length > 0) {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
// map block index
indexReader.openThriftReader(indexPath);
Map<String, Long> tableSizeMap = new HashMap<>();
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
tableSizeMap.put(blockIndex.getFile_name(), blockIndex.getFile_size());
}
// recover each file
for (CarbonFile file : files) {
Long size = tableSizeMap.get(file.getName());
if (null == size || size == 0) {
file.delete();
} else if (size < file.getSize()) {
FileFactory.truncateFile(file.getCanonicalPath(), fileType, size);
}
}
} finally {
indexReader.closeThriftReader();
}
}
} else {
if (files.length > 0) {
for (CarbonFile file : files) {
file.delete();
}
}
}
}
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class StreamSegment method recoverFileIfRequired.
/**
* check the health of stream data file and try to recover data file from task fault
* this method will be invoked in following scenarios.
* 1. at the begin of writing data file task
*/
public static void recoverFileIfRequired(String segmentDir, String fileName, String indexName) throws IOException {
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
String filePath = segmentDir + File.separator + fileName;
CarbonFile file = FileFactory.getCarbonFile(filePath, fileType);
String indexPath = segmentDir + File.separator + indexName;
CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
if (file.exists() && index.exists()) {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
indexReader.openThriftReader(indexPath);
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
if (blockIndex.getFile_name().equals(fileName)) {
if (blockIndex.getFile_size() == 0) {
file.delete();
} else if (blockIndex.getFile_size() < file.getSize()) {
FileFactory.truncateFile(filePath, fileType, blockIndex.getFile_size());
}
}
}
} finally {
indexReader.closeThriftReader();
}
}
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class CarbonTableInputFormat method getSplitsOfStreaming.
/**
* use file list in .carbonindex file to get the split of streaming.
*/
public List<InputSplit> getSplitsOfStreaming(JobContext job, AbsoluteTableIdentifier identifier, List<Segment> streamSegments) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
if (streamSegments != null && !streamSegments.isEmpty()) {
numStreamSegments = streamSegments.size();
long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
long maxSize = getMaxSplitSize(job);
for (Segment segment : streamSegments) {
String segmentDir = CarbonTablePath.getSegmentPath(identifier.getTablePath(), segment.getSegmentNo());
FileFactory.FileType fileType = FileFactory.getFileType(segmentDir);
if (FileFactory.isFileExist(segmentDir, fileType)) {
String indexName = CarbonTablePath.getCarbonStreamIndexFileName();
String indexPath = segmentDir + File.separator + indexName;
CarbonFile index = FileFactory.getCarbonFile(indexPath, fileType);
// index file exists
if (index.exists()) {
// data file exists
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
// map block index
indexReader.openThriftReader(indexPath);
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
String filePath = segmentDir + File.separator + blockIndex.getFile_name();
Path path = new Path(filePath);
long length = blockIndex.getFile_size();
if (length != 0) {
BlockLocation[] blkLocations;
FileSystem fs = FileFactory.getFileSystem(path);
FileStatus file = fs.getFileStatus(path);
blkLocations = fs.getFileBlockLocations(path, 0, length);
long blockSize = file.getBlockSize();
long splitSize = computeSplitSize(blockSize, minSize, maxSize);
long bytesRemaining = length;
while (((double) bytesRemaining) / splitSize > 1.1) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
bytesRemaining -= splitSize;
}
if (bytesRemaining != 0) {
int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
splits.add(makeSplit(segment.getSegmentNo(), path, length - bytesRemaining, bytesRemaining, blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts(), FileFormat.ROW_V1));
}
} else {
// Create empty hosts array for zero length files
splits.add(makeSplit(segment.getSegmentNo(), path, 0, length, new String[0], FileFormat.ROW_V1));
}
}
} finally {
indexReader.closeThriftReader();
}
}
}
}
}
return splits;
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class AbstractDataFileFooterConverter method getIndexInfo.
/**
* Below method will be used to get the index info from index file
*
* @param filePath file path of the index file
* @return list of index info
* @throws IOException problem while reading the index file
*/
public List<DataFileFooter> getIndexInfo(String filePath, byte[] fileData) throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
String parentPath = filePath.substring(0, filePath.lastIndexOf("/"));
try {
// open the reader
if (fileData != null) {
indexReader.openThriftReader(fileData);
} else {
indexReader.openThriftReader(filePath);
}
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
// get the segment info
SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
BlockletIndex blockletIndex = null;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = getTableBlockInfo(readBlockIndexInfo, readIndexHeader, parentPath);
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
dataFileFooter.setSegmentInfo(segmentInfo);
dataFileFooter.setVersionId(tableBlockInfo.getVersion());
// In case of old schema time stamp will not be found in the index header
if (readIndexHeader.isSetSchema_time_stamp()) {
dataFileFooter.setSchemaUpdatedTimeStamp(readIndexHeader.getSchema_time_stamp());
}
if (readBlockIndexInfo.isSetBlocklet_info()) {
List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
BlockletInfo blockletInfo = new DataFileFooterConverterV3().getBlockletInfo(readBlockIndexInfo.getBlocklet_info(), CarbonUtil.getNumberOfDimensionColumns(columnSchemaList));
blockletInfo.setBlockletIndex(blockletIndex);
blockletInfoList.add(blockletInfo);
dataFileFooter.setBlockletList(blockletInfoList);
}
dataFileFooters.add(dataFileFooter);
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
Aggregations