use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class StreamPruner method listAllStreamFiles.
// TODO optimize and move the code to StreamSegment , but it's in the streaming module.
private List<StreamFile> listAllStreamFiles(List<Segment> segments, boolean withMinMax) throws IOException {
List<StreamFile> streamFileList = new ArrayList<>();
for (Segment segment : segments) {
String segmentDir = CarbonTablePath.getSegmentPath(carbonTable.getAbsoluteTableIdentifier().getTablePath(), segment.getSegmentNo());
String indexFile = CarbonTablePath.getCarbonStreamIndexFilePath(segmentDir);
if (FileFactory.isFileExist(indexFile)) {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
indexReader.openThriftReader(indexFile);
try {
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
String filePath = segmentDir + File.separator + blockIndex.getFile_name();
long length = blockIndex.getFile_size();
StreamFile streamFile = new StreamFile(segment.getSegmentNo(), filePath, length);
streamFileList.add(streamFile);
if (withMinMax) {
if (blockIndex.getBlock_index() != null && blockIndex.getBlock_index().getMin_max_index() != null) {
streamFile.setMinMaxIndex(CarbonMetadataUtil.convertExternalMinMaxIndex(blockIndex.getBlock_index().getMin_max_index()));
}
}
}
} finally {
indexReader.closeThriftReader();
}
}
}
totalFileNums = streamFileList.size();
return streamFileList;
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class AbstractDataFileFooterConverter method getIndexInfo.
/**
* Below method will be used to get the index info from index file
*/
public List<DataFileFooter> getIndexInfo(String filePath, byte[] fileData, boolean isTransactionalTable) throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader(configuration);
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
String formattedPath = filePath.replace("\\", "/");
String parentPath = formattedPath.substring(0, formattedPath.lastIndexOf("/"));
try {
// open the reader
if (fileData != null) {
indexReader.openThriftReader(fileData);
} else {
indexReader.openThriftReader(filePath);
}
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = convertColumnSchemaList(readIndexHeader.getTable_columns());
if (!isTransactionalTable) {
QueryUtil.updateColumnUniqueIdForNonTransactionTable(columnSchemaList);
}
BlockletIndex blockletIndex = null;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = getTableBlockInfo(readBlockIndexInfo, readIndexHeader, parentPath);
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(tableBlockInfo);
dataFileFooter.setVersionId(tableBlockInfo.getVersion());
// In case of old schema time stamp will not be found in the index header
if (readIndexHeader.isSetSchema_time_stamp()) {
dataFileFooter.setSchemaUpdatedTimeStamp(readIndexHeader.getSchema_time_stamp());
}
if (readBlockIndexInfo.isSetBlocklet_info()) {
List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
BlockletInfo blockletInfo = new DataFileFooterConverterV3(configuration).getBlockletInfo(readBlockIndexInfo.getBlocklet_info(), CarbonUtil.getNumberOfDimensionColumns(columnSchemaList));
blockletInfo.setBlockletIndex(blockletIndex);
blockletInfoList.add(blockletInfo);
dataFileFooter.setBlockletList(blockletInfoList);
}
dataFileFooters.add(dataFileFooter);
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class AbstractDataFileFooterConverter method getIndexInfo.
/**
* Below method will be used to get the index info from index file
*
* @param filePath file path of the index file
* @param tableBlockInfoList table block index
* @return list of index info
* @throws IOException problem while reading the index file
*/
public List<DataFileFooter> getIndexInfo(String filePath, List<TableBlockInfo> tableBlockInfoList) throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
try {
// open the reader
indexReader.openThriftReader(filePath);
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = convertColumnSchemaList(readIndexHeader.getTable_columns());
// get the segment info
BlockletIndex blockletIndex = null;
int counter = 0;
int index = 0;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = tableBlockInfoList.get(index);
if (Integer.parseInt(CarbonTablePath.DataFileUtil.getPartNo(tableBlockInfo.getFilePath())) == counter++) {
tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
tableBlockInfo.setVersion(ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(tableBlockInfo);
if (readIndexHeader.isSetIs_sort()) {
dataFileFooter.setSorted(readIndexHeader.isIs_sort());
} else {
if (tableBlockInfo.getVersion() == ColumnarFormatVersion.V3) {
dataFileFooter.setSorted(null);
}
}
dataFileFooters.add(dataFileFooter);
if (++index == tableBlockInfoList.size()) {
break;
}
}
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class SegmentIndexFileStore method readIndexAndFillBlockletInfo.
/**
* This method will read the index information from carbon index file
*
* @param indexFile
* @return
* @throws IOException
*/
private void readIndexAndFillBlockletInfo(CarbonFile indexFile) throws IOException {
// flag to take decision whether carbondata file footer reading is required.
// If the index file does not contain the file footer then carbondata file footer
// read is required else not required
boolean isCarbonDataFileFooterReadRequired = true;
List<BlockletInfo> blockletInfoList = null;
List<BlockIndex> blockIndexThrift = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
try {
indexReader.openThriftReader(indexFile.getCanonicalPath());
// get the index header
org.apache.carbondata.format.IndexHeader indexHeader = indexReader.readIndexHeader();
DataFileFooterConverter fileFooterConverter = new DataFileFooterConverter(FileFactory.getConfiguration());
String filePath = FileFactory.getUpdatedFilePath(indexFile.getCanonicalPath());
String parentPath = filePath.substring(0, filePath.lastIndexOf(CarbonCommonConstants.FILE_SEPARATOR));
while (indexReader.hasNext()) {
BlockIndex blockIndex = indexReader.readBlockIndexInfo();
if (blockIndex.isSetBlocklet_info()) {
// this case will come in case segment index compaction property is set to false from the
// application and alter table segment index compaction is run manually. In that case
// blocklet info will be present in the index but read carbon data file footer property
// will be true
isCarbonDataFileFooterReadRequired = false;
break;
} else {
TableBlockInfo blockInfo = fileFooterConverter.getTableBlockInfo(blockIndex, indexHeader, parentPath);
blockletInfoList = getBlockletInfoFromIndexInfo(blockInfo);
}
// the same entry with different blocklet info need to be repeated
for (BlockletInfo info : blockletInfoList) {
BlockIndex blockIndexReplica = blockIndex.deepCopy();
BlockletInfo blockletInfo = info;
blockIndexReplica.setBlock_index(CarbonMetadataUtil.getBlockletIndex(blockletInfo.getBlockletIndex()));
blockIndexReplica.setBlocklet_info(CarbonMetadataUtil.getBlockletInfo3(blockletInfo));
blockIndexThrift.add(blockIndexReplica);
}
}
// read complete file at once
if (!isCarbonDataFileFooterReadRequired) {
readIndexFile(indexFile);
} else {
int totalSize = 0;
List<byte[]> blockIndexByteArrayList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
byte[] indexHeaderBytes = CarbonUtil.getByteArray(indexHeader);
totalSize += indexHeaderBytes.length;
blockIndexByteArrayList.add(indexHeaderBytes);
for (BlockIndex blockIndex : blockIndexThrift) {
byte[] indexInfoBytes = CarbonUtil.getByteArray(blockIndex);
totalSize += indexInfoBytes.length;
blockIndexByteArrayList.add(indexInfoBytes);
}
ByteBuffer byteBuffer = ByteBuffer.allocate(totalSize);
for (byte[] blockIndexBytes : blockIndexByteArrayList) {
byteBuffer.put(blockIndexBytes);
}
carbonIndexMap.put(indexFile.getName(), byteBuffer.array());
}
} finally {
indexReader.closeThriftReader();
}
}
use of org.apache.carbondata.core.reader.CarbonIndexFileReader in project carbondata by apache.
the class CarbonUtil method inferSchemaFromIndexFile.
/**
* This method will infer the schema file from a given index file path
* @param indexFilePath
* @param tableName
* @return
* @throws IOException
*/
public static org.apache.carbondata.format.TableInfo inferSchemaFromIndexFile(String indexFilePath, String tableName) throws IOException {
CarbonIndexFileReader indexFileReader = new CarbonIndexFileReader();
try {
indexFileReader.openThriftReader(indexFilePath);
org.apache.carbondata.format.IndexHeader readIndexHeader = indexFileReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchemaToWrapperColumnSchema(table_columns.get(i)));
}
// only columnSchema is the valid entry, reset all dummy entries.
TableSchema tableSchema = getDummyTableSchema(tableName, columnSchemaList);
ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter = new ThriftWrapperSchemaConverterImpl();
org.apache.carbondata.format.TableSchema thriftFactTable = thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
return new org.apache.carbondata.format.TableInfo(thriftFactTable, new ArrayList<org.apache.carbondata.format.TableSchema>());
} finally {
indexFileReader.closeThriftReader();
}
}
Aggregations