use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.
the class CarbonMetadataUtilTest method testGetBlockIndexInfo.
@Test
public void testGetBlockIndexInfo() throws Exception {
byte[] startKey = { 1, 2, 3, 4, 5 };
byte[] endKey = { 9, 3, 5, 5, 5 };
byte[] byteArr = { 1, 2, 3, 4, 5 };
List<ByteBuffer> minList = new ArrayList<>();
minList.add(ByteBuffer.wrap(byteArr));
byte[] byteArr1 = { 9, 9, 8, 6, 7 };
List<ByteBuffer> maxList = new ArrayList<>();
maxList.add(ByteBuffer.wrap(byteArr1));
org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex blockletMinMaxIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletMinMaxIndex(minList, maxList);
org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex blockletBTreeIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletBTreeIndex(startKey, endKey);
org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex blockletIndex = new org.apache.carbondata.core.metadata.blocklet.index.BlockletIndex(blockletBTreeIndex, blockletMinMaxIndex);
BlockIndexInfo blockIndexInfo = new BlockIndexInfo(1, "file", 1, blockletIndex);
List<BlockIndexInfo> blockIndexInfoList = new ArrayList<>();
blockIndexInfoList.add(blockIndexInfo);
List<BlockIndex> result = getBlockIndexInfo(blockIndexInfoList);
String expected = "file";
assertEquals(result.get(0).file_name, expected);
}
use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.
the class AbstractDataFileFooterConverter method getIndexInfo.
/**
* Below method will be used to get the index info from index file
*
* @param filePath file path of the index file
* @return list of index info
* @throws IOException problem while reading the index file
*/
public List<DataFileFooter> getIndexInfo(String filePath, byte[] fileData) throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
String parentPath = filePath.substring(0, filePath.lastIndexOf("/"));
try {
// open the reader
if (fileData != null) {
indexReader.openThriftReader(fileData);
} else {
indexReader.openThriftReader(filePath);
}
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
// get the segment info
SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
BlockletIndex blockletIndex = null;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = getTableBlockInfo(readBlockIndexInfo, readIndexHeader, parentPath);
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
dataFileFooter.setSegmentInfo(segmentInfo);
dataFileFooter.setVersionId(tableBlockInfo.getVersion());
// In case of old schema time stamp will not be found in the index header
if (readIndexHeader.isSetSchema_time_stamp()) {
dataFileFooter.setSchemaUpdatedTimeStamp(readIndexHeader.getSchema_time_stamp());
}
if (readBlockIndexInfo.isSetBlocklet_info()) {
List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
BlockletInfo blockletInfo = new DataFileFooterConverterV3().getBlockletInfo(readBlockIndexInfo.getBlocklet_info(), CarbonUtil.getNumberOfDimensionColumns(columnSchemaList));
blockletInfo.setBlockletIndex(blockletIndex);
blockletInfoList.add(blockletInfo);
dataFileFooter.setBlockletList(blockletInfoList);
}
dataFileFooters.add(dataFileFooter);
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.
the class CarbonMetadataUtil method getBlockIndexInfo.
/**
* Below method will be used to get the block index info thrift object for
* each block present in the segment
*
* @param blockIndexInfoList block index info list
* @return list of block index
*/
public static List<BlockIndex> getBlockIndexInfo(List<BlockIndexInfo> blockIndexInfoList) {
List<BlockIndex> thriftBlockIndexList = new ArrayList<BlockIndex>();
BlockIndex blockIndex = null;
// below code to create block index info object for each block
for (BlockIndexInfo blockIndexInfo : blockIndexInfoList) {
blockIndex = new BlockIndex();
blockIndex.setNum_rows(blockIndexInfo.getNumberOfRows());
blockIndex.setOffset(blockIndexInfo.getOffset());
blockIndex.setFile_name(blockIndexInfo.getFileName());
blockIndex.setBlock_index(getBlockletIndex(blockIndexInfo.getBlockletIndex()));
if (blockIndexInfo.getBlockletInfo() != null) {
blockIndex.setBlocklet_info(getBlocletInfo3(blockIndexInfo.getBlockletInfo()));
}
thriftBlockIndexList.add(blockIndex);
}
return thriftBlockIndexList;
}
use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.
the class AbstractFactDataWriter method writeIndexFile.
/**
* Below method will be used to write the idex file
*
* @throws IOException throws io exception if any problem while writing
* @throws CarbonDataWriterException data writing
*/
protected void writeIndexFile() throws IOException, CarbonDataWriterException {
// get the header
IndexHeader indexHeader = CarbonMetadataUtil.getIndexHeader(localCardinality, thriftColumnSchemaList, model.getBucketId(), model.getSchemaUpdatedTimeStamp());
// get the block index info thrift
List<BlockIndex> blockIndexThrift = CarbonMetadataUtil.getBlockIndexInfo(blockIndexInfoList);
String indexFileName;
if (enableDirectlyWriteData2Hdfs) {
String rawFileName = model.getCarbonDataDirectoryPath() + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
indexFileName = FileFactory.getUpdatedFilePath(rawFileName, FileFactory.FileType.HDFS);
} else {
// randomly choose a temp location for index file
String[] tempLocations = model.getStoreLocation();
String chosenTempLocation = tempLocations[new Random().nextInt(tempLocations.length)];
LOGGER.info("Randomly choose index file location: " + chosenTempLocation);
indexFileName = chosenTempLocation + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
}
CarbonIndexFileWriter writer = new CarbonIndexFileWriter();
// open file
writer.openThriftWriter(indexFileName);
// write the header first
writer.writeThrift(indexHeader);
// write the indexes
for (BlockIndex blockIndex : blockIndexThrift) {
writer.writeThrift(blockIndex);
}
writer.close();
if (enableDirectlyWriteData2Hdfs) {
executorServiceSubmitList.add(executorService.submit(new CompleteHdfsBackendThread(indexFileName, FileFactory.FileType.HDFS)));
} else {
CarbonUtil.copyCarbonDataFileToCarbonStorePath(indexFileName, model.getCarbonDataDirectoryPath(), fileSizeInBytes);
}
}
use of org.apache.carbondata.format.BlockIndex in project carbondata by apache.
the class AbstractDataFileFooterConverter method getIndexInfo.
/**
* Below method will be used to get the index info from index file
*
* @param filePath file path of the index file
* @param tableBlockInfoList table block index
* @return list of index info
* @throws IOException problem while reading the index file
*/
public List<DataFileFooter> getIndexInfo(String filePath, List<TableBlockInfo> tableBlockInfoList) throws IOException {
CarbonIndexFileReader indexReader = new CarbonIndexFileReader();
List<DataFileFooter> dataFileFooters = new ArrayList<DataFileFooter>();
try {
// open the reader
indexReader.openThriftReader(filePath);
// get the index header
org.apache.carbondata.format.IndexHeader readIndexHeader = indexReader.readIndexHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = readIndexHeader.getTable_columns();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
// get the segment info
SegmentInfo segmentInfo = getSegmentInfo(readIndexHeader.getSegment_info());
BlockletIndex blockletIndex = null;
int counter = 0;
int index = 0;
DataFileFooter dataFileFooter = null;
// read the block info from file
while (indexReader.hasNext()) {
BlockIndex readBlockIndexInfo = indexReader.readBlockIndexInfo();
blockletIndex = getBlockletIndex(readBlockIndexInfo.getBlock_index());
dataFileFooter = new DataFileFooter();
TableBlockInfo tableBlockInfo = tableBlockInfoList.get(index);
if (Integer.parseInt(CarbonTablePath.DataFileUtil.getPartNo(tableBlockInfo.getFilePath())) == counter++) {
tableBlockInfo.setBlockOffset(readBlockIndexInfo.getOffset());
tableBlockInfo.setVersion(ColumnarFormatVersion.valueOf((short) readIndexHeader.getVersion()));
int blockletSize = getBlockletSize(readBlockIndexInfo);
tableBlockInfo.getBlockletInfos().setNoOfBlockLets(blockletSize);
dataFileFooter.setBlockletIndex(blockletIndex);
dataFileFooter.setColumnInTable(columnSchemaList);
dataFileFooter.setNumberOfRows(readBlockIndexInfo.getNum_rows());
dataFileFooter.setBlockInfo(new BlockInfo(tableBlockInfo));
dataFileFooter.setSegmentInfo(segmentInfo);
dataFileFooters.add(dataFileFooter);
if (++index == tableBlockInfoList.size()) {
break;
}
}
}
} finally {
indexReader.closeThriftReader();
}
return dataFileFooters;
}
Aggregations