use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.
the class CarbonMetadataUtil method getIndexHeader.
/**
* Below method will be used to get the index header
*
* @param columnCardinality cardinality of each column
* @param columnSchemaList list of column present in the table
* @return Index header object
*/
public static IndexHeader getIndexHeader(int[] columnCardinality, List<ColumnSchema> columnSchemaList, int bucketNumber) {
// create segment info object
SegmentInfo segmentInfo = new SegmentInfo();
// set the number of columns
segmentInfo.setNum_cols(columnSchemaList.size());
// setting the column cardinality
segmentInfo.setColumn_cardinalities(CarbonUtil.convertToIntegerList(columnCardinality));
// create index header object
IndexHeader indexHeader = new IndexHeader();
ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion();
indexHeader.setVersion(version.number());
// set the segment info
indexHeader.setSegment_info(segmentInfo);
// set the column names
indexHeader.setTable_columns(columnSchemaList);
// set the bucket number
indexHeader.setBucket_id(bucketNumber);
return indexHeader;
}
use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.
the class CarbonMetadataUtilTest method testGetIndexHeader.
@Test
public void testGetIndexHeader() {
int[] columnCardinality = { 1, 2, 3, 4 };
SegmentInfo segmentInfo = new SegmentInfo();
segmentInfo.setNum_cols(0);
segmentInfo.setColumn_cardinalities(CarbonUtil.convertToIntegerList(columnCardinality));
IndexHeader indexHeader = new IndexHeader();
indexHeader.setVersion(3);
indexHeader.setSegment_info(segmentInfo);
indexHeader.setTable_columns(columnSchemaList);
indexHeader.setBucket_id(0);
indexHeader.setSchema_time_stamp(0L);
IndexHeader indexheaderResult = getIndexHeader(columnCardinality, columnSchemaList, 0, 0L);
assertEquals(indexHeader, indexheaderResult);
}
use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.
the class CarbonMetadataUtil method getIndexHeader.
/**
* Below method will be used to get the index header
*
* @param columnCardinality cardinality of each column
* @param columnSchemaList list of column present in the table
* @param bucketNumber
* @param schemaTimeStamp current timestamp of schema
* @return Index header object
*/
public static IndexHeader getIndexHeader(int[] columnCardinality, List<ColumnSchema> columnSchemaList, int bucketNumber, long schemaTimeStamp) {
// create segment info object
SegmentInfo segmentInfo = new SegmentInfo();
// set the number of columns
segmentInfo.setNum_cols(columnSchemaList.size());
// setting the column cardinality
segmentInfo.setColumn_cardinalities(CarbonUtil.convertToIntegerList(columnCardinality));
// create index header object
IndexHeader indexHeader = new IndexHeader();
ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion();
indexHeader.setVersion(version.number());
// set the segment info
indexHeader.setSegment_info(segmentInfo);
// set the column names
indexHeader.setTable_columns(columnSchemaList);
// set the bucket number
indexHeader.setBucket_id(bucketNumber);
// set the current schema time stamp which will used for deciding the restructured block
indexHeader.setSchema_time_stamp(schemaTimeStamp);
return indexHeader;
}
use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.
the class AbstractFactDataWriter method writeIndexFile.
/**
* Below method will be used to write the idex file
*
* @throws IOException throws io exception if any problem while writing
* @throws CarbonDataWriterException data writing
*/
protected void writeIndexFile() throws IOException, CarbonDataWriterException {
// get the header
IndexHeader indexHeader = CarbonMetadataUtil.getIndexHeader(localCardinality, thriftColumnSchemaList, model.getBucketId(), model.getSchemaUpdatedTimeStamp());
// get the block index info thrift
List<BlockIndex> blockIndexThrift = CarbonMetadataUtil.getBlockIndexInfo(blockIndexInfoList);
String indexFileName;
if (enableDirectlyWriteData2Hdfs) {
String rawFileName = model.getCarbonDataDirectoryPath() + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
indexFileName = FileFactory.getUpdatedFilePath(rawFileName, FileFactory.FileType.HDFS);
} else {
// randomly choose a temp location for index file
String[] tempLocations = model.getStoreLocation();
String chosenTempLocation = tempLocations[new Random().nextInt(tempLocations.length)];
LOGGER.info("Randomly choose index file location: " + chosenTempLocation);
indexFileName = chosenTempLocation + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
}
CarbonIndexFileWriter writer = new CarbonIndexFileWriter();
// open file
writer.openThriftWriter(indexFileName);
// write the header first
writer.writeThrift(indexHeader);
// write the indexes
for (BlockIndex blockIndex : blockIndexThrift) {
writer.writeThrift(blockIndex);
}
writer.close();
if (enableDirectlyWriteData2Hdfs) {
executorServiceSubmitList.add(executorService.submit(new CompleteHdfsBackendThread(indexFileName, FileFactory.FileType.HDFS)));
} else {
CarbonUtil.copyCarbonDataFileToCarbonStorePath(indexFileName, model.getCarbonDataDirectoryPath(), fileSizeInBytes);
}
}
use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.
the class DataFileFooterConverterTest method testGetIndexInfo.
@Test
public void testGetIndexInfo() throws Exception {
DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
final ThriftReader thriftReader = new ThriftReader("file");
List<Encoding> encoders = new ArrayList<>();
encoders.add(Encoding.INVERTED_INDEX);
encoders.add(Encoding.BIT_PACKED);
encoders.add(Encoding.DELTA);
encoders.add(Encoding.DICTIONARY);
encoders.add(Encoding.DIRECT_DICTIONARY);
encoders.add(Encoding.RLE);
ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
final List<ColumnSchema> columnSchemas = new ArrayList<>();
columnSchemas.add(columnSchema);
columnSchemas.add(columnSchema1);
columnSchemas.add(columnSchema2);
columnSchemas.add(columnSchema3);
columnSchemas.add(columnSchema4);
columnSchemas.add(columnSchema5);
columnSchemas.add(columnSchema6);
columnSchemas.add(columnSchema7);
final BlockIndex blockIndex = new BlockIndex();
blockIndex.setBlock_index(new org.apache.carbondata.format.BlockletIndex());
org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStart_key("1".getBytes());
blockletBTreeIndex.setEnd_key("3".getBytes());
blockletIndex1.setB_tree_index(blockletBTreeIndex);
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
blockletIndex1.setMin_max_index(blockletMinMaxIndex);
blockIndex.setBlock_index(blockletIndex1);
List<Integer> column_cardinalities = new ArrayList<>();
column_cardinalities.add(new Integer("1"));
final org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
new MockUp<CarbonIndexFileReader>() {
boolean mockedHasNextStatus = true;
@SuppressWarnings("unused")
@Mock
public boolean hasNext() throws IOException {
boolean temp = mockedHasNextStatus;
mockedHasNextStatus = false;
return temp;
}
@SuppressWarnings("unused")
@Mock
public void openThriftReader(String filePath) throws IOException {
thriftReader.open();
}
@SuppressWarnings("unused")
@Mock
public IndexHeader readIndexHeader() throws IOException {
return new IndexHeader(1, columnSchemas, segmentInfo1);
}
@SuppressWarnings("unused")
@Mock
public BlockIndex readBlockIndexInfo() throws IOException {
return blockIndex;
}
@SuppressWarnings("unused")
@Mock
public void closeThriftReader() {
thriftReader.close();
}
};
new MockUp<IndexHeader>() {
@SuppressWarnings("unused")
@Mock
public List<ColumnSchema> getTable_columns() {
return columnSchemas;
}
};
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream("1".getBytes());
final DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream);
new MockUp<FileFactory>() {
@SuppressWarnings("unused")
@Mock
public DataInputStream getDataInputStream(String path, FileFactory.FileType fileType, int bufferSize) {
return dataInputStream;
}
};
String[] arr = { "a", "b", "c" };
String fileName = "/part-0-0_batchno0-0-1495074251740.carbondata";
TableBlockInfo tableBlockInfo = new TableBlockInfo(fileName, 3, "id", arr, 3, ColumnarFormatVersion.V1, null);
tableBlockInfo.getBlockletInfos().setNoOfBlockLets(3);
List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
tableBlockInfoList.add(tableBlockInfo);
String idxFileName = "0_batchno0-0-1495074251740.carbonindex";
List<DataFileFooter> dataFileFooterList = dataFileFooterConverter.getIndexInfo(idxFileName, tableBlockInfoList);
byte[] exp = dataFileFooterList.get(0).getBlockletIndex().getBtreeIndex().getStartKey();
byte[] res = "1".getBytes();
for (int i = 0; i < exp.length; i++) {
assertEquals(exp[i], res[i]);
}
}
Aggregations