Search in sources :

Example 1 with IndexHeader

use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.

the class CarbonMetadataUtil method getIndexHeader.

/**
   * Below method will be used to get the index header
   *
   * @param columnCardinality cardinality of each column
   * @param columnSchemaList  list of column present in the table
   * @return Index header object
   */
public static IndexHeader getIndexHeader(int[] columnCardinality, List<ColumnSchema> columnSchemaList, int bucketNumber) {
    // create segment info object
    SegmentInfo segmentInfo = new SegmentInfo();
    // set the number of columns
    segmentInfo.setNum_cols(columnSchemaList.size());
    // setting the column cardinality
    segmentInfo.setColumn_cardinalities(CarbonUtil.convertToIntegerList(columnCardinality));
    // create index header object
    IndexHeader indexHeader = new IndexHeader();
    ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion();
    indexHeader.setVersion(version.number());
    // set the segment info
    indexHeader.setSegment_info(segmentInfo);
    // set the column names
    indexHeader.setTable_columns(columnSchemaList);
    // set the bucket number
    indexHeader.setBucket_id(bucketNumber);
    return indexHeader;
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) SegmentInfo(org.apache.carbondata.format.SegmentInfo) ColumnarFormatVersion(org.apache.carbondata.core.metadata.ColumnarFormatVersion)

Example 2 with IndexHeader

use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.

the class CarbonMetadataUtilTest method testGetIndexHeader.

@Test
public void testGetIndexHeader() {
    int[] columnCardinality = { 1, 2, 3, 4 };
    SegmentInfo segmentInfo = new SegmentInfo();
    segmentInfo.setNum_cols(0);
    segmentInfo.setColumn_cardinalities(CarbonUtil.convertToIntegerList(columnCardinality));
    IndexHeader indexHeader = new IndexHeader();
    indexHeader.setVersion(3);
    indexHeader.setSegment_info(segmentInfo);
    indexHeader.setTable_columns(columnSchemaList);
    indexHeader.setBucket_id(0);
    indexHeader.setSchema_time_stamp(0L);
    IndexHeader indexheaderResult = getIndexHeader(columnCardinality, columnSchemaList, 0, 0L);
    assertEquals(indexHeader, indexheaderResult);
}
Also used : CarbonMetadataUtil.getIndexHeader(org.apache.carbondata.core.util.CarbonMetadataUtil.getIndexHeader) IndexHeader(org.apache.carbondata.format.IndexHeader) SegmentInfo(org.apache.carbondata.format.SegmentInfo) Test(org.junit.Test)

Example 3 with IndexHeader

use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.

the class CarbonMetadataUtil method getIndexHeader.

/**
 * Below method will be used to get the index header
 *
 * @param columnCardinality cardinality of each column
 * @param columnSchemaList  list of column present in the table
 * @param bucketNumber
 * @param schemaTimeStamp current timestamp of schema
 * @return Index header object
 */
public static IndexHeader getIndexHeader(int[] columnCardinality, List<ColumnSchema> columnSchemaList, int bucketNumber, long schemaTimeStamp) {
    // create segment info object
    SegmentInfo segmentInfo = new SegmentInfo();
    // set the number of columns
    segmentInfo.setNum_cols(columnSchemaList.size());
    // setting the column cardinality
    segmentInfo.setColumn_cardinalities(CarbonUtil.convertToIntegerList(columnCardinality));
    // create index header object
    IndexHeader indexHeader = new IndexHeader();
    ColumnarFormatVersion version = CarbonProperties.getInstance().getFormatVersion();
    indexHeader.setVersion(version.number());
    // set the segment info
    indexHeader.setSegment_info(segmentInfo);
    // set the column names
    indexHeader.setTable_columns(columnSchemaList);
    // set the bucket number
    indexHeader.setBucket_id(bucketNumber);
    // set the current schema time stamp which will used for deciding the restructured block
    indexHeader.setSchema_time_stamp(schemaTimeStamp);
    return indexHeader;
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) SegmentInfo(org.apache.carbondata.format.SegmentInfo) ColumnarFormatVersion(org.apache.carbondata.core.metadata.ColumnarFormatVersion)

Example 4 with IndexHeader

use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.

the class AbstractFactDataWriter method writeIndexFile.

/**
 * Below method will be used to write the idex file
 *
 * @throws IOException               throws io exception if any problem while writing
 * @throws CarbonDataWriterException data writing
 */
protected void writeIndexFile() throws IOException, CarbonDataWriterException {
    // get the header
    IndexHeader indexHeader = CarbonMetadataUtil.getIndexHeader(localCardinality, thriftColumnSchemaList, model.getBucketId(), model.getSchemaUpdatedTimeStamp());
    // get the block index info thrift
    List<BlockIndex> blockIndexThrift = CarbonMetadataUtil.getBlockIndexInfo(blockIndexInfoList);
    String indexFileName;
    if (enableDirectlyWriteData2Hdfs) {
        String rawFileName = model.getCarbonDataDirectoryPath() + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
        indexFileName = FileFactory.getUpdatedFilePath(rawFileName, FileFactory.FileType.HDFS);
    } else {
        // randomly choose a temp location for index file
        String[] tempLocations = model.getStoreLocation();
        String chosenTempLocation = tempLocations[new Random().nextInt(tempLocations.length)];
        LOGGER.info("Randomly choose index file location: " + chosenTempLocation);
        indexFileName = chosenTempLocation + File.separator + CarbonTablePath.getCarbonIndexFileName(model.getCarbonDataFileAttributes().getTaskId(), model.getBucketId(), model.getTaskExtension(), "" + model.getCarbonDataFileAttributes().getFactTimeStamp());
    }
    CarbonIndexFileWriter writer = new CarbonIndexFileWriter();
    // open file
    writer.openThriftWriter(indexFileName);
    // write the header first
    writer.writeThrift(indexHeader);
    // write the indexes
    for (BlockIndex blockIndex : blockIndexThrift) {
        writer.writeThrift(blockIndex);
    }
    writer.close();
    if (enableDirectlyWriteData2Hdfs) {
        executorServiceSubmitList.add(executorService.submit(new CompleteHdfsBackendThread(indexFileName, FileFactory.FileType.HDFS)));
    } else {
        CarbonUtil.copyCarbonDataFileToCarbonStorePath(indexFileName, model.getCarbonDataDirectoryPath(), fileSizeInBytes);
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) CarbonIndexFileWriter(org.apache.carbondata.core.writer.CarbonIndexFileWriter) Random(java.util.Random) BlockIndex(org.apache.carbondata.format.BlockIndex)

Example 5 with IndexHeader

use of org.apache.carbondata.format.IndexHeader in project carbondata by apache.

the class DataFileFooterConverterTest method testGetIndexInfo.

@Test
public void testGetIndexInfo() throws Exception {
    DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
    final ThriftReader thriftReader = new ThriftReader("file");
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    final List<ColumnSchema> columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
    final BlockIndex blockIndex = new BlockIndex();
    blockIndex.setBlock_index(new org.apache.carbondata.format.BlockletIndex());
    org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key("1".getBytes());
    blockletBTreeIndex.setEnd_key("3".getBytes());
    blockletIndex1.setB_tree_index(blockletBTreeIndex);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
    blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
    blockletIndex1.setMin_max_index(blockletMinMaxIndex);
    blockIndex.setBlock_index(blockletIndex1);
    List<Integer> column_cardinalities = new ArrayList<>();
    column_cardinalities.add(new Integer("1"));
    final org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
    new MockUp<CarbonIndexFileReader>() {

        boolean mockedHasNextStatus = true;

        @SuppressWarnings("unused")
        @Mock
        public boolean hasNext() throws IOException {
            boolean temp = mockedHasNextStatus;
            mockedHasNextStatus = false;
            return temp;
        }

        @SuppressWarnings("unused")
        @Mock
        public void openThriftReader(String filePath) throws IOException {
            thriftReader.open();
        }

        @SuppressWarnings("unused")
        @Mock
        public IndexHeader readIndexHeader() throws IOException {
            return new IndexHeader(1, columnSchemas, segmentInfo1);
        }

        @SuppressWarnings("unused")
        @Mock
        public BlockIndex readBlockIndexInfo() throws IOException {
            return blockIndex;
        }

        @SuppressWarnings("unused")
        @Mock
        public void closeThriftReader() {
            thriftReader.close();
        }
    };
    new MockUp<IndexHeader>() {

        @SuppressWarnings("unused")
        @Mock
        public List<ColumnSchema> getTable_columns() {
            return columnSchemas;
        }
    };
    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream("1".getBytes());
    final DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream);
    new MockUp<FileFactory>() {

        @SuppressWarnings("unused")
        @Mock
        public DataInputStream getDataInputStream(String path, FileFactory.FileType fileType, int bufferSize) {
            return dataInputStream;
        }
    };
    String[] arr = { "a", "b", "c" };
    String fileName = "/part-0-0_batchno0-0-1495074251740.carbondata";
    TableBlockInfo tableBlockInfo = new TableBlockInfo(fileName, 3, "id", arr, 3, ColumnarFormatVersion.V1, null);
    tableBlockInfo.getBlockletInfos().setNoOfBlockLets(3);
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
    tableBlockInfoList.add(tableBlockInfo);
    String idxFileName = "0_batchno0-0-1495074251740.carbonindex";
    List<DataFileFooter> dataFileFooterList = dataFileFooterConverter.getIndexInfo(idxFileName, tableBlockInfoList);
    byte[] exp = dataFileFooterList.get(0).getBlockletIndex().getBtreeIndex().getStartKey();
    byte[] res = "1".getBytes();
    for (int i = 0; i < exp.length; i++) {
        assertEquals(exp[i], res[i]);
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) Encoding(org.apache.carbondata.format.Encoding) DataInputStream(java.io.DataInputStream) BlockIndex(org.apache.carbondata.format.BlockIndex) ByteArrayInputStream(java.io.ByteArrayInputStream) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo) Test(org.junit.Test)

Aggregations

IndexHeader (org.apache.carbondata.format.IndexHeader)5 SegmentInfo (org.apache.carbondata.format.SegmentInfo)3 ColumnarFormatVersion (org.apache.carbondata.core.metadata.ColumnarFormatVersion)2 BlockIndex (org.apache.carbondata.format.BlockIndex)2 Test (org.junit.Test)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1 MockUp (mockit.MockUp)1 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)1 DataFileFooter (org.apache.carbondata.core.metadata.blocklet.DataFileFooter)1 SegmentInfo (org.apache.carbondata.core.metadata.blocklet.SegmentInfo)1 ThriftReader (org.apache.carbondata.core.reader.ThriftReader)1 CarbonMetadataUtil.getIndexHeader (org.apache.carbondata.core.util.CarbonMetadataUtil.getIndexHeader)1 CarbonIndexFileWriter (org.apache.carbondata.core.writer.CarbonIndexFileWriter)1 BlockletBTreeIndex (org.apache.carbondata.format.BlockletBTreeIndex)1 BlockletMinMaxIndex (org.apache.carbondata.format.BlockletMinMaxIndex)1 ColumnSchema (org.apache.carbondata.format.ColumnSchema)1 Encoding (org.apache.carbondata.format.Encoding)1