Search in sources :

Example 6 with ThriftReader

use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.

the class SegmentIndexFileStore method getIndexFilesFromMergeFile.

/**
 * List all the index files inside merge file.
 * @param mergeFile
 * @return
 * @throws IOException
 */
public List<String> getIndexFilesFromMergeFile(String mergeFile) throws IOException {
    ThriftReader thriftReader = new ThriftReader(mergeFile);
    thriftReader.open();
    MergedBlockIndexHeader indexHeader = readMergeBlockIndexHeader(thriftReader);
    List<String> fileNames = indexHeader.getFile_names();
    thriftReader.close();
    return fileNames;
}
Also used : ThriftReader(org.apache.carbondata.core.reader.ThriftReader) MergedBlockIndexHeader(org.apache.carbondata.format.MergedBlockIndexHeader)

Example 7 with ThriftReader

use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.

the class DataFileFooterConverterTest method testGetIndexInfo.

@Test
public void testGetIndexInfo() throws Exception {
    DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
    final ThriftReader thriftReader = new ThriftReader("file");
    List<Encoding> encoders = new ArrayList<>();
    encoders.add(Encoding.INVERTED_INDEX);
    encoders.add(Encoding.BIT_PACKED);
    encoders.add(Encoding.DELTA);
    encoders.add(Encoding.DICTIONARY);
    encoders.add(Encoding.DIRECT_DICTIONARY);
    encoders.add(Encoding.RLE);
    ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
    ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
    ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
    ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
    ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
    ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
    final List<ColumnSchema> columnSchemas = new ArrayList<>();
    columnSchemas.add(columnSchema);
    columnSchemas.add(columnSchema1);
    columnSchemas.add(columnSchema2);
    columnSchemas.add(columnSchema3);
    columnSchemas.add(columnSchema4);
    columnSchemas.add(columnSchema5);
    columnSchemas.add(columnSchema6);
    columnSchemas.add(columnSchema7);
    final BlockIndex blockIndex = new BlockIndex();
    blockIndex.setBlock_index(new org.apache.carbondata.format.BlockletIndex());
    org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
    BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
    blockletBTreeIndex.setStart_key("1".getBytes());
    blockletBTreeIndex.setEnd_key("3".getBytes());
    blockletIndex1.setB_tree_index(blockletBTreeIndex);
    BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
    blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
    blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
    blockletIndex1.setMin_max_index(blockletMinMaxIndex);
    blockIndex.setBlock_index(blockletIndex1);
    List<Integer> column_cardinalities = new ArrayList<>();
    column_cardinalities.add(new Integer("1"));
    final org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
    new MockUp<CarbonIndexFileReader>() {

        boolean mockedHasNextStatus = true;

        @SuppressWarnings("unused")
        @Mock
        public boolean hasNext() throws IOException {
            boolean temp = mockedHasNextStatus;
            mockedHasNextStatus = false;
            return temp;
        }

        @SuppressWarnings("unused")
        @Mock
        public void openThriftReader(String filePath) throws IOException {
            thriftReader.open();
        }

        @SuppressWarnings("unused")
        @Mock
        public IndexHeader readIndexHeader() throws IOException {
            return new IndexHeader(1, columnSchemas, segmentInfo1);
        }

        @SuppressWarnings("unused")
        @Mock
        public BlockIndex readBlockIndexInfo() throws IOException {
            return blockIndex;
        }

        @SuppressWarnings("unused")
        @Mock
        public void closeThriftReader() {
            thriftReader.close();
        }
    };
    new MockUp<IndexHeader>() {

        @SuppressWarnings("unused")
        @Mock
        public List<ColumnSchema> getTable_columns() {
            return columnSchemas;
        }
    };
    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream("1".getBytes());
    final DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream);
    new MockUp<FileFactory>() {

        @SuppressWarnings("unused")
        @Mock
        public DataInputStream getDataInputStream(String path, FileFactory.FileType fileType, int bufferSize) {
            return dataInputStream;
        }
    };
    String[] arr = { "a", "b", "c" };
    String fileName = "/part-0-0_batchno0-0-1495074251740.carbondata";
    TableBlockInfo tableBlockInfo = new TableBlockInfo(fileName, 3, "id", arr, 3, ColumnarFormatVersion.V1, null);
    tableBlockInfo.getBlockletInfos().setNoOfBlockLets(3);
    List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
    tableBlockInfoList.add(tableBlockInfo);
    String idxFileName = "0_batchno0-0-1495074251740.carbonindex";
    List<DataFileFooter> dataFileFooterList = dataFileFooterConverter.getIndexInfo(idxFileName, tableBlockInfoList);
    byte[] exp = dataFileFooterList.get(0).getBlockletIndex().getBtreeIndex().getStartKey();
    byte[] res = "1".getBytes();
    for (int i = 0; i < exp.length; i++) {
        assertEquals(exp[i], res[i]);
    }
}
Also used : IndexHeader(org.apache.carbondata.format.IndexHeader) TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.format.ColumnSchema) MockUp(mockit.MockUp) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) DataFileFooter(org.apache.carbondata.core.metadata.blocklet.DataFileFooter) BlockletMinMaxIndex(org.apache.carbondata.format.BlockletMinMaxIndex) BlockletBTreeIndex(org.apache.carbondata.format.BlockletBTreeIndex) Encoding(org.apache.carbondata.format.Encoding) DataInputStream(java.io.DataInputStream) BlockIndex(org.apache.carbondata.format.BlockIndex) ByteArrayInputStream(java.io.ByteArrayInputStream) SegmentInfo(org.apache.carbondata.core.metadata.blocklet.SegmentInfo) Test(org.junit.Test)

Example 8 with ThriftReader

use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.

the class CarbonDictionarySortIndexReaderImpl method openThriftReader.

/**
 * This method will open the dictionary sort index file stream for reading
 *
 * @throws IOException in case any I/O errors occurs
 */
private void openThriftReader() throws IOException {
    this.dictionarySortIndexThriftReader = new ThriftReader(this.sortIndexFilePath, new ThriftReader.TBaseCreator() {

        @Override
        public TBase create() {
            return new ColumnSortInfo();
        }
    });
    dictionarySortIndexThriftReader.open();
}
Also used : ThriftReader(org.apache.carbondata.core.reader.ThriftReader) ColumnSortInfo(org.apache.carbondata.format.ColumnSortInfo)

Example 9 with ThriftReader

use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.

the class CarbonTableReader method parseCarbonMetadata.

/**
 * Read the metadata of the given table and cache it in this.carbonCache (CarbonTableReader cache).
 *
 * @param table name of the given table.
 * @return the CarbonTable instance which contains all the needed metadata for a table.
 */
private CarbonTable parseCarbonMetadata(SchemaTableName table) {
    CarbonTable result = null;
    try {
        CarbonTableCacheModel cache = carbonCache.get().get(table);
        if (cache == null) {
            cache = new CarbonTableCacheModel();
        }
        if (cache.isValid())
            return cache.carbonTable;
        // If table is not previously cached, then:
        // Step 1: get store path of the table and cache it.
        // create table identifier. the table id is randomly generated.
        CarbonTableIdentifier carbonTableIdentifier = new CarbonTableIdentifier(table.getSchemaName(), table.getTableName(), UUID.randomUUID().toString());
        String storePath = config.getStorePath();
        String tablePath = storePath + "/" + carbonTableIdentifier.getDatabaseName() + "/" + carbonTableIdentifier.getTableName();
        // Step 2: read the metadata (tableInfo) of the table.
        ThriftReader.TBaseCreator createTBase = new ThriftReader.TBaseCreator() {

            // TBase is used to read and write thrift objects.
            // TableInfo is a kind of TBase used to read and write table information.
            // TableInfo is generated by thrift, see schema.thrift under format/src/main/thrift for details.
            public TBase create() {
                return new org.apache.carbondata.format.TableInfo();
            }
        };
        ThriftReader thriftReader = new ThriftReader(CarbonTablePath.getSchemaFilePath(tablePath), createTBase);
        thriftReader.open();
        org.apache.carbondata.format.TableInfo tableInfo = (org.apache.carbondata.format.TableInfo) thriftReader.read();
        thriftReader.close();
        // Step 3: convert format level TableInfo to code level TableInfo
        SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
        // wrapperTableInfo is the code level information of a table in carbondata core, different from the Thrift TableInfo.
        TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfo, table.getSchemaName(), table.getTableName(), tablePath);
        // Step 4: Load metadata info into CarbonMetadata
        CarbonMetadata.getInstance().loadTableMetadata(wrapperTableInfo);
        cache.carbonTable = CarbonMetadata.getInstance().getCarbonTable(table.getSchemaName(), table.getTableName());
        // cache the table
        carbonCache.get().put(table, cache);
        result = cache.carbonTable;
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
    return result;
}
Also used : IOException(java.io.IOException) TableNotFoundException(com.facebook.presto.spi.TableNotFoundException) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) ThriftReader(org.apache.carbondata.core.reader.ThriftReader) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) SchemaConverter(org.apache.carbondata.core.metadata.converter.SchemaConverter) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl)

Aggregations

ThriftReader (org.apache.carbondata.core.reader.ThriftReader)9 TableInfo (org.apache.carbondata.core.metadata.schema.table.TableInfo)4 ThriftWrapperSchemaConverterImpl (org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl)3 IOException (java.io.IOException)2 MockUp (mockit.MockUp)2 SchemaConverter (org.apache.carbondata.core.metadata.converter.SchemaConverter)2 TBaseCreator (org.apache.carbondata.core.reader.ThriftReader.TBaseCreator)2 MergedBlockIndexHeader (org.apache.carbondata.format.MergedBlockIndexHeader)2 TException (org.apache.thrift.TException)2 Test (org.junit.Test)2 TableNotFoundException (com.facebook.presto.spi.TableNotFoundException)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 DataInputStream (java.io.DataInputStream)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 TableBlockInfo (org.apache.carbondata.core.datastore.block.TableBlockInfo)1 CarbonDataWriterException (org.apache.carbondata.core.datastore.exception.CarbonDataWriterException)1 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)1 InvalidConfigurationException (org.apache.carbondata.core.exception.InvalidConfigurationException)1 CarbonTableIdentifier (org.apache.carbondata.core.metadata.CarbonTableIdentifier)1