use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.
the class SegmentIndexFileStore method getIndexFilesFromMergeFile.
/**
* List all the index files inside merge file.
* @param mergeFile
* @return
* @throws IOException
*/
public List<String> getIndexFilesFromMergeFile(String mergeFile) throws IOException {
ThriftReader thriftReader = new ThriftReader(mergeFile);
thriftReader.open();
MergedBlockIndexHeader indexHeader = readMergeBlockIndexHeader(thriftReader);
List<String> fileNames = indexHeader.getFile_names();
thriftReader.close();
return fileNames;
}
use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.
the class DataFileFooterConverterTest method testGetIndexInfo.
@Test
public void testGetIndexInfo() throws Exception {
DataFileFooterConverter dataFileFooterConverter = new DataFileFooterConverter();
final ThriftReader thriftReader = new ThriftReader("file");
List<Encoding> encoders = new ArrayList<>();
encoders.add(Encoding.INVERTED_INDEX);
encoders.add(Encoding.BIT_PACKED);
encoders.add(Encoding.DELTA);
encoders.add(Encoding.DICTIONARY);
encoders.add(Encoding.DIRECT_DICTIONARY);
encoders.add(Encoding.RLE);
ColumnSchema columnSchema = new ColumnSchema(DataType.INT, "column", "3", true, encoders, true);
ColumnSchema columnSchema1 = new ColumnSchema(DataType.ARRAY, "column", "3", true, encoders, true);
ColumnSchema columnSchema2 = new ColumnSchema(DataType.DECIMAL, "column", "3", true, encoders, true);
ColumnSchema columnSchema3 = new ColumnSchema(DataType.DOUBLE, "column", "3", true, encoders, true);
ColumnSchema columnSchema4 = new ColumnSchema(DataType.LONG, "column", "3", true, encoders, true);
ColumnSchema columnSchema5 = new ColumnSchema(DataType.SHORT, "column", "3", true, encoders, true);
ColumnSchema columnSchema6 = new ColumnSchema(DataType.STRUCT, "column", "3", true, encoders, true);
ColumnSchema columnSchema7 = new ColumnSchema(DataType.STRING, "column", "3", true, encoders, true);
final List<ColumnSchema> columnSchemas = new ArrayList<>();
columnSchemas.add(columnSchema);
columnSchemas.add(columnSchema1);
columnSchemas.add(columnSchema2);
columnSchemas.add(columnSchema3);
columnSchemas.add(columnSchema4);
columnSchemas.add(columnSchema5);
columnSchemas.add(columnSchema6);
columnSchemas.add(columnSchema7);
final BlockIndex blockIndex = new BlockIndex();
blockIndex.setBlock_index(new org.apache.carbondata.format.BlockletIndex());
org.apache.carbondata.format.BlockletIndex blockletIndex1 = new org.apache.carbondata.format.BlockletIndex();
BlockletBTreeIndex blockletBTreeIndex = new BlockletBTreeIndex();
blockletBTreeIndex.setStart_key("1".getBytes());
blockletBTreeIndex.setEnd_key("3".getBytes());
blockletIndex1.setB_tree_index(blockletBTreeIndex);
BlockletMinMaxIndex blockletMinMaxIndex = new BlockletMinMaxIndex();
blockletMinMaxIndex.setMax_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 2)));
blockletMinMaxIndex.setMin_values(Arrays.asList(ByteBuffer.allocate(1).put((byte) 1)));
blockletIndex1.setMin_max_index(blockletMinMaxIndex);
blockIndex.setBlock_index(blockletIndex1);
List<Integer> column_cardinalities = new ArrayList<>();
column_cardinalities.add(new Integer("1"));
final org.apache.carbondata.format.SegmentInfo segmentInfo1 = new org.apache.carbondata.format.SegmentInfo(3, column_cardinalities);
new MockUp<CarbonIndexFileReader>() {
boolean mockedHasNextStatus = true;
@SuppressWarnings("unused")
@Mock
public boolean hasNext() throws IOException {
boolean temp = mockedHasNextStatus;
mockedHasNextStatus = false;
return temp;
}
@SuppressWarnings("unused")
@Mock
public void openThriftReader(String filePath) throws IOException {
thriftReader.open();
}
@SuppressWarnings("unused")
@Mock
public IndexHeader readIndexHeader() throws IOException {
return new IndexHeader(1, columnSchemas, segmentInfo1);
}
@SuppressWarnings("unused")
@Mock
public BlockIndex readBlockIndexInfo() throws IOException {
return blockIndex;
}
@SuppressWarnings("unused")
@Mock
public void closeThriftReader() {
thriftReader.close();
}
};
new MockUp<IndexHeader>() {
@SuppressWarnings("unused")
@Mock
public List<ColumnSchema> getTable_columns() {
return columnSchemas;
}
};
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream("1".getBytes());
final DataInputStream dataInputStream = new DataInputStream(byteArrayInputStream);
new MockUp<FileFactory>() {
@SuppressWarnings("unused")
@Mock
public DataInputStream getDataInputStream(String path, FileFactory.FileType fileType, int bufferSize) {
return dataInputStream;
}
};
String[] arr = { "a", "b", "c" };
String fileName = "/part-0-0_batchno0-0-1495074251740.carbondata";
TableBlockInfo tableBlockInfo = new TableBlockInfo(fileName, 3, "id", arr, 3, ColumnarFormatVersion.V1, null);
tableBlockInfo.getBlockletInfos().setNoOfBlockLets(3);
List<TableBlockInfo> tableBlockInfoList = new ArrayList<>();
tableBlockInfoList.add(tableBlockInfo);
String idxFileName = "0_batchno0-0-1495074251740.carbonindex";
List<DataFileFooter> dataFileFooterList = dataFileFooterConverter.getIndexInfo(idxFileName, tableBlockInfoList);
byte[] exp = dataFileFooterList.get(0).getBlockletIndex().getBtreeIndex().getStartKey();
byte[] res = "1".getBytes();
for (int i = 0; i < exp.length; i++) {
assertEquals(exp[i], res[i]);
}
}
use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.
the class CarbonDictionarySortIndexReaderImpl method openThriftReader.
/**
* This method will open the dictionary sort index file stream for reading
*
* @throws IOException in case any I/O errors occurs
*/
private void openThriftReader() throws IOException {
this.dictionarySortIndexThriftReader = new ThriftReader(this.sortIndexFilePath, new ThriftReader.TBaseCreator() {
@Override
public TBase create() {
return new ColumnSortInfo();
}
});
dictionarySortIndexThriftReader.open();
}
use of org.apache.carbondata.core.reader.ThriftReader in project carbondata by apache.
the class CarbonTableReader method parseCarbonMetadata.
/**
* Read the metadata of the given table and cache it in this.carbonCache (CarbonTableReader cache).
*
* @param table name of the given table.
* @return the CarbonTable instance which contains all the needed metadata for a table.
*/
private CarbonTable parseCarbonMetadata(SchemaTableName table) {
CarbonTable result = null;
try {
CarbonTableCacheModel cache = carbonCache.get().get(table);
if (cache == null) {
cache = new CarbonTableCacheModel();
}
if (cache.isValid())
return cache.carbonTable;
// If table is not previously cached, then:
// Step 1: get store path of the table and cache it.
// create table identifier. the table id is randomly generated.
CarbonTableIdentifier carbonTableIdentifier = new CarbonTableIdentifier(table.getSchemaName(), table.getTableName(), UUID.randomUUID().toString());
String storePath = config.getStorePath();
String tablePath = storePath + "/" + carbonTableIdentifier.getDatabaseName() + "/" + carbonTableIdentifier.getTableName();
// Step 2: read the metadata (tableInfo) of the table.
ThriftReader.TBaseCreator createTBase = new ThriftReader.TBaseCreator() {
// TBase is used to read and write thrift objects.
// TableInfo is a kind of TBase used to read and write table information.
// TableInfo is generated by thrift, see schema.thrift under format/src/main/thrift for details.
public TBase create() {
return new org.apache.carbondata.format.TableInfo();
}
};
ThriftReader thriftReader = new ThriftReader(CarbonTablePath.getSchemaFilePath(tablePath), createTBase);
thriftReader.open();
org.apache.carbondata.format.TableInfo tableInfo = (org.apache.carbondata.format.TableInfo) thriftReader.read();
thriftReader.close();
// Step 3: convert format level TableInfo to code level TableInfo
SchemaConverter schemaConverter = new ThriftWrapperSchemaConverterImpl();
// wrapperTableInfo is the code level information of a table in carbondata core, different from the Thrift TableInfo.
TableInfo wrapperTableInfo = schemaConverter.fromExternalToWrapperTableInfo(tableInfo, table.getSchemaName(), table.getTableName(), tablePath);
// Step 4: Load metadata info into CarbonMetadata
CarbonMetadata.getInstance().loadTableMetadata(wrapperTableInfo);
cache.carbonTable = CarbonMetadata.getInstance().getCarbonTable(table.getSchemaName(), table.getTableName());
// cache the table
carbonCache.get().put(table, cache);
result = cache.carbonTable;
} catch (Exception ex) {
throw new RuntimeException(ex);
}
return result;
}
Aggregations