use of org.apache.carbondata.core.reader.CarbonHeaderReader in project carbondata by apache.
the class CarbonStreamRecordReader method getSyncMarker.
private byte[] getSyncMarker(String filePath) throws IOException {
CarbonHeaderReader headerReader = new CarbonHeaderReader(filePath);
FileHeader header = headerReader.readHeader();
return header.getSync_marker();
}
use of org.apache.carbondata.core.reader.CarbonHeaderReader in project carbondata by apache.
the class CarbonUtil method inferSchema.
/**
* This method will read the schema file from a given path
*
* @param schemaFilePath
* @return
*/
public static org.apache.carbondata.format.TableInfo inferSchema(String carbonDataFilePath, AbsoluteTableIdentifier absoluteTableIdentifier, boolean schemaExists) throws IOException {
TBaseCreator createTBase = new ThriftReader.TBaseCreator() {
public org.apache.thrift.TBase<org.apache.carbondata.format.TableInfo, org.apache.carbondata.format.TableInfo._Fields> create() {
return new org.apache.carbondata.format.TableInfo();
}
};
if (schemaExists == false) {
List<String> filePaths = getFilePathExternalFilePath(carbonDataFilePath + "/Fact/Part0/Segment_null");
String fistFilePath = null;
try {
fistFilePath = filePaths.get(0);
} catch (Exception e) {
LOGGER.error("CarbonData file is not present in the table location");
}
CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(fistFilePath);
FileHeader fileHeader = carbonHeaderReader.readHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema();
for (int i = 0; i < table_columns.size(); i++) {
ColumnSchema col = thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i));
col.setColumnReferenceId(col.getColumnUniqueId());
columnSchemaList.add(col);
}
TableSchema tableSchema = new TableSchema();
tableSchema.setTableName(absoluteTableIdentifier.getTableName());
tableSchema.setBucketingInfo(null);
tableSchema.setSchemaEvalution(null);
tableSchema.setTableId(UUID.randomUUID().toString());
tableSchema.setListOfColumns(columnSchemaList);
ThriftWrapperSchemaConverterImpl thriftWrapperSchemaConverter = new ThriftWrapperSchemaConverterImpl();
SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry();
schemaEvolutionEntry.setTimeStamp(System.currentTimeMillis());
SchemaEvolution schemaEvol = new SchemaEvolution();
List<SchemaEvolutionEntry> schEntryList = new ArrayList<>();
schEntryList.add(schemaEvolutionEntry);
schemaEvol.setSchemaEvolutionEntryList(schEntryList);
tableSchema.setSchemaEvalution(schemaEvol);
org.apache.carbondata.format.TableSchema thriftFactTable = thriftWrapperSchemaConverter.fromWrapperToExternalTableSchema(tableSchema);
org.apache.carbondata.format.TableInfo tableInfo = new org.apache.carbondata.format.TableInfo(thriftFactTable, new ArrayList<org.apache.carbondata.format.TableSchema>());
tableInfo.setDataMapSchemas(null);
return tableInfo;
} else {
ThriftReader thriftReader = new ThriftReader(carbonDataFilePath, createTBase);
thriftReader.open();
org.apache.carbondata.format.TableInfo tableInfo = (org.apache.carbondata.format.TableInfo) thriftReader.read();
thriftReader.close();
return tableInfo;
}
}
use of org.apache.carbondata.core.reader.CarbonHeaderReader in project carbondata by apache.
the class DataFileFooterConverterV3 method readDataFileFooter.
/**
* Below method will be used to convert thrift file meta to wrapper file meta
* This method will read the footer from footer offset present in the data file
* 1. It will read the header from carbon data file, header starts from 0 offset
* 2. It will set the stream offset
* 3. It will read the footer data from file
* 4. parse the footer to thrift object
* 5. convert to wrapper object
*
* @param tableBlockInfo
* table block info
* @return data file footer
*/
@Override
public DataFileFooter readDataFileFooter(TableBlockInfo tableBlockInfo) throws IOException {
DataFileFooter dataFileFooter = new DataFileFooter();
CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(tableBlockInfo.getFilePath());
FileHeader fileHeader = carbonHeaderReader.readHeader();
CarbonFooterReaderV3 reader = new CarbonFooterReaderV3(tableBlockInfo.getFilePath(), tableBlockInfo.getBlockOffset());
FileFooter3 footer = reader.readFooterVersion3();
dataFileFooter.setVersionId(ColumnarFormatVersion.valueOf((short) fileHeader.getVersion()));
dataFileFooter.setNumberOfRows(footer.getNum_rows());
dataFileFooter.setSegmentInfo(getSegmentInfo(footer.getSegment_info()));
dataFileFooter.setSchemaUpdatedTimeStamp(fileHeader.getTime_stamp());
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
dataFileFooter.setColumnInTable(columnSchemaList);
List<org.apache.carbondata.format.BlockletIndex> leaf_node_indices_Thrift = footer.getBlocklet_index_list();
List<BlockletIndex> blockletIndexList = new ArrayList<BlockletIndex>();
for (int i = 0; i < leaf_node_indices_Thrift.size(); i++) {
BlockletIndex blockletIndex = getBlockletIndex(leaf_node_indices_Thrift.get(i));
blockletIndexList.add(blockletIndex);
}
List<org.apache.carbondata.format.BlockletInfo3> leaf_node_infos_Thrift = footer.getBlocklet_info_list3();
List<BlockletInfo> blockletInfoList = new ArrayList<BlockletInfo>();
for (int i = 0; i < leaf_node_infos_Thrift.size(); i++) {
BlockletInfo blockletInfo = getBlockletInfo(leaf_node_infos_Thrift.get(i), CarbonUtil.getNumberOfDimensionColumns(columnSchemaList));
blockletInfo.setBlockletIndex(blockletIndexList.get(i));
blockletInfoList.add(blockletInfo);
}
dataFileFooter.setBlockletList(blockletInfoList);
dataFileFooter.setBlockletIndex(getBlockletIndexForDataFileFooter(blockletIndexList));
return dataFileFooter;
}
use of org.apache.carbondata.core.reader.CarbonHeaderReader in project carbondata by apache.
the class DataFileFooterConverterV3 method getSchema.
@Override
public List<ColumnSchema> getSchema(TableBlockInfo tableBlockInfo) throws IOException {
CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(tableBlockInfo.getFilePath());
FileHeader fileHeader = carbonHeaderReader.readHeader();
List<ColumnSchema> columnSchemaList = new ArrayList<ColumnSchema>();
List<org.apache.carbondata.format.ColumnSchema> table_columns = fileHeader.getColumn_schema();
for (int i = 0; i < table_columns.size(); i++) {
columnSchemaList.add(thriftColumnSchmeaToWrapperColumnSchema(table_columns.get(i)));
}
return columnSchemaList;
}
use of org.apache.carbondata.core.reader.CarbonHeaderReader in project carbondata by apache.
the class CarbonTable method buildFromDataFile.
public static CarbonTable buildFromDataFile(String tableName, String tablePath, String filePath) throws IOException {
CarbonHeaderReader carbonHeaderReader = new CarbonHeaderReader(filePath);
FileHeader fileHeader = carbonHeaderReader.readHeader();
TableSchemaBuilder builder = TableSchema.builder();
ThriftWrapperSchemaConverterImpl schemaConverter = new ThriftWrapperSchemaConverterImpl();
for (org.apache.carbondata.format.ColumnSchema column : fileHeader.getColumn_schema()) {
ColumnSchema columnSchema = schemaConverter.fromExternalToWrapperColumnSchema(column);
builder.addColumn(new StructField(columnSchema.getColumnName(), columnSchema.getDataType()), false);
}
TableSchema tableSchema = builder.tableName(tableName).build();
TableInfo tableInfo = new TableInfo();
tableInfo.setFactTable(tableSchema);
tableInfo.setTablePath(tablePath);
tableInfo.setDatabaseName("default");
tableInfo.setTableUniqueName(CarbonTable.buildUniqueName("default", tableSchema.getTableName()));
return buildFromTableInfo(tableInfo);
}
Aggregations