use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class BlockletIndexUtil method getMinMaxFlagValuesForColumnsToBeCached.
/**
* Method to get the flag values for columns to be cached
*
* @param segmentProperties
* @param minMaxCacheColumns
* @param minMaxFlag
* @return
*/
public static boolean[] getMinMaxFlagValuesForColumnsToBeCached(SegmentProperties segmentProperties, List<CarbonColumn> minMaxCacheColumns, boolean[] minMaxFlag) {
boolean[] minMaxFlagValuesForColumnsToBeCached = minMaxFlag;
if (null != minMaxCacheColumns) {
minMaxFlagValuesForColumnsToBeCached = new boolean[minMaxCacheColumns.size()];
int counter = 0;
for (CarbonColumn column : minMaxCacheColumns) {
minMaxFlagValuesForColumnsToBeCached[counter++] = minMaxFlag[getColumnOrdinal(segmentProperties, column)];
}
}
return minMaxFlagValuesForColumnsToBeCached;
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class CarbonHiveSerDe method inferSchema.
private void inferSchema(Properties tbl, List<String> columnNames, List<TypeInfo> columnTypes) {
if (columnNames.size() == 0 && columnTypes.size() == 0) {
String external = tbl.getProperty("EXTERNAL");
String location = CarbonUtil.checkAndAppendFileSystemURIScheme(tbl.getProperty(hive_metastoreConstants.META_TABLE_LOCATION));
if (external != null && "TRUE".equals(external) && location != null) {
String[] names = tbl.getProperty(hive_metastoreConstants.META_TABLE_NAME).split("\\.");
if (names.length == 2) {
AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier.from(location, names[0], names[1]);
String schemaPath = CarbonTablePath.getSchemaFilePath(identifier.getTablePath());
try {
TableInfo tableInfo = null;
if (!FileFactory.isFileExist(schemaPath)) {
tableInfo = SchemaReader.inferSchema(identifier, false);
} else {
tableInfo = SchemaReader.getTableInfo(identifier);
}
if (tableInfo != null) {
CarbonTable carbonTable = CarbonTable.buildFromTableInfo(tableInfo);
List<CarbonColumn> columns = carbonTable.getCreateOrderColumn();
for (CarbonColumn column : columns) {
columnNames.add(column.getColName());
columnTypes.add(HiveDataTypeUtils.convertCarbonDataTypeToHive(column));
}
}
} catch (Exception ex) {
LOGGER.warn("Failed to infer schema: " + ex.getMessage());
}
}
}
}
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class MapredCarbonInputFormat method getProjection.
/**
* Return the Projection for the CarbonQuery.
*/
private String getProjection(Configuration configuration, CarbonTable carbonTable) {
// query plan includes projection column
String projection = getColumnProjection(configuration);
if (projection == null) {
projection = configuration.get("hive.io.file.readcolumn.names");
}
List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn();
List<String> carbonColumnNames = new ArrayList<>();
StringBuilder allColumns = new StringBuilder();
StringBuilder projectionColumns = new StringBuilder();
for (CarbonColumn column : carbonColumns) {
carbonColumnNames.add(column.getColName().toLowerCase());
allColumns.append(column.getColName()).append(",");
}
if (null != projection && !projection.equals("")) {
String[] columnNames = projection.split(",");
// verify that the columns parsed by Hive exist in the table
for (String col : columnNames) {
// show columns command will return these data
if (carbonColumnNames.contains(col.toLowerCase())) {
projectionColumns.append(col).append(",");
}
}
return projectionColumns.substring(0, projectionColumns.lastIndexOf(","));
} else {
return allColumns.substring(0, allColumns.lastIndexOf(","));
}
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.
/**
* This method will create a model object for carbon fact data handler
*
* @param loadModel
* @return
*/
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String[] tempStoreLocation, String carbonDataDirectoryPath) {
// for dynamic page size in write step if varchar columns exist
List<CarbonDimension> allDimensions = carbonTable.getVisibleDimensions();
CarbonColumn[] noDicAndComplexColumns = new CarbonColumn[segmentProperties.getNumberOfNoDictionaryDimension() + segmentProperties.getComplexDimensions().size()];
int noDicAndComp = 0;
List<DataType> noDictDataTypesList = new ArrayList<>();
for (CarbonDimension dim : allDimensions) {
if (dim.getDataType() != DataTypes.DATE) {
noDicAndComplexColumns[noDicAndComp++] = new CarbonColumn(dim.getColumnSchema(), dim.getOrdinal(), dim.getSchemaOrdinal());
noDictDataTypesList.add(dim.getDataType());
}
}
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
carbonFactDataHandlerModel.setTableName(tableName);
carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setSegmentId(loadModel.getSegmentId());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getVisibleDimensions(), carbonTable.getVisibleMeasures());
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
carbonFactDataHandlerModel.setComplexIndexMap(convertComplexDimensionToComplexIndexMap(segmentProperties, loadModel.getSerializationNullFormat()));
DataType[] measureDataTypes = new DataType[segmentProperties.getMeasures().size()];
int i = 0;
for (CarbonMeasure msr : segmentProperties.getMeasures()) {
measureDataTypes[i++] = msr.getDataType();
}
carbonFactDataHandlerModel.setMeasureDataType(measureDataTypes);
carbonFactDataHandlerModel.setNoDictAndComplexColumns(noDicAndComplexColumns);
carbonFactDataHandlerModel.setNoDictDataTypesList(noDictDataTypesList);
CarbonUtil.checkAndCreateFolderWithPermission(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.setColumnCompressor(loadModel.getColumnCompressor());
carbonFactDataHandlerModel.tableSpec = new TableSpec(carbonTable, false);
IndexWriterListener listener = new IndexWriterListener();
listener.registerAllWriter(carbonTable, loadModel.getSegmentId(), CarbonTablePath.getShardName(CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(loadModel.getTaskNo()), carbonFactDataHandlerModel.getBucketId(), carbonFactDataHandlerModel.getTaskExtension(), String.valueOf(loadModel.getFactTimeStamp()), loadModel.getSegmentId()), segmentProperties);
carbonFactDataHandlerModel.indexWriterlistener = listener;
carbonFactDataHandlerModel.initNumberOfCores();
carbonFactDataHandlerModel.setColumnLocalDictGenMap(CarbonUtil.getLocalDictionaryModel(carbonTable));
carbonFactDataHandlerModel.sortScope = carbonTable.getSortScope();
carbonFactDataHandlerModel.setMetrics(loadModel.getMetrics());
return carbonFactDataHandlerModel;
}
use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.
the class CarbonDataProcessorUtil method getColumnIdxBasedOnSchemaInRowAsDataFieldOrder.
/**
* If the dimension is added in older version 1.1, by default it will be sort column, So during
* initial sorting, carbonrow will be in order where added sort column is at the beginning, But
* before final merger of sort, the data should be in schema order
* (org.apache.carbondata.processing.sort.SchemaBasedRowUpdater updates the carbonRow in schema
* order), so This method helps to find the index of no dictionary/ dictionary sort column in
* the carbonrow data.
*/
public static Map<String, int[]> getColumnIdxBasedOnSchemaInRowAsDataFieldOrder(DataField[] dataFields) {
List<Integer> noDicSortColMap = new ArrayList<>();
int counter = 0;
// get no-dict / dict sort column schema
List<CarbonColumn> noDictSortColumns = new ArrayList<>();
List<CarbonColumn> dictSortColumns = new ArrayList<>();
List<Integer> noDictSortColIdx = new ArrayList<>();
List<Integer> dictSortColIdx = new ArrayList<>();
for (DataField dataField : dataFields) {
if (!dataField.getColumn().isInvisible() && dataField.getColumn().isDimension()) {
if (dataField.getColumn().getColumnSchema().hasEncoding(Encoding.DICTIONARY) || dataField.getColumn().getColumnSchema().getDataType() == DataTypes.DATE) {
dictSortColumns.add(dataField.getColumn());
} else {
noDictSortColumns.add(dataField.getColumn());
}
if (dataField.getColumn().getColumnSchema().getDataType() == DataTypes.DATE) {
continue;
}
if (dataField.getColumn().getColumnSchema().isSortColumn() && DataTypeUtil.isPrimitiveColumn(dataField.getColumn().getColumnSchema().getDataType())) {
noDicSortColMap.add(counter);
}
counter++;
}
}
// add no-Dict sort column index
for (int i = 0; i < noDictSortColumns.size(); i++) {
if (noDictSortColumns.get(i).getColumnSchema().isSortColumn()) {
noDictSortColIdx.add(i);
}
}
// add dict sort column index
for (int i = 0; i < dictSortColumns.size(); i++) {
if (dictSortColumns.get(i).getColumnSchema().isSortColumn()) {
dictSortColIdx.add(i);
}
}
Integer[] mapping = noDicSortColMap.toArray(new Integer[0]);
int[] columnIdxBasedOnSchemaInRow = new int[mapping.length];
for (int i = 0; i < mapping.length; i++) {
columnIdxBasedOnSchemaInRow[i] = mapping[i];
}
Integer[] noDictSortIdx = noDictSortColIdx.toArray(new Integer[0]);
int[] noDictSortIdxBasedOnSchemaInRow = new int[noDictSortIdx.length];
for (int i = 0; i < noDictSortIdx.length; i++) {
noDictSortIdxBasedOnSchemaInRow[i] = noDictSortIdx[i];
}
Integer[] dictSortIdx = dictSortColIdx.toArray(new Integer[0]);
int[] dictSortIdxBasedOnSchemaInRow = new int[dictSortIdx.length];
for (int i = 0; i < dictSortIdx.length; i++) {
dictSortIdxBasedOnSchemaInRow[i] = dictSortIdx[i];
}
Map<String, int[]> dictOrNoSortInfoMap = new HashMap<>();
dictOrNoSortInfoMap.put("columnIdxBasedOnSchemaInRow", columnIdxBasedOnSchemaInRow);
dictOrNoSortInfoMap.put("noDictSortIdxBasedOnSchemaInRow", noDictSortIdxBasedOnSchemaInRow);
dictOrNoSortInfoMap.put("dictSortIdxBasedOnSchemaInRow", dictSortIdxBasedOnSchemaInRow);
return dictOrNoSortInfoMap;
}
Aggregations