Search in sources :

Example 41 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class BlockletIndexUtil method getMinMaxFlagValuesForColumnsToBeCached.

/**
 * Method to get the flag values for columns to be cached
 *
 * @param segmentProperties
 * @param minMaxCacheColumns
 * @param minMaxFlag
 * @return
 */
public static boolean[] getMinMaxFlagValuesForColumnsToBeCached(SegmentProperties segmentProperties, List<CarbonColumn> minMaxCacheColumns, boolean[] minMaxFlag) {
    boolean[] minMaxFlagValuesForColumnsToBeCached = minMaxFlag;
    if (null != minMaxCacheColumns) {
        minMaxFlagValuesForColumnsToBeCached = new boolean[minMaxCacheColumns.size()];
        int counter = 0;
        for (CarbonColumn column : minMaxCacheColumns) {
            minMaxFlagValuesForColumnsToBeCached[counter++] = minMaxFlag[getColumnOrdinal(segmentProperties, column)];
        }
    }
    return minMaxFlagValuesForColumnsToBeCached;
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)

Example 42 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class CarbonHiveSerDe method inferSchema.

private void inferSchema(Properties tbl, List<String> columnNames, List<TypeInfo> columnTypes) {
    if (columnNames.size() == 0 && columnTypes.size() == 0) {
        String external = tbl.getProperty("EXTERNAL");
        String location = CarbonUtil.checkAndAppendFileSystemURIScheme(tbl.getProperty(hive_metastoreConstants.META_TABLE_LOCATION));
        if (external != null && "TRUE".equals(external) && location != null) {
            String[] names = tbl.getProperty(hive_metastoreConstants.META_TABLE_NAME).split("\\.");
            if (names.length == 2) {
                AbsoluteTableIdentifier identifier = AbsoluteTableIdentifier.from(location, names[0], names[1]);
                String schemaPath = CarbonTablePath.getSchemaFilePath(identifier.getTablePath());
                try {
                    TableInfo tableInfo = null;
                    if (!FileFactory.isFileExist(schemaPath)) {
                        tableInfo = SchemaReader.inferSchema(identifier, false);
                    } else {
                        tableInfo = SchemaReader.getTableInfo(identifier);
                    }
                    if (tableInfo != null) {
                        CarbonTable carbonTable = CarbonTable.buildFromTableInfo(tableInfo);
                        List<CarbonColumn> columns = carbonTable.getCreateOrderColumn();
                        for (CarbonColumn column : columns) {
                            columnNames.add(column.getColName());
                            columnTypes.add(HiveDataTypeUtils.convertCarbonDataTypeToHive(column));
                        }
                    }
                } catch (Exception ex) {
                    LOGGER.warn("Failed to infer schema: " + ex.getMessage());
                }
            }
        }
    }
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 43 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class MapredCarbonInputFormat method getProjection.

/**
 * Return the Projection for the CarbonQuery.
 */
private String getProjection(Configuration configuration, CarbonTable carbonTable) {
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    if (projection == null) {
        projection = configuration.get("hive.io.file.readcolumn.names");
    }
    List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn();
    List<String> carbonColumnNames = new ArrayList<>();
    StringBuilder allColumns = new StringBuilder();
    StringBuilder projectionColumns = new StringBuilder();
    for (CarbonColumn column : carbonColumns) {
        carbonColumnNames.add(column.getColName().toLowerCase());
        allColumns.append(column.getColName()).append(",");
    }
    if (null != projection && !projection.equals("")) {
        String[] columnNames = projection.split(",");
        // verify that the columns parsed by Hive exist in the table
        for (String col : columnNames) {
            // show columns command will return these data
            if (carbonColumnNames.contains(col.toLowerCase())) {
                projectionColumns.append(col).append(",");
            }
        }
        return projectionColumns.substring(0, projectionColumns.lastIndexOf(","));
    } else {
        return allColumns.substring(0, allColumns.lastIndexOf(","));
    }
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ArrayList(java.util.ArrayList)

Example 44 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.

/**
 * This method will create a model object for carbon fact data handler
 *
 * @param loadModel
 * @return
 */
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String[] tempStoreLocation, String carbonDataDirectoryPath) {
    // for dynamic page size in write step if varchar columns exist
    List<CarbonDimension> allDimensions = carbonTable.getVisibleDimensions();
    CarbonColumn[] noDicAndComplexColumns = new CarbonColumn[segmentProperties.getNumberOfNoDictionaryDimension() + segmentProperties.getComplexDimensions().size()];
    int noDicAndComp = 0;
    List<DataType> noDictDataTypesList = new ArrayList<>();
    for (CarbonDimension dim : allDimensions) {
        if (dim.getDataType() != DataTypes.DATE) {
            noDicAndComplexColumns[noDicAndComp++] = new CarbonColumn(dim.getColumnSchema(), dim.getOrdinal(), dim.getSchemaOrdinal());
            noDictDataTypesList.add(dim.getDataType());
        }
    }
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(tableName);
    carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setSegmentId(loadModel.getSegmentId());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getVisibleDimensions(), carbonTable.getVisibleMeasures());
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    carbonFactDataHandlerModel.setComplexIndexMap(convertComplexDimensionToComplexIndexMap(segmentProperties, loadModel.getSerializationNullFormat()));
    DataType[] measureDataTypes = new DataType[segmentProperties.getMeasures().size()];
    int i = 0;
    for (CarbonMeasure msr : segmentProperties.getMeasures()) {
        measureDataTypes[i++] = msr.getDataType();
    }
    carbonFactDataHandlerModel.setMeasureDataType(measureDataTypes);
    carbonFactDataHandlerModel.setNoDictAndComplexColumns(noDicAndComplexColumns);
    carbonFactDataHandlerModel.setNoDictDataTypesList(noDictDataTypesList);
    CarbonUtil.checkAndCreateFolderWithPermission(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.setColumnCompressor(loadModel.getColumnCompressor());
    carbonFactDataHandlerModel.tableSpec = new TableSpec(carbonTable, false);
    IndexWriterListener listener = new IndexWriterListener();
    listener.registerAllWriter(carbonTable, loadModel.getSegmentId(), CarbonTablePath.getShardName(CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(loadModel.getTaskNo()), carbonFactDataHandlerModel.getBucketId(), carbonFactDataHandlerModel.getTaskExtension(), String.valueOf(loadModel.getFactTimeStamp()), loadModel.getSegmentId()), segmentProperties);
    carbonFactDataHandlerModel.indexWriterlistener = listener;
    carbonFactDataHandlerModel.initNumberOfCores();
    carbonFactDataHandlerModel.setColumnLocalDictGenMap(CarbonUtil.getLocalDictionaryModel(carbonTable));
    carbonFactDataHandlerModel.sortScope = carbonTable.getSortScope();
    carbonFactDataHandlerModel.setMetrics(loadModel.getMetrics());
    return carbonFactDataHandlerModel;
}
Also used : TableSpec(org.apache.carbondata.core.datastore.TableSpec) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) DataType(org.apache.carbondata.core.metadata.datatype.DataType) IndexWriterListener(org.apache.carbondata.processing.index.IndexWriterListener)

Example 45 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class CarbonDataProcessorUtil method getColumnIdxBasedOnSchemaInRowAsDataFieldOrder.

/**
 * If the dimension is added in older version 1.1, by default it will be sort column, So during
 * initial sorting, carbonrow will be in order where added sort column is at the beginning, But
 * before final merger of sort, the data should be in schema order
 * (org.apache.carbondata.processing.sort.SchemaBasedRowUpdater updates the carbonRow in schema
 * order), so This method helps to find the index of no dictionary/ dictionary sort column in
 * the carbonrow data.
 */
public static Map<String, int[]> getColumnIdxBasedOnSchemaInRowAsDataFieldOrder(DataField[] dataFields) {
    List<Integer> noDicSortColMap = new ArrayList<>();
    int counter = 0;
    // get no-dict / dict sort column schema
    List<CarbonColumn> noDictSortColumns = new ArrayList<>();
    List<CarbonColumn> dictSortColumns = new ArrayList<>();
    List<Integer> noDictSortColIdx = new ArrayList<>();
    List<Integer> dictSortColIdx = new ArrayList<>();
    for (DataField dataField : dataFields) {
        if (!dataField.getColumn().isInvisible() && dataField.getColumn().isDimension()) {
            if (dataField.getColumn().getColumnSchema().hasEncoding(Encoding.DICTIONARY) || dataField.getColumn().getColumnSchema().getDataType() == DataTypes.DATE) {
                dictSortColumns.add(dataField.getColumn());
            } else {
                noDictSortColumns.add(dataField.getColumn());
            }
            if (dataField.getColumn().getColumnSchema().getDataType() == DataTypes.DATE) {
                continue;
            }
            if (dataField.getColumn().getColumnSchema().isSortColumn() && DataTypeUtil.isPrimitiveColumn(dataField.getColumn().getColumnSchema().getDataType())) {
                noDicSortColMap.add(counter);
            }
            counter++;
        }
    }
    // add no-Dict sort column index
    for (int i = 0; i < noDictSortColumns.size(); i++) {
        if (noDictSortColumns.get(i).getColumnSchema().isSortColumn()) {
            noDictSortColIdx.add(i);
        }
    }
    // add dict sort column index
    for (int i = 0; i < dictSortColumns.size(); i++) {
        if (dictSortColumns.get(i).getColumnSchema().isSortColumn()) {
            dictSortColIdx.add(i);
        }
    }
    Integer[] mapping = noDicSortColMap.toArray(new Integer[0]);
    int[] columnIdxBasedOnSchemaInRow = new int[mapping.length];
    for (int i = 0; i < mapping.length; i++) {
        columnIdxBasedOnSchemaInRow[i] = mapping[i];
    }
    Integer[] noDictSortIdx = noDictSortColIdx.toArray(new Integer[0]);
    int[] noDictSortIdxBasedOnSchemaInRow = new int[noDictSortIdx.length];
    for (int i = 0; i < noDictSortIdx.length; i++) {
        noDictSortIdxBasedOnSchemaInRow[i] = noDictSortIdx[i];
    }
    Integer[] dictSortIdx = dictSortColIdx.toArray(new Integer[0]);
    int[] dictSortIdxBasedOnSchemaInRow = new int[dictSortIdx.length];
    for (int i = 0; i < dictSortIdx.length; i++) {
        dictSortIdxBasedOnSchemaInRow[i] = dictSortIdx[i];
    }
    Map<String, int[]> dictOrNoSortInfoMap = new HashMap<>();
    dictOrNoSortInfoMap.put("columnIdxBasedOnSchemaInRow", columnIdxBasedOnSchemaInRow);
    dictOrNoSortInfoMap.put("noDictSortIdxBasedOnSchemaInRow", noDictSortIdxBasedOnSchemaInRow);
    dictOrNoSortInfoMap.put("dictSortIdxBasedOnSchemaInRow", dictSortIdxBasedOnSchemaInRow);
    return dictOrNoSortInfoMap;
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) DataField(org.apache.carbondata.processing.loading.DataField) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) ArrayList(java.util.ArrayList)

Aggregations

CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)45 ArrayList (java.util.ArrayList)20 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)14 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)11 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)6 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)6 DataType (org.apache.carbondata.core.metadata.datatype.DataType)5 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)5 Expression (org.apache.carbondata.core.scan.expression.Expression)5 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)5 InExpression (org.apache.carbondata.core.scan.expression.conditional.InExpression)4 TrueExpression (org.apache.carbondata.core.scan.expression.logical.TrueExpression)4 DataField (org.apache.carbondata.processing.loading.DataField)4 HashMap (java.util.HashMap)3 MockUp (mockit.MockUp)3 AbstractDictionaryCacheTest (org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)3 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)3 Test (org.junit.Test)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2