Search in sources :

Example 21 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY);
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier = new DictionaryColumnUniqueIdentifier(table.getAbsoluteTableIdentifier(), columnIdentifier, columnIdentifier.getDataType());
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(dictionaryColumnUniqueIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(identifier, columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(dictionaryColumnUniqueIdentifier);
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Example 22 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class CarbonStreamRecordReader method initialize.

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // input
    if (split instanceof CarbonInputSplit) {
        fileSplit = (CarbonInputSplit) split;
    } else if (split instanceof CarbonMultiBlockSplit) {
        fileSplit = ((CarbonMultiBlockSplit) split).getAllSplits().get(0);
    } else {
        fileSplit = (FileSplit) split;
    }
    // metadata
    hadoopConf = context.getConfiguration();
    if (model == null) {
        CarbonTableInputFormat format = new CarbonTableInputFormat<Object>();
        model = format.createQueryModel(split, context);
    }
    carbonTable = model.getTable();
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
    dimensionCount = dimensions.size();
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
    measureCount = measures.size();
    List<CarbonColumn> carbonColumnList = carbonTable.getStreamStorageOrderColumn(carbonTable.getTableName());
    storageColumns = carbonColumnList.toArray(new CarbonColumn[carbonColumnList.size()]);
    isNoDictColumn = CarbonDataProcessorUtil.getNoDictionaryMapping(storageColumns);
    directDictionaryGenerators = new DirectDictionaryGenerator[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        if (storageColumns[i].hasEncoding(Encoding.DIRECT_DICTIONARY)) {
            directDictionaryGenerators[i] = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(storageColumns[i].getDataType());
        }
    }
    measureDataTypes = new DataType[measureCount];
    for (int i = 0; i < measureCount; i++) {
        measureDataTypes[i] = storageColumns[dimensionCount + i].getDataType();
    }
    // decode data
    allNonNull = new BitSet(storageColumns.length);
    projection = model.getProjectionColumns();
    isRequired = new boolean[storageColumns.length];
    boolean[] isFiltlerDimensions = model.getIsFilterDimensions();
    boolean[] isFiltlerMeasures = model.getIsFilterMeasures();
    isFilterRequired = new boolean[storageColumns.length];
    filterMap = new int[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        if (storageColumns[i].isDimension()) {
            if (isFiltlerDimensions[storageColumns[i].getOrdinal()]) {
                isRequired[i] = true;
                isFilterRequired[i] = true;
                filterMap[i] = storageColumns[i].getOrdinal();
            }
        } else {
            if (isFiltlerMeasures[storageColumns[i].getOrdinal()]) {
                isRequired[i] = true;
                isFilterRequired[i] = true;
                filterMap[i] = carbonTable.getDimensionOrdinalMax() + storageColumns[i].getOrdinal();
            }
        }
    }
    isProjectionRequired = new boolean[storageColumns.length];
    projectionMap = new int[storageColumns.length];
    for (int i = 0; i < storageColumns.length; i++) {
        for (int j = 0; j < projection.length; j++) {
            if (storageColumns[i].getColName().equals(projection[j].getColName())) {
                isRequired[i] = true;
                isProjectionRequired[i] = true;
                projectionMap[i] = j;
                break;
            }
        }
    }
    // initialize filter
    if (null != model.getFilterExpressionResolverTree()) {
        initializeFilter();
    } else if (projection.length == 0) {
        skipScanData = true;
    }
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) BitSet(java.util.BitSet) CarbonInputSplit(org.apache.carbondata.hadoop.CarbonInputSplit) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) CarbonMultiBlockSplit(org.apache.carbondata.hadoop.CarbonMultiBlockSplit) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat)

Example 23 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class CarbonLoadModelBuilder method build.

/**
 * build CarbonLoadModel for data loading
 * @param options Load options from user input
 * @return a new CarbonLoadModel instance
 */
public CarbonLoadModel build(Map<String, String> options) throws InvalidLoadOptionException, IOException {
    Map<String, String> optionsFinal = LoadOption.fillOptionWithDefaultValue(options);
    if (!options.containsKey("fileheader")) {
        List<CarbonColumn> csvHeader = table.getCreateOrderColumn(table.getTableName());
        String[] columns = new String[csvHeader.size()];
        for (int i = 0; i < columns.length; i++) {
            columns[i] = csvHeader.get(i).getColName();
        }
        optionsFinal.put("fileheader", Strings.mkString(columns, ","));
    }
    CarbonLoadModel model = new CarbonLoadModel();
    // we have provided 'fileheader', so it hadoopConf can be null
    build(options, optionsFinal, model, null);
    // set default values
    model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
    model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
    model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, "onepass", "false")));
    model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", null));
    try {
        model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, "dictport", "-1")));
    } catch (NumberFormatException e) {
        throw new InvalidLoadOptionException(e.getMessage());
    }
    return model;
}
Also used : InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)

Example 24 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class MapredCarbonInputFormat method getProjection.

/**
 * Return the Projection for the CarbonQuery.
 *
 * @param configuration
 * @param carbonTable
 * @param tableName
 * @return
 */
private String getProjection(Configuration configuration, CarbonTable carbonTable, String tableName) {
    // query plan includes projection column
    String projection = getColumnProjection(configuration);
    if (projection == null) {
        projection = configuration.get("hive.io.file.readcolumn.names");
    }
    List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(tableName);
    List<String> carbonColumnNames = new ArrayList<>();
    StringBuilder allColumns = new StringBuilder();
    StringBuilder projectionColumns = new StringBuilder();
    for (CarbonColumn column : carbonColumns) {
        carbonColumnNames.add(column.getColName().toLowerCase());
        allColumns.append(column.getColName() + ",");
    }
    if (!projection.equals("")) {
        String[] columnNames = projection.split(",");
        // verify that the columns parsed by Hive exist in the table
        for (String col : columnNames) {
            // show columns command will return these data
            if (carbonColumnNames.contains(col.toLowerCase())) {
                projectionColumns.append(col + ",");
            }
        }
        return projectionColumns.substring(0, projectionColumns.lastIndexOf(","));
    } else {
        return allColumns.substring(0, allColumns.lastIndexOf(","));
    }
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ArrayList(java.util.ArrayList)

Example 25 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class CarbondataMetadata method getTableMetadata.

private ConnectorTableMetadata getTableMetadata(SchemaTableName schemaTableName) {
    if (!listSchemaNamesInternal().contains(schemaTableName.getSchemaName())) {
        throw new SchemaNotFoundException(schemaTableName.getSchemaName());
    }
    CarbonTable carbonTable = carbonTableReader.getTable(schemaTableName);
    List<ColumnMetadata> columnsMetaList = new LinkedList<>();
    List<CarbonColumn> carbonColumns = carbonTable.getCreateOrderColumn(schemaTableName.getTableName());
    for (CarbonColumn col : carbonColumns) {
        // show columns command will return these data
        Type columnType = carbonDataType2SpiMapper(col.getColumnSchema());
        ColumnMetadata columnMeta = new ColumnMetadata(col.getColumnSchema().getColumnName(), columnType);
        columnsMetaList.add(columnMeta);
    }
    // carbondata connector's table metadata
    return new ConnectorTableMetadata(schemaTableName, columnsMetaList);
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) DataType(org.apache.carbondata.core.metadata.datatype.DataType) Types.checkType(org.apache.carbondata.presto.Types.checkType) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)

Aggregations

CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)45 ArrayList (java.util.ArrayList)20 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)14 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)11 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)6 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)6 DataType (org.apache.carbondata.core.metadata.datatype.DataType)5 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)5 Expression (org.apache.carbondata.core.scan.expression.Expression)5 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)5 InExpression (org.apache.carbondata.core.scan.expression.conditional.InExpression)4 TrueExpression (org.apache.carbondata.core.scan.expression.logical.TrueExpression)4 DataField (org.apache.carbondata.processing.loading.DataField)4 HashMap (java.util.HashMap)3 MockUp (mockit.MockUp)3 AbstractDictionaryCacheTest (org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)3 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)3 Test (org.junit.Test)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2