Search in sources :

Example 16 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class IndexWriterListener method onPageAdded.

/**
 * Pick corresponding column pages and add to all registered index
 *
 * @param pageId     sequence number of page, start from 0
 * @param tablePage  page data
 */
public void onPageAdded(int blockletId, int pageId, TablePage tablePage) throws IOException {
    Set<Map.Entry<List<CarbonColumn>, List<IndexWriter>>> entries = registry.entrySet();
    for (Map.Entry<List<CarbonColumn>, List<IndexWriter>> entry : entries) {
        List<CarbonColumn> indexedColumns = entry.getKey();
        ColumnPage[] pages = new ColumnPage[indexedColumns.size()];
        for (int i = 0; i < indexedColumns.size(); i++) {
            pages[i] = tablePage.getColumnPage(indexedColumns.get(i).getColName());
        }
        List<IndexWriter> writers = entry.getValue();
        int pageSize = pages[0].getPageSize();
        for (IndexWriter writer : writers) {
            writer.onPageAdded(blockletId, pageId, pageSize, pages);
        }
    }
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) IndexWriter(org.apache.carbondata.core.index.dev.IndexWriter) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage) ArrayList(java.util.ArrayList) List(java.util.List) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map)

Example 17 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class IndexWriterListener method register.

/**
 * Register a IndexWriter
 */
private void register(IndexFactory factory, String segmentId, String taskNo, SegmentProperties segmentProperties) {
    assert (factory != null);
    assert (segmentId != null);
    IndexMeta meta = factory.getMeta();
    if (meta == null) {
        // if index does not have meta, no need to register
        return;
    }
    List<CarbonColumn> columns = factory.getMeta().getIndexedColumns();
    List<IndexWriter> writers = registry.get(columns);
    IndexWriter writer = null;
    try {
        writer = factory.createWriter(new Segment(segmentId), taskNo, segmentProperties);
    } catch (IOException e) {
        LOG.error("Failed to create IndexWriter: " + e.getMessage(), e);
        throw new IndexWriterException(e);
    }
    if (writers != null) {
        writers.add(writer);
    } else {
        writers = new ArrayList<>();
        writers.add(writer);
        registry.put(columns, writers);
    }
    LOG.info("IndexWriter " + writer + " added");
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) IndexWriter(org.apache.carbondata.core.index.dev.IndexWriter) IndexMeta(org.apache.carbondata.core.index.IndexMeta) IOException(java.io.IOException) Segment(org.apache.carbondata.core.index.Segment)

Example 18 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class SparkDataTypeConverterImpl method convertCarbonSchemaToSparkSchema.

/**
 * convert from CarbonColumn array to Spark's StructField array
 */
@Override
public Object[] convertCarbonSchemaToSparkSchema(CarbonColumn[] carbonColumns) {
    StructField[] fields = new StructField[carbonColumns.length];
    for (int i = 0; i < carbonColumns.length; i++) {
        CarbonColumn carbonColumn = carbonColumns[i];
        if (carbonColumn.isDimension()) {
            if (carbonColumn.hasEncoding(Encoding.DIRECT_DICTIONARY)) {
                DirectDictionaryGenerator generator = DirectDictionaryKeyGeneratorFactory.getDirectDictionaryGenerator(carbonColumn.getDataType());
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(generator.getReturnType()), true, null);
            } else if (!carbonColumn.hasEncoding(Encoding.DICTIONARY)) {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
            } else if (carbonColumn.isComplex()) {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(carbonColumn.getDataType()), true, null);
            } else {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(org.apache.carbondata.core.metadata.datatype.DataTypes.INT), true, null);
            }
        } else if (carbonColumn.isMeasure()) {
            DataType dataType = carbonColumn.getDataType();
            if (dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BOOLEAN || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.SHORT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.INT || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.LONG || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.BINARY || dataType == org.apache.carbondata.core.metadata.datatype.DataTypes.VARCHAR) {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(dataType), true, null);
            } else if (org.apache.carbondata.core.metadata.datatype.DataTypes.isDecimal(dataType)) {
                CarbonMeasure measure = (CarbonMeasure) carbonColumn;
                fields[i] = new StructField(carbonColumn.getColName(), new DecimalType(measure.getPrecision(), measure.getScale()), true, null);
            } else {
                fields[i] = new StructField(carbonColumn.getColName(), convertCarbonToSparkDataType(org.apache.carbondata.core.metadata.datatype.DataTypes.DOUBLE), true, null);
            }
        }
    }
    return fields;
}
Also used : StructField(org.apache.spark.sql.types.StructField) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) DataType(org.apache.carbondata.core.metadata.datatype.DataType) DecimalType(org.apache.spark.sql.types.DecimalType) DirectDictionaryGenerator(org.apache.carbondata.core.keygenerator.directdictionary.DirectDictionaryGenerator)

Example 19 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class StoreCreator method writeDictionary.

private static void writeDictionary(String factFilePath, CarbonTable table) throws Exception {
    BufferedReader reader = new BufferedReader(new FileReader(factFilePath));
    String header = reader.readLine();
    String[] split = header.split(",");
    List<CarbonColumn> allCols = new ArrayList<CarbonColumn>();
    List<CarbonDimension> dims = table.getDimensionByTableName(table.getFactTableName());
    allCols.addAll(dims);
    List<CarbonMeasure> msrs = table.getMeasureByTableName(table.getFactTableName());
    allCols.addAll(msrs);
    Set<String>[] set = new HashSet[dims.size()];
    for (int i = 0; i < set.length; i++) {
        set[i] = new HashSet<String>();
    }
    String line = reader.readLine();
    while (line != null) {
        String[] data = line.split(",");
        for (int i = 0; i < set.length; i++) {
            set[i].add(data[i]);
        }
        line = reader.readLine();
    }
    Cache dictCache = CacheProvider.getInstance().createCache(CacheType.REVERSE_DICTIONARY, absoluteTableIdentifier.getStorePath());
    for (int i = 0; i < set.length; i++) {
        ColumnIdentifier columnIdentifier = new ColumnIdentifier(dims.get(i).getColumnId(), null, null);
        CarbonDictionaryWriter writer = new CarbonDictionaryWriterImpl(absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier);
        for (String value : set[i]) {
            writer.write(value);
        }
        writer.close();
        writer.commit();
        Dictionary dict = (Dictionary) dictCache.get(new DictionaryColumnUniqueIdentifier(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, dims.get(i).getDataType()));
        CarbonDictionarySortInfoPreparator preparator = new CarbonDictionarySortInfoPreparator();
        List<String> newDistinctValues = new ArrayList<String>();
        CarbonDictionarySortInfo dictionarySortInfo = preparator.getDictionarySortInfo(newDistinctValues, dict, dims.get(i).getDataType());
        CarbonDictionarySortIndexWriter carbonDictionaryWriter = new CarbonDictionarySortIndexWriterImpl(absoluteTableIdentifier.getCarbonTableIdentifier(), columnIdentifier, absoluteTableIdentifier.getStorePath());
        try {
            carbonDictionaryWriter.writeSortIndex(dictionarySortInfo.getSortIndex());
            carbonDictionaryWriter.writeInvertedSortIndex(dictionarySortInfo.getSortIndexInverted());
        } finally {
            carbonDictionaryWriter.close();
        }
    }
    reader.close();
}
Also used : Dictionary(org.apache.carbondata.core.cache.dictionary.Dictionary) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) Set(java.util.Set) HashSet(java.util.HashSet) CarbonDictionaryWriterImpl(org.apache.carbondata.core.writer.CarbonDictionaryWriterImpl) ArrayList(java.util.ArrayList) CarbonDictionarySortIndexWriterImpl(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriterImpl) FileReader(java.io.FileReader) HashSet(java.util.HashSet) CarbonDictionarySortInfoPreparator(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfoPreparator) CarbonDictionarySortInfo(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortInfo) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonDictionarySortIndexWriter(org.apache.carbondata.core.writer.sortindex.CarbonDictionarySortIndexWriter) DictionaryColumnUniqueIdentifier(org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) BufferedReader(java.io.BufferedReader) ColumnIdentifier(org.apache.carbondata.core.metadata.ColumnIdentifier) CarbonDictionaryWriter(org.apache.carbondata.core.writer.CarbonDictionaryWriter) Cache(org.apache.carbondata.core.cache.Cache)

Example 20 with CarbonColumn

use of org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn in project carbondata by apache.

the class DataLoadProcessBuilder method createConfiguration.

private CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel, String storeLocation) throws Exception {
    if (!new File(storeLocation).mkdirs()) {
        LOGGER.error("Error while creating the temp store path: " + storeLocation);
    }
    CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
    String databaseName = loadModel.getDatabaseName();
    String tableName = loadModel.getTableName();
    String tempLocationKey = CarbonDataProcessorUtil.getTempStoreLocationKey(databaseName, tableName, loadModel.getTaskNo(), false);
    CarbonProperties.getInstance().addProperty(tempLocationKey, storeLocation);
    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.STORE_LOCATION_HDFS, loadModel.getStorePath());
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    configuration.setTableIdentifier(identifier);
    configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    configuration.setHeader(loadModel.getCsvHeaderColumns());
    configuration.setPartitionId(loadModel.getPartitionId());
    configuration.setSegmentId(loadModel.getSegmentId());
    configuration.setTaskNo(loadModel.getTaskNo());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
    configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
    CarbonMetadata.getInstance().addCarbonTable(carbonTable);
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getFactTableName());
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getFactTableName());
    Map<String, String> dateFormatMap = CarbonDataProcessorUtil.getDateFormatMap(loadModel.getDateFormat());
    List<DataField> dataFields = new ArrayList<>();
    List<DataField> complexDataFields = new ArrayList<>();
    // And then add complex data types and measures.
    for (CarbonColumn column : dimensions) {
        DataField dataField = new DataField(column);
        dataField.setDateFormat(dateFormatMap.get(column.getColName()));
        if (column.isComplex()) {
            complexDataFields.add(dataField);
        } else {
            dataFields.add(dataField);
        }
    }
    dataFields.addAll(complexDataFields);
    for (CarbonColumn column : measures) {
        // This dummy measure is added when no measure was present. We no need to load it.
        if (!(column.getColName().equals("default_dummy_measure"))) {
            dataFields.add(new DataField(column));
        }
    }
    configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
    configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getFactTableName()));
    // configuration for one pass load: dictionary server info
    configuration.setUseOnePass(loadModel.getUseOnePass());
    configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
    configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
    configuration.setPreFetch(loadModel.isPreFetch());
    configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
    configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
    return configuration;
}
Also used : CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) File(java.io.File)

Aggregations

CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)45 ArrayList (java.util.ArrayList)20 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)14 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)11 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)6 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)6 DataType (org.apache.carbondata.core.metadata.datatype.DataType)5 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)5 Expression (org.apache.carbondata.core.scan.expression.Expression)5 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)5 InExpression (org.apache.carbondata.core.scan.expression.conditional.InExpression)4 TrueExpression (org.apache.carbondata.core.scan.expression.logical.TrueExpression)4 DataField (org.apache.carbondata.processing.loading.DataField)4 HashMap (java.util.HashMap)3 MockUp (mockit.MockUp)3 AbstractDictionaryCacheTest (org.apache.carbondata.core.cache.dictionary.AbstractDictionaryCacheTest)3 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)3 Test (org.junit.Test)3 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2