Search in sources :

Example 1 with TableSpec

use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.

the class DataLoadProcessBuilder method createConfiguration.

public static CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel) {
    CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
    CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
    AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
    configuration.setTableIdentifier(identifier);
    configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    configuration.setHeader(loadModel.getCsvHeaderColumns());
    configuration.setSegmentId(loadModel.getSegmentId());
    configuration.setTaskNo(loadModel.getTaskNo());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
    configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
    configuration.setDataLoadProperty(DataLoadProcessorConstants.SKIP_EMPTY_LINE, loadModel.getSkipEmptyLine());
    configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
    configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS, loadModel.getGlobalSortPartitions());
    configuration.setDataLoadProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, loadModel.getBadRecordsLocation());
    CarbonMetadata.getInstance().addCarbonTable(carbonTable);
    List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
    List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
    List<DataField> dataFields = new ArrayList<>();
    List<DataField> complexDataFields = new ArrayList<>();
    // And then add complex data types and measures.
    for (CarbonColumn column : dimensions) {
        DataField dataField = new DataField(column);
        if (column.getDataType() == DataTypes.DATE) {
            dataField.setDateFormat(loadModel.getDateFormat());
        } else if (column.getDataType() == DataTypes.TIMESTAMP) {
            dataField.setTimestampFormat(loadModel.getTimestampformat());
        }
        if (column.isComplex()) {
            complexDataFields.add(dataField);
        } else {
            dataFields.add(dataField);
        }
    }
    dataFields.addAll(complexDataFields);
    for (CarbonColumn column : measures) {
        // This dummy measure is added when no measure was present. We no need to load it.
        if (!(column.getColName().equals("default_dummy_measure"))) {
            dataFields.add(new DataField(column));
        }
    }
    configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
    configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getTableName()));
    // configuration for one pass load: dictionary server info
    configuration.setUseOnePass(loadModel.getUseOnePass());
    configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
    configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
    configuration.setDictionaryServerSecretKey(loadModel.getDictionaryServerSecretKey());
    configuration.setDictionaryEncryptServerSecure(loadModel.getDictionaryEncryptServerSecure());
    configuration.setDictionaryServiceProvider(loadModel.getDictionaryServiceProvider());
    configuration.setPreFetch(loadModel.isPreFetch());
    configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
    configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
    configuration.setDataWritePath(loadModel.getDataWritePath());
    setSortColumnInfo(carbonTable, loadModel, configuration);
    // threads per partition
    if (carbonTable.isHivePartitionTable()) {
        configuration.setWritingCoresCount((short) 1);
    }
    TableSpec tableSpec = new TableSpec(carbonTable);
    configuration.setTableSpec(tableSpec);
    return configuration;
}
Also used : CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) TableSpec(org.apache.carbondata.core.datastore.TableSpec) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) ArrayList(java.util.ArrayList) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)

Example 2 with TableSpec

use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.

the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.

/**
 * This method will create a model object for carbon fact data handler
 *
 * @param loadModel
 * @return
 */
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String[] tempStoreLocation, String carbonDataDirectoryPath) {
    CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
    carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
    carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
    carbonFactDataHandlerModel.setTableName(tableName);
    carbonFactDataHandlerModel.setMeasureCount(segmentProperties.getMeasures().size());
    carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
    carbonFactDataHandlerModel.setDimLens(segmentProperties.getDimColumnsCardinality());
    carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
    carbonFactDataHandlerModel.setNoDictionaryCount(segmentProperties.getNumberOfNoDictionaryDimension());
    carbonFactDataHandlerModel.setDimensionCount(segmentProperties.getDimensions().size() - carbonFactDataHandlerModel.getNoDictionaryCount());
    List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(tableName), carbonTable.getMeasureByTableName(tableName));
    carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
    // get the cardinality for all all the columns including no dictionary columns
    int[] formattedCardinality = CarbonUtil.getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema);
    carbonFactDataHandlerModel.setColCardinality(formattedCardinality);
    // TO-DO Need to handle complex types here .
    Map<Integer, GenericDataType> complexIndexMap = new HashMap<Integer, GenericDataType>(segmentProperties.getComplexDimensions().size());
    carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
    DataType[] measureDataTypes = new DataType[segmentProperties.getMeasures().size()];
    int i = 0;
    for (CarbonMeasure msr : segmentProperties.getMeasures()) {
        measureDataTypes[i++] = msr.getDataType();
    }
    carbonFactDataHandlerModel.setMeasureDataType(measureDataTypes);
    CarbonUtil.checkAndCreateFolderWithPermission(carbonDataDirectoryPath);
    carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
    List<CarbonDimension> dimensionByTableName = carbonTable.getDimensionByTableName(tableName);
    boolean[] isUseInvertedIndexes = new boolean[dimensionByTableName.size()];
    int index = 0;
    for (CarbonDimension dimension : dimensionByTableName) {
        isUseInvertedIndexes[index++] = dimension.isUseInvertedIndex();
    }
    carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndexes);
    carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality());
    carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
    carbonFactDataHandlerModel.tableSpec = new TableSpec(loadModel.getCarbonDataLoadSchema().getCarbonTable());
    DataMapWriterListener listener = new DataMapWriterListener();
    listener.registerAllWriter(loadModel.getCarbonDataLoadSchema().getCarbonTable(), loadModel.getSegmentId(), tempStoreLocation[new Random().nextInt(tempStoreLocation.length)]);
    carbonFactDataHandlerModel.dataMapWriterlistener = listener;
    return carbonFactDataHandlerModel;
}
Also used : TableSpec(org.apache.carbondata.core.datastore.TableSpec) HashMap(java.util.HashMap) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonDimension(org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) CarbonMeasure(org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure) DataMapWriterListener(org.apache.carbondata.processing.datamap.DataMapWriterListener) Random(java.util.Random) GenericDataType(org.apache.carbondata.processing.datatypes.GenericDataType) DataType(org.apache.carbondata.core.metadata.datatype.DataType)

Example 3 with TableSpec

use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.

the class TablePage method getColumnPage.

/**
 * return column page of specified column name
 */
public ColumnPage getColumnPage(String columnName) {
    int dictDimensionIndex = -1;
    int noDictDimensionIndex = -1;
    ColumnPage page = null;
    TableSpec spec = model.getTableSpec();
    int numDimensions = spec.getNumDimensions();
    for (int i = 0; i < numDimensions; i++) {
        ColumnType type = spec.getDimensionSpec(i).getColumnType();
        if ((type == ColumnType.GLOBAL_DICTIONARY) || (type == ColumnType.DIRECT_DICTIONARY)) {
            page = dictDimensionPages[++dictDimensionIndex];
        } else if (type == ColumnType.PLAIN_VALUE) {
            page = noDictDimensionPages[++noDictDimensionIndex];
        } else {
            // do not support datamap on complex column
            continue;
        }
        String fieldName = spec.getDimensionSpec(i).getFieldName();
        if (fieldName.equalsIgnoreCase(columnName)) {
            return page;
        }
    }
    int numMeasures = spec.getNumMeasures();
    for (int i = 0; i < numMeasures; i++) {
        String fieldName = spec.getMeasureSpec(i).getFieldName();
        if (fieldName.equalsIgnoreCase(columnName)) {
            return measurePages[i];
        }
    }
    throw new IllegalArgumentException("DataMap: must have '" + columnName + "' column in schema");
}
Also used : TableSpec(org.apache.carbondata.core.datastore.TableSpec) ColumnType(org.apache.carbondata.core.datastore.ColumnType) EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) ComplexColumnPage(org.apache.carbondata.core.datastore.page.ComplexColumnPage) ColumnPage(org.apache.carbondata.core.datastore.page.ColumnPage)

Example 4 with TableSpec

use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.

the class TablePage method encodeAndCompressDimensions.

// apply and compress each dimension, set encoded data in `encodedData`
private EncodedColumnPage[] encodeAndCompressDimensions() throws KeyGenException, IOException, MemoryException {
    List<EncodedColumnPage> encodedDimensions = new ArrayList<>();
    List<EncodedColumnPage> encodedComplexDimenions = new ArrayList<>();
    TableSpec tableSpec = model.getTableSpec();
    int dictIndex = 0;
    int noDictIndex = 0;
    int complexDimIndex = 0;
    int numDimensions = tableSpec.getNumDimensions();
    for (int i = 0; i < numDimensions; i++) {
        ColumnPageEncoder columnPageEncoder;
        EncodedColumnPage encodedPage;
        TableSpec.DimensionSpec spec = tableSpec.getDimensionSpec(i);
        switch(spec.getColumnType()) {
            case GLOBAL_DICTIONARY:
            case DIRECT_DICTIONARY:
                columnPageEncoder = encodingFactory.createEncoder(spec, dictDimensionPages[dictIndex]);
                encodedPage = columnPageEncoder.encode(dictDimensionPages[dictIndex++]);
                encodedDimensions.add(encodedPage);
                break;
            case PLAIN_VALUE:
                columnPageEncoder = encodingFactory.createEncoder(spec, noDictDimensionPages[noDictIndex]);
                encodedPage = columnPageEncoder.encode(noDictDimensionPages[noDictIndex++]);
                encodedDimensions.add(encodedPage);
                break;
            case COMPLEX:
                EncodedColumnPage[] encodedPages = ColumnPageEncoder.encodeComplexColumn(complexDimensionPages[complexDimIndex++]);
                encodedComplexDimenions.addAll(Arrays.asList(encodedPages));
                break;
            default:
                throw new IllegalArgumentException("unsupported dimension type:" + spec.getColumnType());
        }
    }
    encodedDimensions.addAll(encodedComplexDimenions);
    return encodedDimensions.toArray(new EncodedColumnPage[encodedDimensions.size()]);
}
Also used : ColumnPageEncoder(org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder) TableSpec(org.apache.carbondata.core.datastore.TableSpec) EncodedColumnPage(org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage) ArrayList(java.util.ArrayList)

Aggregations

TableSpec (org.apache.carbondata.core.datastore.TableSpec)4 ArrayList (java.util.ArrayList)2 EncodedColumnPage (org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage)2 CarbonDimension (org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension)2 CarbonMeasure (org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure)2 HashMap (java.util.HashMap)1 Random (java.util.Random)1 ColumnType (org.apache.carbondata.core.datastore.ColumnType)1 ColumnPage (org.apache.carbondata.core.datastore.page.ColumnPage)1 ComplexColumnPage (org.apache.carbondata.core.datastore.page.ComplexColumnPage)1 ColumnPageEncoder (org.apache.carbondata.core.datastore.page.encoding.ColumnPageEncoder)1 AbsoluteTableIdentifier (org.apache.carbondata.core.metadata.AbsoluteTableIdentifier)1 DataType (org.apache.carbondata.core.metadata.datatype.DataType)1 CarbonTable (org.apache.carbondata.core.metadata.schema.table.CarbonTable)1 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)1 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)1 DataMapWriterListener (org.apache.carbondata.processing.datamap.DataMapWriterListener)1 GenericDataType (org.apache.carbondata.processing.datatypes.GenericDataType)1