use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.
the class DataLoadProcessBuilder method createConfiguration.
public static CarbonDataLoadConfiguration createConfiguration(CarbonLoadModel loadModel) {
CarbonDataLoadConfiguration configuration = new CarbonDataLoadConfiguration();
CarbonTable carbonTable = loadModel.getCarbonDataLoadSchema().getCarbonTable();
AbsoluteTableIdentifier identifier = carbonTable.getAbsoluteTableIdentifier();
configuration.setTableIdentifier(identifier);
configuration.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
configuration.setHeader(loadModel.getCsvHeaderColumns());
configuration.setSegmentId(loadModel.getSegmentId());
configuration.setTaskNo(loadModel.getTaskNo());
configuration.setDataLoadProperty(DataLoadProcessorConstants.COMPLEX_DELIMITERS, new String[] { loadModel.getComplexDelimiterLevel1(), loadModel.getComplexDelimiterLevel2() });
configuration.setDataLoadProperty(DataLoadProcessorConstants.SERIALIZATION_NULL_FORMAT, loadModel.getSerializationNullFormat().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_TIME_STAMP, loadModel.getFactTimeStamp());
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ENABLE, loadModel.getBadRecordsLoggerEnable().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.BAD_RECORDS_LOGGER_ACTION, loadModel.getBadRecordsAction().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD, loadModel.getIsEmptyDataBadRecord().split(",")[1]);
configuration.setDataLoadProperty(DataLoadProcessorConstants.SKIP_EMPTY_LINE, loadModel.getSkipEmptyLine());
configuration.setDataLoadProperty(DataLoadProcessorConstants.FACT_FILE_PATH, loadModel.getFactFilePath());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_SORT_SCOPE, loadModel.getSortScope());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_BATCH_SORT_SIZE_INMB, loadModel.getBatchSortSizeInMb());
configuration.setDataLoadProperty(CarbonCommonConstants.LOAD_GLOBAL_SORT_PARTITIONS, loadModel.getGlobalSortPartitions());
configuration.setDataLoadProperty(CarbonLoadOptionConstants.CARBON_OPTIONS_BAD_RECORD_PATH, loadModel.getBadRecordsLocation());
CarbonMetadata.getInstance().addCarbonTable(carbonTable);
List<CarbonDimension> dimensions = carbonTable.getDimensionByTableName(carbonTable.getTableName());
List<CarbonMeasure> measures = carbonTable.getMeasureByTableName(carbonTable.getTableName());
List<DataField> dataFields = new ArrayList<>();
List<DataField> complexDataFields = new ArrayList<>();
// And then add complex data types and measures.
for (CarbonColumn column : dimensions) {
DataField dataField = new DataField(column);
if (column.getDataType() == DataTypes.DATE) {
dataField.setDateFormat(loadModel.getDateFormat());
} else if (column.getDataType() == DataTypes.TIMESTAMP) {
dataField.setTimestampFormat(loadModel.getTimestampformat());
}
if (column.isComplex()) {
complexDataFields.add(dataField);
} else {
dataFields.add(dataField);
}
}
dataFields.addAll(complexDataFields);
for (CarbonColumn column : measures) {
// This dummy measure is added when no measure was present. We no need to load it.
if (!(column.getColName().equals("default_dummy_measure"))) {
dataFields.add(new DataField(column));
}
}
configuration.setDataFields(dataFields.toArray(new DataField[dataFields.size()]));
configuration.setBucketingInfo(carbonTable.getBucketingInfo(carbonTable.getTableName()));
// configuration for one pass load: dictionary server info
configuration.setUseOnePass(loadModel.getUseOnePass());
configuration.setDictionaryServerHost(loadModel.getDictionaryServerHost());
configuration.setDictionaryServerPort(loadModel.getDictionaryServerPort());
configuration.setDictionaryServerSecretKey(loadModel.getDictionaryServerSecretKey());
configuration.setDictionaryEncryptServerSecure(loadModel.getDictionaryEncryptServerSecure());
configuration.setDictionaryServiceProvider(loadModel.getDictionaryServiceProvider());
configuration.setPreFetch(loadModel.isPreFetch());
configuration.setNumberOfSortColumns(carbonTable.getNumberOfSortColumns());
configuration.setNumberOfNoDictSortColumns(carbonTable.getNumberOfNoDictSortColumns());
configuration.setDataWritePath(loadModel.getDataWritePath());
setSortColumnInfo(carbonTable, loadModel, configuration);
// threads per partition
if (carbonTable.isHivePartitionTable()) {
configuration.setWritingCoresCount((short) 1);
}
TableSpec tableSpec = new TableSpec(carbonTable);
configuration.setTableSpec(tableSpec);
return configuration;
}
use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.
the class CarbonFactDataHandlerModel method getCarbonFactDataHandlerModel.
/**
* This method will create a model object for carbon fact data handler
*
* @param loadModel
* @return
*/
public static CarbonFactDataHandlerModel getCarbonFactDataHandlerModel(CarbonLoadModel loadModel, CarbonTable carbonTable, SegmentProperties segmentProperties, String tableName, String[] tempStoreLocation, String carbonDataDirectoryPath) {
CarbonFactDataHandlerModel carbonFactDataHandlerModel = new CarbonFactDataHandlerModel();
carbonFactDataHandlerModel.setSchemaUpdatedTimeStamp(carbonTable.getTableLastUpdatedTime());
carbonFactDataHandlerModel.setDatabaseName(loadModel.getDatabaseName());
carbonFactDataHandlerModel.setTableName(tableName);
carbonFactDataHandlerModel.setMeasureCount(segmentProperties.getMeasures().size());
carbonFactDataHandlerModel.setStoreLocation(tempStoreLocation);
carbonFactDataHandlerModel.setDimLens(segmentProperties.getDimColumnsCardinality());
carbonFactDataHandlerModel.setSegmentProperties(segmentProperties);
carbonFactDataHandlerModel.setNoDictionaryCount(segmentProperties.getNumberOfNoDictionaryDimension());
carbonFactDataHandlerModel.setDimensionCount(segmentProperties.getDimensions().size() - carbonFactDataHandlerModel.getNoDictionaryCount());
List<ColumnSchema> wrapperColumnSchema = CarbonUtil.getColumnSchemaList(carbonTable.getDimensionByTableName(tableName), carbonTable.getMeasureByTableName(tableName));
carbonFactDataHandlerModel.setWrapperColumnSchema(wrapperColumnSchema);
// get the cardinality for all all the columns including no dictionary columns
int[] formattedCardinality = CarbonUtil.getFormattedCardinality(segmentProperties.getDimColumnsCardinality(), wrapperColumnSchema);
carbonFactDataHandlerModel.setColCardinality(formattedCardinality);
// TO-DO Need to handle complex types here .
Map<Integer, GenericDataType> complexIndexMap = new HashMap<Integer, GenericDataType>(segmentProperties.getComplexDimensions().size());
carbonFactDataHandlerModel.setComplexIndexMap(complexIndexMap);
DataType[] measureDataTypes = new DataType[segmentProperties.getMeasures().size()];
int i = 0;
for (CarbonMeasure msr : segmentProperties.getMeasures()) {
measureDataTypes[i++] = msr.getDataType();
}
carbonFactDataHandlerModel.setMeasureDataType(measureDataTypes);
CarbonUtil.checkAndCreateFolderWithPermission(carbonDataDirectoryPath);
carbonFactDataHandlerModel.setCarbonDataDirectoryPath(carbonDataDirectoryPath);
List<CarbonDimension> dimensionByTableName = carbonTable.getDimensionByTableName(tableName);
boolean[] isUseInvertedIndexes = new boolean[dimensionByTableName.size()];
int index = 0;
for (CarbonDimension dimension : dimensionByTableName) {
isUseInvertedIndexes[index++] = dimension.isUseInvertedIndex();
}
carbonFactDataHandlerModel.setIsUseInvertedIndex(isUseInvertedIndexes);
carbonFactDataHandlerModel.setPrimitiveDimLens(segmentProperties.getDimColumnsCardinality());
carbonFactDataHandlerModel.setBlockSizeInMB(carbonTable.getBlockSizeInMB());
carbonFactDataHandlerModel.tableSpec = new TableSpec(loadModel.getCarbonDataLoadSchema().getCarbonTable());
DataMapWriterListener listener = new DataMapWriterListener();
listener.registerAllWriter(loadModel.getCarbonDataLoadSchema().getCarbonTable(), loadModel.getSegmentId(), tempStoreLocation[new Random().nextInt(tempStoreLocation.length)]);
carbonFactDataHandlerModel.dataMapWriterlistener = listener;
return carbonFactDataHandlerModel;
}
use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.
the class TablePage method getColumnPage.
/**
* return column page of specified column name
*/
public ColumnPage getColumnPage(String columnName) {
int dictDimensionIndex = -1;
int noDictDimensionIndex = -1;
ColumnPage page = null;
TableSpec spec = model.getTableSpec();
int numDimensions = spec.getNumDimensions();
for (int i = 0; i < numDimensions; i++) {
ColumnType type = spec.getDimensionSpec(i).getColumnType();
if ((type == ColumnType.GLOBAL_DICTIONARY) || (type == ColumnType.DIRECT_DICTIONARY)) {
page = dictDimensionPages[++dictDimensionIndex];
} else if (type == ColumnType.PLAIN_VALUE) {
page = noDictDimensionPages[++noDictDimensionIndex];
} else {
// do not support datamap on complex column
continue;
}
String fieldName = spec.getDimensionSpec(i).getFieldName();
if (fieldName.equalsIgnoreCase(columnName)) {
return page;
}
}
int numMeasures = spec.getNumMeasures();
for (int i = 0; i < numMeasures; i++) {
String fieldName = spec.getMeasureSpec(i).getFieldName();
if (fieldName.equalsIgnoreCase(columnName)) {
return measurePages[i];
}
}
throw new IllegalArgumentException("DataMap: must have '" + columnName + "' column in schema");
}
use of org.apache.carbondata.core.datastore.TableSpec in project carbondata by apache.
the class TablePage method encodeAndCompressDimensions.
// apply and compress each dimension, set encoded data in `encodedData`
private EncodedColumnPage[] encodeAndCompressDimensions() throws KeyGenException, IOException, MemoryException {
List<EncodedColumnPage> encodedDimensions = new ArrayList<>();
List<EncodedColumnPage> encodedComplexDimenions = new ArrayList<>();
TableSpec tableSpec = model.getTableSpec();
int dictIndex = 0;
int noDictIndex = 0;
int complexDimIndex = 0;
int numDimensions = tableSpec.getNumDimensions();
for (int i = 0; i < numDimensions; i++) {
ColumnPageEncoder columnPageEncoder;
EncodedColumnPage encodedPage;
TableSpec.DimensionSpec spec = tableSpec.getDimensionSpec(i);
switch(spec.getColumnType()) {
case GLOBAL_DICTIONARY:
case DIRECT_DICTIONARY:
columnPageEncoder = encodingFactory.createEncoder(spec, dictDimensionPages[dictIndex]);
encodedPage = columnPageEncoder.encode(dictDimensionPages[dictIndex++]);
encodedDimensions.add(encodedPage);
break;
case PLAIN_VALUE:
columnPageEncoder = encodingFactory.createEncoder(spec, noDictDimensionPages[noDictIndex]);
encodedPage = columnPageEncoder.encode(noDictDimensionPages[noDictIndex++]);
encodedDimensions.add(encodedPage);
break;
case COMPLEX:
EncodedColumnPage[] encodedPages = ColumnPageEncoder.encodeComplexColumn(complexDimensionPages[complexDimIndex++]);
encodedComplexDimenions.addAll(Arrays.asList(encodedPages));
break;
default:
throw new IllegalArgumentException("unsupported dimension type:" + spec.getColumnType());
}
}
encodedDimensions.addAll(encodedComplexDimenions);
return encodedDimensions.toArray(new EncodedColumnPage[encodedDimensions.size()]);
}
Aggregations