use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class CarbonLoadModelBuilder method validateAndSetColumnCompressor.
private void validateAndSetColumnCompressor(CarbonLoadModel carbonLoadModel) throws InvalidLoadOptionException {
try {
String columnCompressor = carbonLoadModel.getColumnCompressor();
if (StringUtils.isBlank(columnCompressor)) {
columnCompressor = CarbonProperties.getInstance().getProperty(CarbonCommonConstants.COMPRESSOR, CarbonCommonConstants.DEFAULT_COMPRESSOR);
}
// check and load compressor
CompressorFactory.getInstance().getCompressor(columnCompressor);
carbonLoadModel.setColumnCompressor(columnCompressor);
} catch (Exception e) {
LOGGER.error(e.getMessage(), e);
throw new InvalidLoadOptionException("Failed to load the compressor");
}
}
use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class CarbonLoadModelBuilder method build.
/**
* build CarbonLoadModel for data loading
* @param options Load options from user input
* @param optionsFinal Load options that populated with default values for optional options
* @param carbonLoadModel The output load model
* @param hadoopConf hadoopConf is needed to read CSV header if there 'fileheader' is not set in
* user provided load options
* @param partitions partition name map to path
* @param isDataFrame true if build for load for dataframe
*/
public void build(Map<String, String> options, Map<String, String> optionsFinal, CarbonLoadModel carbonLoadModel, Configuration hadoopConf, Map<String, String> partitions, boolean isDataFrame) throws InvalidLoadOptionException, IOException {
carbonLoadModel.setTableName(table.getTableName());
carbonLoadModel.setDatabaseName(table.getDatabaseName());
carbonLoadModel.setTablePath(table.getTablePath());
carbonLoadModel.setTableName(table.getTableName());
carbonLoadModel.setCarbonTransactionalTable(table.isTransactionalTable());
CarbonDataLoadSchema dataLoadSchema = new CarbonDataLoadSchema(table);
// Need to fill dimension relation
carbonLoadModel.setCarbonDataLoadSchema(dataLoadSchema);
String sort_scope = optionsFinal.get("sort_scope");
String bad_records_logger_enable = optionsFinal.get("bad_records_logger_enable");
String bad_records_action = optionsFinal.get("bad_records_action");
String bad_record_path = optionsFinal.get("bad_record_path");
String global_sort_partitions = optionsFinal.get("global_sort_partitions");
String timestampformat = optionsFinal.get("timestampformat");
String dateFormat = optionsFinal.get("dateformat");
String delimiter = optionsFinal.get("delimiter");
String complex_delimiter_level1 = optionsFinal.get("complex_delimiter_level_1");
String complex_delimiter_level2 = optionsFinal.get("complex_delimiter_level_2");
String complex_delimiter_level3 = optionsFinal.get("complex_delimiter_level_3");
String complex_delimiter_level4 = optionsFinal.get("complex_delimiter_level_4");
validateDateTimeFormat(timestampformat, "TimestampFormat");
validateDateTimeFormat(dateFormat, "DateFormat");
if (Boolean.parseBoolean(bad_records_logger_enable) || LoggerAction.REDIRECT.name().equalsIgnoreCase(bad_records_action)) {
if (!StringUtils.isEmpty(bad_record_path)) {
bad_record_path = CarbonUtil.checkAndAppendHDFSUrl(bad_record_path);
} else {
throw new InvalidLoadOptionException("Cannot redirect bad records as bad record location is not provided.");
}
}
carbonLoadModel.setBadRecordsLocation(bad_record_path);
validateGlobalSortPartitions(global_sort_partitions);
carbonLoadModel.setEscapeChar(checkDefaultValue(optionsFinal.get("escapechar"), "\\"));
carbonLoadModel.setQuoteChar(CarbonUtil.unescapeChar(checkDefaultValue(optionsFinal.get("quotechar"), "\"")));
carbonLoadModel.setCommentChar(checkDefaultValue(optionsFinal.get("commentchar"), "#"));
String lineSeparator = CarbonUtil.unescapeChar(options.get("line_separator"));
if (lineSeparator != null) {
carbonLoadModel.setLineSeparator(lineSeparator);
}
// if there isn't file header in csv file and load sql doesn't provide FILEHEADER option,
// we should use table schema to generate file header.
String fileHeader = optionsFinal.get("fileheader");
String headerOption = optionsFinal.get("header");
if (StringUtils.isNotEmpty(headerOption)) {
if (!headerOption.equalsIgnoreCase("true") && !headerOption.equalsIgnoreCase("false")) {
throw new InvalidLoadOptionException("'header' option should be either 'true' or 'false'.");
}
// whether the csv file has file header, the default value is true
if (Boolean.valueOf(headerOption)) {
if (!StringUtils.isEmpty(fileHeader)) {
throw new InvalidLoadOptionException("When 'header' option is true, 'fileheader' option is not required.");
}
} else {
if (StringUtils.isEmpty(fileHeader)) {
List<CarbonColumn> columns = table.getCreateOrderColumn();
List<String> columnNames = new ArrayList<>();
List<String> partitionColumns = new ArrayList<>();
for (int i = 0; i < columns.size(); i++) {
columnNames.add(columns.get(i).getColName());
}
columnNames.addAll(partitionColumns);
fileHeader = Strings.mkString(columnNames.toArray(new String[columnNames.size()]), ",");
}
}
}
String binaryDecoder = options.get("binary_decoder");
carbonLoadModel.setBinaryDecoder(binaryDecoder);
carbonLoadModel.setTimestampFormat(timestampformat);
carbonLoadModel.setDateFormat(dateFormat);
carbonLoadModel.setDefaultTimestampFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT));
carbonLoadModel.setDefaultDateFormat(CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT));
carbonLoadModel.setSerializationNullFormat(TableOptionConstant.SERIALIZATION_NULL_FORMAT.getName() + "," + optionsFinal.get("serialization_null_format"));
carbonLoadModel.setBadRecordsLoggerEnable(TableOptionConstant.BAD_RECORDS_LOGGER_ENABLE.getName() + "," + bad_records_logger_enable);
carbonLoadModel.setBadRecordsAction(TableOptionConstant.BAD_RECORDS_ACTION.getName() + "," + bad_records_action.toUpperCase());
carbonLoadModel.setIsEmptyDataBadRecord(DataLoadProcessorConstants.IS_EMPTY_DATA_BAD_RECORD + "," + optionsFinal.get("is_empty_data_bad_record"));
carbonLoadModel.setSkipEmptyLine(optionsFinal.get("skip_empty_line"));
carbonLoadModel.setSortScope(sort_scope);
if (global_sort_partitions == null) {
global_sort_partitions = table.getGlobalSortPartitions();
}
carbonLoadModel.setGlobalSortPartitions(global_sort_partitions);
if (delimiter.equalsIgnoreCase(complex_delimiter_level1) || complex_delimiter_level1.equalsIgnoreCase(complex_delimiter_level2) || delimiter.equalsIgnoreCase(complex_delimiter_level2) || delimiter.equalsIgnoreCase(complex_delimiter_level3)) {
throw new InvalidLoadOptionException("Field Delimiter and Complex types delimiter are same");
} else {
carbonLoadModel.setComplexDelimiter(complex_delimiter_level1);
carbonLoadModel.setComplexDelimiter(complex_delimiter_level2);
carbonLoadModel.setComplexDelimiter(complex_delimiter_level3);
carbonLoadModel.setComplexDelimiter(complex_delimiter_level4);
}
carbonLoadModel.setCsvDelimiter(CarbonUtil.unescapeChar(delimiter));
carbonLoadModel.setCsvHeader(fileHeader);
List<String> ignoreColumns = new ArrayList<>();
if (!isDataFrame) {
for (Map.Entry<String, String> partition : partitions.entrySet()) {
if (partition.getValue() != null) {
ignoreColumns.add(partition.getKey());
}
}
}
carbonLoadModel.setCsvHeaderColumns(LoadOption.getCsvHeaderColumns(carbonLoadModel, hadoopConf, ignoreColumns));
int validatedMaxColumns = validateMaxColumns(carbonLoadModel.getCsvHeaderColumns(), optionsFinal.get("maxcolumns"));
carbonLoadModel.setMaxColumns(String.valueOf(validatedMaxColumns));
if (carbonLoadModel.isCarbonTransactionalTable()) {
carbonLoadModel.readAndSetLoadMetadataDetails();
}
carbonLoadModel.setSortColumnsBoundsStr(optionsFinal.get("sort_column_bounds"));
carbonLoadModel.setLoadMinSize(optionsFinal.get(CarbonCommonConstants.CARBON_LOAD_MIN_SIZE_INMB));
validateAndSetLoadMinSize(carbonLoadModel);
validateAndSetColumnCompressor(carbonLoadModel);
validateAndSetBinaryDecoder(carbonLoadModel);
validateRangeColumn(optionsFinal, carbonLoadModel);
carbonLoadModel.setMetrics(new DataLoadMetrics());
}
use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class CarbonLoadModelBuilder method validateRangeColumn.
private void validateRangeColumn(Map<String, String> optionsFinal, CarbonLoadModel carbonLoadModel) throws InvalidLoadOptionException {
String scaleFactor = optionsFinal.get("scale_factor");
if (scaleFactor != null) {
try {
int scale = Integer.parseInt(scaleFactor);
if (scale < 1 || scale > 300) {
throw new InvalidLoadOptionException("Invalid scale_factor option, the range of scale_factor should be [1, 300]");
}
carbonLoadModel.setScaleFactor(scale);
} catch (NumberFormatException ex) {
throw new InvalidLoadOptionException("Invalid scale_factor option, scale_factor should be a integer");
}
}
}
Aggregations