use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class AvroCarbonWriterTest method WriteAvroComplexDataAndRead.
private void WriteAvroComplexDataAndRead(String mySchema) throws IOException, InvalidLoadOptionException, InterruptedException {
// conversion to GenericData.Record
Schema nn = new Schema.Parser().parse(mySchema);
try {
CarbonWriter writer = CarbonWriter.builder().outputPath(path).withAvroInput(mySchema).writtenBy("AvroCarbonWriterTest").build();
int numOfRows = 100000 / 100;
int numOfWrite = 20000;
int arrayLength = 300;
for (int i = 0; i < numOfRows; i++) {
StringBuffer aa1 = new StringBuffer();
StringBuffer bb1 = new StringBuffer();
StringBuffer cc1 = new StringBuffer();
aa1.append("[0.1234567,0.2,-0.3,0.4]");
bb1.append("[0.2123456]");
cc1.append("[0.3123456]");
for (int j = 1; j < arrayLength; j++) {
aa1.append(",[1" + i + "" + j + ".1234567,1" + i + "" + j + ".2,-1" + i + "" + j + ".3,1" + i + "" + j + ".4]");
bb1.append(",[2" + i + "" + j + ".2123456,-2" + i + "" + j + ".2]");
cc1.append(",[3" + i + "" + j + ".3123456]");
}
String json = "{\"fileName\":\"bob\", \"id\":10, " + " \"aa1\" : [" + aa1 + "], " + "\"bb1\" : [" + bb1 + "], " + "\"cc1\" : [" + cc1 + "]}";
writer.write(json);
if (i > 0 && i % numOfWrite == 0) {
writer.close();
writer = CarbonWriter.builder().outputPath(path).withAvroInput(mySchema).writtenBy("AvroCarbonWriterTest").build();
}
}
writer.close();
String[] projection = new String[nn.getFields().size()];
for (int j = 0; j < nn.getFields().size(); j++) {
projection[j] = nn.getFields().get(j).name();
}
CarbonReader carbonReader = CarbonReader.builder().projection(projection).withFolder(path).build();
int sum = 0;
while (carbonReader.hasNext()) {
sum++;
Object[] row = (Object[]) carbonReader.readNextRow();
Assert.assertTrue(row.length == 5);
Object[] aa1 = (Object[]) row[2];
Assert.assertTrue(aa1.length == arrayLength);
Object[] aa2 = (Object[]) aa1[1];
Assert.assertTrue(aa2.length == 4 || aa2.length == 2 || aa2.length == 1);
}
Assert.assertTrue(sum == numOfRows);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class AvroCarbonWriterTest method testExceptionForDuplicateColumns.
@Test
public void testExceptionForDuplicateColumns() throws IOException, InvalidLoadOptionException {
Field[] field = new Field[2];
field[0] = new Field("name", DataTypes.STRING);
field[1] = new Field("name", DataTypes.STRING);
CarbonWriterBuilder writer = CarbonWriter.builder().uniqueIdentifier(System.currentTimeMillis()).outputPath(path);
try {
writer.withCsvInput(new org.apache.carbondata.sdk.file.Schema(field)).writtenBy("AvroCarbonWriterTest").build();
Assert.fail();
} catch (Exception e) {
assert (e.getMessage().contains("Duplicate column name found in table schema"));
}
FileUtils.deleteDirectory(new File(path));
}
use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class CarbonLoadModelBuilder method validateMaxColumns.
private int validateMaxColumns(String[] csvHeaders, String maxColumns) throws InvalidLoadOptionException {
/*
User configures both csvheadercolumns, maxcolumns,
if csvheadercolumns >= maxcolumns, give error
if maxcolumns > threashold, give error
User configures csvheadercolumns
if csvheadercolumns >= maxcolumns(default) then maxcolumns = csvheadercolumns+1
if csvheadercolumns >= threashold, give error
User configures nothing
if csvheadercolumns >= maxcolumns(default) then maxcolumns = csvheadercolumns+1
if csvheadercolumns >= threashold, give error
*/
int columnCountInSchema = csvHeaders.length;
int maxNumberOfColumnsForParsing = 0;
Integer maxColumnsInt = getMaxColumnValue(maxColumns);
if (maxColumnsInt != null) {
if (columnCountInSchema >= maxColumnsInt) {
throw new InvalidLoadOptionException("csv headers should be less than the max columns " + maxColumnsInt);
} else if (maxColumnsInt > CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING) {
throw new InvalidLoadOptionException("max columns cannot be greater than the threshold value: " + CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING);
} else {
maxNumberOfColumnsForParsing = maxColumnsInt;
}
} else if (columnCountInSchema >= CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING) {
throw new InvalidLoadOptionException("csv header columns should be less than max threashold: " + CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING);
} else if (columnCountInSchema >= CSVInputFormat.DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING) {
maxNumberOfColumnsForParsing = columnCountInSchema + 1;
} else {
maxNumberOfColumnsForParsing = CSVInputFormat.DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING;
}
return maxNumberOfColumnsForParsing;
}
use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class CSVCarbonWriterTest method writeFilesAndVerify.
/**
* Invoke CarbonWriter API to write carbon files and assert the file is rewritten
* @param rows number of rows to write
* @param schema schema of the file
* @param path local write path
* @param sortColumns sort columns
* @param persistSchema true if want to persist schema file
* @param blockletSize blockletSize in the file, -1 for default size
* @param blockSize blockSize in the file, -1 for default size
*/
private void writeFilesAndVerify(int rows, Schema schema, String path, String[] sortColumns, boolean persistSchema, int blockletSize, int blockSize) {
try {
CarbonWriterBuilder builder = CarbonWriter.builder().withSchema(schema).outputPath(path);
if (sortColumns != null) {
builder = builder.sortBy(sortColumns);
}
if (persistSchema) {
builder = builder.persistSchemaFile(true);
}
if (blockletSize != -1) {
builder = builder.withBlockletSize(blockletSize);
}
if (blockSize != -1) {
builder = builder.withBlockSize(blockSize);
}
CarbonWriter writer = builder.buildWriterForCSVInput();
for (int i = 0; i < rows; i++) {
writer.write(new String[] { "robot" + (i % 10), String.valueOf(i), String.valueOf((double) i / 2) });
}
writer.close();
} catch (IOException e) {
e.printStackTrace();
Assert.fail(e.getMessage());
} catch (InvalidLoadOptionException l) {
l.printStackTrace();
Assert.fail(l.getMessage());
}
File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null"));
Assert.assertTrue(segmentFolder.exists());
File[] dataFiles = segmentFolder.listFiles(new FileFilter() {
@Override
public boolean accept(File pathname) {
return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
}
});
Assert.assertNotNull(dataFiles);
Assert.assertTrue(dataFiles.length > 0);
}
use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.
the class CarbonLoadModelBuilder method build.
/**
* build CarbonLoadModel for data loading
* @param options Load options from user input
* @return a new CarbonLoadModel instance
*/
public CarbonLoadModel build(Map<String, String> options) throws InvalidLoadOptionException, IOException {
Map<String, String> optionsFinal = LoadOption.fillOptionWithDefaultValue(options);
if (!options.containsKey("fileheader")) {
List<CarbonColumn> csvHeader = table.getCreateOrderColumn(table.getTableName());
String[] columns = new String[csvHeader.size()];
for (int i = 0; i < columns.length; i++) {
columns[i] = csvHeader.get(i).getColName();
}
optionsFinal.put("fileheader", Strings.mkString(columns, ","));
}
CarbonLoadModel model = new CarbonLoadModel();
// we have provided 'fileheader', so it hadoopConf can be null
build(options, optionsFinal, model, null);
// set default values
model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, "onepass", "false")));
model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", null));
try {
model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, "dictport", "-1")));
} catch (NumberFormatException e) {
throw new InvalidLoadOptionException(e.getMessage());
}
return model;
}
Aggregations