Search in sources :

Example 6 with InvalidLoadOptionException

use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.

the class AvroCarbonWriterTest method WriteAvroComplexDataAndRead.

private void WriteAvroComplexDataAndRead(String mySchema) throws IOException, InvalidLoadOptionException, InterruptedException {
    // conversion to GenericData.Record
    Schema nn = new Schema.Parser().parse(mySchema);
    try {
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withAvroInput(mySchema).writtenBy("AvroCarbonWriterTest").build();
        int numOfRows = 100000 / 100;
        int numOfWrite = 20000;
        int arrayLength = 300;
        for (int i = 0; i < numOfRows; i++) {
            StringBuffer aa1 = new StringBuffer();
            StringBuffer bb1 = new StringBuffer();
            StringBuffer cc1 = new StringBuffer();
            aa1.append("[0.1234567,0.2,-0.3,0.4]");
            bb1.append("[0.2123456]");
            cc1.append("[0.3123456]");
            for (int j = 1; j < arrayLength; j++) {
                aa1.append(",[1" + i + "" + j + ".1234567,1" + i + "" + j + ".2,-1" + i + "" + j + ".3,1" + i + "" + j + ".4]");
                bb1.append(",[2" + i + "" + j + ".2123456,-2" + i + "" + j + ".2]");
                cc1.append(",[3" + i + "" + j + ".3123456]");
            }
            String json = "{\"fileName\":\"bob\", \"id\":10, " + "   \"aa1\" : [" + aa1 + "], " + "\"bb1\" : [" + bb1 + "], " + "\"cc1\" : [" + cc1 + "]}";
            writer.write(json);
            if (i > 0 && i % numOfWrite == 0) {
                writer.close();
                writer = CarbonWriter.builder().outputPath(path).withAvroInput(mySchema).writtenBy("AvroCarbonWriterTest").build();
            }
        }
        writer.close();
        String[] projection = new String[nn.getFields().size()];
        for (int j = 0; j < nn.getFields().size(); j++) {
            projection[j] = nn.getFields().get(j).name();
        }
        CarbonReader carbonReader = CarbonReader.builder().projection(projection).withFolder(path).build();
        int sum = 0;
        while (carbonReader.hasNext()) {
            sum++;
            Object[] row = (Object[]) carbonReader.readNextRow();
            Assert.assertTrue(row.length == 5);
            Object[] aa1 = (Object[]) row[2];
            Assert.assertTrue(aa1.length == arrayLength);
            Object[] aa2 = (Object[]) aa1[1];
            Assert.assertTrue(aa2.length == 4 || aa2.length == 2 || aa2.length == 1);
        }
        Assert.assertTrue(sum == numOfRows);
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : Schema(org.apache.avro.Schema) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)

Example 7 with InvalidLoadOptionException

use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.

the class AvroCarbonWriterTest method testExceptionForDuplicateColumns.

@Test
public void testExceptionForDuplicateColumns() throws IOException, InvalidLoadOptionException {
    Field[] field = new Field[2];
    field[0] = new Field("name", DataTypes.STRING);
    field[1] = new Field("name", DataTypes.STRING);
    CarbonWriterBuilder writer = CarbonWriter.builder().uniqueIdentifier(System.currentTimeMillis()).outputPath(path);
    try {
        writer.withCsvInput(new org.apache.carbondata.sdk.file.Schema(field)).writtenBy("AvroCarbonWriterTest").build();
        Assert.fail();
    } catch (Exception e) {
        assert (e.getMessage().contains("Duplicate column name found in table schema"));
    }
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) File(java.io.File) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Test(org.junit.Test)

Example 8 with InvalidLoadOptionException

use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.

the class CarbonLoadModelBuilder method validateMaxColumns.

private int validateMaxColumns(String[] csvHeaders, String maxColumns) throws InvalidLoadOptionException {
    /*
    User configures both csvheadercolumns, maxcolumns,
      if csvheadercolumns >= maxcolumns, give error
      if maxcolumns > threashold, give error
    User configures csvheadercolumns
      if csvheadercolumns >= maxcolumns(default) then maxcolumns = csvheadercolumns+1
      if csvheadercolumns >= threashold, give error
    User configures nothing
      if csvheadercolumns >= maxcolumns(default) then maxcolumns = csvheadercolumns+1
      if csvheadercolumns >= threashold, give error
     */
    int columnCountInSchema = csvHeaders.length;
    int maxNumberOfColumnsForParsing = 0;
    Integer maxColumnsInt = getMaxColumnValue(maxColumns);
    if (maxColumnsInt != null) {
        if (columnCountInSchema >= maxColumnsInt) {
            throw new InvalidLoadOptionException("csv headers should be less than the max columns " + maxColumnsInt);
        } else if (maxColumnsInt > CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING) {
            throw new InvalidLoadOptionException("max columns cannot be greater than the threshold value: " + CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING);
        } else {
            maxNumberOfColumnsForParsing = maxColumnsInt;
        }
    } else if (columnCountInSchema >= CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING) {
        throw new InvalidLoadOptionException("csv header columns should be less than max threashold: " + CSVInputFormat.THRESHOLD_MAX_NUMBER_OF_COLUMNS_FOR_PARSING);
    } else if (columnCountInSchema >= CSVInputFormat.DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING) {
        maxNumberOfColumnsForParsing = columnCountInSchema + 1;
    } else {
        maxNumberOfColumnsForParsing = CSVInputFormat.DEFAULT_MAX_NUMBER_OF_COLUMNS_FOR_PARSING;
    }
    return maxNumberOfColumnsForParsing;
}
Also used : InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)

Example 9 with InvalidLoadOptionException

use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.

the class CSVCarbonWriterTest method writeFilesAndVerify.

/**
 * Invoke CarbonWriter API to write carbon files and assert the file is rewritten
 * @param rows number of rows to write
 * @param schema schema of the file
 * @param path local write path
 * @param sortColumns sort columns
 * @param persistSchema true if want to persist schema file
 * @param blockletSize blockletSize in the file, -1 for default size
 * @param blockSize blockSize in the file, -1 for default size
 */
private void writeFilesAndVerify(int rows, Schema schema, String path, String[] sortColumns, boolean persistSchema, int blockletSize, int blockSize) {
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().withSchema(schema).outputPath(path);
        if (sortColumns != null) {
            builder = builder.sortBy(sortColumns);
        }
        if (persistSchema) {
            builder = builder.persistSchemaFile(true);
        }
        if (blockletSize != -1) {
            builder = builder.withBlockletSize(blockletSize);
        }
        if (blockSize != -1) {
            builder = builder.withBlockSize(blockSize);
        }
        CarbonWriter writer = builder.buildWriterForCSVInput();
        for (int i = 0; i < rows; i++) {
            writer.write(new String[] { "robot" + (i % 10), String.valueOf(i), String.valueOf((double) i / 2) });
        }
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } catch (InvalidLoadOptionException l) {
        l.printStackTrace();
        Assert.fail(l.getMessage());
    }
    File segmentFolder = new File(CarbonTablePath.getSegmentPath(path, "null"));
    Assert.assertTrue(segmentFolder.exists());
    File[] dataFiles = segmentFolder.listFiles(new FileFilter() {

        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
        }
    });
    Assert.assertNotNull(dataFiles);
    Assert.assertTrue(dataFiles.length > 0);
}
Also used : InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) IOException(java.io.IOException) FileFilter(java.io.FileFilter) File(java.io.File)

Example 10 with InvalidLoadOptionException

use of org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException in project carbondata by apache.

the class CarbonLoadModelBuilder method build.

/**
 * build CarbonLoadModel for data loading
 * @param options Load options from user input
 * @return a new CarbonLoadModel instance
 */
public CarbonLoadModel build(Map<String, String> options) throws InvalidLoadOptionException, IOException {
    Map<String, String> optionsFinal = LoadOption.fillOptionWithDefaultValue(options);
    if (!options.containsKey("fileheader")) {
        List<CarbonColumn> csvHeader = table.getCreateOrderColumn(table.getTableName());
        String[] columns = new String[csvHeader.size()];
        for (int i = 0; i < columns.length; i++) {
            columns[i] = csvHeader.get(i).getColName();
        }
        optionsFinal.put("fileheader", Strings.mkString(columns, ","));
    }
    CarbonLoadModel model = new CarbonLoadModel();
    // we have provided 'fileheader', so it hadoopConf can be null
    build(options, optionsFinal, model, null);
    // set default values
    model.setTimestampformat(CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
    model.setDateFormat(CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
    model.setUseOnePass(Boolean.parseBoolean(Maps.getOrDefault(options, "onepass", "false")));
    model.setDictionaryServerHost(Maps.getOrDefault(options, "dicthost", null));
    try {
        model.setDictionaryServerPort(Integer.parseInt(Maps.getOrDefault(options, "dictport", "-1")));
    } catch (NumberFormatException e) {
        throw new InvalidLoadOptionException(e.getMessage());
    }
    return model;
}
Also used : InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) CarbonColumn(org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)

Aggregations

InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)18 File (java.io.File)10 IOException (java.io.IOException)10 Field (org.apache.carbondata.core.metadata.datatype.Field)8 Test (org.junit.Test)8 StructField (org.apache.carbondata.core.metadata.datatype.StructField)6 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)4 HashMap (java.util.HashMap)3 BufferedInputStream (java.io.BufferedInputStream)2 FileFilter (java.io.FileFilter)2 FileInputStream (java.io.FileInputStream)2 Schema (org.apache.avro.Schema)2 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)2 CarbonColumn (org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn)2 DataLoadMetrics (org.apache.carbondata.core.util.DataLoadMetrics)2 DecoderException (org.apache.commons.codec.DecoderException)2 FilenameFilter (java.io.FilenameFilter)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1