Search in sources :

Example 41 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadNextBatchRowWithVectorReader.

@Test
public void testReadNextBatchRowWithVectorReader() {
    String path = "./carbondata";
    try {
        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[11];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("shortField", DataTypes.SHORT);
        fields[2] = new Field("intField", DataTypes.INT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
        fields[9] = new Field("varcharField", DataTypes.VARCHAR);
        // Vector don't support complex data type
        // fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
        fields[10] = new Field("floatField", DataTypes.FLOAT);
        Map<String, String> map = new HashMap<>();
        map.put("complex_delimiter_level_1", "#");
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
        for (int i = 0; i < 300; i++) {
            String[] row2 = new String[] { "robot" + (i % 10000), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "1.23" };
            writer.write(row2);
        }
        writer.close();
        // Read data
        int batchSize = 150;
        CarbonReader reader = CarbonReader.builder(path, "_temp").withBatch(batchSize).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] batch = reader.readNextBatchRow();
            Assert.assertTrue(batch.length <= batchSize);
            for (int j = 0; j < batch.length; j++) {
                Object[] data = (Object[]) batch[j];
                assert (RowUtil.getString(data, 0).equals("robot" + i));
                assertEquals(RowUtil.getString(data, 1), "2019-03-02");
                assert (RowUtil.getVarchar(data, 3).equals("varchar"));
                assertEquals(RowUtil.getShort(data, 4), i);
                assertEquals(RowUtil.getInt(data, 5), i);
                assertEquals(RowUtil.getLong(data, 6), Long.MAX_VALUE - i);
                assertEquals(RowUtil.getDouble(data, 7), ((double) i) / 2);
                assert (RowUtil.getDecimal(data, 9).equals("12.35"));
                assertEquals(RowUtil.getFloat(data, 10), (float) 1.23);
                i++;
            }
            System.out.println("batch is " + i);
        }
        reader.close();
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
Also used : HashMap(java.util.HashMap) IOException(java.io.IOException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 42 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testWriteAndReadFiles.

@Test
public void testWriteAndReadFiles() throws IOException, InterruptedException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path), false);
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path);
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        Assert.assertEquals(("robot" + (i % 10)), row[0]);
        Assert.assertEquals(i, row[1]);
        i++;
    }
    Assert.assertEquals(i, 200);
    // Read again
    CarbonReader reader2 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).build();
    i = 0;
    while (reader2.hasNext()) {
        Object[] row = (Object[]) reader2.readNextRow();
        Assert.assertEquals(("robot" + (i % 10)), row[0]);
        Assert.assertEquals(i, row[1]);
        i++;
    }
    Assert.assertEquals(i, 200);
    reader2.close();
    reader.close();
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 43 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadWithFilterOfnonTransactionalwithsubfolders.

@Test
public void testReadWithFilterOfnonTransactionalwithsubfolders() throws IOException, InterruptedException {
    String path1 = "./testWriteFiles/1/" + System.nanoTime();
    String path2 = "./testWriteFiles/2/" + System.nanoTime();
    String path3 = "./testWriteFiles/3/" + System.nanoTime();
    FileUtils.deleteDirectory(new File("./testWriteFiles"));
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path1);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path2);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path3);
    EqualToExpression equalToExpression = new EqualToExpression(new ColumnExpression("name", DataTypes.STRING), new LiteralExpression("robot1", DataTypes.STRING));
    CarbonReader reader = CarbonReader.builder("./testWriteFiles", "_temp").projection(new String[] { "name", "age" }).filter(equalToExpression).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        // Default sort column is applied for dimensions. So, need  to validate accordingly
        assert ("robot1".equals(row[0]));
        i++;
    }
    Assert.assertEquals(i, 60);
    reader.close();
    FileUtils.deleteDirectory(new File("./testWriteFiles"));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FilterUtil.prepareEqualToExpression(org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) File(java.io.File) Test(org.junit.Test)

Example 44 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testValidateBadRecordsLoggerEnableWithImproperValue.

@Test
public void testValidateBadRecordsLoggerEnableWithImproperValue() throws IOException {
    String path = "./testValidateBadRecordsLoggerEnableValue";
    Field[] fields = new Field[2];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
    Schema schema = new Schema(fields);
    Map map = new HashMap();
    map.put("bad_records_logger_enable", "FLSE");
    try {
        CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(schema).enableLocalDictionary(false).writtenBy("CarbonReaderTest").build();
        Assert.fail();
    } catch (IllegalArgumentException e) {
        Assert.assertTrue(e.getMessage().contains("Invalid value FLSE for key bad_records_logger_enable"));
    } catch (Exception e) {
        Assert.fail(e.getMessage());
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) HashMap(java.util.HashMap) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Test(org.junit.Test)

Example 45 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testGetSplits.

@Test
public void testGetSplits() throws IOException, InterruptedException {
    String path = "./testWriteFiles/" + System.nanoTime();
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, 1, 100);
    InputSplit[] splits = CarbonReader.builder(path).getSplits(true);
    // check for 3 blocklet count (as only one carbon file will be created)
    Assert.assertEquals(splits.length, 3);
    InputSplit[] splits1 = CarbonReader.builder(path).getSplits(false);
    // check for 1 block count (as only one carbon file will be created)
    Assert.assertEquals(splits1.length, 1);
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6