Search in sources :

Example 86 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testWriteAndReadJson.

@Test
public void testWriteAndReadJson() throws IOException, InterruptedException {
    int numRows = 100;
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    String json = "{\"name\":\"bob\", \"age\":10}";
    Schema schema = new Schema(new Field[] { new Field("name", "string"), new Field("age", "int") });
    try {
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withJsonInput(schema).writtenBy("AvroCarbonWriterTest").build();
        for (int i = 0; i < numRows; i++) {
            writer.write(json);
        }
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
    File[] dataFiles = new File(path).listFiles(new FileFilter() {

        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
        }
    });
    Assert.assertNotNull(dataFiles);
    Assert.assertEquals(1, dataFiles.length);
    // read it and verify
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        Assert.assertEquals("bob", row[0]);
        Assert.assertEquals(10, row[1]);
        i++;
    }
    Assert.assertEquals(i, numRows);
    reader.close();
    FileUtils.deleteDirectory(new File(path));
}
Also used : IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FileFilter(java.io.FileFilter) File(java.io.File) Test(org.junit.Test)

Example 87 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadNextRowWithRowUtil.

@Test
public void testReadNextRowWithRowUtil() {
    String path = "./carbondata";
    try {
        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[12];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("shortField", DataTypes.SHORT);
        fields[2] = new Field("intField", DataTypes.INT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
        fields[9] = new Field("varcharField", DataTypes.VARCHAR);
        fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
        fields[11] = new Field("floatField", DataTypes.FLOAT);
        Map<String, String> map = new HashMap<>();
        map.put("complex_delimiter_level_1", "#");
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
        for (int i = 0; i < 10; i++) {
            String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon", "1.23" };
            writer.write(row2);
        }
        writer.close();
        File[] dataFiles = new File(path).listFiles(new FilenameFilter() {

            @Override
            public boolean accept(File dir, String name) {
                if (name == null) {
                    return false;
                }
                return name.endsWith("carbonindex");
            }
        });
        if (dataFiles == null || dataFiles.length < 1) {
            throw new RuntimeException("Carbon index file not exists.");
        }
        Schema schema = CarbonSchemaReader.readSchema(dataFiles[0].getAbsolutePath()).asOriginOrder();
        // Transform the schema
        int count = 0;
        for (int i = 0; i < schema.getFields().length; i++) {
            if (!((schema.getFields())[i].getFieldName().contains("."))) {
                count++;
            }
        }
        String[] strings = new String[count];
        int index = 0;
        for (int i = 0; i < schema.getFields().length; i++) {
            if (!((schema.getFields())[i].getFieldName().contains("."))) {
                strings[index] = (schema.getFields())[i].getFieldName();
                index++;
            }
        }
        // Read data
        CarbonReader reader = CarbonReader.builder(path, "_temp").projection(strings).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] data = (Object[]) reader.readNextRow();
            assert (RowUtil.getString(data, 0).equals("robot" + i));
            assertEquals(RowUtil.getShort(data, 1), i);
            assertEquals(RowUtil.getInt(data, 2), i);
            assertEquals(RowUtil.getLong(data, 3), Long.MAX_VALUE - i);
            assertEquals(RowUtil.getDouble(data, 4), ((double) i) / 2);
            assert (RowUtil.getBoolean(data, 5));
            assertEquals(RowUtil.getString(data, 6), "2019-03-02");
            assert (RowUtil.getDecimal(data, 8).equals("12.35"));
            assert (RowUtil.getVarchar(data, 9).equals("varchar"));
            Object[] arr = RowUtil.getArray(data, 10);
            assert (arr[0].equals("Hello"));
            assert (arr[1].equals("World"));
            assert (arr[2].equals("From"));
            assert (arr[3].equals("Carbon"));
            assertEquals(RowUtil.getFloat(data, 11), (float) 1.23);
            i++;
        }
        reader.close();
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail(e.getMessage());
        }
    }
}
Also used : HashMap(java.util.HashMap) IOException(java.io.IOException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FilenameFilter(java.io.FilenameFilter) File(java.io.File) Test(org.junit.Test)

Example 88 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadingDateAndTimestampColumnInArrayOfStruct.

@Test
public void testReadingDateAndTimestampColumnInArrayOfStruct() throws IOException {
    String path = "./testWriteFilesArrayStruct";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[4];
    fields[0] = new Field("id", DataTypes.STRING);
    fields[1] = new Field("source", DataTypes.STRING);
    fields[2] = new Field("usage", DataTypes.STRING);
    List<StructField> structFieldsList = new ArrayList<>();
    structFieldsList.add(new StructField("name", DataTypes.STRING));
    structFieldsList.add(new StructField("type", DataTypes.STRING));
    structFieldsList.add(new StructField("creation-date", DataTypes.DATE));
    structFieldsList.add(new StructField("creation-timestamp", DataTypes.TIMESTAMP));
    StructField structTypeByList = new StructField("annotation", DataTypes.createStructType(structFieldsList), structFieldsList);
    List<StructField> list = new ArrayList<>();
    list.add(structTypeByList);
    Field arrayType = new Field("annotations", "array", list);
    fields[3] = arrayType;
    try {
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)).writtenBy("complexTest").build();
        for (int i = 0; i < 15; i++) {
            String[] row = new String[] { "robot" + i, String.valueOf(i), i + "." + i, "sunflowers" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30" + "\002" + "2019-03-30 17:22:31" + "\001" + "roses" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30" + "\002" + "2019-03-30 17:22:31" };
            writer.write(row);
        }
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    }
    Schema schema = CarbonSchemaReader.readSchema(path).asOriginOrder();
    assert (4 == schema.getFieldsLength());
    CarbonReader reader = null;
    try {
        reader = CarbonReader.builder(path).projection(new String[] { "id", "source", "usage", "annotations" }).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] row = (Object[]) reader.readNextRow();
            assert (4 == row.length);
            assert (row[0].equals("robot" + i));
            int value = Integer.valueOf((String) row[1]);
            Float value2 = Float.valueOf((String) row[2]);
            assert (value > -1 || value < 15);
            assert (value2 > -1 || value2 < 15);
            Object[] annotations = (Object[]) row[3];
            for (int j = 0; j < annotations.length; j++) {
                Object[] annotation = (Object[]) annotations[j];
                assert (((String) annotation[0]).contains("sunflowers") || ((String) annotation[0]).contains("roses"));
                assert (((String) annotation[1]).contains("modelarts/image_classification"));
                assert (annotation[2].equals("2019-03-30"));
                assert (annotation[3].equals("2019-03-30 17:22:31"));
            }
            i++;
        }
        assert (15 == i);
        reader.close();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : ArrayList(java.util.ArrayList) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 89 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testValidateQuoteCharWithProperValue.

@Test
public void testValidateQuoteCharWithProperValue() throws IOException {
    String path = "./testValidateQuoteCharWithProperValue";
    Field[] fields = new Field[2];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
    Schema schema = new Schema(fields);
    Map map = new HashMap();
    map.put("quotechar", "#");
    try {
        CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(schema).enableLocalDictionary(false).writtenBy("CarbonReaderTest").build();
    } catch (IllegalArgumentException e) {
        e.printStackTrace();
        Assert.fail();
    } catch (Exception e) {
        Assert.fail(e.getMessage());
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) HashMap(java.util.HashMap) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Test(org.junit.Test)

Example 90 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadWithFilterEqualSet.

@Test
public void testReadWithFilterEqualSet() throws IOException, InterruptedException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[3];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    fields[2] = new Field("doubleField", DataTypes.DOUBLE);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path);
    List<Object> values = new ArrayList<>();
    values.add("robot7");
    values.add("robot1");
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpressionSet("name", "String", values)).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        if (((String) row[0]).contains("robot7")) {
            assert (7 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else if (((String) row[0]).contains("robot1")) {
            assert (1 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else {
            Assert.assertTrue(false);
        }
        i++;
    }
    Assert.assertEquals(i, 40);
    reader.close();
    List<Object> values2 = new ArrayList<>();
    values2.add(1);
    values2.add(7);
    CarbonReader reader2 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpressionSet("age", "int", values2)).build();
    i = 0;
    while (reader2.hasNext()) {
        Object[] row = (Object[]) reader2.readNextRow();
        if (((String) row[0]).contains("robot7")) {
            assert (7 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else if (((String) row[0]).contains("robot1")) {
            assert (1 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else {
            Assert.assertTrue(false);
        }
        i++;
    }
    Assert.assertEquals(i, 2);
    reader2.close();
    List<Object> values3 = new ArrayList<>();
    values3.add(0.5);
    values3.add(3.5);
    CarbonReader reader3 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpressionSet("doubleField", "double", values3)).build();
    i = 0;
    while (reader3.hasNext()) {
        Object[] row = (Object[]) reader3.readNextRow();
        if (((String) row[0]).contains("robot7")) {
            assert (7 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else if (((String) row[0]).contains("robot1")) {
            assert (1 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else {
            Assert.assertTrue(false);
        }
        i++;
    }
    Assert.assertEquals(i, 2);
    reader3.close();
    CarbonReader reader4 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareEqualToExpression("name", "string", "robot7")).build();
    i = 0;
    while (reader4.hasNext()) {
        Object[] row = (Object[]) reader4.readNextRow();
        if (((String) row[0]).contains("robot7")) {
            assert (7 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else {
            Assert.assertTrue(false);
        }
        i++;
    }
    Assert.assertEquals(i, 20);
    reader4.close();
    List<Expression> expressions = new ArrayList<>();
    expressions.add(prepareEqualToExpression("name", "String", "robot1"));
    expressions.add(prepareEqualToExpression("name", "String", "robot7"));
    expressions.add(prepareEqualToExpression("age", "int", "2"));
    CarbonReader reader5 = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(prepareOrExpression(expressions)).build();
    i = 0;
    while (reader5.hasNext()) {
        Object[] row = (Object[]) reader5.readNextRow();
        if (((String) row[0]).contains("robot7")) {
            assert (7 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else if (((String) row[0]).contains("robot1")) {
            assert (1 == ((int) (row[1]) % 10));
            assert (0.5 == ((double) (row[2]) % 1));
        } else if (((String) row[0]).contains("robot2")) {
            assert (2 == ((int) (row[1]) % 10));
            assert (0 == ((double) (row[2]) % 1));
        } else {
            Assert.assertTrue(false);
        }
        i++;
    }
    Assert.assertEquals(i, 41);
    reader5.close();
    FileUtils.deleteDirectory(new File(path));
}
Also used : ArrayList(java.util.ArrayList) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FilterUtil.prepareEqualToExpression(org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) FilterUtil.prepareOrExpression(org.apache.carbondata.core.scan.filter.FilterUtil.prepareOrExpression) Expression(org.apache.carbondata.core.scan.expression.Expression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) NotInExpression(org.apache.carbondata.core.scan.expression.conditional.NotInExpression) OrExpression(org.apache.carbondata.core.scan.expression.logical.OrExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) InExpression(org.apache.carbondata.core.scan.expression.conditional.InExpression) LessThanExpression(org.apache.carbondata.core.scan.expression.conditional.LessThanExpression) NotEqualsExpression(org.apache.carbondata.core.scan.expression.conditional.NotEqualsExpression) File(java.io.File) Test(org.junit.Test)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6