Search in sources :

Example 26 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method testByteDataType.

@Test
public void testByteDataType() throws IOException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("byteField", DataTypes.BYTE);
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
        CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
        for (int i = 0; i < 15; i++) {
            String[] row = new String[] { "robot" + (i % 10), "" + i };
            writer.write(row);
        }
        writer.close();
        TableInfo tableInfo = SchemaReader.inferSchema(AbsoluteTableIdentifier.from(path, "", ""), false);
        List<String> dataTypes = new ArrayList<>();
        for (ColumnSchema columnSchema : tableInfo.getFactTable().getListOfColumns()) {
            dataTypes.add(columnSchema.getDataType().toString());
        }
        assert (dataTypes.contains("STRING"));
        assert (dataTypes.contains("BYTE"));
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) ArrayList(java.util.ArrayList) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Example 27 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method testWritingAndReadingArrayOfFloatAndByte.

@Test
public void testWritingAndReadingArrayOfFloatAndByte() throws IOException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    StructField[] fields = new StructField[1];
    fields[0] = new StructField("floatField", DataTypes.FLOAT);
    Field structType1 = new Field("floatarray", "array", Arrays.asList(fields));
    StructField[] fields2 = new StructField[1];
    fields2[0] = new StructField("byteField", DataTypes.BYTE);
    Field structType2 = new Field("bytearray", "array", Arrays.asList(fields2));
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
        CarbonWriter writer = builder.withCsvInput(new Schema(new Field[] { structType1, structType2 })).writtenBy("CSVCarbonWriterTest").build();
        for (int i = 0; i < 15; i++) {
            String[] row = new String[] { "1.0\0012.0\0013.0", "1\0012\0013" };
            writer.write(row);
        }
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Test(org.junit.Test)

Example 28 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method testWritingAndReadingArrayStruct.

@Test
public void testWritingAndReadingArrayStruct() throws IOException {
    String path = "./testWriteFilesArrayStruct";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[4];
    fields[0] = new Field("id", DataTypes.STRING);
    fields[1] = new Field("source", DataTypes.STRING);
    fields[2] = new Field("usage", DataTypes.STRING);
    List<StructField> structFieldsList = new ArrayList<>();
    structFieldsList.add(new StructField("name", DataTypes.STRING));
    structFieldsList.add(new StructField("type", DataTypes.STRING));
    structFieldsList.add(new StructField("creation-time", DataTypes.STRING));
    structFieldsList.add(new StructField("property", DataTypes.STRING));
    StructField structTypeByList = new StructField("annotation", DataTypes.createStructType(structFieldsList), structFieldsList);
    List<StructField> list = new ArrayList<>();
    list.add(structTypeByList);
    Field arrayType = new Field("annotations", "array", list);
    fields[3] = arrayType;
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
        CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
        for (int i = 0; i < 15; i++) {
            String[] row = new String[] { "robot" + (i % 10), String.valueOf(i), i + "." + i, "sunflowers" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30 17:22:31" + "\002" + "{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}" + "\001" + "roses" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30 17:22:32" + "\002" + "{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}" };
            writer.write(row);
        }
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    }
    Schema schema = CarbonSchemaReader.readSchema(path).asOriginOrder();
    assert (4 == schema.getFieldsLength());
    Field[] fields1 = schema.getFields();
    boolean flag = false;
    for (int i = 0; i < fields1.length; i++) {
        if (DataTypes.isArrayType(fields1[i].getDataType())) {
            ArrayType arrayType1 = (ArrayType) fields1[i].getDataType();
            assert ("annotations.annotation".equalsIgnoreCase(arrayType1.getElementName()));
            assert (DataTypes.isStructType(fields1[i].getChildren().get(0).getDataType()));
            assert (4 == (((StructType) fields1[i].getChildren().get(0).getDataType()).getFields()).size());
            flag = true;
        }
    }
    assert (flag);
    // Read again
    CarbonReader reader = null;
    try {
        reader = CarbonReader.builder(path).projection(new String[] { "id", "source", "usage", "annotations" }).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] row = (Object[]) reader.readNextRow();
            assert (4 == row.length);
            assert (((String) row[0]).contains("robot"));
            int value = Integer.valueOf((String) row[1]);
            Float value2 = Float.valueOf((String) row[2]);
            assert (value > -1 || value < 15);
            assert (value2 > -1 || value2 < 15);
            Object[] annotations = (Object[]) row[3];
            for (int j = 0; j < annotations.length; j++) {
                Object[] annotation = (Object[]) annotations[j];
                assert (((String) annotation[0]).contains("sunflowers") || ((String) annotation[0]).contains("roses"));
                assert (((String) annotation[1]).contains("modelarts/image_classification"));
                assert (((String) annotation[2]).contains("2019-03-30 17:22:3"));
                assert (((String) annotation[3]).contains("{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}"));
                Object[] annotation1 = readObjects(annotations, j);
                assert (((String) annotation1[0]).contains("sunflowers") || ((String) annotation1[0]).contains("roses"));
                assert (((String) annotation1[1]).contains("modelarts/image_classification"));
                assert (((String) annotation1[2]).contains("2019-03-30 17:22:3"));
                assert (((String) annotation1[3]).contains("{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}"));
            }
            i++;
        }
        assert (15 == i);
        reader.close();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : StructType(org.apache.carbondata.core.metadata.datatype.StructType) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) ArrayList(java.util.ArrayList) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) ArrayType(org.apache.carbondata.core.metadata.datatype.ArrayType) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Example 29 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method testAllPrimitiveDataType.

@Test
public void testAllPrimitiveDataType() throws IOException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[9];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("intField", DataTypes.INT);
    fields[2] = new Field("shortField", DataTypes.SHORT);
    fields[3] = new Field("longField", DataTypes.LONG);
    fields[4] = new Field("doubleField", DataTypes.DOUBLE);
    fields[5] = new Field("boolField", DataTypes.BOOLEAN);
    fields[6] = new Field("dateField", DataTypes.DATE);
    fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
    fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path);
        CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
        for (int i = 0; i < 100; i++) {
            Object[] row = new Object[] { "robot" + (i % 10), i, i, (Long.MAX_VALUE - i), ((double) i / 2), true, "2019-03-02", "2019-02-12 03:03:34", "1.234567" };
            writer.write(row);
        }
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
    File[] dataFiles = new File(path).listFiles(new FileFilter() {

        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
        }
    });
    Assert.assertNotNull(dataFiles);
    Assert.assertTrue(dataFiles.length > 0);
    FileUtils.deleteDirectory(new File(path));
}
Also used : ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FileFilter(java.io.FileFilter) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Example 30 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method test2Blocklet.

@Test
public void test2Blocklet() throws IOException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, 1, 100);
    // TODO: implement reader to verify the number of blocklet in the file
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6