Search in sources :

Example 31 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method testTaskNo.

@Test
public void testTaskNo() throws IOException {
    // TODO: write all data type and read by CarbonRecordReader to verify the content
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("intField", DataTypes.INT);
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
        CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
        for (int i = 0; i < 2; i++) {
            String[] row = new String[] { "robot" + (i % 10), String.valueOf(i) };
            writer.write(row);
        }
        writer.close();
        File[] dataFiles = new File(path).listFiles(new FileFilter() {

            @Override
            public boolean accept(File pathname) {
                return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
            }
        });
        Assert.assertNotNull(dataFiles);
        Assert.assertTrue(dataFiles.length > 0);
        String taskNo = CarbonTablePath.DataFileUtil.getTaskNo(dataFiles[0].getName());
        String taskID = CarbonTablePath.DataFileUtil.getTaskIdFromTaskNo(taskNo);
        Assert.assertEquals("Task Id is not matched", taskID, "5");
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FileFilter(java.io.FileFilter) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Example 32 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method testWritingAndReadingArrayString.

@Test
public void testWritingAndReadingArrayString() throws IOException {
    String path = "./testWriteFilesArrayString";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[4];
    fields[0] = new Field("id", DataTypes.STRING);
    fields[1] = new Field("source", DataTypes.STRING);
    fields[2] = new Field("usage", DataTypes.STRING);
    StructField[] stringFields = new StructField[1];
    stringFields[0] = new StructField("stringField", DataTypes.STRING);
    Field arrayType = new Field("annotations", "array", Arrays.asList(stringFields));
    fields[3] = arrayType;
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().taskNo(5).outputPath(path);
        CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CSVCarbonWriterTest").build();
        for (int i = 0; i < 15; i++) {
            String[] row = new String[] { "robot" + (i % 10), String.valueOf(i), i + "." + i, "sunflowers" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30 17:22:31" + "\002" + "{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}" + "\001" + "roses" + (i % 10) + "\002" + "modelarts/image_classification" + "\002" + "2019-03-30 17:22:32" + "\002" + "{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}" };
            writer.write(row);
        }
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail();
    }
    Schema schema = CarbonSchemaReader.readSchema(path).asOriginOrder();
    assert (4 == schema.getFieldsLength());
    Field[] fields1 = schema.getFields();
    boolean flag = false;
    for (int i = 0; i < fields1.length; i++) {
        if (DataTypes.isArrayType(fields1[i].getDataType())) {
            ArrayType arrayType1 = (ArrayType) fields1[i].getDataType();
            assert ("annotations.stringField".equalsIgnoreCase(arrayType1.getElementName()));
            assert (DataTypes.STRING.equals(fields1[i].getChildren().get(0).getDataType()));
            flag = true;
        }
    }
    assert (flag);
    // Read again
    CarbonReader reader = null;
    try {
        reader = CarbonReader.builder(path).projection(new String[] { "id", "source", "usage", "annotations" }).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] row = (Object[]) reader.readNextRow();
            assert (4 == row.length);
            assert (((String) row[0]).contains("robot"));
            int value = Integer.valueOf((String) row[1]);
            Float value2 = Float.valueOf((String) row[2]);
            assert (value > -1 || value < 15);
            assert (value2 > -1 || value2 < 15);
            Object[] annotations = (Object[]) row[3];
            for (int j = 0; j < annotations.length; j++) {
                assert (((String) annotations[j]).contains("\u0002modelarts/image_classification\u00022019-03-30 17:22:31\u0002{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}") || ((String) annotations[j]).contains("\u0002modelarts/image_classification\u00022019-03-30 17:22:32\u0002{\"@modelarts:start_index\":0,\"@modelarts:end_index\":5}"));
            }
            i++;
        }
        assert (15 == i);
        reader.close();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } finally {
        FileUtils.deleteDirectory(new File(path));
    }
}
Also used : ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) ArrayType(org.apache.carbondata.core.metadata.datatype.ArrayType) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Example 33 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CSVCarbonWriterTest method testWriteFiles.

@Test
public void testWriteFiles() throws IOException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(new Schema(fields), path);
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Example 34 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class ImageTest method testWriteWithByteArrayDataType.

@Test
public void testWriteWithByteArrayDataType() throws IOException, InvalidLoadOptionException, InterruptedException {
    String imagePath = "./src/test/resources/image/carbondatalogo.jpg";
    int num = 1;
    int rows = 10;
    String path = "./target/binary";
    try {
        FileUtils.deleteDirectory(new File(path));
    } catch (IOException e) {
        e.printStackTrace();
    }
    Field[] fields = new Field[7];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    fields[2] = new Field("image1", DataTypes.BINARY);
    fields[3] = new Field("image2", DataTypes.BINARY);
    fields[4] = new Field("image3", DataTypes.BINARY);
    fields[5] = new Field("decodeString", DataTypes.BINARY);
    fields[6] = new Field("decodeByte", DataTypes.BINARY);
    String[] projection = new String[] { "name", "age", "image1", "image2", "image3", "decodeString", "decodeByte" };
    byte[] originBinary = null;
    // read and write image data
    for (int j = 0; j < num; j++) {
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)).writtenBy("SDKS3Example").withPageSizeInMb(1).withLoadOption("binary_decoder", "base64").build();
        for (int i = 0; i < rows; i++) {
            // read image and encode to Hex
            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath));
            originBinary = new byte[bis.available()];
            while ((bis.read(originBinary)) != -1) {
            }
            // write data
            writer.write(new Object[] { "robot" + (i % 10), i, originBinary, originBinary, originBinary, "YWJj", "YWJj".getBytes() });
            bis.close();
        }
        writer.close();
    }
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(projection).build();
    System.out.println("\nData:");
    int i = 0;
    while (i < 20 && reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        byte[] outputBinary = (byte[]) row[2];
        byte[] outputBinary2 = (byte[]) row[3];
        byte[] outputBinary3 = (byte[]) row[4];
        String stringValue = new String((byte[]) row[5]);
        String byteValue = new String((byte[]) row[6]);
        // when input is string, it will be decoded by base64.
        Assert.assertTrue("abc".equals(stringValue));
        // when input is byte[], it will be not decoded by base64.
        Assert.assertTrue("YWJj".equals(byteValue));
        System.out.println(row[0] + " " + row[1] + " image1 size:" + outputBinary.length + " image2 size:" + outputBinary2.length + " image3 size:" + outputBinary3.length + "\t" + stringValue + "\t" + byteValue);
        for (int k = 0; k < 3; k++) {
            byte[] originBinaryTemp = (byte[]) row[2 + k];
            // validate output binary data and origin binary data
            assert (originBinaryTemp.length == outputBinary.length);
            for (int j = 0; j < originBinaryTemp.length; j++) {
                assert (originBinaryTemp[j] == outputBinary[j]);
                assert (originBinary[j] == outputBinary[j]);
            }
            // save image, user can compare the save image and original image
            String destString = "./target/binary/image" + k + "_" + i + ".jpg";
            BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
            bos.write(originBinaryTemp);
            bos.close();
        }
        i++;
    }
    System.out.println("\nFinished");
    reader.close();
}
Also used : IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) BufferedInputStream(java.io.BufferedInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) Test(org.junit.Test)

Example 35 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class ImageTest method testWriteNonBase64WithBase64Decoder.

@Test
public void testWriteNonBase64WithBase64Decoder() throws IOException, InvalidLoadOptionException, InterruptedException {
    String imagePath = "./src/test/resources/image/carbondatalogo.jpg";
    int num = 1;
    int rows = 10;
    String path = "./target/binary";
    try {
        FileUtils.deleteDirectory(new File(path));
    } catch (IOException e) {
        e.printStackTrace();
    }
    Field[] fields = new Field[7];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    fields[2] = new Field("image1", DataTypes.BINARY);
    fields[3] = new Field("image2", DataTypes.BINARY);
    fields[4] = new Field("image3", DataTypes.BINARY);
    fields[5] = new Field("decodeString", DataTypes.BINARY);
    fields[6] = new Field("decodeByte", DataTypes.BINARY);
    byte[] originBinary = null;
    // read and write image data
    for (int j = 0; j < num; j++) {
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withCsvInput(new Schema(fields)).writtenBy("SDKS3Example").withPageSizeInMb(1).withLoadOption("binary_decoder", "base64").build();
        for (int i = 0; i < rows; i++) {
            // read image and encode to Hex
            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(imagePath));
            originBinary = new byte[bis.available()];
            while ((bis.read(originBinary)) != -1) {
            }
            // write data
            writer.write(new Object[] { "robot" + (i % 10), i, originBinary, originBinary, originBinary, "^YWJj", "^YWJj".getBytes() });
            bis.close();
        }
        try {
            writer.close();
            Assert.assertTrue(false);
        } catch (Exception e) {
            Assert.assertTrue(e.getMessage().contains("Binary decoder is base64, but data is not base64"));
        }
    }
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) BufferedInputStream(java.io.BufferedInputStream) IOException(java.io.IOException) File(java.io.File) FileInputStream(java.io.FileInputStream) DecoderException(org.apache.commons.codec.DecoderException) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Test(org.junit.Test)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6