Search in sources :

Example 91 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testWriteAndReadFilesWithReaderBuildFail.

@Test
public void testWriteAndReadFilesWithReaderBuildFail() throws IOException, InterruptedException {
    String path1 = "./testWriteFiles";
    String path2 = "./testWriteFiles2";
    FileUtils.deleteDirectory(new File(path1));
    FileUtils.deleteDirectory(new File(path2));
    IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path1), false);
    IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path2), false);
    Field[] fields = new Field[] { new Field("c1", "string"), new Field("c2", "int") };
    Schema schema = new Schema(fields);
    CarbonWriterBuilder builder = CarbonWriter.builder();
    CarbonWriter carbonWriter = null;
    try {
        carbonWriter = builder.outputPath(path1).uniqueIdentifier(12345).withCsvInput(schema).writtenBy("CarbonReaderTest").build();
    } catch (InvalidLoadOptionException e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
    carbonWriter.write(new String[] { "MNO", "100" });
    carbonWriter.close();
    Field[] fields1 = new Field[] { new Field("p1", "string"), new Field("p2", "int") };
    Schema schema1 = new Schema(fields1);
    CarbonWriterBuilder builder1 = CarbonWriter.builder();
    CarbonWriter carbonWriter1 = null;
    try {
        carbonWriter1 = builder1.outputPath(path2).uniqueIdentifier(12345).withCsvInput(schema1).writtenBy("CarbonReaderTest").build();
    } catch (InvalidLoadOptionException e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
    carbonWriter1.write(new String[] { "PQR", "200" });
    carbonWriter1.close();
    try {
        CarbonReader reader = CarbonReader.builder(path1, "_temp").projection(new String[] { "c1", "c3" }).build();
        Assert.fail();
    } catch (Exception e) {
        System.out.println("Success");
        Assert.assertTrue(true);
    }
    CarbonReader reader1 = CarbonReader.builder(path2, "_temp1").projection(new String[] { "p1", "p2" }).build();
    while (reader1.hasNext()) {
        Object[] row1 = (Object[]) reader1.readNextRow();
        System.out.println(row1[0]);
        System.out.println(row1[1]);
    }
    reader1.close();
    FileUtils.deleteDirectory(new File(path1));
    FileUtils.deleteDirectory(new File(path2));
}
Also used : InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Test(org.junit.Test)

Example 92 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadWithFilterOfNonTransactionalSimple.

@Test
public void testReadWithFilterOfNonTransactionalSimple() throws IOException, InterruptedException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path), false);
    String path1 = path + "/0testdir";
    String path2 = path + "/testdir";
    FileUtils.deleteDirectory(new File(path));
    FileFactory.getCarbonFile(path1);
    FileFactory.mkdirs(path1);
    FileFactory.getCarbonFile(path2);
    FileFactory.mkdirs(path2);
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path);
    ColumnExpression columnExpression = new ColumnExpression("name", DataTypes.STRING);
    EqualToExpression equalToExpression = new EqualToExpression(columnExpression, new LiteralExpression("robot1", DataTypes.STRING));
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age" }).filter(equalToExpression).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        // Default sort column is applied for dimensions. So, need  to validate accordingly
        assert ("robot1".equals(row[0]));
        i++;
    }
    Assert.assertEquals(i, 20);
    reader.close();
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FilterUtil.prepareEqualToExpression(org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) File(java.io.File) Test(org.junit.Test)

Example 93 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testTimeStampAndBadRecord.

@Test
public void testTimeStampAndBadRecord() throws IOException, InterruptedException {
    String timestampFormat = carbonProperties.getProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT);
    String badRecordAction = carbonProperties.getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION_DEFAULT);
    String badRecordLoc = carbonProperties.getProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, CarbonCommonConstants.CARBON_BADRECORDS_LOC_DEFAULT_VAL);
    String rootPath = new File(this.getClass().getResource("/").getPath() + "../../").getCanonicalPath();
    String storeLocation = rootPath + "/target/";
    carbonProperties.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, storeLocation).addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd hh:mm:ss").addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "REDIRECT");
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[9];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("intField", DataTypes.INT);
    fields[2] = new Field("shortField", DataTypes.SHORT);
    fields[3] = new Field("longField", DataTypes.LONG);
    fields[4] = new Field("doubleField", DataTypes.DOUBLE);
    fields[5] = new Field("boolField", DataTypes.BOOLEAN);
    fields[6] = new Field("dateField", DataTypes.DATE);
    fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
    fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
    try {
        CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path);
        CarbonWriter writer = builder.withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
        for (int i = 0; i < 100; i++) {
            String[] row = new String[] { "robot" + (i % 10), String.valueOf(i), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2018-05-12", "2018-05-12", "12.345" };
            writer.write(row);
            String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345" };
            writer.write(row2);
        }
        writer.close();
    } catch (Exception e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
    File folder = new File(path);
    Assert.assertTrue(folder.exists());
    File[] dataFiles = folder.listFiles(new FileFilter() {

        @Override
        public boolean accept(File pathname) {
            return pathname.getName().endsWith(CarbonCommonConstants.FACT_FILE_EXT);
        }
    });
    Assert.assertNotNull(dataFiles);
    Assert.assertTrue(dataFiles.length > 0);
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "stringField", "shortField", "intField", "longField", "doubleField", "boolField", "dateField", "timeField", "decimalField" }).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        int id = (int) row[2];
        Assert.assertEquals("robot" + (id % 10), row[0]);
        Assert.assertEquals(Short.parseShort(String.valueOf(id)), row[1]);
        Assert.assertEquals(Long.MAX_VALUE - id, row[3]);
        Assert.assertEquals((double) id / 2, row[4]);
        Assert.assertEquals(true, (boolean) row[5]);
        Assert.assertEquals("2019-03-02", row[6]);
        Assert.assertEquals("2019-02-12 03:03:34", row[7]);
        i++;
    }
    Assert.assertEquals(i, 100);
    reader.close();
    FileUtils.deleteDirectory(new File(path));
    carbonProperties.addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, timestampFormat);
    carbonProperties.addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, badRecordAction);
    carbonProperties.addProperty(CarbonCommonConstants.CARBON_BADRECORDS_LOC, badRecordLoc);
}
Also used : IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FileFilter(java.io.FileFilter) File(java.io.File) Test(org.junit.Test)

Example 94 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testWriteWithDifferentDataType.

@Test
public void testWriteWithDifferentDataType() {
    String path = "./carbondata";
    try {
        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[13];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("shortField", DataTypes.SHORT);
        fields[2] = new Field("intField", DataTypes.INT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
        fields[9] = new Field("varcharField", DataTypes.VARCHAR);
        fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
        fields[11] = new Field("floatField", DataTypes.FLOAT);
        fields[12] = new Field("binaryField", DataTypes.BINARY);
        Map<String, String> map = new HashMap<>();
        map.put("complex_delimiter_level_1", "#");
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
        byte[] value = "Binary".getBytes();
        for (int i = 0; i < 10; i++) {
            Object[] row2 = new Object[] { "robot" + (i % 10), i % 10000, i, (Long.MAX_VALUE - i), ((double) i / 2), (true), "2019-03-02", "2019-02-12 03:03:34", 12.345, "varchar", "Hello#World#From#Carbon", 1.23, value };
            writer.write(row2);
        }
        writer.close();
        // Read data
        CarbonReader reader = CarbonReader.builder(path, "_temp").withRowRecordReader().build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] data = (Object[]) reader.readNextRow();
            assert (RowUtil.getString(data, 0).equals("robot" + i));
            assertEquals(RowUtil.getString(data, 1), "2019-03-02");
            Assert.assertEquals(new String(value), new String(RowUtil.getBinary(data, 3)));
            assert (RowUtil.getVarchar(data, 4).equals("varchar"));
            Object[] arr = RowUtil.getArray(data, 5);
            assert (arr[0].equals("Hello"));
            assert (arr[1].equals("World"));
            assert (arr[2].equals("From"));
            assert (arr[3].equals("Carbon"));
            assertEquals(RowUtil.getShort(data, 6), i);
            assertEquals(RowUtil.getInt(data, 7), i);
            assertEquals(RowUtil.getLong(data, 8), Long.MAX_VALUE - i);
            assertEquals(RowUtil.getDouble(data, 9), ((double) i) / 2);
            assert (RowUtil.getBoolean(data, 10));
            assert (RowUtil.getDecimal(data, 11).equals("12.35"));
            assertEquals(RowUtil.getFloat(data, 12), (float) 1.23);
            i++;
        }
        assert (i == 10);
        reader.close();
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail(e.getMessage());
        }
    }
}
Also used : HashMap(java.util.HashMap) IOException(java.io.IOException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 95 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class ImageTest method binaryToCarbonWithHWD.

public void binaryToCarbonWithHWD(String sourceImageFolder, String outputPath, String preDestPath, String sufAnnotation, final String sufImage, int numToWrite) throws Exception {
    int num = 1;
    Field[] fields = new Field[7];
    fields[0] = new Field("height", DataTypes.INT);
    fields[1] = new Field("width", DataTypes.INT);
    fields[2] = new Field("depth", DataTypes.INT);
    fields[3] = new Field("binaryName", DataTypes.STRING);
    fields[4] = new Field("binary", DataTypes.BINARY);
    fields[5] = new Field("labelName", DataTypes.STRING);
    fields[6] = new Field("labelContent", DataTypes.STRING);
    byte[] originBinary = null;
    // read and write image data
    for (int j = 0; j < num; j++) {
        Object[] files = listFiles(sourceImageFolder, sufImage).toArray();
        int index = 0;
        if (null != files) {
            CarbonWriter writer = CarbonWriter.builder().outputPath(outputPath).withCsvInput(new Schema(fields)).withBlockSize(256).writtenBy("SDKS3Example").withPageSizeInMb(1).build();
            for (int i = 0; i < files.length; i++) {
                if (0 == index % numToWrite) {
                    writer.close();
                    writer = CarbonWriter.builder().outputPath(outputPath).withCsvInput(new Schema(fields)).withBlockSize(256).writtenBy("SDKS3Example").withPageSizeInMb(1).build();
                }
                index++;
                // read image and encode to Hex
                File file = new File((String) files[i]);
                System.out.println(file.getCanonicalPath());
                BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));
                int depth = 0;
                boolean isGray;
                boolean hasAlpha;
                BufferedImage bufferedImage = null;
                try {
                    bufferedImage = ImageIO.read(file);
                    isGray = bufferedImage.getColorModel().getColorSpace().getType() == ColorSpace.TYPE_GRAY;
                    hasAlpha = bufferedImage.getColorModel().hasAlpha();
                    if (isGray) {
                        depth = 1;
                    } else if (hasAlpha) {
                        depth = 4;
                    } else {
                        depth = 3;
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                    System.out.println(i);
                    ImageInputStream stream = new FileImageInputStream(new File(file.getCanonicalPath()));
                    Iterator<ImageReader> iter = ImageIO.getImageReaders(stream);
                    Exception lastException = null;
                    while (iter.hasNext()) {
                        ImageReader reader = null;
                        try {
                            reader = (ImageReader) iter.next();
                            ImageReadParam param = reader.getDefaultReadParam();
                            reader.setInput(stream, true, true);
                            Iterator<ImageTypeSpecifier> imageTypes = reader.getImageTypes(0);
                            while (imageTypes.hasNext()) {
                                ImageTypeSpecifier imageTypeSpecifier = imageTypes.next();
                                System.out.println(imageTypeSpecifier.getColorModel().getColorSpace().getType());
                                int bufferedImageType = imageTypeSpecifier.getBufferedImageType();
                                if (bufferedImageType == BufferedImage.TYPE_BYTE_GRAY) {
                                    param.setDestinationType(imageTypeSpecifier);
                                    break;
                                }
                            }
                            bufferedImage = reader.read(0, param);
                            isGray = bufferedImage.getColorModel().getColorSpace().getType() == ColorSpace.TYPE_GRAY;
                            hasAlpha = bufferedImage.getColorModel().hasAlpha();
                            if (isGray) {
                                depth = 1;
                            } else if (hasAlpha) {
                                depth = 4;
                            } else {
                                depth = 3;
                            }
                            if (null != bufferedImage)
                                break;
                        } catch (Exception e2) {
                            lastException = e2;
                        } finally {
                            if (null != reader)
                                reader.dispose();
                        }
                    }
                    // If you don't have an image at the end of all readers
                    if (null == bufferedImage) {
                        if (null != lastException) {
                            throw lastException;
                        }
                    }
                } finally {
                    originBinary = new byte[bis.available()];
                    while ((bis.read(originBinary)) != -1) {
                    }
                    String txtFileName = file.getCanonicalPath().split(sufImage)[0] + sufAnnotation;
                    BufferedInputStream txtBis = new BufferedInputStream(new FileInputStream(txtFileName));
                    String txtValue = null;
                    byte[] txtBinary = null;
                    txtBinary = new byte[txtBis.available()];
                    while ((txtBis.read(txtBinary)) != -1) {
                        txtValue = new String(txtBinary, "UTF-8");
                    }
                    // write data
                    writer.write(new Object[] { bufferedImage.getHeight(), bufferedImage.getWidth(), depth, file.getCanonicalPath(), originBinary, txtFileName, txtValue.replace("\n", "") });
                    bis.close();
                }
            }
            writer.close();
        }
    }
    CarbonReader reader = CarbonReader.builder(outputPath).build();
    System.out.println("\nData:");
    int i = 0;
    while (i < 20 && reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        byte[] outputBinary = (byte[]) row[1];
        System.out.println(row[2] + " " + row[3] + " " + row[4] + " " + row[5] + " image size:" + outputBinary.length + " " + row[0]);
        // save image, user can compare the save image and original image
        String destString = preDestPath + i + sufImage;
        BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destString));
        bos.write(outputBinary);
        bos.close();
        i++;
    }
    System.out.println("\nFinished");
    reader.close();
}
Also used : FileImageInputStream(javax.imageio.stream.FileImageInputStream) ImageInputStream(javax.imageio.stream.ImageInputStream) FileInputStream(java.io.FileInputStream) BufferedImage(java.awt.image.BufferedImage) DecoderException(org.apache.commons.codec.DecoderException) IOException(java.io.IOException) InvalidLoadOptionException(org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException) ImageTypeSpecifier(javax.imageio.ImageTypeSpecifier) FileImageInputStream(javax.imageio.stream.FileImageInputStream) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) ImageReadParam(javax.imageio.ImageReadParam) BufferedInputStream(java.io.BufferedInputStream) FileOutputStream(java.io.FileOutputStream) Iterator(java.util.Iterator) ImageReader(javax.imageio.ImageReader) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6