Search in sources :

Example 46 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadWithZeroBatchSize.

@Test
public void testReadWithZeroBatchSize() throws Exception {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path), false);
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(10, new Schema(fields), path);
    CarbonReader reader;
    reader = CarbonReader.builder(path).withRowRecordReader().withBatch(0).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        Assert.assertEquals(("robot" + (i % 10)), row[0]);
        Assert.assertEquals(i, row[1]);
        i++;
    }
    Assert.assertEquals(i, 10);
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 47 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadWithFilterOfNonTransactionalGreaterThan.

@Test
public void testReadWithFilterOfNonTransactionalGreaterThan() throws IOException, InterruptedException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path), false);
    Field[] fields = new Field[3];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    fields[2] = new Field("doubleField", DataTypes.DOUBLE);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path);
    ColumnExpression columnExpression = new ColumnExpression("doubleField", DataTypes.DOUBLE);
    GreaterThanExpression greaterThanExpression = new GreaterThanExpression(columnExpression, new LiteralExpression("13.5", DataTypes.DOUBLE));
    ColumnExpression columnExpression2 = new ColumnExpression("name", DataTypes.STRING);
    EqualToExpression equalToExpression2 = new EqualToExpression(columnExpression2, new LiteralExpression("robot7", DataTypes.STRING));
    AndExpression andExpression = new AndExpression(greaterThanExpression, equalToExpression2);
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(andExpression).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        assert (((String) row[0]).contains("robot7"));
        assert (7 == ((int) (row[1]) % 10));
        assert ((double) row[2] > 13.5);
        i++;
    }
    Assert.assertEquals(i, 17);
    reader.close();
    FileUtils.deleteDirectory(new File(path));
}
Also used : FilterUtil.prepareEqualToExpression(org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) GreaterThanExpression(org.apache.carbondata.core.scan.expression.conditional.GreaterThanExpression) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) File(java.io.File) Test(org.junit.Test)

Example 48 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadBlocklet.

@Test
public void testReadBlocklet() throws IOException, InterruptedException {
    String path = "./testWriteFiles/" + System.nanoTime();
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(1000 * 1000, new Schema(fields), path, null, 1, 100);
    InputSplit[] splits = CarbonReader.builder(path).getSplits(true);
    // check for 3 blocklet count (as only one carbon file will be created)
    Assert.assertEquals(splits.length, 3);
    int totalCount = 0;
    for (int k = 0; k < splits.length; k++) {
        CarbonReader reader = CarbonReader.builder(splits[k]).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] row = (Object[]) reader.readNextRow();
            i++;
        }
        totalCount += i;
        reader.close();
    }
    Assert.assertEquals(totalCount, 1000000);
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) InputSplit(org.apache.hadoop.mapreduce.InputSplit) Test(org.junit.Test)

Example 49 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadWithFilterOfNonTransactionalAnd.

@Test
public void testReadWithFilterOfNonTransactionalAnd() throws IOException, InterruptedException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    IndexStoreManager.getInstance().clearIndexCache(AbsoluteTableIdentifier.from(path), false);
    Field[] fields = new Field[3];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    fields[2] = new Field("doubleField", DataTypes.DOUBLE);
    TestUtil.writeFilesAndVerify(200, new Schema(fields), path);
    ColumnExpression columnExpression = new ColumnExpression("doubleField", DataTypes.DOUBLE);
    EqualToExpression equalToExpression = new EqualToExpression(columnExpression, new LiteralExpression("3.5", DataTypes.DOUBLE));
    ColumnExpression columnExpression2 = new ColumnExpression("name", DataTypes.STRING);
    EqualToExpression equalToExpression2 = new EqualToExpression(columnExpression2, new LiteralExpression("robot7", DataTypes.STRING));
    AndExpression andExpression = new AndExpression(equalToExpression, equalToExpression2);
    CarbonReader reader = CarbonReader.builder(path, "_temp").projection(new String[] { "name", "age", "doubleField" }).filter(andExpression).build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        assert (((String) row[0]).contains("robot7"));
        assert (7 == (int) (row[1]));
        assert (3.5 == (double) (row[2]));
        i++;
    }
    Assert.assertEquals(i, 1);
    reader.close();
    FileUtils.deleteDirectory(new File(path));
}
Also used : FilterUtil.prepareEqualToExpression(org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression) EqualToExpression(org.apache.carbondata.core.scan.expression.conditional.EqualToExpression) LiteralExpression(org.apache.carbondata.core.scan.expression.LiteralExpression) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) AndExpression(org.apache.carbondata.core.scan.expression.logical.AndExpression) ColumnExpression(org.apache.carbondata.core.scan.expression.ColumnExpression) File(java.io.File) Test(org.junit.Test)

Example 50 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadSchemaFromDataFileArrayString.

@Test
public void testReadSchemaFromDataFileArrayString() {
    String path = "./testWriteFiles";
    try {
        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[11];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("shortField", DataTypes.SHORT);
        fields[2] = new Field("intField", DataTypes.INT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
        fields[9] = new Field("varcharField", DataTypes.VARCHAR);
        fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
        Map<String, String> map = new HashMap<>();
        map.put("complex_delimiter_level_1", "#");
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
        for (int i = 0; i < 10; i++) {
            String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon" };
            writer.write(row2);
        }
        writer.close();
        File[] dataFiles = new File(path).listFiles(new FilenameFilter() {

            @Override
            public boolean accept(File dir, String name) {
                if (name == null) {
                    return false;
                }
                return name.endsWith("carbondata");
            }
        });
        if (dataFiles == null || dataFiles.length < 1) {
            throw new RuntimeException("Carbon data file not exists.");
        }
        Schema schema = CarbonSchemaReader.readSchema(dataFiles[0].getAbsolutePath()).asOriginOrder();
        // Transform the schema
        String[] strings = new String[schema.getFields().length];
        for (int i = 0; i < schema.getFields().length; i++) {
            strings[i] = (schema.getFields())[i].getFieldName();
        }
        // Read data
        CarbonReader reader = CarbonReader.builder(path, "_temp").projection(strings).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] row = (Object[]) reader.readNextRow();
            assert (row[0].equals("robot" + i));
            assert (row[2].equals(i));
            assert (row[6].equals("2019-03-02"));
            Object[] arr = (Object[]) row[10];
            assert (arr[0].equals("Hello"));
            assert (arr[3].equals("Carbon"));
            i++;
        }
        reader.close();
        FileUtils.deleteDirectory(new File(path));
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    }
}
Also used : HashMap(java.util.HashMap) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) FilenameFilter(java.io.FilenameFilter) File(java.io.File) Test(org.junit.Test)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6