Search in sources :

Example 1 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class HiveCarbonUtil method getTableInfo.

private static TableInfo getTableInfo(String tableName, String databaseName, String location, String sortColumnsString, String[] columns, String[] columnTypes, List<String> partitionColumns) throws SQLException {
    TableInfo tableInfo = new TableInfo();
    TableSchemaBuilder builder = new TableSchemaBuilder();
    builder.tableName(tableName);
    List<String> sortColumns = new ArrayList<>();
    if (sortColumnsString != null) {
        sortColumns = Arrays.asList(sortColumnsString.toLowerCase().split("\\,"));
    }
    PartitionInfo partitionInfo = null;
    AtomicInteger integer = new AtomicInteger();
    List<StructField> partitionStructFields = new ArrayList<>();
    for (int i = 0; i < columns.length; i++) {
        DataType dataType = DataTypeUtil.convertHiveTypeToCarbon(columnTypes[i]);
        Field field = new Field(columns[i].toLowerCase(), dataType);
        if (partitionColumns.contains(columns[i])) {
            partitionStructFields.add(new StructField(columns[i].toLowerCase(), dataType, field.getChildren()));
        } else {
            builder.addColumn(new StructField(columns[i].toLowerCase(), dataType, field.getChildren()), integer, sortColumns.contains(columns[i]), false);
        }
    }
    if (!partitionStructFields.isEmpty()) {
        List<ColumnSchema> partitionColumnSchemas = new ArrayList<>();
        for (StructField partitionStructField : partitionStructFields) {
            partitionColumnSchemas.add(builder.addColumn(partitionStructField, integer, sortColumns.contains(partitionStructField.getFieldName()), false));
        }
        partitionInfo = new PartitionInfo(partitionColumnSchemas, PartitionType.NATIVE_HIVE);
    }
    TableSchema tableSchema = builder.build();
    SchemaEvolution schemaEvol = new SchemaEvolution();
    List<SchemaEvolutionEntry> schemaEvolutionEntry = new ArrayList<>();
    schemaEvolutionEntry.add(new SchemaEvolutionEntry());
    schemaEvol.setSchemaEvolutionEntryList(schemaEvolutionEntry);
    tableSchema.setSchemaEvolution(schemaEvol);
    tableSchema.setPartitionInfo(partitionInfo);
    tableInfo.setDatabaseName(databaseName);
    tableInfo.setTablePath(location);
    tableInfo.setFactTable(tableSchema);
    tableInfo.setTableUniqueName(databaseName + "_" + tableName);
    return tableInfo;
}
Also used : TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) ArrayList(java.util.ArrayList) TableSchemaBuilder(org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) SchemaEvolution(org.apache.carbondata.core.metadata.schema.SchemaEvolution) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) SchemaEvolutionEntry(org.apache.carbondata.core.metadata.schema.SchemaEvolutionEntry) StructField(org.apache.carbondata.core.metadata.datatype.StructField) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DataType(org.apache.carbondata.core.metadata.datatype.DataType) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo)

Example 2 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadFilesWithProjectAllColumns.

@Test
public void testReadFilesWithProjectAllColumns() throws IOException, InterruptedException {
    String path = "./testWriteFiles";
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(100, new Schema(fields), path);
    CarbonReader reader = CarbonReader.builder(path, "_temp").build();
    int i = 0;
    while (reader.hasNext()) {
        Object[] row = (Object[]) reader.readNextRow();
        Assert.assertEquals(("robot" + (i % 10)), row[0]);
        Assert.assertEquals(i, row[1]);
        i++;
    }
    Assert.assertEquals(i, 100);
    reader.close();
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 3 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testReadNextBatchRow.

@Test
public void testReadNextBatchRow() {
    String path = "./carbondata";
    try {
        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[12];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("shortField", DataTypes.SHORT);
        fields[2] = new Field("intField", DataTypes.INT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
        fields[9] = new Field("varcharField", DataTypes.VARCHAR);
        fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
        fields[11] = new Field("floatField", DataTypes.FLOAT);
        Map<String, String> map = new HashMap<>();
        map.put("complex_delimiter_level_1", "#");
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
        for (int i = 0; i < 300; i++) {
            String[] row2 = new String[] { "robot" + (i % 10000), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon", "1.23" };
            writer.write(row2);
        }
        writer.close();
        // Read data
        int batchSize = 150;
        CarbonReader reader = CarbonReader.builder(path, "_temp").withBatch(batchSize).build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] batch = reader.readNextBatchRow();
            Assert.assertTrue(batch.length <= batchSize);
            for (int j = 0; j < batch.length; j++) {
                Object[] data = (Object[]) batch[j];
                assert (RowUtil.getString(data, 0).equals("robot" + i));
                assertEquals(RowUtil.getString(data, 1), "2019-03-02");
                assert (RowUtil.getVarchar(data, 3).equals("varchar"));
                Object[] arr = RowUtil.getArray(data, 4);
                assert (arr[0].equals("Hello"));
                assert (arr[1].equals("World"));
                assert (arr[2].equals("From"));
                assert (arr[3].equals("Carbon"));
                assertEquals(RowUtil.getShort(data, 5), i);
                assertEquals(RowUtil.getInt(data, 6), i);
                assertEquals(RowUtil.getLong(data, 7), Long.MAX_VALUE - i);
                assertEquals(RowUtil.getDouble(data, 8), ((double) i) / 2);
                assert (RowUtil.getBoolean(data, 9));
                assert (RowUtil.getDecimal(data, 10).equals("12.35"));
                assertEquals(RowUtil.getFloat(data, 11), (float) 1.23);
                i++;
            }
            System.out.println("batch is " + i);
        }
        reader.close();
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
Also used : HashMap(java.util.HashMap) IOException(java.io.IOException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 4 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testVectorReader.

@Test
public void testVectorReader() {
    String path = "./testWriteFiles";
    try {
        FileUtils.deleteDirectory(new File(path));
        Field[] fields = new Field[12];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("shortField", DataTypes.SHORT);
        fields[2] = new Field("intField", DataTypes.INT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
        fields[9] = new Field("varcharField", DataTypes.VARCHAR);
        fields[10] = new Field("byteField", DataTypes.BYTE);
        fields[11] = new Field("floatField", DataTypes.FLOAT);
        Map<String, String> map = new HashMap<>();
        map.put("complex_delimiter_level_1", "#");
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOptions(map).withCsvInput(new Schema(fields)).writtenBy("CarbonReaderTest").build();
        for (int i = 0; i < 10; i++) {
            String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", String.valueOf(i), "1.23" };
            writer.write(row2);
        }
        writer.close();
        // Read data
        CarbonReader reader = CarbonReader.builder(path, "_temp").build();
        int i = 0;
        while (reader.hasNext()) {
            Object[] data = (Object[]) reader.readNextRow();
            assert (RowUtil.getString(data, 0).equals("robot" + i));
            assertEquals(RowUtil.getShort(data, 4), i);
            assertEquals(RowUtil.getInt(data, 5), i);
            assert (RowUtil.getLong(data, 6) == Long.MAX_VALUE - i);
            assertEquals(RowUtil.getDouble(data, 7), ((double) i) / 2);
            assert (RowUtil.getBoolean(data, 8));
            assertEquals(RowUtil.getString(data, 1), "2019-03-02");
            assert (RowUtil.getDecimal(data, 9).equals("12.35"));
            assert (RowUtil.getString(data, 3).equals("varchar"));
            assertEquals(RowUtil.getByte(data, 10), new Byte(String.valueOf(i)));
            assertEquals(RowUtil.getFloat(data, 11), new Float("1.23"));
            i++;
        }
        assert (i == 10);
        reader.close();
    } catch (Throwable e) {
        e.printStackTrace();
        Assert.fail(e.getMessage());
    } finally {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException e) {
            e.printStackTrace();
            Assert.fail(e.getMessage());
        }
    }
}
Also used : HashMap(java.util.HashMap) IOException(java.io.IOException) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) File(java.io.File) Test(org.junit.Test)

Example 5 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderTest method testSdkWriteWhenArrayOfStringIsEmpty.

@Test
public void testSdkWriteWhenArrayOfStringIsEmpty() throws IOException, InvalidLoadOptionException {
    String badRecordAction = CarbonProperties.getInstance().getProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION);
    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, "FAIL");
    String path = "./testSdkWriteWhenArrayOfStringIsEmpty";
    String[] rec = { "aaa", "bbb", "aaa@cdf.com", "", "", "mmm", "" };
    Field[] fields = new Field[7];
    fields[0] = new Field("stringField", DataTypes.STRING);
    fields[1] = new Field("varcharField", DataTypes.VARCHAR);
    fields[2] = new Field("stringField1", DataTypes.STRING);
    fields[3] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
    fields[4] = new Field("arrayField1", DataTypes.createArrayType(DataTypes.STRING));
    fields[5] = new Field("arrayField2", DataTypes.createArrayType(DataTypes.STRING));
    fields[6] = new Field("varcharField1", DataTypes.VARCHAR);
    Schema schema = new Schema(fields);
    Map map = new HashMap();
    map.put("complex_delimiter_level_1", "#");
    map.put("bad_records_logger_enable", "TRUE");
    map.put("bad_record_path", path + "/badrec");
    CarbonWriterBuilder builder = CarbonWriter.builder().outputPath(path);
    builder.withLoadOptions(map).withCsvInput(schema).enableLocalDictionary(false).writtenBy("CarbonReaderTest");
    CarbonWriter writer = builder.build();
    writer.write(rec);
    writer.close();
    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_BAD_RECORDS_ACTION, badRecordAction);
    FileUtils.deleteDirectory(new File(path));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) HashMap(java.util.HashMap) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) Test(org.junit.Test)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6