Search in sources :

Example 76 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonReaderExample method main.

public static void main(String[] args) {
    String path = "./testWriteFiles";
    try {
        FileUtils.deleteDirectory(new File(path));
        CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT).addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT);
        Field[] fields = new Field[11];
        fields[0] = new Field("stringField", DataTypes.STRING);
        fields[1] = new Field("shortField", DataTypes.SHORT);
        fields[2] = new Field("intField", DataTypes.INT);
        fields[3] = new Field("longField", DataTypes.LONG);
        fields[4] = new Field("doubleField", DataTypes.DOUBLE);
        fields[5] = new Field("boolField", DataTypes.BOOLEAN);
        fields[6] = new Field("dateField", DataTypes.DATE);
        fields[7] = new Field("timeField", DataTypes.TIMESTAMP);
        fields[8] = new Field("decimalField", DataTypes.createDecimalType(8, 2));
        fields[9] = new Field("varcharField", DataTypes.VARCHAR);
        fields[10] = new Field("arrayField", DataTypes.createArrayType(DataTypes.STRING));
        CarbonWriter writer = CarbonWriter.builder().outputPath(path).withLoadOption("complex_delimiter_level_1", "#").withCsvInput(new Schema(fields)).writtenBy("CarbonReaderExample").build();
        for (int i = 0; i < 10; i++) {
            String[] row2 = new String[] { "robot" + (i % 10), String.valueOf(i % 10000), String.valueOf(i), String.valueOf(Long.MAX_VALUE - i), String.valueOf((double) i / 2), String.valueOf(true), "2019-03-02", "2019-02-12 03:03:34", "12.345", "varchar", "Hello#World#From#Carbon" };
            writer.write(row2);
        }
        writer.close();
        File[] dataFiles = new File(path).listFiles(new FilenameFilter() {

            @Override
            public boolean accept(File dir, String name) {
                if (name == null) {
                    return false;
                }
                return name.endsWith("carbonindex");
            }
        });
        if (dataFiles == null || dataFiles.length < 1) {
            throw new RuntimeException("Carbon index file not exists.");
        }
        Schema schema = CarbonSchemaReader.readSchema(dataFiles[0].getAbsolutePath()).asOriginOrder();
        // Transform the schema
        String[] strings = new String[schema.getFields().length];
        for (int i = 0; i < schema.getFields().length; i++) {
            strings[i] = (schema.getFields())[i].getFieldName();
        }
        // Read data
        CarbonReader reader = CarbonReader.builder(path, "_temp").projection(strings).build();
        System.out.println("\nData:");
        int i = 0;
        while (reader.hasNext()) {
            Object[] row = (Object[]) reader.readNextRow();
            System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t", i, row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9]));
            Object[] arr = (Object[]) row[10];
            for (int j = 0; j < arr.length; j++) {
                System.out.print(arr[j] + " ");
            }
            assert (arr[0].equals("Hello"));
            assert (arr[3].equals("Carbon"));
            System.out.println();
            i++;
        }
        reader.close();
        // Read data
        CarbonReader reader2 = CarbonReader.builder(path, "_temp").build();
        System.out.println("\nData:");
        i = 0;
        while (reader2.hasNext()) {
            Object[] row = (Object[]) reader2.readNextRow();
            System.out.print(String.format("%s\t%s\t%s\t%s\t%s\t", i, row[0], row[1], row[2], row[3]));
            Object[] arr = (Object[]) row[4];
            for (int j = 0; j < arr.length; j++) {
                System.out.print(arr[j] + " ");
            }
            System.out.println(String.format("\t%s\t%s\t%s\t%s\t%s\t%s\t", row[5], row[6], row[7], row[8], row[9], row[10]));
            i++;
        }
        reader2.close();
    } catch (Throwable e) {
        e.printStackTrace();
        assert (false);
        System.out.println(e.getMessage());
    } finally {
        try {
            FileUtils.deleteDirectory(new File(path));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
Also used : Schema(org.apache.carbondata.sdk.file.Schema) CarbonWriter(org.apache.carbondata.sdk.file.CarbonWriter) IOException(java.io.IOException) Field(org.apache.carbondata.core.metadata.datatype.Field) FilenameFilter(java.io.FilenameFilter) CarbonReader(org.apache.carbondata.sdk.file.CarbonReader) File(java.io.File)

Example 77 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonCliTest method before.

@Before
public void before() throws IOException {
    FileUtils.deleteDirectory(new File(path));
    Field[] fields = new Field[2];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new String[] { "name", "age" }, 3, 8);
    TestUtil.writeFilesAndVerify(5000000, new Schema(fields), path, new String[] { "name", "age" }, 3, 8);
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Before(org.junit.Before)

Example 78 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonCliTest method testBinary.

@Test
public void testBinary() throws IOException, InvalidLoadOptionException {
    FileUtils.deleteDirectory(new File(pathBinary));
    Field[] fields = new Field[3];
    fields[0] = new Field("name", DataTypes.STRING);
    fields[1] = new Field("age", DataTypes.INT);
    fields[2] = new Field("binaryField", DataTypes.BINARY);
    buildBinaryData(5000000, new Schema(fields), pathBinary, new String[] { "name" }, 3, 8);
    String[] args = { "-cmd", "summary", "-p", pathBinary };
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    PrintStream stream = new PrintStream(out);
    CarbonCli.run(args, stream);
    String[] args2 = { "-cmd", "summary", "-p", pathBinary, "-s" };
    out = new ByteArrayOutputStream();
    stream = new PrintStream(out);
    CarbonCli.run(args2, stream);
    String output = new String(out.toByteArray());
    Assert.assertTrue(output.contains("binaryfield") && output.contains("BINARY"));
    FileUtils.deleteDirectory(new File(pathBinary));
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) PrintStream(java.io.PrintStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) File(java.io.File) Test(org.junit.Test)

Example 79 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonWriterBuilder method buildTableSchema.

private void buildTableSchema(Field[] fields, TableSchemaBuilder tableSchemaBuilder, List<String> sortColumnsList, ColumnSchema[] sortColumnsSchemaList, List<String> invertedIdxColumnsList) {
    Set<String> uniqueFields = new HashSet<>();
    // a counter which will be used in case of complex array type. This valIndex will be assigned
    // to child of complex array type in the order val1, val2 so that each array type child is
    // differentiated to any level
    AtomicInteger valIndex = new AtomicInteger(0);
    // Check if any of the columns specified in sort columns are missing from schema.
    for (String sortColumn : sortColumnsList) {
        boolean exists = false;
        for (Field field : fields) {
            if (field.getFieldName().equalsIgnoreCase(sortColumn)) {
                exists = true;
                break;
            }
        }
        if (!exists) {
            throw new RuntimeException("column: " + sortColumn + " specified in sort columns does not exist in schema");
        }
    }
    // Check if any of the columns specified in inverted index are missing from schema.
    for (String invertedIdxColumn : invertedIdxColumnsList) {
        boolean exists = false;
        for (Field field : fields) {
            if (field.getFieldName().equalsIgnoreCase(invertedIdxColumn)) {
                exists = true;
                break;
            }
        }
        if (!exists) {
            throw new RuntimeException("column: " + invertedIdxColumn + " specified in inverted index columns does not exist in schema");
        }
    }
    int i = 0;
    for (Field field : fields) {
        if (null != field) {
            if (!uniqueFields.add(field.getFieldName())) {
                throw new RuntimeException("Duplicate column " + field.getFieldName() + " found in table schema");
            }
            int isSortColumn = sortColumnsList.indexOf(field.getFieldName());
            int isInvertedIdxColumn = invertedIdxColumnsList.indexOf(field.getFieldName());
            if (isSortColumn > -1) {
                // unsupported types for ("array", "struct", "double", "float", "decimal")
                if (field.getDataType() == DataTypes.DOUBLE || field.getDataType() == DataTypes.FLOAT || DataTypes.isDecimal(field.getDataType()) || field.getDataType().isComplexType() || field.getDataType() == DataTypes.VARCHAR || field.getDataType() == DataTypes.BINARY) {
                    String errorMsg = "sort columns not supported for array, struct, map, double, float, decimal, " + "varchar, binary";
                    throw new RuntimeException(errorMsg);
                }
            }
            if (field.getChildren() != null && field.getChildren().size() > 0) {
                if (field.getDataType().getName().equalsIgnoreCase("ARRAY")) {
                    // Loop through the inner columns and for a StructData
                    DataType complexType = DataTypes.createArrayType(field.getChildren().get(0).getDataType(), field.getChildren().get(0).getFieldName());
                    tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), complexType, field.getChildren()), valIndex, false, isInvertedIdxColumn > -1);
                } else if (field.getDataType().getName().equalsIgnoreCase("STRUCT")) {
                    // Loop through the inner columns and for a StructData
                    List<StructField> structFieldsArray = new ArrayList<StructField>(field.getChildren().size());
                    for (StructField childFld : field.getChildren()) {
                        structFieldsArray.add(new StructField(childFld.getFieldName(), childFld.getDataType()));
                    }
                    DataType complexType = DataTypes.createStructType(structFieldsArray);
                    tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), complexType, field.getChildren()), valIndex, false, isInvertedIdxColumn > -1);
                } else if (field.getDataType().getName().equalsIgnoreCase("MAP")) {
                    // Loop through the inner columns for MapType
                    DataType mapType = DataTypes.createMapType(((MapType) field.getDataType()).getKeyType(), field.getChildren().get(0).getDataType());
                    tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), mapType, field.getChildren()), valIndex, false, isInvertedIdxColumn > -1);
                }
            } else {
                ColumnSchema columnSchema = tableSchemaBuilder.addColumn(new StructField(field.getFieldName(), field.getDataType()), valIndex, isSortColumn > -1, isInvertedIdxColumn > -1);
                if (isSortColumn > -1) {
                    columnSchema.setSortColumn(true);
                    sortColumnsSchemaList[isSortColumn] = columnSchema;
                }
            }
        }
    }
}
Also used : ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField) StructField(org.apache.carbondata.core.metadata.datatype.StructField) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) DataType(org.apache.carbondata.core.metadata.datatype.DataType) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet)

Example 80 with Field

use of org.apache.carbondata.core.metadata.datatype.Field in project carbondata by apache.

the class CarbonWriterBuilder method setCsvHeader.

private void setCsvHeader(CarbonLoadModel model) {
    Field[] fields = schema.getFields();
    StringBuilder builder = new StringBuilder();
    String[] columns = new String[fields.length];
    int i = 0;
    for (Field field : fields) {
        if (null != field) {
            builder.append(field.getFieldName());
            builder.append(",");
            columns[i++] = field.getFieldName();
        }
    }
    String header = builder.toString();
    model.setCsvHeader(header.substring(0, header.length() - 1));
    model.setCsvHeaderColumns(columns);
}
Also used : Field(org.apache.carbondata.core.metadata.datatype.Field) StructField(org.apache.carbondata.core.metadata.datatype.StructField)

Aggregations

Field (org.apache.carbondata.core.metadata.datatype.Field)140 File (java.io.File)111 Test (org.junit.Test)111 StructField (org.apache.carbondata.core.metadata.datatype.StructField)104 IOException (java.io.IOException)55 InvalidLoadOptionException (org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException)39 ColumnSchema (org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema)27 HashMap (java.util.HashMap)26 ArrayList (java.util.ArrayList)23 CarbonFile (org.apache.carbondata.core.datastore.filesystem.CarbonFile)22 ColumnExpression (org.apache.carbondata.core.scan.expression.ColumnExpression)21 LiteralExpression (org.apache.carbondata.core.scan.expression.LiteralExpression)21 EqualToExpression (org.apache.carbondata.core.scan.expression.conditional.EqualToExpression)13 Map (java.util.Map)12 FilenameFilter (java.io.FilenameFilter)11 FilterUtil.prepareEqualToExpression (org.apache.carbondata.core.scan.filter.FilterUtil.prepareEqualToExpression)10 BufferedInputStream (java.io.BufferedInputStream)9 FileInputStream (java.io.FileInputStream)9 AndExpression (org.apache.carbondata.core.scan.expression.logical.AndExpression)9 FileFilter (java.io.FileFilter)6