Search in sources :

Example 81 with CSVReader

use of au.com.bytecode.opencsv.CSVReader in project dkpro-lab by dkpro.

the class FlexTable method getCsvReader.

public StreamReader getCsvReader() {
    return new StreamReader() {

        @Override
        public void read(InputStream aStream) throws IOException {
            try {
                CSVReader reader = new CSVReader(new InputStreamReader(aStream, "UTF-8"));
                String[] headers = reader.readNext();
                Method converter = FlexTable.this.dataClass.getMethod("valueOf", String.class);
                String[] data;
                while ((data = reader.readNext()) != null) {
                    Map<String, V> row = new LinkedHashMap<String, V>();
                    for (int i = 1; i < headers.length; i++) {
                        @SuppressWarnings("unchecked") V value = (V) converter.invoke(null, data[i]);
                        row.put(headers[i], value);
                    }
                    addRow(data[0], row);
                }
                reader.close();
            } catch (IOException e) {
                throw e;
            } catch (NoSuchMethodException e) {
                throw new IOException("Data class " + FlexTable.this.dataClass.getName() + " does not have a " + "public static Object valueOf(String) method - unable unmarshall the " + "data.");
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
    };
}
Also used : InputStreamReader(java.io.InputStreamReader) CSVReader(au.com.bytecode.opencsv.CSVReader) InputStream(java.io.InputStream) Method(java.lang.reflect.Method) IOException(java.io.IOException) IOException(java.io.IOException) LinkedHashMap(java.util.LinkedHashMap) StreamReader(org.dkpro.lab.storage.StreamReader) InputStreamReader(java.io.InputStreamReader)

Example 82 with CSVReader

use of au.com.bytecode.opencsv.CSVReader in project parquet-mr by apache.

the class AvroCSV method inferSchemaInternal.

private static Schema inferSchemaInternal(String name, InputStream incoming, CSVProperties props, Set<String> requiredFields, boolean makeNullable) throws IOException {
    CSVReader reader = newReader(incoming, props);
    String[] header;
    String[] line;
    if (props.useHeader) {
        // read the header and then the first line
        header = reader.readNext();
        line = reader.readNext();
        Objects.requireNonNull(line, "No content to infer schema");
    } else if (props.header != null) {
        header = newParser(props).parseLine(props.header);
        line = reader.readNext();
        Objects.requireNonNull(line, "No content to infer schema");
    } else {
        // use the first line to create a header
        line = reader.readNext();
        Objects.requireNonNull(line, "No content to infer schema");
        header = new String[line.length];
        for (int i = 0; i < line.length; i += 1) {
            header[i] = "field_" + String.valueOf(i);
        }
    }
    Schema.Type[] types = new Schema.Type[header.length];
    String[] values = new String[header.length];
    boolean[] nullable = new boolean[header.length];
    boolean[] empty = new boolean[header.length];
    for (int processed = 0; processed < DEFAULT_INFER_LINES; processed += 1) {
        if (line == null) {
            break;
        }
        for (int i = 0; i < header.length; i += 1) {
            if (i < line.length) {
                if (types[i] == null) {
                    types[i] = inferFieldType(line[i]);
                    if (types[i] != null) {
                        // keep track of the value used
                        values[i] = line[i];
                    }
                }
                if (line[i] == null) {
                    nullable[i] = true;
                } else if (line[i].isEmpty()) {
                    empty[i] = true;
                }
            } else {
                // no value results in null
                nullable[i] = true;
            }
        }
        line = reader.readNext();
    }
    SchemaBuilder.FieldAssembler<Schema> fieldAssembler = SchemaBuilder.record(name).fields();
    // types may be missing, but fieldSchema will return a nullable string
    for (int i = 0; i < header.length; i += 1) {
        if (header[i] == null) {
            throw new RuntimeException("Bad header for field " + i + ": null");
        }
        String fieldName = header[i].trim();
        if (fieldName.isEmpty()) {
            throw new RuntimeException("Bad header for field " + i + ": \"" + fieldName + "\"");
        } else if (!isAvroCompatibleName(fieldName)) {
            throw new RuntimeException("Bad header for field, should start with a character " + "or _ and can contain only alphanumerics and _ " + i + ": \"" + fieldName + "\"");
        }
        // the empty string is not considered null for string fields
        boolean foundNull = (nullable[i] || (empty[i] && types[i] != Schema.Type.STRING));
        if (requiredFields.contains(fieldName)) {
            if (foundNull) {
                throw new RuntimeException("Found null value for required field: " + fieldName + " (" + types[i] + ")");
            }
            fieldAssembler = fieldAssembler.name(fieldName).doc("Type inferred from '" + sample(values[i]) + "'").type(schema(types[i], false)).noDefault();
        } else {
            SchemaBuilder.GenericDefault<Schema> defaultBuilder = fieldAssembler.name(fieldName).doc("Type inferred from '" + sample(values[i]) + "'").type(schema(types[i], makeNullable || foundNull));
            if (makeNullable || foundNull) {
                fieldAssembler = defaultBuilder.withDefault(null);
            } else {
                fieldAssembler = defaultBuilder.noDefault();
            }
        }
    }
    return fieldAssembler.endRecord();
}
Also used : CSVReader(au.com.bytecode.opencsv.CSVReader) Schema(org.apache.avro.Schema) SchemaBuilder(org.apache.avro.SchemaBuilder)

Aggregations

CSVReader (au.com.bytecode.opencsv.CSVReader)82 IOException (java.io.IOException)29 InputStreamReader (java.io.InputStreamReader)27 ArrayList (java.util.ArrayList)16 FileReader (java.io.FileReader)11 StringReader (java.io.StringReader)11 HashMap (java.util.HashMap)9 BufferedReader (java.io.BufferedReader)8 InputStream (java.io.InputStream)6 File (java.io.File)5 Reader (java.io.Reader)5 HttpClient (org.apache.commons.httpclient.HttpClient)5 GetMethod (org.apache.commons.httpclient.methods.GetMethod)5 Test (org.junit.Test)5 DBException (org.jkiss.dbeaver.DBException)4 Query (au.org.ala.spatial.util.Query)3 TransformationExample (eu.esdihumboldt.cst.test.TransformationExample)3 Date (java.util.Date)3 LinkedHashMap (java.util.LinkedHashMap)3 JSONArray (org.json.simple.JSONArray)3