use of au.com.bytecode.opencsv.CSVReader in project dkpro-lab by dkpro.
the class FlexTable method getCsvReader.
public StreamReader getCsvReader() {
return new StreamReader() {
@Override
public void read(InputStream aStream) throws IOException {
try {
CSVReader reader = new CSVReader(new InputStreamReader(aStream, "UTF-8"));
String[] headers = reader.readNext();
Method converter = FlexTable.this.dataClass.getMethod("valueOf", String.class);
String[] data;
while ((data = reader.readNext()) != null) {
Map<String, V> row = new LinkedHashMap<String, V>();
for (int i = 1; i < headers.length; i++) {
@SuppressWarnings("unchecked") V value = (V) converter.invoke(null, data[i]);
row.put(headers[i], value);
}
addRow(data[0], row);
}
reader.close();
} catch (IOException e) {
throw e;
} catch (NoSuchMethodException e) {
throw new IOException("Data class " + FlexTable.this.dataClass.getName() + " does not have a " + "public static Object valueOf(String) method - unable unmarshall the " + "data.");
} catch (Exception e) {
throw new IOException(e);
}
}
};
}
use of au.com.bytecode.opencsv.CSVReader in project parquet-mr by apache.
the class AvroCSV method inferSchemaInternal.
private static Schema inferSchemaInternal(String name, InputStream incoming, CSVProperties props, Set<String> requiredFields, boolean makeNullable) throws IOException {
CSVReader reader = newReader(incoming, props);
String[] header;
String[] line;
if (props.useHeader) {
// read the header and then the first line
header = reader.readNext();
line = reader.readNext();
Objects.requireNonNull(line, "No content to infer schema");
} else if (props.header != null) {
header = newParser(props).parseLine(props.header);
line = reader.readNext();
Objects.requireNonNull(line, "No content to infer schema");
} else {
// use the first line to create a header
line = reader.readNext();
Objects.requireNonNull(line, "No content to infer schema");
header = new String[line.length];
for (int i = 0; i < line.length; i += 1) {
header[i] = "field_" + String.valueOf(i);
}
}
Schema.Type[] types = new Schema.Type[header.length];
String[] values = new String[header.length];
boolean[] nullable = new boolean[header.length];
boolean[] empty = new boolean[header.length];
for (int processed = 0; processed < DEFAULT_INFER_LINES; processed += 1) {
if (line == null) {
break;
}
for (int i = 0; i < header.length; i += 1) {
if (i < line.length) {
if (types[i] == null) {
types[i] = inferFieldType(line[i]);
if (types[i] != null) {
// keep track of the value used
values[i] = line[i];
}
}
if (line[i] == null) {
nullable[i] = true;
} else if (line[i].isEmpty()) {
empty[i] = true;
}
} else {
// no value results in null
nullable[i] = true;
}
}
line = reader.readNext();
}
SchemaBuilder.FieldAssembler<Schema> fieldAssembler = SchemaBuilder.record(name).fields();
// types may be missing, but fieldSchema will return a nullable string
for (int i = 0; i < header.length; i += 1) {
if (header[i] == null) {
throw new RuntimeException("Bad header for field " + i + ": null");
}
String fieldName = header[i].trim();
if (fieldName.isEmpty()) {
throw new RuntimeException("Bad header for field " + i + ": \"" + fieldName + "\"");
} else if (!isAvroCompatibleName(fieldName)) {
throw new RuntimeException("Bad header for field, should start with a character " + "or _ and can contain only alphanumerics and _ " + i + ": \"" + fieldName + "\"");
}
// the empty string is not considered null for string fields
boolean foundNull = (nullable[i] || (empty[i] && types[i] != Schema.Type.STRING));
if (requiredFields.contains(fieldName)) {
if (foundNull) {
throw new RuntimeException("Found null value for required field: " + fieldName + " (" + types[i] + ")");
}
fieldAssembler = fieldAssembler.name(fieldName).doc("Type inferred from '" + sample(values[i]) + "'").type(schema(types[i], false)).noDefault();
} else {
SchemaBuilder.GenericDefault<Schema> defaultBuilder = fieldAssembler.name(fieldName).doc("Type inferred from '" + sample(values[i]) + "'").type(schema(types[i], makeNullable || foundNull));
if (makeNullable || foundNull) {
fieldAssembler = defaultBuilder.withDefault(null);
} else {
fieldAssembler = defaultBuilder.noDefault();
}
}
}
return fieldAssembler.endRecord();
}
Aggregations