use of com.thinkbiganalytics.discovery.model.DefaultFileSchema in project kylo by Teradata.
the class CSVFileSchemaParser method populateSchema.
private DefaultFileSchema populateSchema(CSVParser parser) {
DefaultFileSchema fileSchema = new DefaultFileSchema();
int i = 0;
ArrayList<Field> fields = new ArrayList<>();
for (CSVRecord record : parser) {
if (i > 9) {
break;
}
int size = record.size();
for (int j = 0; j < size; j++) {
DefaultField field = null;
if (i == 0) {
field = new DefaultField();
if (headerRow) {
field.setName(record.get(j));
} else {
field.setName("Col_" + (j + 1));
}
fields.add(field);
} else {
try {
field = (DefaultField) fields.get(j);
field.getSampleValues().add(StringUtils.defaultString(record.get(j), ""));
} catch (IndexOutOfBoundsException e) {
LOG.warn("Sample file has potential sparse column problem at row [?] field [?]", i + 1, j + 1);
}
}
}
i++;
}
fileSchema.setFields(fields);
return fileSchema;
}
use of com.thinkbiganalytics.discovery.model.DefaultFileSchema in project kylo by Teradata.
the class CSVFileSchemaParser method parse.
@Override
public Schema parse(InputStream is, Charset charset, TableSchemaType target) throws IOException {
Validate.notNull(target, "target must not be null");
Validate.notNull(is, "stream must not be null");
Validate.notNull(charset, "charset must not be null");
validate();
// Parse the file
String sampleData = ParserHelper.extractSampleLines(is, charset, numRowsToSample);
Validate.notEmpty(sampleData, "No data in file");
CSVFormat format = createCSVFormat(sampleData);
try (Reader reader = new StringReader(sampleData)) {
CSVParser parser = format.parse(reader);
DefaultFileSchema fileSchema = populateSchema(parser);
fileSchema.setCharset(charset.name());
// Convert to target schema with proper derived types
Schema targetSchema = convertToTarget(target, fileSchema);
return targetSchema;
}
}
Aggregations