use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project hazelcast by hazelcast.
the class CsvInputFormat method createRecordReader.
@Override
public RecordReader<NullWritable, Object> createRecordReader(InputSplit split, TaskAttemptContext context) {
return new RecordReader<NullWritable, Object>() {
private Object current;
private MappingIterator<Object> iterator;
private Function<Object, Object> projection = identity();
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
FileSplit fileSplit = (FileSplit) split;
Configuration conf = context.getConfiguration();
Configuration configuration = context.getConfiguration();
String className = configuration.get(CSV_INPUT_FORMAT_BEAN_CLASS);
Class<?> formatClazz = className == null ? null : ReflectionUtils.loadClass(className);
Path file = fileSplit.getPath();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream in = fs.open(file);
if (formatClazz == String[].class) {
ObjectReader reader = new CsvMapper().enable(Feature.WRAP_AS_ARRAY).readerFor(String[].class).with(CsvSchema.emptySchema().withSkipFirstDataRow(false));
iterator = reader.readValues((InputStream) in);
if (!iterator.hasNext()) {
throw new JetException("Header row missing in " + split);
}
String[] header = (String[]) iterator.next();
List<String> fieldNames = new ArrayList<>();
String field;
for (int i = 0; (field = configuration.get(CSV_INPUT_FORMAT_FIELD_LIST_PREFIX + i)) != null; i++) {
fieldNames.add(field);
}
projection = (Function) createFieldProjection(header, fieldNames);
} else {
iterator = new CsvMapper().readerFor(formatClazz).withoutFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES).with(CsvSchema.emptySchema().withHeader()).readValues((InputStream) in);
}
}
@Override
public boolean nextKeyValue() {
if (!iterator.hasNext()) {
return false;
}
current = projection.apply(iterator.next());
return true;
}
@Override
public NullWritable getCurrentKey() {
return NullWritable.get();
}
@Override
public Object getCurrentValue() {
return current;
}
@Override
public float getProgress() {
return 0;
}
@Override
public void close() throws IOException {
iterator.close();
}
};
}
use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project hazelcast by hazelcast.
the class CsvReadFileFnProvider method createReadFileFn.
@SuppressWarnings("unchecked")
@Nonnull
@Override
public <T> FunctionEx<Path, Stream<T>> createReadFileFn(@Nonnull FileFormat<T> format) {
CsvFileFormat<T> csvFileFormat = (CsvFileFormat<T>) format;
// Format is not Serializable
Class<?> formatClazz = csvFileFormat.clazz();
return path -> {
FileInputStream fis = new FileInputStream(path.toFile());
MappingIterator<T> iterator;
Function<T, T> projection = identity();
if (formatClazz == String[].class) {
ObjectReader reader = new CsvMapper().enable(Feature.WRAP_AS_ARRAY).readerFor(String[].class).with(CsvSchema.emptySchema().withSkipFirstDataRow(false));
iterator = reader.readValues(fis);
if (!iterator.hasNext()) {
throw new JetException("Header row missing in " + path);
}
String[] header = (String[]) iterator.next();
List<String> fieldNames = csvFileFormat.fieldNames();
if (fieldNames != null) {
projection = (Function<T, T>) createFieldProjection(header, fieldNames);
}
} else {
iterator = new CsvMapper().readerFor(formatClazz).withoutFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES).with(CsvSchema.emptySchema().withHeader()).readValues(fis);
}
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, ORDERED), false).map(projection).onClose(() -> uncheckRun(fis::close));
};
}
use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project xm-ms-entity by xm-online.
the class EntityToCsvConverterUtils method toCsv.
/**
* Writes entities to csv file.
* @param o the object which serialize to csv
* @param schema the csv schema
* @return byte array of csv file
*/
public static byte[] toCsv(Object o, CsvSchema schema) {
if (o == null) {
log.warn("Passed empty object for serialize, therefore return empty byte array which represents csv file");
return new byte[0];
}
CsvMapper mapper = createDefaultCsvMapper();
ObjectWriter csvWriter = mapper.writer(schema);
try {
return csvWriter.writeValueAsBytes(o);
} catch (JsonProcessingException e) {
throw new IllegalStateException("Exception while writing data to csv file", e);
}
}
use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project xm-ms-entity by xm-online.
the class EntityToCsvConverterUtils method createDefaultCsvMapper.
private static CsvMapper createDefaultCsvMapper() {
CsvMapper mapper = new CsvMapper();
mapper.configure(JsonGenerator.Feature.IGNORE_UNKNOWN, true);
mapper.registerModule(new JavaTimeModule());
return mapper;
}
use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project snow-owl by b2ihealthcare.
the class SnomedRf2ImportRequest method read.
private void read(File rf2Archive, Rf2EffectiveTimeSlices slices, Rf2ValidationIssueReporter reporter) {
final CsvMapper csvMapper = new CsvMapper();
csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY);
final CsvSchema schema = CsvSchema.emptySchema().withoutQuoteChar().withColumnSeparator('\t').withLineSeparator("\r\n");
final ObjectReader oReader = csvMapper.readerFor(String[].class).with(schema);
final Stopwatch w = Stopwatch.createStarted();
try (final ZipFile zip = new ZipFile(rf2Archive)) {
for (ZipEntry entry : Collections.list(zip.entries())) {
final String fileName = Paths.get(entry.getName()).getFileName().toString().toLowerCase();
if (fileName.endsWith(TXT_EXT)) {
if (fileName.contains(releaseType.toString().toLowerCase())) {
w.reset().start();
try (final InputStream in = zip.getInputStream(entry)) {
readFile(entry, in, oReader, slices, reporter);
}
log.info("{} - {}", entry.getName(), w);
}
}
}
} catch (IOException e) {
throw new SnowowlRuntimeException(e);
}
slices.flushAll();
}
Aggregations