Search in sources :

Example 6 with CsvMapper

use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project hazelcast by hazelcast.

the class CsvInputFormat method createRecordReader.

@Override
public RecordReader<NullWritable, Object> createRecordReader(InputSplit split, TaskAttemptContext context) {
    return new RecordReader<NullWritable, Object>() {

        private Object current;

        private MappingIterator<Object> iterator;

        private Function<Object, Object> projection = identity();

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
            FileSplit fileSplit = (FileSplit) split;
            Configuration conf = context.getConfiguration();
            Configuration configuration = context.getConfiguration();
            String className = configuration.get(CSV_INPUT_FORMAT_BEAN_CLASS);
            Class<?> formatClazz = className == null ? null : ReflectionUtils.loadClass(className);
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = fs.open(file);
            if (formatClazz == String[].class) {
                ObjectReader reader = new CsvMapper().enable(Feature.WRAP_AS_ARRAY).readerFor(String[].class).with(CsvSchema.emptySchema().withSkipFirstDataRow(false));
                iterator = reader.readValues((InputStream) in);
                if (!iterator.hasNext()) {
                    throw new JetException("Header row missing in " + split);
                }
                String[] header = (String[]) iterator.next();
                List<String> fieldNames = new ArrayList<>();
                String field;
                for (int i = 0; (field = configuration.get(CSV_INPUT_FORMAT_FIELD_LIST_PREFIX + i)) != null; i++) {
                    fieldNames.add(field);
                }
                projection = (Function) createFieldProjection(header, fieldNames);
            } else {
                iterator = new CsvMapper().readerFor(formatClazz).withoutFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES).with(CsvSchema.emptySchema().withHeader()).readValues((InputStream) in);
            }
        }

        @Override
        public boolean nextKeyValue() {
            if (!iterator.hasNext()) {
                return false;
            }
            current = projection.apply(iterator.next());
            return true;
        }

        @Override
        public NullWritable getCurrentKey() {
            return NullWritable.get();
        }

        @Override
        public Object getCurrentValue() {
            return current;
        }

        @Override
        public float getProgress() {
            return 0;
        }

        @Override
        public void close() throws IOException {
            iterator.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.hadoop.mapreduce.RecordReader) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JetException(com.hazelcast.jet.JetException) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) Function(java.util.function.Function) MappingIterator(com.fasterxml.jackson.databind.MappingIterator) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 7 with CsvMapper

use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project hazelcast by hazelcast.

the class CsvReadFileFnProvider method createReadFileFn.

@SuppressWarnings("unchecked")
@Nonnull
@Override
public <T> FunctionEx<Path, Stream<T>> createReadFileFn(@Nonnull FileFormat<T> format) {
    CsvFileFormat<T> csvFileFormat = (CsvFileFormat<T>) format;
    // Format is not Serializable
    Class<?> formatClazz = csvFileFormat.clazz();
    return path -> {
        FileInputStream fis = new FileInputStream(path.toFile());
        MappingIterator<T> iterator;
        Function<T, T> projection = identity();
        if (formatClazz == String[].class) {
            ObjectReader reader = new CsvMapper().enable(Feature.WRAP_AS_ARRAY).readerFor(String[].class).with(CsvSchema.emptySchema().withSkipFirstDataRow(false));
            iterator = reader.readValues(fis);
            if (!iterator.hasNext()) {
                throw new JetException("Header row missing in " + path);
            }
            String[] header = (String[]) iterator.next();
            List<String> fieldNames = csvFileFormat.fieldNames();
            if (fieldNames != null) {
                projection = (Function<T, T>) createFieldProjection(header, fieldNames);
            }
        } else {
            iterator = new CsvMapper().readerFor(formatClazz).withoutFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES).with(CsvSchema.emptySchema().withHeader()).readValues(fis);
        }
        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, ORDERED), false).map(projection).onClose(() -> uncheckRun(fis::close));
    };
}
Also used : FunctionEx(com.hazelcast.function.FunctionEx) Util.uncheckRun(com.hazelcast.jet.impl.util.Util.uncheckRun) Spliterators(java.util.Spliterators) MappingIterator(com.fasterxml.jackson.databind.MappingIterator) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) CsvSchema(com.fasterxml.jackson.dataformat.csv.CsvSchema) ORDERED(java.util.Spliterator.ORDERED) FileInputStream(java.io.FileInputStream) Function(java.util.function.Function) DeserializationFeature(com.fasterxml.jackson.databind.DeserializationFeature) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) JetException(com.hazelcast.jet.JetException) CsvFileFormat(com.hazelcast.jet.pipeline.file.CsvFileFormat) Feature(com.fasterxml.jackson.dataformat.csv.CsvParser.Feature) FileFormat(com.hazelcast.jet.pipeline.file.FileFormat) List(java.util.List) Stream(java.util.stream.Stream) Util.createFieldProjection(com.hazelcast.jet.impl.util.Util.createFieldProjection) ReadFileFnProvider(com.hazelcast.jet.pipeline.file.impl.ReadFileFnProvider) Function.identity(java.util.function.Function.identity) StreamSupport(java.util.stream.StreamSupport) Nonnull(javax.annotation.Nonnull) Path(java.nio.file.Path) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) JetException(com.hazelcast.jet.JetException) FileInputStream(java.io.FileInputStream) Function(java.util.function.Function) MappingIterator(com.fasterxml.jackson.databind.MappingIterator) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) List(java.util.List) CsvFileFormat(com.hazelcast.jet.pipeline.file.CsvFileFormat) Nonnull(javax.annotation.Nonnull)

Example 8 with CsvMapper

use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project xm-ms-entity by xm-online.

the class EntityToCsvConverterUtils method toCsv.

/**
 * Writes entities to csv file.
 * @param o the object which serialize to csv
 * @param schema the csv schema
 * @return byte array of csv file
 */
public static byte[] toCsv(Object o, CsvSchema schema) {
    if (o == null) {
        log.warn("Passed empty object for serialize, therefore return empty byte array which represents csv file");
        return new byte[0];
    }
    CsvMapper mapper = createDefaultCsvMapper();
    ObjectWriter csvWriter = mapper.writer(schema);
    try {
        return csvWriter.writeValueAsBytes(o);
    } catch (JsonProcessingException e) {
        throw new IllegalStateException("Exception while writing data to csv file", e);
    }
}
Also used : CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) ObjectWriter(com.fasterxml.jackson.databind.ObjectWriter) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException)

Example 9 with CsvMapper

use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project xm-ms-entity by xm-online.

the class EntityToCsvConverterUtils method createDefaultCsvMapper.

private static CsvMapper createDefaultCsvMapper() {
    CsvMapper mapper = new CsvMapper();
    mapper.configure(JsonGenerator.Feature.IGNORE_UNKNOWN, true);
    mapper.registerModule(new JavaTimeModule());
    return mapper;
}
Also used : CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) JavaTimeModule(com.fasterxml.jackson.datatype.jsr310.JavaTimeModule)

Example 10 with CsvMapper

use of org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper in project snow-owl by b2ihealthcare.

the class SnomedRf2ImportRequest method read.

private void read(File rf2Archive, Rf2EffectiveTimeSlices slices, Rf2ValidationIssueReporter reporter) {
    final CsvMapper csvMapper = new CsvMapper();
    csvMapper.enable(CsvParser.Feature.WRAP_AS_ARRAY);
    final CsvSchema schema = CsvSchema.emptySchema().withoutQuoteChar().withColumnSeparator('\t').withLineSeparator("\r\n");
    final ObjectReader oReader = csvMapper.readerFor(String[].class).with(schema);
    final Stopwatch w = Stopwatch.createStarted();
    try (final ZipFile zip = new ZipFile(rf2Archive)) {
        for (ZipEntry entry : Collections.list(zip.entries())) {
            final String fileName = Paths.get(entry.getName()).getFileName().toString().toLowerCase();
            if (fileName.endsWith(TXT_EXT)) {
                if (fileName.contains(releaseType.toString().toLowerCase())) {
                    w.reset().start();
                    try (final InputStream in = zip.getInputStream(entry)) {
                        readFile(entry, in, oReader, slices, reporter);
                    }
                    log.info("{} - {}", entry.getName(), w);
                }
            }
        }
    } catch (IOException e) {
        throw new SnowowlRuntimeException(e);
    }
    slices.flushAll();
}
Also used : CsvSchema(com.fasterxml.jackson.dataformat.csv.CsvSchema) ZipFile(java.util.zip.ZipFile) InputStream(java.io.InputStream) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) ZipEntry(java.util.zip.ZipEntry) Stopwatch(com.google.common.base.Stopwatch) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) IOException(java.io.IOException) SnowowlRuntimeException(com.b2international.snowowl.core.api.SnowowlRuntimeException)

Aggregations

CsvMapper (com.fasterxml.jackson.dataformat.csv.CsvMapper)21 CsvSchema (com.fasterxml.jackson.dataformat.csv.CsvSchema)15 IOException (java.io.IOException)10 ArrayList (java.util.ArrayList)7 ObjectWriter (com.fasterxml.jackson.databind.ObjectWriter)5 InputStream (java.io.InputStream)5 List (java.util.List)5 CsvMapper (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper)5 ObjectReader (com.fasterxml.jackson.databind.ObjectReader)4 File (java.io.File)4 HashMap (java.util.HashMap)4 Map (java.util.Map)4 Converter (org.apache.flink.formats.common.Converter)4 CsvSchema (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema)4 FileUploadException (uk.ac.ebi.spot.goci.curation.exception.FileUploadException)4 OutputStream (java.io.OutputStream)3 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)2 MappingIterator (com.fasterxml.jackson.databind.MappingIterator)2 JetException (com.hazelcast.jet.JetException)2 FileOutputStream (java.io.FileOutputStream)2