Search in sources :

Example 6 with CsvMapper

use of com.fasterxml.jackson.dataformat.csv.CsvMapper in project hazelcast by hazelcast.

the class CsvInputFormat method createRecordReader.

@Override
public RecordReader<NullWritable, Object> createRecordReader(InputSplit split, TaskAttemptContext context) {
    return new RecordReader<NullWritable, Object>() {

        private Object current;

        private MappingIterator<Object> iterator;

        private Function<Object, Object> projection = identity();

        @SuppressWarnings({ "unchecked", "rawtypes" })
        @Override
        public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
            FileSplit fileSplit = (FileSplit) split;
            Configuration conf = context.getConfiguration();
            Configuration configuration = context.getConfiguration();
            String className = configuration.get(CSV_INPUT_FORMAT_BEAN_CLASS);
            Class<?> formatClazz = className == null ? null : ReflectionUtils.loadClass(className);
            Path file = fileSplit.getPath();
            FileSystem fs = file.getFileSystem(conf);
            FSDataInputStream in = fs.open(file);
            if (formatClazz == String[].class) {
                ObjectReader reader = new CsvMapper().enable(Feature.WRAP_AS_ARRAY).readerFor(String[].class).with(CsvSchema.emptySchema().withSkipFirstDataRow(false));
                iterator = reader.readValues((InputStream) in);
                if (!iterator.hasNext()) {
                    throw new JetException("Header row missing in " + split);
                }
                String[] header = (String[]) iterator.next();
                List<String> fieldNames = new ArrayList<>();
                String field;
                for (int i = 0; (field = configuration.get(CSV_INPUT_FORMAT_FIELD_LIST_PREFIX + i)) != null; i++) {
                    fieldNames.add(field);
                }
                projection = (Function) createFieldProjection(header, fieldNames);
            } else {
                iterator = new CsvMapper().readerFor(formatClazz).withoutFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES).with(CsvSchema.emptySchema().withHeader()).readValues((InputStream) in);
            }
        }

        @Override
        public boolean nextKeyValue() {
            if (!iterator.hasNext()) {
                return false;
            }
            current = projection.apply(iterator.next());
            return true;
        }

        @Override
        public NullWritable getCurrentKey() {
            return NullWritable.get();
        }

        @Override
        public Object getCurrentValue() {
            return current;
        }

        @Override
        public float getProgress() {
            return 0;
        }

        @Override
        public void close() throws IOException {
            iterator.close();
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputStream(java.io.InputStream) RecordReader(org.apache.hadoop.mapreduce.RecordReader) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) ArrayList(java.util.ArrayList) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) JetException(com.hazelcast.jet.JetException) FileSplit(org.apache.hadoop.mapreduce.lib.input.FileSplit) Function(java.util.function.Function) MappingIterator(com.fasterxml.jackson.databind.MappingIterator) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Example 7 with CsvMapper

use of com.fasterxml.jackson.dataformat.csv.CsvMapper in project hazelcast by hazelcast.

the class CsvReadFileFnProvider method createReadFileFn.

@SuppressWarnings("unchecked")
@Nonnull
@Override
public <T> FunctionEx<Path, Stream<T>> createReadFileFn(@Nonnull FileFormat<T> format) {
    CsvFileFormat<T> csvFileFormat = (CsvFileFormat<T>) format;
    // Format is not Serializable
    Class<?> formatClazz = csvFileFormat.clazz();
    return path -> {
        FileInputStream fis = new FileInputStream(path.toFile());
        MappingIterator<T> iterator;
        Function<T, T> projection = identity();
        if (formatClazz == String[].class) {
            ObjectReader reader = new CsvMapper().enable(Feature.WRAP_AS_ARRAY).readerFor(String[].class).with(CsvSchema.emptySchema().withSkipFirstDataRow(false));
            iterator = reader.readValues(fis);
            if (!iterator.hasNext()) {
                throw new JetException("Header row missing in " + path);
            }
            String[] header = (String[]) iterator.next();
            List<String> fieldNames = csvFileFormat.fieldNames();
            if (fieldNames != null) {
                projection = (Function<T, T>) createFieldProjection(header, fieldNames);
            }
        } else {
            iterator = new CsvMapper().readerFor(formatClazz).withoutFeatures(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES).with(CsvSchema.emptySchema().withHeader()).readValues(fis);
        }
        return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, ORDERED), false).map(projection).onClose(() -> uncheckRun(fis::close));
    };
}
Also used : FunctionEx(com.hazelcast.function.FunctionEx) Util.uncheckRun(com.hazelcast.jet.impl.util.Util.uncheckRun) Spliterators(java.util.Spliterators) MappingIterator(com.fasterxml.jackson.databind.MappingIterator) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) CsvSchema(com.fasterxml.jackson.dataformat.csv.CsvSchema) ORDERED(java.util.Spliterator.ORDERED) FileInputStream(java.io.FileInputStream) Function(java.util.function.Function) DeserializationFeature(com.fasterxml.jackson.databind.DeserializationFeature) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) JetException(com.hazelcast.jet.JetException) CsvFileFormat(com.hazelcast.jet.pipeline.file.CsvFileFormat) Feature(com.fasterxml.jackson.dataformat.csv.CsvParser.Feature) FileFormat(com.hazelcast.jet.pipeline.file.FileFormat) List(java.util.List) Stream(java.util.stream.Stream) Util.createFieldProjection(com.hazelcast.jet.impl.util.Util.createFieldProjection) ReadFileFnProvider(com.hazelcast.jet.pipeline.file.impl.ReadFileFnProvider) Function.identity(java.util.function.Function.identity) StreamSupport(java.util.stream.StreamSupport) Nonnull(javax.annotation.Nonnull) Path(java.nio.file.Path) SuppressFBWarnings(edu.umd.cs.findbugs.annotations.SuppressFBWarnings) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) JetException(com.hazelcast.jet.JetException) FileInputStream(java.io.FileInputStream) Function(java.util.function.Function) MappingIterator(com.fasterxml.jackson.databind.MappingIterator) ObjectReader(com.fasterxml.jackson.databind.ObjectReader) List(java.util.List) CsvFileFormat(com.hazelcast.jet.pipeline.file.CsvFileFormat) Nonnull(javax.annotation.Nonnull)

Example 8 with CsvMapper

use of com.fasterxml.jackson.dataformat.csv.CsvMapper in project xm-ms-entity by xm-online.

the class EntityToCsvConverterUtils method toCsv.

/**
 * Writes entities to csv file.
 * @param o the object which serialize to csv
 * @param schema the csv schema
 * @return byte array of csv file
 */
public static byte[] toCsv(Object o, CsvSchema schema) {
    if (o == null) {
        log.warn("Passed empty object for serialize, therefore return empty byte array which represents csv file");
        return new byte[0];
    }
    CsvMapper mapper = createDefaultCsvMapper();
    ObjectWriter csvWriter = mapper.writer(schema);
    try {
        return csvWriter.writeValueAsBytes(o);
    } catch (JsonProcessingException e) {
        throw new IllegalStateException("Exception while writing data to csv file", e);
    }
}
Also used : CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) ObjectWriter(com.fasterxml.jackson.databind.ObjectWriter) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException)

Example 9 with CsvMapper

use of com.fasterxml.jackson.dataformat.csv.CsvMapper in project xm-ms-entity by xm-online.

the class EntityToCsvConverterUtils method createDefaultCsvMapper.

private static CsvMapper createDefaultCsvMapper() {
    CsvMapper mapper = new CsvMapper();
    mapper.configure(JsonGenerator.Feature.IGNORE_UNKNOWN, true);
    mapper.registerModule(new JavaTimeModule());
    return mapper;
}
Also used : CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper) JavaTimeModule(com.fasterxml.jackson.datatype.jsr310.JavaTimeModule)

Example 10 with CsvMapper

use of com.fasterxml.jackson.dataformat.csv.CsvMapper in project registry by hortonworks.

the class TruckEventsCsvConverter method readTruckEventsFromCsv.

private MappingIterator<TruckEvent> readTruckEventsFromCsv(InputStream csvStream) throws IOException {
    CsvSchema bootstrap = CsvSchema.builder().addColumn("driverId", CsvSchema.ColumnType.NUMBER).addColumn("truckId", CsvSchema.ColumnType.NUMBER).addColumn("eventTime", CsvSchema.ColumnType.STRING).addColumn("eventType", CsvSchema.ColumnType.STRING).addColumn("longitude", CsvSchema.ColumnType.NUMBER).addColumn("latitude", CsvSchema.ColumnType.NUMBER).addColumn("eventKey", CsvSchema.ColumnType.STRING).addColumn("correlationId", CsvSchema.ColumnType.NUMBER).addColumn("driverName", CsvSchema.ColumnType.STRING).addColumn("routeId", CsvSchema.ColumnType.NUMBER).addColumn("routeName", CsvSchema.ColumnType.STRING).addColumn("eventDate", CsvSchema.ColumnType.STRING).build().withHeader();
    CsvMapper csvMapper = new CsvMapper();
    return csvMapper.readerFor(TruckEvent.class).with(bootstrap).readValues(csvStream);
}
Also used : CsvSchema(com.fasterxml.jackson.dataformat.csv.CsvSchema) CsvMapper(com.fasterxml.jackson.dataformat.csv.CsvMapper)

Aggregations

CsvMapper (com.fasterxml.jackson.dataformat.csv.CsvMapper)18 CsvSchema (com.fasterxml.jackson.dataformat.csv.CsvSchema)12 IOException (java.io.IOException)9 ArrayList (java.util.ArrayList)7 Map (java.util.Map)5 CsvMapper (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper)5 ObjectWriter (com.fasterxml.jackson.databind.ObjectWriter)4 InputStream (java.io.InputStream)4 HashMap (java.util.HashMap)4 List (java.util.List)4 Converter (org.apache.flink.formats.common.Converter)4 CsvSchema (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema)4 FileUploadException (uk.ac.ebi.spot.goci.curation.exception.FileUploadException)4 ObjectReader (com.fasterxml.jackson.databind.ObjectReader)3 File (java.io.File)3 JsonProcessingException (com.fasterxml.jackson.core.JsonProcessingException)2 MappingIterator (com.fasterxml.jackson.databind.MappingIterator)2 JetException (com.hazelcast.jet.JetException)2 FileOutputStream (java.io.FileOutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2