use of org.talend.daikon.avro.inferrer.JsonSchemaInferrer in project components by Talend.
the class PythonRowDoFn method flatMap.
private void flatMap(IndexedRecord input, ProcessContext context) throws IOException {
// Prepare Python environment
PyObject outputList = pyFn.__call__(new PyString(input.toString()));
if (outputList instanceof PyList) {
PyList list = (PyList) outputList;
for (Object output : list) {
if (jsonGenericRecordConverter == null) {
JsonSchemaInferrer jsonSchemaInferrer = new JsonSchemaInferrer(new ObjectMapper());
Schema jsonSchema = jsonSchemaInferrer.inferSchema(output.toString());
jsonGenericRecordConverter = new JsonGenericRecordConverter(jsonSchema);
}
GenericRecord outputRecord = jsonGenericRecordConverter.convertToAvro(output.toString());
context.output(outputRecord);
}
}
}
use of org.talend.daikon.avro.inferrer.JsonSchemaInferrer in project components by Talend.
the class PythonRowDoFn method map.
private void map(IndexedRecord input, ProcessContext context) throws IOException {
PyObject output = pyFn.__call__(new PyString(input.toString()));
if (jsonGenericRecordConverter == null) {
JsonSchemaInferrer jsonSchemaInferrer = new JsonSchemaInferrer(new ObjectMapper());
Schema jsonSchema = jsonSchemaInferrer.inferSchema(output.toString());
jsonGenericRecordConverter = new JsonGenericRecordConverter(jsonSchema);
}
GenericRecord outputRecord = jsonGenericRecordConverter.convertToAvro(output.toString());
context.output(outputRecord);
}
use of org.talend.daikon.avro.inferrer.JsonSchemaInferrer in project components by Talend.
the class FixedDatasetRuntime method getValues.
public List<IndexedRecord> getValues(int limit) {
List<IndexedRecord> values = new ArrayList<>();
switch(properties.format.getValue()) {
case CSV:
try {
CsvRecordToIndexedRecordConverter converter = new CsvRecordToIndexedRecordConverter(getSchema());
for (CSVRecord r : //
CSVFormat.RFC4180.withDelimiter(//
properties.getFieldDelimiter().charAt(0)).withRecordSeparator(properties.getRecordDelimiter()).parse(new StringReader(properties.values.getValue()))) values.add(converter.convertToAvro(r));
} catch (IOException e) {
throw LocalIOErrorCode.createCannotParseSchema(e, properties.values.getValue());
}
break;
case JSON:
ObjectMapper mapper = new ObjectMapper();
JsonSchemaInferrer jsonSchemaInferrer = new JsonSchemaInferrer(mapper);
JsonGenericRecordConverter converter = null;
JsonFactory jsonFactory = new JsonFactory();
try (StringReader r = new StringReader(properties.values.getValue())) {
Iterator<JsonNode> value = mapper.readValues(jsonFactory.createParser(r), JsonNode.class);
int count = 0;
while (value.hasNext() && count++ < limit) {
String json = value.next().toString();
if (converter == null) {
Schema jsonSchema = jsonSchemaInferrer.inferSchema(json);
converter = new JsonGenericRecordConverter(jsonSchema);
}
values.add(converter.convertToAvro(json));
}
} catch (IOException e) {
throw LocalIOErrorCode.createCannotParseJson(e, properties.schema.getValue(), properties.values.getValue());
}
break;
case AVRO:
Schema schema = getSchema();
if (isRandom()) {
GeneratorFunction<IndexedRecord> gf = (GeneratorFunction<IndexedRecord>) GeneratorFunctions.of(getSchema());
GeneratorFunction.GeneratorContext ctx = GeneratorFunction.GeneratorContext.of(0, 0L);
for (int i = 0; i < limit; i++) {
ctx.setRowId(i);
values.add(gf.apply(ctx));
}
} else {
try (ByteArrayInputStream bais = new ByteArrayInputStream(properties.values.getValue().trim().getBytes())) {
JsonDecoder decoder = DecoderFactory.get().jsonDecoder(schema, bais);
DatumReader<IndexedRecord> reader = new GenericDatumReader<>(schema);
int count = 0;
while (count++ < limit) {
values.add(reader.read(null, decoder));
}
} catch (EOFException e) {
// Indicates the end of the values.
} catch (IOException e) {
throw LocalIOErrorCode.createCannotParseAvroJson(e, properties.schema.getValue(), properties.values.getValue());
}
}
break;
}
return values;
}
use of org.talend.daikon.avro.inferrer.JsonSchemaInferrer in project components by Talend.
the class ElasticsearchInputRuntime method expand.
@Override
public PCollection<IndexedRecord> expand(PBegin in) {
ElasticsearchIO.Read esRead = ElasticsearchIO.read().withConnectionConfiguration(createConnectionConf(properties.getDatasetProperties()));
if (properties.query.getValue() != null) {
esRead = esRead.withQuery(properties.query.getValue());
}
PCollection<String> readFromElasticsearch = in.apply(esRead);
PCollection<IndexedRecord> elasticsearchDataAsAvro = readFromElasticsearch.apply("DocumentToIndexedRecord", ParDo.of(new DoFn<String, IndexedRecord>() {
@ProcessElement
public void processElement(ProcessContext c) {
if (jsonGenericRecordConverter == null) {
JsonSchemaInferrer jsonSchemaInferrer = new JsonSchemaInferrer(new ObjectMapper());
Schema jsonSchema = jsonSchemaInferrer.inferSchema(c.element().toString());
jsonGenericRecordConverter = new JsonGenericRecordConverter(jsonSchema);
}
GenericRecord outputRecord = jsonGenericRecordConverter.convertToAvro(c.element().toString());
c.output(outputRecord);
}
}));
return elasticsearchDataAsAvro;
}
Aggregations