use of io.openlineage.spark.agent.util.ScalaConversionUtils.asJavaOptional in project OpenLineage by OpenLineage.
the class KafkaRelationVisitor method createDatasetsFromOptions.
private static <D extends OpenLineage.Dataset> List<D> createDatasetsFromOptions(DatasetFactory<D> datasetFactory, Map<String, String> sourceOptions, StructType schema) {
List<String> topics;
Optional<String> servers = asJavaOptional(sourceOptions.get("kafka.bootstrap.servers"));
// don't support subscribePattern, as it will report dataset nodes that don't exist
topics = Stream.concat(// handle "subscribe" and "topic" here to handle single topic reads/writes
Stream.of("subscribe", "topic").map(it -> sourceOptions.get(it)).filter(it -> it.nonEmpty()).map(it -> it.get()).map(String.class::cast), // https://spark.apache.org/docs/3.1.2/structured-streaming-kafka-integration.html
ScalaConversionUtils.asJavaOptional(sourceOptions.get("assign")).map((String str) -> {
try {
JsonNode jsonNode = new ObjectMapper().readTree(str);
long fieldCount = jsonNode.size();
return StreamSupport.stream(Spliterators.spliterator(jsonNode.fieldNames(), fieldCount, Spliterator.SIZED & Spliterator.IMMUTABLE), false);
} catch (IOException e) {
log.warn("Unable to find topics from Kafka source configuration {}", str, e);
}
return Stream.<String>empty();
}).orElse(Stream.empty())).collect(Collectors.toList());
String server = servers.map(str -> {
if (!str.matches("\\w+://.*")) {
return "PLAINTEXT://" + str;
} else {
return str;
}
}).map(str -> URI.create(str.split(",")[0])).map(uri -> uri.getHost() + ":" + uri.getPort()).orElse("");
String namespace = "kafka://" + server;
return topics.stream().map(topic -> datasetFactory.getDataset(topic, namespace, schema)).collect(Collectors.toList());
}
Aggregations