use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.
the class ExecutionResource method read.
/**
* Read inputs from an instance of mapper. The number of returned records if enforced to be limited to 1000.
* The format is a JSON based format where each like is a json record.
*
* @param family the component family.
* @param component the component name.
* @param size the maximum number of records to read.
* @param configuration the component configuration as key/values.
*/
@POST
@Deprecated
@Produces("talend/stream")
@Path("read/{family}/{component}")
public void read(@Suspended final AsyncResponse response, @Context final Providers providers, @PathParam("family") final String family, @PathParam("component") final String component, @QueryParam("size") @DefaultValue("50") final long size, final Map<String, String> configuration) {
final long maxSize = Math.min(size, MAX_RECORDS);
response.setTimeoutHandler(asyncResponse -> log.warn("Timeout on dataset retrieval"));
response.setTimeout(appConfiguration.datasetRetrieverTimeout(), SECONDS);
executorService.submit(() -> {
final Optional<Mapper> mapperOptional = manager.findMapper(family, component, getConfigComponentVersion(configuration), configuration);
if (!mapperOptional.isPresent()) {
response.resume(new WebApplicationException(Response.status(BAD_REQUEST).entity(new ErrorPayload(COMPONENT_MISSING, "Didn't find the input component")).build()));
return;
}
final Mapper mapper = mapperOptional.get();
mapper.start();
try {
final Input input = mapper.create();
try {
input.start();
response.resume((StreamingOutput) output -> {
Object data;
int current = 0;
while (current++ < maxSize && (data = input.next()) != null) {
if (CharSequence.class.isInstance(data) || Number.class.isInstance(data) || Boolean.class.isInstance(data)) {
final PrimitiveWrapper wrapper = new PrimitiveWrapper();
wrapper.setValue(data);
data = wrapper;
}
inlineStreamingMapper.toJson(data, output);
output.write(EOL);
}
});
} finally {
input.stop();
}
} finally {
mapper.stop();
}
});
}
use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.
the class ComponentExtensionTest method sourceCollector.
@Test
void sourceCollector() {
final Mapper mapper = handler.createMapper(Source.class, new Source.Config() {
{
values = asList("a", "b");
}
});
assertEquals(asList("a", "b"), handler.collectAsList(String.class, mapper));
}
use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.
the class BeamExecutor method run.
@Override
public void run() {
try {
final Map<String, Mapper> mappers = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(Job.Component::isSource).collect(toMap(Job.Component::getId, e -> delegate.getManager().findMapper(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No mapper found for: " + e.getNode()))));
final Map<String, Processor> processors = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(component -> !component.isSource()).collect(toMap(Job.Component::getId, e -> delegate.getManager().findProcessor(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No processor found for:" + e.getNode()))));
final Pipeline pipeline = Pipeline.create(createPipelineOptions());
final Map<String, PCollection<JsonObject>> pCollections = new HashMap<>();
delegate.getLevels().values().stream().flatMap(Collection::stream).forEach(component -> {
if (component.isSource()) {
final Mapper mapper = mappers.get(component.getId());
pCollections.put(component.getId(), pipeline.apply(toName("TalendIO", component), TalendIO.read(mapper)).apply(toName("RecordNormalizer", component), RecordNormalizer.of(mapper.plugin())));
} else {
final Processor processor = processors.get(component.getId());
final List<Job.Edge> joins = getEdges(delegate.getEdges(), component, e -> e.getTo().getNode());
final Map<String, PCollection<KV<String, JsonObject>>> inputs = joins.stream().collect(toMap(e -> e.getTo().getBranch(), e -> {
final PCollection<JsonObject> pc = pCollections.get(e.getFrom().getNode().getId());
final PCollection<JsonObject> filteredInput = pc.apply(toName("RecordBranchFilter", component, e), RecordBranchFilter.of(processor.plugin(), e.getFrom().getBranch()));
final PCollection<JsonObject> mappedInput;
if (e.getFrom().getBranch().equals(e.getTo().getBranch())) {
mappedInput = filteredInput;
} else {
mappedInput = filteredInput.apply(toName("RecordBranchMapper", component, e), RecordBranchMapper.of(processor.plugin(), e.getFrom().getBranch(), e.getTo().getBranch()));
}
return mappedInput.apply(toName("RecordBranchUnwrapper", component, e), RecordBranchUnwrapper.of(processor.plugin(), e.getTo().getBranch())).apply(toName("AutoKVWrapper", component, e), AutoKVWrapper.of(processor.plugin(), delegate.getKeyProvider(component.getId()), component.getId(), e.getFrom().getBranch()));
}));
KeyedPCollectionTuple<String> join = null;
for (final Map.Entry<String, PCollection<KV<String, JsonObject>>> entry : inputs.entrySet()) {
final TupleTag<JsonObject> branch = new TupleTag<>(entry.getKey());
join = join == null ? KeyedPCollectionTuple.of(branch, entry.getValue()) : join.and(branch, entry.getValue());
}
final PCollection<JsonObject> preparedInput = join.apply(toName("CoGroupByKey", component), CoGroupByKey.create()).apply(toName("CoGroupByKeyResultMappingTransform", component), new CoGroupByKeyResultMappingTransform<>(processor.plugin(), true));
if (getEdges(delegate.getEdges(), component, e -> e.getFrom().getNode()).isEmpty()) {
final PTransform<PCollection<JsonObject>, PDone> write = TalendIO.write(processor);
preparedInput.apply(toName("Output", component), write);
} else {
final PTransform<PCollection<JsonObject>, PCollection<JsonObject>> process = TalendFn.asFn(processor);
pCollections.put(component.getId(), preparedInput.apply(toName("Processor", component), process));
}
}
});
final PipelineResult result = pipeline.run();
// the wait until finish don't wait for the job to complete on the direct runner
result.waitUntilFinish();
while (PipelineResult.State.RUNNING.equals(result.getState())) {
try {
Thread.sleep(100L);
} catch (final InterruptedException e) {
throw new IllegalStateException("the job was aborted", e);
}
}
} finally {
delegate.getLevels().values().stream().flatMap(Collection::stream).map(Job.Component::getId).forEach(JobImpl.LocalSequenceHolder::clean);
}
}
use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.
the class TaCoKitGuessSchema method guessInputComponentSchemaThroughResult.
private boolean guessInputComponentSchemaThroughResult() throws Exception {
final Mapper mapper = componentManager.findMapper(family, componentName, 1, configuration).orElseThrow(() -> new IllegalArgumentException("Can't find " + family + "#" + componentName));
if (JobStateAware.class.isInstance(mapper)) {
JobStateAware.class.cast(mapper).setState(new JobStateAware.State());
}
Input input = null;
try {
mapper.start();
final ChainedMapper chainedMapper = new ChainedMapper(mapper, mapper.split(mapper.assess()).iterator());
chainedMapper.start();
input = chainedMapper.create();
input.start();
Object rowObject = input.next();
if (rowObject == null) {
return false;
}
if (rowObject instanceof java.util.Map) {
return guessInputSchemaThroughResults(input, (java.util.Map) rowObject);
} else if (rowObject instanceof java.util.Collection) {
throw new Exception("Can't guess schema from a Collection");
} else {
return guessSchemaThroughResultClass(rowObject.getClass());
}
} finally {
if (input != null) {
try {
input.stop();
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
try {
mapper.stop();
} catch (Exception e) {
log.error(e.getMessage(), e);
}
}
}
use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.
the class DIBatchSimulationTest method doDi.
private void doDi(final ComponentManager manager, final Collection<Object> sourceData, final Collection<Object> processorData, final Optional<Processor> proc, final Optional<Mapper> mapper) {
final Map<String, Object> globalMap = new HashMap<>();
try {
final Processor processor = proc.orElseThrow(() -> new IllegalStateException("scanning failed"));
JobStateAware.init(processor, globalMap);
final Jsonb jsonbProcessor = Jsonb.class.cast(manager.findPlugin(processor.plugin()).get().get(ComponentManager.AllServices.class).getServices().get(Jsonb.class));
final AutoChunkProcessor processorProcessor = new AutoChunkProcessor(100, processor);
processorProcessor.start();
globalMap.put("processorProcessor", processorProcessor);
final InputsHandler inputsHandlerProcessor = new InputsHandler(jsonbProcessor);
inputsHandlerProcessor.addConnection("FLOW", row1Struct.class);
final OutputsHandler outputHandlerProcessor = new OutputsHandler(jsonbProcessor);
final InputFactory inputsProcessor = inputsHandlerProcessor.asInputFactory();
final OutputFactory outputsProcessor = outputHandlerProcessor.asOutputFactory();
final Mapper tempMapperMapper = mapper.orElseThrow(() -> new IllegalStateException("scanning failed"));
JobStateAware.init(tempMapperMapper, globalMap);
doRun(manager, sourceData, processorData, globalMap, processorProcessor, inputsHandlerProcessor, outputHandlerProcessor, inputsProcessor, outputsProcessor, tempMapperMapper);
} finally {
doClose(globalMap);
}
}
Aggregations