Search in sources :

Example 11 with Mapper

use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.

the class ExecutionResource method read.

/**
 * Read inputs from an instance of mapper. The number of returned records if enforced to be limited to 1000.
 * The format is a JSON based format where each like is a json record.
 *
 * @param family the component family.
 * @param component the component name.
 * @param size the maximum number of records to read.
 * @param configuration the component configuration as key/values.
 */
@POST
@Deprecated
@Produces("talend/stream")
@Path("read/{family}/{component}")
public void read(@Suspended final AsyncResponse response, @Context final Providers providers, @PathParam("family") final String family, @PathParam("component") final String component, @QueryParam("size") @DefaultValue("50") final long size, final Map<String, String> configuration) {
    final long maxSize = Math.min(size, MAX_RECORDS);
    response.setTimeoutHandler(asyncResponse -> log.warn("Timeout on dataset retrieval"));
    response.setTimeout(appConfiguration.datasetRetrieverTimeout(), SECONDS);
    executorService.submit(() -> {
        final Optional<Mapper> mapperOptional = manager.findMapper(family, component, getConfigComponentVersion(configuration), configuration);
        if (!mapperOptional.isPresent()) {
            response.resume(new WebApplicationException(Response.status(BAD_REQUEST).entity(new ErrorPayload(COMPONENT_MISSING, "Didn't find the input component")).build()));
            return;
        }
        final Mapper mapper = mapperOptional.get();
        mapper.start();
        try {
            final Input input = mapper.create();
            try {
                input.start();
                response.resume((StreamingOutput) output -> {
                    Object data;
                    int current = 0;
                    while (current++ < maxSize && (data = input.next()) != null) {
                        if (CharSequence.class.isInstance(data) || Number.class.isInstance(data) || Boolean.class.isInstance(data)) {
                            final PrimitiveWrapper wrapper = new PrimitiveWrapper();
                            wrapper.setValue(data);
                            data = wrapper;
                        }
                        inlineStreamingMapper.toJson(data, output);
                        output.write(EOL);
                    }
                });
            } finally {
                input.stop();
            }
        } finally {
            mapper.stop();
        }
    });
}
Also used : PrimitiveWrapper(org.talend.sdk.component.server.front.model.execution.PrimitiveWrapper) Produces(javax.ws.rs.Produces) Path(javax.ws.rs.Path) BAD_FORMAT(org.talend.sdk.component.server.front.model.ErrorDictionary.BAD_FORMAT) PreDestroy(javax.annotation.PreDestroy) MediaType(javax.ws.rs.core.MediaType) JsonNumber(javax.json.JsonNumber) QueryParam(javax.ws.rs.QueryParam) Collectors.toMap(java.util.stream.Collectors.toMap) Consumes(javax.ws.rs.Consumes) Map(java.util.Map) DefaultValue(javax.ws.rs.DefaultValue) BAD_REQUEST(javax.ws.rs.core.Response.Status.BAD_REQUEST) JsonObject(javax.json.JsonObject) JsonbBuilder(javax.json.bind.JsonbBuilder) Context(javax.ws.rs.core.Context) Providers(javax.ws.rs.ext.Providers) AsyncResponse(javax.ws.rs.container.AsyncResponse) StreamingOutput(javax.ws.rs.core.StreamingOutput) Processor(org.talend.sdk.component.runtime.output.Processor) Suspended(javax.ws.rs.container.Suspended) StandardCharsets(java.nio.charset.StandardCharsets) ErrorPayload(org.talend.sdk.component.server.front.model.error.ErrorPayload) OutputEmitter(org.talend.sdk.component.api.processor.OutputEmitter) Branches(org.talend.sdk.component.runtime.output.Branches) Slf4j(lombok.extern.slf4j.Slf4j) Response(javax.ws.rs.core.Response) PostConstruct(javax.annotation.PostConstruct) Optional(java.util.Optional) WebApplicationException(javax.ws.rs.WebApplicationException) ApplicationScoped(javax.enterprise.context.ApplicationScoped) ACTION_ERROR(org.talend.sdk.component.server.front.model.ErrorDictionary.ACTION_ERROR) WriteStatistics(org.talend.sdk.component.server.front.model.execution.WriteStatistics) PathParam(javax.ws.rs.PathParam) OutputFactory(org.talend.sdk.component.runtime.output.OutputFactory) Inject(javax.inject.Inject) ComponentServerConfiguration(org.talend.sdk.component.server.configuration.ComponentServerConfiguration) Input(org.talend.sdk.component.runtime.input.Input) ExecutorService(java.util.concurrent.ExecutorService) POST(javax.ws.rs.POST) Optional.ofNullable(java.util.Optional.ofNullable) COMPONENT_MISSING(org.talend.sdk.component.server.front.model.ErrorDictionary.COMPONENT_MISSING) InputStreamReader(java.io.InputStreamReader) JsonString(javax.json.JsonString) Mapper(org.talend.sdk.component.runtime.input.Mapper) Jsonb(javax.json.bind.Jsonb) BufferedReader(java.io.BufferedReader) ComponentManager(org.talend.sdk.component.runtime.manager.ComponentManager) SECONDS(java.util.concurrent.TimeUnit.SECONDS) InputStream(java.io.InputStream) Mapper(org.talend.sdk.component.runtime.input.Mapper) PrimitiveWrapper(org.talend.sdk.component.server.front.model.execution.PrimitiveWrapper) ErrorPayload(org.talend.sdk.component.server.front.model.error.ErrorPayload) Input(org.talend.sdk.component.runtime.input.Input) WebApplicationException(javax.ws.rs.WebApplicationException) JsonNumber(javax.json.JsonNumber) JsonObject(javax.json.JsonObject) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Produces(javax.ws.rs.Produces)

Example 12 with Mapper

use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.

the class ComponentExtensionTest method sourceCollector.

@Test
void sourceCollector() {
    final Mapper mapper = handler.createMapper(Source.class, new Source.Config() {

        {
            values = asList("a", "b");
        }
    });
    assertEquals(asList("a", "b"), handler.collectAsList(String.class, mapper));
}
Also used : Mapper(org.talend.sdk.component.runtime.input.Mapper) Source(org.talend.sdk.component.junit.component.Source) Test(org.junit.jupiter.api.Test)

Example 13 with Mapper

use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.

the class BeamExecutor method run.

@Override
public void run() {
    try {
        final Map<String, Mapper> mappers = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(Job.Component::isSource).collect(toMap(Job.Component::getId, e -> delegate.getManager().findMapper(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No mapper found for: " + e.getNode()))));
        final Map<String, Processor> processors = delegate.getLevels().values().stream().flatMap(Collection::stream).filter(component -> !component.isSource()).collect(toMap(Job.Component::getId, e -> delegate.getManager().findProcessor(e.getNode().getFamily(), e.getNode().getComponent(), e.getNode().getVersion(), e.getNode().getConfiguration()).orElseThrow(() -> new IllegalStateException("No processor found for:" + e.getNode()))));
        final Pipeline pipeline = Pipeline.create(createPipelineOptions());
        final Map<String, PCollection<JsonObject>> pCollections = new HashMap<>();
        delegate.getLevels().values().stream().flatMap(Collection::stream).forEach(component -> {
            if (component.isSource()) {
                final Mapper mapper = mappers.get(component.getId());
                pCollections.put(component.getId(), pipeline.apply(toName("TalendIO", component), TalendIO.read(mapper)).apply(toName("RecordNormalizer", component), RecordNormalizer.of(mapper.plugin())));
            } else {
                final Processor processor = processors.get(component.getId());
                final List<Job.Edge> joins = getEdges(delegate.getEdges(), component, e -> e.getTo().getNode());
                final Map<String, PCollection<KV<String, JsonObject>>> inputs = joins.stream().collect(toMap(e -> e.getTo().getBranch(), e -> {
                    final PCollection<JsonObject> pc = pCollections.get(e.getFrom().getNode().getId());
                    final PCollection<JsonObject> filteredInput = pc.apply(toName("RecordBranchFilter", component, e), RecordBranchFilter.of(processor.plugin(), e.getFrom().getBranch()));
                    final PCollection<JsonObject> mappedInput;
                    if (e.getFrom().getBranch().equals(e.getTo().getBranch())) {
                        mappedInput = filteredInput;
                    } else {
                        mappedInput = filteredInput.apply(toName("RecordBranchMapper", component, e), RecordBranchMapper.of(processor.plugin(), e.getFrom().getBranch(), e.getTo().getBranch()));
                    }
                    return mappedInput.apply(toName("RecordBranchUnwrapper", component, e), RecordBranchUnwrapper.of(processor.plugin(), e.getTo().getBranch())).apply(toName("AutoKVWrapper", component, e), AutoKVWrapper.of(processor.plugin(), delegate.getKeyProvider(component.getId()), component.getId(), e.getFrom().getBranch()));
                }));
                KeyedPCollectionTuple<String> join = null;
                for (final Map.Entry<String, PCollection<KV<String, JsonObject>>> entry : inputs.entrySet()) {
                    final TupleTag<JsonObject> branch = new TupleTag<>(entry.getKey());
                    join = join == null ? KeyedPCollectionTuple.of(branch, entry.getValue()) : join.and(branch, entry.getValue());
                }
                final PCollection<JsonObject> preparedInput = join.apply(toName("CoGroupByKey", component), CoGroupByKey.create()).apply(toName("CoGroupByKeyResultMappingTransform", component), new CoGroupByKeyResultMappingTransform<>(processor.plugin(), true));
                if (getEdges(delegate.getEdges(), component, e -> e.getFrom().getNode()).isEmpty()) {
                    final PTransform<PCollection<JsonObject>, PDone> write = TalendIO.write(processor);
                    preparedInput.apply(toName("Output", component), write);
                } else {
                    final PTransform<PCollection<JsonObject>, PCollection<JsonObject>> process = TalendFn.asFn(processor);
                    pCollections.put(component.getId(), preparedInput.apply(toName("Processor", component), process));
                }
            }
        });
        final PipelineResult result = pipeline.run();
        // the wait until finish don't wait for the job to complete on the direct runner
        result.waitUntilFinish();
        while (PipelineResult.State.RUNNING.equals(result.getState())) {
            try {
                Thread.sleep(100L);
            } catch (final InterruptedException e) {
                throw new IllegalStateException("the job was aborted", e);
            }
        }
    } finally {
        delegate.getLevels().values().stream().flatMap(Collection::stream).map(Job.Component::getId).forEach(JobImpl.LocalSequenceHolder::clean);
    }
}
Also used : TalendIO(org.talend.sdk.component.runtime.beam.TalendIO) KV(org.apache.beam.sdk.values.KV) PipelineResult(org.apache.beam.sdk.PipelineResult) RecordBranchFilter(org.talend.sdk.component.runtime.beam.transform.RecordBranchFilter) HashMap(java.util.HashMap) PipelineOptionsFactory(org.apache.beam.sdk.options.PipelineOptionsFactory) Function(java.util.function.Function) PTransform(org.apache.beam.sdk.transforms.PTransform) RecordBranchMapper(org.talend.sdk.component.runtime.beam.transform.RecordBranchMapper) Collectors.toMap(java.util.stream.Collectors.toMap) TupleTag(org.apache.beam.sdk.values.TupleTag) Map(java.util.Map) RecordNormalizer(org.talend.sdk.component.runtime.beam.transform.RecordNormalizer) Pipeline(org.apache.beam.sdk.Pipeline) KeyedPCollectionTuple(org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple) PipelineOptions(org.apache.beam.sdk.options.PipelineOptions) JsonObject(javax.json.JsonObject) PDone(org.apache.beam.sdk.values.PDone) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) Processor(org.talend.sdk.component.runtime.output.Processor) RecordBranchUnwrapper(org.talend.sdk.component.runtime.beam.transform.RecordBranchUnwrapper) CoGroupByKey(org.apache.beam.sdk.transforms.join.CoGroupByKey) AutoKVWrapper(org.talend.sdk.component.runtime.beam.transform.AutoKVWrapper) Collectors.toList(java.util.stream.Collectors.toList) List(java.util.List) Mapper(org.talend.sdk.component.runtime.input.Mapper) CoGroupByKeyResultMappingTransform(org.talend.sdk.component.runtime.beam.transform.CoGroupByKeyResultMappingTransform) Job(org.talend.sdk.component.runtime.manager.chain.Job) JobImpl(org.talend.sdk.component.runtime.manager.chain.internal.JobImpl) AllArgsConstructor(lombok.AllArgsConstructor) TalendFn(org.talend.sdk.component.runtime.beam.TalendFn) Processor(org.talend.sdk.component.runtime.output.Processor) HashMap(java.util.HashMap) JsonObject(javax.json.JsonObject) TupleTag(org.apache.beam.sdk.values.TupleTag) RecordBranchMapper(org.talend.sdk.component.runtime.beam.transform.RecordBranchMapper) Mapper(org.talend.sdk.component.runtime.input.Mapper) Job(org.talend.sdk.component.runtime.manager.chain.Job) PipelineResult(org.apache.beam.sdk.PipelineResult) Pipeline(org.apache.beam.sdk.Pipeline) PCollection(org.apache.beam.sdk.values.PCollection) PDone(org.apache.beam.sdk.values.PDone) Collection(java.util.Collection) PCollection(org.apache.beam.sdk.values.PCollection) HashMap(java.util.HashMap) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map)

Example 14 with Mapper

use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.

the class TaCoKitGuessSchema method guessInputComponentSchemaThroughResult.

private boolean guessInputComponentSchemaThroughResult() throws Exception {
    final Mapper mapper = componentManager.findMapper(family, componentName, 1, configuration).orElseThrow(() -> new IllegalArgumentException("Can't find " + family + "#" + componentName));
    if (JobStateAware.class.isInstance(mapper)) {
        JobStateAware.class.cast(mapper).setState(new JobStateAware.State());
    }
    Input input = null;
    try {
        mapper.start();
        final ChainedMapper chainedMapper = new ChainedMapper(mapper, mapper.split(mapper.assess()).iterator());
        chainedMapper.start();
        input = chainedMapper.create();
        input.start();
        Object rowObject = input.next();
        if (rowObject == null) {
            return false;
        }
        if (rowObject instanceof java.util.Map) {
            return guessInputSchemaThroughResults(input, (java.util.Map) rowObject);
        } else if (rowObject instanceof java.util.Collection) {
            throw new Exception("Can't guess schema from a Collection");
        } else {
            return guessSchemaThroughResultClass(rowObject.getClass());
        }
    } finally {
        if (input != null) {
            try {
                input.stop();
            } catch (Exception e) {
                log.error(e.getMessage(), e);
            }
        }
        try {
            mapper.stop();
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }
    }
}
Also used : ChainedMapper(org.talend.sdk.component.runtime.manager.chain.ChainedMapper) Mapper(org.talend.sdk.component.runtime.input.Mapper) Input(org.talend.sdk.component.runtime.input.Input) JobStateAware(org.talend.sdk.component.runtime.di.JobStateAware) Collection(java.util.Collection) ChainedMapper(org.talend.sdk.component.runtime.manager.chain.ChainedMapper) JsonObject(javax.json.JsonObject) HashMap(java.util.HashMap) Collectors.toMap(java.util.stream.Collectors.toMap) Map(java.util.Map)

Example 15 with Mapper

use of org.talend.sdk.component.runtime.input.Mapper in project component-runtime by Talend.

the class DIBatchSimulationTest method doDi.

private void doDi(final ComponentManager manager, final Collection<Object> sourceData, final Collection<Object> processorData, final Optional<Processor> proc, final Optional<Mapper> mapper) {
    final Map<String, Object> globalMap = new HashMap<>();
    try {
        final Processor processor = proc.orElseThrow(() -> new IllegalStateException("scanning failed"));
        JobStateAware.init(processor, globalMap);
        final Jsonb jsonbProcessor = Jsonb.class.cast(manager.findPlugin(processor.plugin()).get().get(ComponentManager.AllServices.class).getServices().get(Jsonb.class));
        final AutoChunkProcessor processorProcessor = new AutoChunkProcessor(100, processor);
        processorProcessor.start();
        globalMap.put("processorProcessor", processorProcessor);
        final InputsHandler inputsHandlerProcessor = new InputsHandler(jsonbProcessor);
        inputsHandlerProcessor.addConnection("FLOW", row1Struct.class);
        final OutputsHandler outputHandlerProcessor = new OutputsHandler(jsonbProcessor);
        final InputFactory inputsProcessor = inputsHandlerProcessor.asInputFactory();
        final OutputFactory outputsProcessor = outputHandlerProcessor.asOutputFactory();
        final Mapper tempMapperMapper = mapper.orElseThrow(() -> new IllegalStateException("scanning failed"));
        JobStateAware.init(tempMapperMapper, globalMap);
        doRun(manager, sourceData, processorData, globalMap, processorProcessor, inputsHandlerProcessor, outputHandlerProcessor, inputsProcessor, outputsProcessor, tempMapperMapper);
    } finally {
        doClose(globalMap);
    }
}
Also used : InputFactory(org.talend.sdk.component.runtime.output.InputFactory) AutoChunkProcessor(org.talend.sdk.component.runtime.di.AutoChunkProcessor) Processor(org.talend.sdk.component.runtime.output.Processor) HashMap(java.util.HashMap) OutputsHandler(org.talend.sdk.component.runtime.di.OutputsHandler) ToString(lombok.ToString) PartitionMapper(org.talend.sdk.component.api.input.PartitionMapper) ChainedMapper(org.talend.sdk.component.runtime.manager.chain.ChainedMapper) Mapper(org.talend.sdk.component.runtime.input.Mapper) Jsonb(javax.json.bind.Jsonb) AutoChunkProcessor(org.talend.sdk.component.runtime.di.AutoChunkProcessor) JsonObject(javax.json.JsonObject) OutputFactory(org.talend.sdk.component.runtime.output.OutputFactory) InputsHandler(org.talend.sdk.component.runtime.di.InputsHandler)

Aggregations

Mapper (org.talend.sdk.component.runtime.input.Mapper)18 Input (org.talend.sdk.component.runtime.input.Input)9 JsonObject (javax.json.JsonObject)8 Test (org.junit.Test)6 PartitionMapper (org.talend.sdk.component.api.input.PartitionMapper)6 HashMap (java.util.HashMap)4 Map (java.util.Map)4 Jsonb (javax.json.bind.Jsonb)4 ChainedMapper (org.talend.sdk.component.runtime.manager.chain.ChainedMapper)4 Processor (org.talend.sdk.component.runtime.output.Processor)4 Collection (java.util.Collection)3 CountDownLatch (java.util.concurrent.CountDownLatch)3 Collectors.toMap (java.util.stream.Collectors.toMap)3 Test (org.junit.jupiter.api.Test)3 Source (org.talend.sdk.component.junit.component.Source)3 Annotation (java.lang.annotation.Annotation)2 Method (java.lang.reflect.Method)2 List (java.util.List)2 Optional (java.util.Optional)2 ExecutorService (java.util.concurrent.ExecutorService)2