Search in sources :

Example 26 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class LengthPrefixUnknownCoders method forParallelInstruction.

/**
 * Wrap unknown coders with a {@link LengthPrefixCoder} for the given {@link ParallelInstruction}.
 */
@VisibleForTesting
static ParallelInstruction forParallelInstruction(ParallelInstruction input, boolean replaceWithByteArrayCoder) throws Exception {
    try {
        ParallelInstruction instruction = clone(input, ParallelInstruction.class);
        if (instruction.getRead() != null) {
            Source cloudSource = instruction.getRead().getSource();
            cloudSource.setCodec(forCodec(cloudSource.getCodec(), replaceWithByteArrayCoder));
        } else if (instruction.getWrite() != null) {
            com.google.api.services.dataflow.model.Sink cloudSink = instruction.getWrite().getSink();
            cloudSink.setCodec(forCodec(cloudSink.getCodec(), replaceWithByteArrayCoder));
        } else if (instruction.getParDo() != null) {
            instruction.setParDo(forParDoInstruction(instruction.getParDo(), replaceWithByteArrayCoder));
        } else if (instruction.getPartialGroupByKey() != null) {
            PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
            pgbk.setInputElementCodec(forCodec(pgbk.getInputElementCodec(), replaceWithByteArrayCoder));
        } else if (instruction.getFlatten() != null) {
        // FlattenInstructions have no codecs to wrap.
        } else {
            throw new RuntimeException("Unknown parallel instruction: " + input);
        }
        return instruction;
    } catch (IOException e) {
        throw new RuntimeException(String.format("Failed to replace unknown coder with " + "LengthPrefixCoder for : {%s}", input), e);
    }
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) PartialGroupByKeyInstruction(com.google.api.services.dataflow.model.PartialGroupByKeyInstruction) IOException(java.io.IOException) Source(com.google.api.services.dataflow.model.Source) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 27 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class ConcatReaderFactory method createSourceFromDictionary.

public static Source createSourceFromDictionary(Map<String, Object> dictionary) throws Exception {
    Source source = new Source();
    // Set spec
    CloudObject subSourceSpec = CloudObject.fromSpec(getObject(dictionary, PropertyNames.SOURCE_SPEC));
    source.setSpec(subSourceSpec);
    // Set encoding
    CloudObject subSourceEncoding = CloudObject.fromSpec(getObject(dictionary, PropertyNames.ENCODING, null));
    if (subSourceEncoding != null) {
        source.setCodec(subSourceEncoding);
    }
    // Set base specs
    List<Map<String, Object>> subSourceBaseSpecs = getListOfMaps(dictionary, WorkerPropertyNames.CONCAT_SOURCE_BASE_SPECS, null);
    if (subSourceBaseSpecs != null) {
        source.setBaseSpecs(subSourceBaseSpecs);
    }
    // Set metadata
    SourceMetadata metadata = new SourceMetadata();
    Boolean infinite = getBoolean(dictionary, PropertyNames.SOURCE_IS_INFINITE, null);
    if (infinite != null) {
        metadata.setInfinite(infinite);
    }
    Long estimatedSizeBytes = getLong(dictionary, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, null);
    if (estimatedSizeBytes != null) {
        metadata.setEstimatedSizeBytes(estimatedSizeBytes);
    }
    if (estimatedSizeBytes != null || infinite != null) {
        source.setMetadata(metadata);
    }
    // Set doesNotNeedSplitting
    Boolean doesNotNeedSplitting = getBoolean(dictionary, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, null);
    if (doesNotNeedSplitting != null) {
        source.setDoesNotNeedSplitting(doesNotNeedSplitting);
    }
    return source;
}
Also used : CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) SourceMetadata(com.google.api.services.dataflow.model.SourceMetadata) Structs.getLong(org.apache.beam.runners.dataflow.util.Structs.getLong) Structs.getBoolean(org.apache.beam.runners.dataflow.util.Structs.getBoolean) Map(java.util.Map) Source(com.google.api.services.dataflow.model.Source)

Example 28 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class SourceTranslationUtils method dictionaryToCloudSource.

public static Source dictionaryToCloudSource(Map<String, Object> params) throws Exception {
    Source res = new Source();
    res.setSpec(getDictionary(params, PropertyNames.SOURCE_SPEC));
    // translated, because they only make sense in cloud Source objects produced by the user.
    return res;
}
Also used : Source(com.google.api.services.dataflow.model.Source)

Example 29 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class IntrinsicMapTaskExecutorFactory method createReadOperation.

OperationNode createReadOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, ReaderFactory readerFactory, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
    ParallelInstruction instruction = node.getParallelInstruction();
    ReadInstruction read = instruction.getRead();
    Source cloudSource = CloudSourceUtils.flattenBaseSpecs(read.getSource());
    CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
    Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(cloudSource.getCodec()));
    NativeReader<?> reader = readerFactory.create(sourceSpec, coder, options, executionContext, operationContext);
    OutputReceiver[] receivers = getOutputReceivers(network, node);
    return OperationNode.create(ReadOperation.create(reader, receivers, operationContext));
}
Also used : ParallelInstruction(com.google.api.services.dataflow.model.ParallelInstruction) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject) OutputReceiver(org.apache.beam.runners.dataflow.worker.util.common.worker.OutputReceiver) ReadInstruction(com.google.api.services.dataflow.model.ReadInstruction) Source(com.google.api.services.dataflow.model.Source)

Example 30 with Source

use of com.google.api.services.dataflow.model.Source in project beam by apache.

the class IsmSideInputReader method createReadersFromSources.

private List<IsmReader<?>> createReadersFromSources(PipelineOptions options, SideInputInfo sideInputInfo, DataflowExecutionContext executionContext, int sideInputIndex) throws Exception {
    String sideInputKind = getString(sideInputInfo.getKind(), PropertyNames.OBJECT_TYPE_NAME);
    if (SINGLETON_KIND.equals(sideInputKind)) {
        checkState(sideInputInfo.getSources().size() == 1, "expecting a singleton side input kind to have a single source");
    } else if (!COLLECTION_KIND.equals(sideInputKind)) {
        throw new Exception("unexpected kind of side input: " + sideInputKind);
    }
    SideInputReadCounter sideInputReadCounter = new DataflowSideInputReadCounter(executionContext, operationContext, sideInputIndex);
    ImmutableList.Builder<IsmReader<?>> builder = ImmutableList.builder();
    for (Source source : sideInputInfo.getSources()) {
        Coder<?> coder = null;
        if (source.getCodec() != null) {
            coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(source.getCodec()));
        }
        CloudObject spec = CloudObject.fromSpec(source.getSpec());
        final String filepattern = getString(spec, WorkerPropertyNames.FILENAME);
        for (String file : Filepatterns.expandAtNFilepattern(filepattern)) {
            // Deep clone.
            CloudObject fileSpec = spec.clone();
            addString(fileSpec, WorkerPropertyNames.FILENAME, file);
            @SuppressWarnings("unchecked") NativeReader<?> reader = readerFactory.create(fileSpec, coder, options, executionContext, operationContext);
            checkState(reader instanceof IsmReader, "%s only supports %s as a reader but was %s.", IsmSideInputReader.class.getSimpleName(), IsmReader.class.getSimpleName(), reader.getClass().getSimpleName());
            IsmReader ismReader = (IsmReader) reader;
            builder.add(new SideInputTrackingIsmReader<>(ismReader, sideInputReadCounter));
        }
    }
    return builder.build();
}
Also used : ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) Structs.getString(org.apache.beam.runners.dataflow.util.Structs.getString) Structs.addString(org.apache.beam.runners.dataflow.util.Structs.addString) NoSuchElementException(java.util.NoSuchElementException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) Source(com.google.api.services.dataflow.model.Source) CloudObject(org.apache.beam.runners.dataflow.util.CloudObject)

Aggregations

Source (com.google.api.services.dataflow.model.Source)51 Test (org.junit.Test)31 ArrayList (java.util.ArrayList)20 WindowedValue (org.apache.beam.sdk.util.WindowedValue)18 CloudObject (org.apache.beam.runners.dataflow.util.CloudObject)16 Map (java.util.Map)15 Callable (java.util.concurrent.Callable)15 Future (java.util.concurrent.Future)15 HashMap (java.util.HashMap)13 ImmutableMap (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableMap)12 SortedMap (java.util.SortedMap)11 TreeMap (java.util.TreeMap)11 BoundedWindow (org.apache.beam.sdk.transforms.windowing.BoundedWindow)8 ParallelInstruction (com.google.api.services.dataflow.model.ParallelInstruction)7 ReadInstruction (com.google.api.services.dataflow.model.ReadInstruction)6 KV (org.apache.beam.sdk.values.KV)6 Collection (java.util.Collection)5 List (java.util.List)5 IsmRecord (org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord)5 Structs.getString (org.apache.beam.runners.dataflow.util.Structs.getString)5