use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class LengthPrefixUnknownCoders method forParallelInstruction.
/**
* Wrap unknown coders with a {@link LengthPrefixCoder} for the given {@link ParallelInstruction}.
*/
@VisibleForTesting
static ParallelInstruction forParallelInstruction(ParallelInstruction input, boolean replaceWithByteArrayCoder) throws Exception {
try {
ParallelInstruction instruction = clone(input, ParallelInstruction.class);
if (instruction.getRead() != null) {
Source cloudSource = instruction.getRead().getSource();
cloudSource.setCodec(forCodec(cloudSource.getCodec(), replaceWithByteArrayCoder));
} else if (instruction.getWrite() != null) {
com.google.api.services.dataflow.model.Sink cloudSink = instruction.getWrite().getSink();
cloudSink.setCodec(forCodec(cloudSink.getCodec(), replaceWithByteArrayCoder));
} else if (instruction.getParDo() != null) {
instruction.setParDo(forParDoInstruction(instruction.getParDo(), replaceWithByteArrayCoder));
} else if (instruction.getPartialGroupByKey() != null) {
PartialGroupByKeyInstruction pgbk = instruction.getPartialGroupByKey();
pgbk.setInputElementCodec(forCodec(pgbk.getInputElementCodec(), replaceWithByteArrayCoder));
} else if (instruction.getFlatten() != null) {
// FlattenInstructions have no codecs to wrap.
} else {
throw new RuntimeException("Unknown parallel instruction: " + input);
}
return instruction;
} catch (IOException e) {
throw new RuntimeException(String.format("Failed to replace unknown coder with " + "LengthPrefixCoder for : {%s}", input), e);
}
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class ConcatReaderFactory method createSourceFromDictionary.
public static Source createSourceFromDictionary(Map<String, Object> dictionary) throws Exception {
Source source = new Source();
// Set spec
CloudObject subSourceSpec = CloudObject.fromSpec(getObject(dictionary, PropertyNames.SOURCE_SPEC));
source.setSpec(subSourceSpec);
// Set encoding
CloudObject subSourceEncoding = CloudObject.fromSpec(getObject(dictionary, PropertyNames.ENCODING, null));
if (subSourceEncoding != null) {
source.setCodec(subSourceEncoding);
}
// Set base specs
List<Map<String, Object>> subSourceBaseSpecs = getListOfMaps(dictionary, WorkerPropertyNames.CONCAT_SOURCE_BASE_SPECS, null);
if (subSourceBaseSpecs != null) {
source.setBaseSpecs(subSourceBaseSpecs);
}
// Set metadata
SourceMetadata metadata = new SourceMetadata();
Boolean infinite = getBoolean(dictionary, PropertyNames.SOURCE_IS_INFINITE, null);
if (infinite != null) {
metadata.setInfinite(infinite);
}
Long estimatedSizeBytes = getLong(dictionary, PropertyNames.SOURCE_ESTIMATED_SIZE_BYTES, null);
if (estimatedSizeBytes != null) {
metadata.setEstimatedSizeBytes(estimatedSizeBytes);
}
if (estimatedSizeBytes != null || infinite != null) {
source.setMetadata(metadata);
}
// Set doesNotNeedSplitting
Boolean doesNotNeedSplitting = getBoolean(dictionary, PropertyNames.SOURCE_DOES_NOT_NEED_SPLITTING, null);
if (doesNotNeedSplitting != null) {
source.setDoesNotNeedSplitting(doesNotNeedSplitting);
}
return source;
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class SourceTranslationUtils method dictionaryToCloudSource.
public static Source dictionaryToCloudSource(Map<String, Object> params) throws Exception {
Source res = new Source();
res.setSpec(getDictionary(params, PropertyNames.SOURCE_SPEC));
// translated, because they only make sense in cloud Source objects produced by the user.
return res;
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IntrinsicMapTaskExecutorFactory method createReadOperation.
OperationNode createReadOperation(Network<Node, Edge> network, ParallelInstructionNode node, PipelineOptions options, ReaderFactory readerFactory, DataflowExecutionContext<?> executionContext, DataflowOperationContext operationContext) throws Exception {
ParallelInstruction instruction = node.getParallelInstruction();
ReadInstruction read = instruction.getRead();
Source cloudSource = CloudSourceUtils.flattenBaseSpecs(read.getSource());
CloudObject sourceSpec = CloudObject.fromSpec(cloudSource.getSpec());
Coder<?> coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(cloudSource.getCodec()));
NativeReader<?> reader = readerFactory.create(sourceSpec, coder, options, executionContext, operationContext);
OutputReceiver[] receivers = getOutputReceivers(network, node);
return OperationNode.create(ReadOperation.create(reader, receivers, operationContext));
}
use of com.google.api.services.dataflow.model.Source in project beam by apache.
the class IsmSideInputReader method createReadersFromSources.
private List<IsmReader<?>> createReadersFromSources(PipelineOptions options, SideInputInfo sideInputInfo, DataflowExecutionContext executionContext, int sideInputIndex) throws Exception {
String sideInputKind = getString(sideInputInfo.getKind(), PropertyNames.OBJECT_TYPE_NAME);
if (SINGLETON_KIND.equals(sideInputKind)) {
checkState(sideInputInfo.getSources().size() == 1, "expecting a singleton side input kind to have a single source");
} else if (!COLLECTION_KIND.equals(sideInputKind)) {
throw new Exception("unexpected kind of side input: " + sideInputKind);
}
SideInputReadCounter sideInputReadCounter = new DataflowSideInputReadCounter(executionContext, operationContext, sideInputIndex);
ImmutableList.Builder<IsmReader<?>> builder = ImmutableList.builder();
for (Source source : sideInputInfo.getSources()) {
Coder<?> coder = null;
if (source.getCodec() != null) {
coder = CloudObjects.coderFromCloudObject(CloudObject.fromSpec(source.getCodec()));
}
CloudObject spec = CloudObject.fromSpec(source.getSpec());
final String filepattern = getString(spec, WorkerPropertyNames.FILENAME);
for (String file : Filepatterns.expandAtNFilepattern(filepattern)) {
// Deep clone.
CloudObject fileSpec = spec.clone();
addString(fileSpec, WorkerPropertyNames.FILENAME, file);
@SuppressWarnings("unchecked") NativeReader<?> reader = readerFactory.create(fileSpec, coder, options, executionContext, operationContext);
checkState(reader instanceof IsmReader, "%s only supports %s as a reader but was %s.", IsmSideInputReader.class.getSimpleName(), IsmReader.class.getSimpleName(), reader.getClass().getSimpleName());
IsmReader ismReader = (IsmReader) reader;
builder.add(new SideInputTrackingIsmReader<>(ismReader, sideInputReadCounter));
}
}
return builder.build();
}
Aggregations