use of org.apache.beam.runners.dataflow.util.PackageUtil.StagedFile in project beam by apache.
the class DataflowRunner method stageArtifacts.
protected List<DataflowPackage> stageArtifacts(RunnerApi.Pipeline pipeline) {
ImmutableList.Builder<StagedFile> filesToStageBuilder = ImmutableList.builder();
Set<String> stagedNames = new HashSet<>();
for (Map.Entry<String, RunnerApi.Environment> entry : pipeline.getComponents().getEnvironmentsMap().entrySet()) {
for (RunnerApi.ArtifactInformation info : entry.getValue().getDependenciesList()) {
if (!BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE).equals(info.getTypeUrn())) {
throw new RuntimeException(String.format("unsupported artifact type %s", info.getTypeUrn()));
}
RunnerApi.ArtifactFilePayload filePayload;
try {
filePayload = RunnerApi.ArtifactFilePayload.parseFrom(info.getTypePayload());
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Error parsing artifact file payload.", e);
}
String stagedName;
if (BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO).equals(info.getRoleUrn())) {
try {
RunnerApi.ArtifactStagingToRolePayload stagingPayload = RunnerApi.ArtifactStagingToRolePayload.parseFrom(info.getRolePayload());
stagedName = stagingPayload.getStagedName();
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Error parsing artifact staging_to role payload.", e);
}
} else {
try {
File source = new File(filePayload.getPath());
HashCode hashCode = Files.asByteSource(source).hash(Hashing.sha256());
stagedName = Environments.createStagingFileName(source, hashCode);
} catch (IOException e) {
throw new RuntimeException(String.format("Error creating staged name for artifact %s", filePayload.getPath()), e);
}
}
if (stagedNames.contains(stagedName)) {
continue;
} else {
stagedNames.add(stagedName);
}
filesToStageBuilder.add(StagedFile.of(filePayload.getPath(), filePayload.getSha256(), stagedName));
}
}
return options.getStager().stageFiles(filesToStageBuilder.build());
}
use of org.apache.beam.runners.dataflow.util.PackageUtil.StagedFile in project beam by apache.
the class PackageUtilTest method makeStagedFile.
private static StagedFile makeStagedFile(String source, String destName) throws IOException {
File file = new File(source);
File sourceFile;
HashCode hashCode;
if (file.exists()) {
sourceFile = file.isDirectory() ? zipDirectory(file) : file;
hashCode = Files.asByteSource(sourceFile).hash(Hashing.sha256());
} else {
sourceFile = file;
hashCode = Hashing.sha256().hashBytes(new byte[] {});
}
String destination = destName == null ? Environments.createStagingFileName(file, hashCode) : destName;
return StagedFile.of(sourceFile.getPath(), hashCode.toString(), destination);
}
use of org.apache.beam.runners.dataflow.util.PackageUtil.StagedFile in project beam by apache.
the class PackageUtilTest method makePackageAttributes.
private static PackageAttributes makePackageAttributes(File file, @Nullable String overridePackageName) throws IOException {
StagedFile stagedFile = makeStagedFile(file.getPath());
PackageAttributes attributes = PackageUtil.PackageAttributes.forFileToStage(stagedFile.getSource(), stagedFile.getSha256(), stagedFile.getDestination(), STAGING_PATH);
if (overridePackageName != null) {
attributes = attributes.withPackageName(overridePackageName);
}
return attributes;
}
use of org.apache.beam.runners.dataflow.util.PackageUtil.StagedFile in project beam by apache.
the class PackageUtilTest method testPackageUploadWithLargeClasspathLogsWarning.
@Test
public void testPackageUploadWithLargeClasspathLogsWarning() throws Exception {
File tmpFile = makeFileWithContents("file.txt", "This is a test!");
when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))).thenReturn(ImmutableList.of(StorageObjectOrIOException.create(createStorageObject(STAGING_PATH, tmpFile.length()))));
List<StagedFile> classpathElements = Lists.newLinkedList();
for (int i = 0; i < 1005; ++i) {
String eltName = "element" + i;
classpathElements.add(makeStagedFile(tmpFile.getAbsolutePath(), eltName));
}
defaultPackageUtil.stageClasspathElements(classpathElements, STAGING_PATH, createOptions);
logged.verifyWarn("Your classpath contains 1005 elements, which Google Cloud Dataflow");
}
Aggregations