use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.HashCode in project beam by apache.
the class BigqueryMatcher method generateHash.
private String generateHash(@Nonnull List<TableRow> rows) {
List<HashCode> rowHashes = Lists.newArrayList();
for (TableRow row : rows) {
List<String> cellsInOneRow = Lists.newArrayList();
for (TableCell cell : row.getF()) {
cellsInOneRow.add(Objects.toString(cell.getV()));
Collections.sort(cellsInOneRow);
}
rowHashes.add(Hashing.sha1().hashString(cellsInOneRow.toString(), StandardCharsets.UTF_8));
}
return Hashing.combineUnordered(rowHashes).toString();
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.HashCode in project beam by apache.
the class HashingFn method addInput.
@Override
public Accum addInput(Accum accum, String input) {
List<HashCode> elementHashes = Lists.newArrayList();
if (accum.hashCode != null) {
elementHashes.add(accum.hashCode);
}
HashCode inputHashCode = Hashing.murmur3_128().hashString(input, StandardCharsets.UTF_8);
elementHashes.add(inputHashCode);
accum.hashCode = Hashing.combineUnordered(elementHashes);
return accum;
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.HashCode in project beam by apache.
the class Environments method getArtifacts.
public static List<ArtifactInformation> getArtifacts(List<String> stagingFiles) {
ImmutableList.Builder<ArtifactInformation> artifactsBuilder = ImmutableList.builder();
Set<String> deduplicatedStagingFiles = new LinkedHashSet<>(stagingFiles);
for (String path : deduplicatedStagingFiles) {
File file;
String stagedName = null;
if (path.contains("=")) {
String[] components = path.split("=", 2);
file = new File(components[1]);
stagedName = components[0];
} else {
file = new File(path);
}
// Spurious items get added to the classpath. Filter by just those that exist.
if (file.exists()) {
ArtifactInformation.Builder artifactBuilder = ArtifactInformation.newBuilder();
artifactBuilder.setTypeUrn(BeamUrns.getUrn(StandardArtifacts.Types.FILE));
artifactBuilder.setRoleUrn(BeamUrns.getUrn(StandardArtifacts.Roles.STAGING_TO));
HashCode hashCode;
if (file.isDirectory()) {
File zippedFile;
try {
zippedFile = zipDirectory(file);
hashCode = Files.asByteSource(zippedFile).hash(Hashing.sha256());
} catch (IOException e) {
throw new RuntimeException(e);
}
artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(zippedFile.getPath()).setSha256(hashCode.toString()).build().toByteString());
} else {
try {
hashCode = Files.asByteSource(file).hash(Hashing.sha256());
} catch (IOException e) {
throw new RuntimeException(e);
}
artifactBuilder.setTypePayload(RunnerApi.ArtifactFilePayload.newBuilder().setPath(file.getPath()).setSha256(hashCode.toString()).build().toByteString());
}
if (stagedName == null) {
stagedName = createStagingFileName(file, hashCode);
}
artifactBuilder.setRolePayload(RunnerApi.ArtifactStagingToRolePayload.newBuilder().setStagedName(stagedName).build().toByteString());
artifactsBuilder.add(artifactBuilder.build());
}
}
return artifactsBuilder.build();
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.HashCode in project beam by apache.
the class DataflowRunner method stageArtifacts.
protected List<DataflowPackage> stageArtifacts(RunnerApi.Pipeline pipeline) {
ImmutableList.Builder<StagedFile> filesToStageBuilder = ImmutableList.builder();
Set<String> stagedNames = new HashSet<>();
for (Map.Entry<String, RunnerApi.Environment> entry : pipeline.getComponents().getEnvironmentsMap().entrySet()) {
for (RunnerApi.ArtifactInformation info : entry.getValue().getDependenciesList()) {
if (!BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE).equals(info.getTypeUrn())) {
throw new RuntimeException(String.format("unsupported artifact type %s", info.getTypeUrn()));
}
RunnerApi.ArtifactFilePayload filePayload;
try {
filePayload = RunnerApi.ArtifactFilePayload.parseFrom(info.getTypePayload());
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Error parsing artifact file payload.", e);
}
String stagedName;
if (BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO).equals(info.getRoleUrn())) {
try {
RunnerApi.ArtifactStagingToRolePayload stagingPayload = RunnerApi.ArtifactStagingToRolePayload.parseFrom(info.getRolePayload());
stagedName = stagingPayload.getStagedName();
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Error parsing artifact staging_to role payload.", e);
}
} else {
try {
File source = new File(filePayload.getPath());
HashCode hashCode = Files.asByteSource(source).hash(Hashing.sha256());
stagedName = Environments.createStagingFileName(source, hashCode);
} catch (IOException e) {
throw new RuntimeException(String.format("Error creating staged name for artifact %s", filePayload.getPath()), e);
}
}
if (stagedNames.contains(stagedName)) {
continue;
} else {
stagedNames.add(stagedName);
}
filesToStageBuilder.add(StagedFile.of(filePayload.getPath(), filePayload.getSha256(), stagedName));
}
}
return options.getStager().stageFiles(filesToStageBuilder.build());
}
use of org.apache.beam.vendor.guava.v26_0_jre.com.google.common.hash.HashCode in project beam by apache.
the class DataflowRunner method resolveArtifacts.
@VisibleForTesting
protected RunnerApi.Pipeline resolveArtifacts(RunnerApi.Pipeline pipeline) {
RunnerApi.Pipeline.Builder pipelineBuilder = pipeline.toBuilder();
RunnerApi.Components.Builder componentsBuilder = pipelineBuilder.getComponentsBuilder();
componentsBuilder.clearEnvironments();
for (Map.Entry<String, RunnerApi.Environment> entry : pipeline.getComponents().getEnvironmentsMap().entrySet()) {
RunnerApi.Environment.Builder environmentBuilder = entry.getValue().toBuilder();
environmentBuilder.clearDependencies();
for (RunnerApi.ArtifactInformation info : entry.getValue().getDependenciesList()) {
if (!BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.FILE).equals(info.getTypeUrn())) {
throw new RuntimeException(String.format("unsupported artifact type %s", info.getTypeUrn()));
}
RunnerApi.ArtifactFilePayload filePayload;
try {
filePayload = RunnerApi.ArtifactFilePayload.parseFrom(info.getTypePayload());
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Error parsing artifact file payload.", e);
}
String stagedName;
if (BeamUrns.getUrn(RunnerApi.StandardArtifacts.Roles.STAGING_TO).equals(info.getRoleUrn())) {
try {
RunnerApi.ArtifactStagingToRolePayload stagingPayload = RunnerApi.ArtifactStagingToRolePayload.parseFrom(info.getRolePayload());
stagedName = stagingPayload.getStagedName();
} catch (InvalidProtocolBufferException e) {
throw new RuntimeException("Error parsing artifact staging_to role payload.", e);
}
} else {
try {
File source = new File(filePayload.getPath());
HashCode hashCode = Files.asByteSource(source).hash(Hashing.sha256());
stagedName = Environments.createStagingFileName(source, hashCode);
} catch (IOException e) {
throw new RuntimeException(String.format("Error creating staged name for artifact %s", filePayload.getPath()), e);
}
}
environmentBuilder.addDependencies(info.toBuilder().setTypeUrn(BeamUrns.getUrn(RunnerApi.StandardArtifacts.Types.URL)).setTypePayload(RunnerApi.ArtifactUrlPayload.newBuilder().setUrl(FileSystems.matchNewResource(options.getStagingLocation(), true).resolve(stagedName, ResolveOptions.StandardResolveOptions.RESOLVE_FILE).toString()).setSha256(filePayload.getSha256()).build().toByteString()));
}
componentsBuilder.putEnvironments(entry.getKey(), environmentBuilder.build());
}
return pipelineBuilder.build();
}
Aggregations