use of com.google.common.hash.HashCode in project beam by apache.
the class BigqueryMatcher method generateHash.
private String generateHash(@Nonnull List<TableRow> rows) {
List<HashCode> rowHashes = Lists.newArrayList();
for (TableRow row : rows) {
List<String> cellsInOneRow = Lists.newArrayList();
for (TableCell cell : row.getF()) {
cellsInOneRow.add(Objects.toString(cell.getV()));
Collections.sort(cellsInOneRow);
}
rowHashes.add(Hashing.sha1().hashString(cellsInOneRow.toString(), StandardCharsets.UTF_8));
}
return Hashing.combineUnordered(rowHashes).toString();
}
use of com.google.common.hash.HashCode in project presto by prestodb.
the class MLFunctions method getOrLoadModel.
private static Model getOrLoadModel(Slice slice) {
HashCode modelHash = ModelUtils.modelHash(slice);
Model model = MODEL_CACHE.getIfPresent(modelHash);
if (model == null) {
model = ModelUtils.deserialize(slice);
MODEL_CACHE.put(modelHash, model);
}
return model;
}
use of com.google.common.hash.HashCode in project presto by prestodb.
the class ModelUtils method deserialize.
public static Model deserialize(Slice slice) {
int version = slice.getInt(VERSION_OFFSET);
checkArgument(version == CURRENT_FORMAT_VERSION, format("Unsupported version: %d", version));
byte[] modelHashBytes = slice.getBytes(HASH_OFFSET, 32);
HashCode expectedHash = HashCode.fromBytes(modelHashBytes);
HashCode actualHash = Hashing.sha256().hashBytes(slice.getBytes(ALGORITHM_OFFSET, slice.length() - ALGORITHM_OFFSET));
checkArgument(actualHash.equals(expectedHash), "model hash does not match data");
int id = slice.getInt(ALGORITHM_OFFSET);
Class<? extends Model> algorithm = MODEL_SERIALIZATION_IDS.inverse().get(id);
requireNonNull(algorithm, format("Unsupported algorith %d", id));
int hyperparameterLength = slice.getInt(HYPERPARAMETER_LENGTH_OFFSET);
byte[] hyperparameterBytes = slice.getBytes(HYPERPARAMETERS_OFFSET, hyperparameterLength);
int dataLengthOffset = HYPERPARAMETERS_OFFSET + hyperparameterLength;
long dataLength = slice.getLong(dataLengthOffset);
int dataOffset = dataLengthOffset + SIZE_OF_LONG;
byte[] data = slice.getBytes(dataOffset, (int) dataLength);
try {
Method deserialize = algorithm.getMethod("deserialize", byte[].class);
return (Model) deserialize.invoke(null, new Object[] { data });
} catch (NoSuchMethodException | InvocationTargetException | IllegalAccessException e) {
throw Throwables.propagate(e);
}
}
use of com.google.common.hash.HashCode in project protoman by spotify.
the class GcsContentAddressedBlobStorage method put.
@Override
public HashCode put(final byte[] bytes) {
final HashCode hashCode = hashFunction.hashBytes(bytes);
final String crc32c = GCS_UPLOAD_HASH_FUNCTION.hashBytes(bytes).toString();
final String blobName = blobName(hashCode);
storage.create(BlobInfo.newBuilder(bucket, blobName).setContentType(contentType).setCrc32c(crc32c).build(), bytes);
return hashCode;
}
use of com.google.common.hash.HashCode in project protoman by spotify.
the class GcsSchemaStorage method open.
@Override
public Transaction open() {
final ProtoIndex protoIndex = ProtoIndex.parse(indexFile.load());
long indexGeneration = indexFile.currentGeneration();
logger.debug("Starting transaction from snapshot={}", indexGeneration);
return new Transaction() {
final AtomicReference<TxState> state = new AtomicReference<>(TxState.OPEN);
@Override
public void storeFile(final SchemaFile file) {
Preconditions.checkState(state.get() == TxState.OPEN);
final HashCode hash = protoStorage.put(file.content().getBytes(Charsets.UTF_8));
protoIndex.updateProtoLocation(file.path().toString(), hash.toString());
logger.info("Stored file. path={} content={}", file.path(), hash.toString());
}
@Override
public Stream<SchemaFile> fetchAllFiles(final long snapshotVersion) {
Preconditions.checkState(state.get() == TxState.OPEN);
ProtoIndex currentProtoIndex = protoIndex(snapshotVersion);
return currentProtoIndex.getProtoLocations().entrySet().stream().map(e -> schemaFile(Paths.get(e.getKey()), e.getValue()));
}
@Override
public void storePackageVersion(final String protoPackage, final SchemaVersion version) {
Preconditions.checkState(state.get() == TxState.OPEN);
protoIndex.updatePackageVersion(protoPackage, version);
}
@Override
public void storeProtoDependencies(final Path path, final Set<Path> paths) {
Preconditions.checkState(state.get() == TxState.OPEN);
protoIndex.updateProtoDependencies(path, paths);
}
@Override
public Optional<SchemaVersion> getPackageVersion(final long snapshotVersion, final String protoPackage) {
Preconditions.checkState(state.get() == TxState.OPEN);
final SchemaVersion schemaVersion = protoIndex(snapshotVersion).getPackageVersions().get(protoPackage);
return Optional.ofNullable(schemaVersion);
}
@Override
public Stream<Path> protosForPackage(final long snapshotVersion, final String pkgName) {
Preconditions.checkState(state.get() == TxState.OPEN);
ProtoIndex currentProtoIndex = protoIndex(snapshotVersion);
return currentProtoIndex.getProtoLocations().keySet().stream().map(Paths::get).filter(packageFilter(pkgName));
}
@Override
public Stream<Path> getDependencies(final long snapshotVersion, final Path path) {
Preconditions.checkState(state.get() == TxState.OPEN);
ProtoIndex currentProtoIndex = protoIndex(snapshotVersion);
return currentProtoIndex.getProtoDependencies().get(path).stream();
}
@Override
public SchemaFile schemaFile(final long snapshotVersion, final Path path) {
Preconditions.checkState(state.get() == TxState.OPEN);
ProtoIndex currentProtoIndex = protoIndex(snapshotVersion);
return SchemaFile.create(path, fileContents(currentProtoIndex, path));
}
@Override
public ImmutableMap<String, SchemaVersion> allPackageVersions(final long snapshotVersion) {
Preconditions.checkState(state.get() == TxState.OPEN);
return ImmutableMap.copyOf(protoIndex(snapshotVersion).getPackageVersions());
}
@Override
public long commit() {
Preconditions.checkState(state.compareAndSet(TxState.OPEN, TxState.COMMITTED));
final long snapshotVersion = indexFile.replace(protoIndex.toByteArray());
logger.info("Committed. snapshotVersion={}", snapshotVersion);
if (logger.isDebugEnabled()) {
logger.debug("index={}", protoIndex.toProtoString());
}
return snapshotVersion;
}
@Override
public long getLatestSnapshotVersion() {
Preconditions.checkState(state.get() == TxState.OPEN);
return indexFile.currentGeneration();
}
@Override
public Stream<Long> getSnapshotVersions() {
Preconditions.checkState(state.get() == TxState.OPEN);
return indexFile.listGenerations();
}
@Override
public void deleteFile(final Path path) {
Preconditions.checkState(state.get() == TxState.OPEN);
if (!protoIndex.removeProtoLocation(path.toString())) {
throw new RuntimeException("Not found: " + path);
}
}
@Override
public void close() {
Preconditions.checkState(state.getAndSet(TxState.CLOSED) != TxState.CLOSED);
// nothing do to
}
private Predicate<Path> packageFilter(final String pkgName) {
Objects.requireNonNull(pkgName);
final Path pkgPath = Paths.get(pkgName.replaceAll("\\.", "/"));
return path -> path.getParent().equals(pkgPath);
}
private String fileContents(final ProtoIndex protoIndex, final Path path) {
Objects.requireNonNull(protoIndex);
Objects.requireNonNull(path);
final String location = protoIndex.getProtoLocations().get(path.toString());
if (location == null) {
throw new RuntimeException("Location not found: " + path);
}
final byte[] bytes = protoStorage.get(HashCode.fromString(location)).orElseThrow(() -> new IllegalStateException("Location found. Missing data: " + path));
return new String(bytes, UTF_8);
}
private ProtoIndex protoIndex(final long snapshotVersion) {
if (snapshotVersion != indexGeneration) {
return ProtoIndex.parse(indexFile.contentForGeneration(snapshotVersion));
}
return protoIndex;
}
private SchemaFile schemaFile(final Path path, final String hash) {
Objects.requireNonNull(path);
Objects.requireNonNull(hash);
return SchemaFile.create(path, new String(protoStorage.get(HashCode.fromString(hash)).orElseThrow(() -> new RuntimeException("Not found: " + hash)), Charsets.UTF_8));
}
};
}
Aggregations