use of com.google.common.hash.HashFunction in project flink by apache.
the class StreamGraphHasherV1 method traverseStreamGraphAndGenerateHashes.
@Override
public Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes(StreamGraph streamGraph) {
// The hash function used to generate the hash
final HashFunction hashFunction = Hashing.murmur3_128(0);
final Map<Integer, byte[]> hashes = new HashMap<>();
Set<Integer> visited = new HashSet<>();
Queue<StreamNode> remaining = new ArrayDeque<>();
// We need to make the source order deterministic. The source IDs are
// not returned in the same order, which means that submitting the same
// program twice might result in different traversal, which breaks the
// deterministic hash assignment.
List<Integer> sources = new ArrayList<>();
for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
sources.add(sourceNodeId);
}
Collections.sort(sources);
// Start with source nodes
for (Integer sourceNodeId : sources) {
remaining.add(streamGraph.getStreamNode(sourceNodeId));
visited.add(sourceNodeId);
}
StreamNode currentNode;
while ((currentNode = remaining.poll()) != null) {
// generate the hash code.
if (generateNodeHash(currentNode, hashFunction, hashes, streamGraph.isChainingEnabled())) {
// Add the child nodes
for (StreamEdge outEdge : currentNode.getOutEdges()) {
StreamNode child = outEdge.getTargetVertex();
if (!visited.contains(child.getId())) {
remaining.add(child);
visited.add(child.getId());
}
}
} else {
// We will revisit this later.
visited.remove(currentNode.getId());
}
}
return hashes;
}
use of com.google.common.hash.HashFunction in project flink by apache.
the class StreamGraphHasherV2 method traverseStreamGraphAndGenerateHashes.
/**
* Returns a map with a hash for each {@link StreamNode} of the {@link
* StreamGraph}. The hash is used as the {@link JobVertexID} in order to
* identify nodes across job submissions if they didn't change.
*
* <p>
* <p>The complete {@link StreamGraph} is traversed. The hash is either
* computed from the transformation's user-specified id (see
* {@link StreamTransformation#getUid()}) or generated in a deterministic way.
*
* <p>
* <p>The generated hash is deterministic with respect to:
* <ul>
* <li>node-local properties (like parallelism, UDF, node ID),
* <li>chained output nodes, and
* <li>input nodes hashes
* </ul>
*
* @return A map from {@link StreamNode#id} to hash as 16-byte array.
*/
@Override
public Map<Integer, byte[]> traverseStreamGraphAndGenerateHashes(StreamGraph streamGraph) {
// The hash function used to generate the hash
final HashFunction hashFunction = Hashing.murmur3_128(0);
final Map<Integer, byte[]> hashes = new HashMap<>();
Set<Integer> visited = new HashSet<>();
Queue<StreamNode> remaining = new ArrayDeque<>();
// We need to make the source order deterministic. The source IDs are
// not returned in the same order, which means that submitting the same
// program twice might result in different traversal, which breaks the
// deterministic hash assignment.
List<Integer> sources = new ArrayList<>();
for (Integer sourceNodeId : streamGraph.getSourceIDs()) {
sources.add(sourceNodeId);
}
Collections.sort(sources);
// Start with source nodes
for (Integer sourceNodeId : sources) {
remaining.add(streamGraph.getStreamNode(sourceNodeId));
visited.add(sourceNodeId);
}
StreamNode currentNode;
while ((currentNode = remaining.poll()) != null) {
// generate the hash code.
if (generateNodeHash(currentNode, hashFunction, hashes, streamGraph.isChainingEnabled())) {
// Add the child nodes
for (StreamEdge outEdge : currentNode.getOutEdges()) {
StreamNode child = outEdge.getTargetVertex();
if (!visited.contains(child.getId())) {
remaining.add(child);
visited.add(child.getId());
}
}
} else {
// We will revisit this later.
visited.remove(currentNode.getId());
}
}
return hashes;
}
use of com.google.common.hash.HashFunction in project hive by apache.
the class TestMurmur3 method testHashCodesM3_128_double.
@Test
public void testHashCodesM3_128_double() {
int seed = 123;
Random rand = new Random(seed);
HashFunction hf = Hashing.murmur3_128(seed);
for (int i = 0; i < 1000; i++) {
double val = rand.nextDouble();
byte[] data = ByteBuffer.allocate(8).putDouble(val).array();
// guava stores the hashcodes in little endian order
ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
buf.put(hf.hashBytes(data).asBytes());
buf.flip();
long gl1 = buf.getLong();
long gl2 = buf.getLong(8);
long[] hc = Murmur3.hash128(data, 0, data.length, seed);
long m1 = hc[0];
long m2 = hc[1];
assertEquals(gl1, m1);
assertEquals(gl2, m2);
}
}
use of com.google.common.hash.HashFunction in project hive by apache.
the class TestMurmur3 method testHashCodesM3_128_ints.
@Test
public void testHashCodesM3_128_ints() {
int seed = 123;
Random rand = new Random(seed);
HashFunction hf = Hashing.murmur3_128(seed);
for (int i = 0; i < 1000; i++) {
int val = rand.nextInt();
byte[] data = ByteBuffer.allocate(4).putInt(val).array();
// guava stores the hashcodes in little endian order
ByteBuffer buf = ByteBuffer.allocate(16).order(ByteOrder.LITTLE_ENDIAN);
buf.put(hf.hashBytes(data).asBytes());
buf.flip();
long gl1 = buf.getLong();
long gl2 = buf.getLong(8);
long[] hc = Murmur3.hash128(data, 0, data.length, seed);
long m1 = hc[0];
long m2 = hc[1];
assertEquals(gl1, m1);
assertEquals(gl2, m2);
byte[] offsetData = new byte[data.length + 50];
System.arraycopy(data, 0, offsetData, 50, data.length);
hc = Murmur3.hash128(offsetData, 50, data.length, seed);
assertEquals(gl1, hc[0]);
assertEquals(gl2, hc[1]);
}
}
use of com.google.common.hash.HashFunction in project hive by apache.
the class TestMurmur3 method testHashCodesM3_32_string.
@Test
public void testHashCodesM3_32_string() {
String key = "test";
int seed = 123;
HashFunction hf = Hashing.murmur3_32(seed);
int hc1 = hf.hashBytes(key.getBytes()).asInt();
int hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
assertEquals(hc1, hc2);
key = "testkey";
hc1 = hf.hashBytes(key.getBytes()).asInt();
hc2 = Murmur3.hash32(key.getBytes(), key.getBytes().length, seed);
assertEquals(hc1, hc2);
}
Aggregations