use of org.apache.hadoop.io.BinaryComparable in project hive by apache.
the class ReduceSinkOperator method toHiveKey.
// Serialize the keys and append the tag
protected HiveKey toHiveKey(Object obj, int tag, Integer distLength) throws SerDeException {
BinaryComparable key = (BinaryComparable) keySerializer.serialize(obj, keyObjectInspector);
int keyLength = key.getLength();
if (tag == -1 || skipTag) {
keyWritable.set(key.getBytes(), 0, keyLength);
} else {
keyWritable.setSize(keyLength + 1);
System.arraycopy(key.getBytes(), 0, keyWritable.get(), 0, keyLength);
keyWritable.get()[keyLength] = tagByte[0];
}
keyWritable.setDistKeyLength((distLength == null) ? keyLength : distLength);
return keyWritable;
}
use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.
the class TestBinaryPartitioner method testDefaultOffsets.
@Test
public void testDefaultOffsets() {
Configuration conf = new Configuration();
BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
BinaryComparable key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
int partition1 = partitioner.getPartition(key1, null, 10);
int partition2 = partitioner.getPartition(key2, null, 10);
assertEquals(partition1, partition2);
key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
key2 = new BytesWritable(new byte[] { 6, 2, 3, 4, 5 });
partition1 = partitioner.getPartition(key1, null, 10);
partition2 = partitioner.getPartition(key2, null, 10);
assertTrue(partition1 != partition2);
key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 6 });
partition1 = partitioner.getPartition(key1, null, 10);
partition2 = partitioner.getPartition(key2, null, 10);
assertTrue(partition1 != partition2);
}
use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.
the class TestBinaryPartitioner method testUpperBound.
@Test
public void testUpperBound() {
Configuration conf = new Configuration();
BinaryPartitioner.setRightOffset(conf, 4);
BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
BinaryComparable key2 = new BytesWritable(new byte[] { 1, 2, 3, 4, 6 });
int partition1 = partitioner.getPartition(key1, null, 10);
int partition2 = partitioner.getPartition(key2, null, 10);
assertTrue(partition1 != partition2);
}
use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.
the class TotalOrderPartitioner method setConf.
/**
* Read in the partition file and build indexing data structures.
* If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
* <tt>total.order.partitioner.natural.order</tt> is not false, a trie
* of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
* will be built. Otherwise, keys will be located using a binary search of
* the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
* defined for this job. The input file must be sorted with the same
* comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.
*/
// keytype from conf not static
@SuppressWarnings("unchecked")
public void setConf(Configuration conf) {
try {
this.conf = conf;
String parts = getPartitionFile(conf);
final Path partFile = new Path(parts);
final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? // assume in DistributedCache
FileSystem.getLocal(conf) : partFile.getFileSystem(conf);
Job job = Job.getInstance(conf);
Class<K> keyClass = (Class<K>) job.getMapOutputKeyClass();
K[] splitPoints = readPartitions(fs, partFile, keyClass, conf);
if (splitPoints.length != job.getNumReduceTasks() - 1) {
throw new IOException("Wrong number of partitions in keyset");
}
RawComparator<K> comparator = (RawComparator<K>) job.getSortComparator();
for (int i = 0; i < splitPoints.length - 1; ++i) {
if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
throw new IOException("Split points are out of order");
}
}
boolean natOrder = conf.getBoolean(NATURAL_ORDER, true);
if (natOrder && BinaryComparable.class.isAssignableFrom(keyClass)) {
partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0], // limit large but not huge.
conf.getInt(MAX_TRIE_DEPTH, 200));
} else {
partitions = new BinarySearchNode(splitPoints, comparator);
}
} catch (IOException e) {
throw new IllegalArgumentException("Can't read partitions file", e);
}
}
use of org.apache.hadoop.io.BinaryComparable in project hive by apache.
the class VectorDeserializeOrcWriter method writeOneRow.
@Override
public void writeOneRow(Writable row) throws IOException {
if (sourceBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
flushBatch();
}
BinaryComparable binComp = (BinaryComparable) row;
deserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
// Deserialize and append new row using the current batch size as the index.
try {
// Not using ByRef now since it's unsafe for text readers. Might be safe for others.
vectorDeserializeRow.deserialize(sourceBatch, sourceBatch.size++);
} catch (Exception e) {
throw new IOException("DeserializeRead detail: " + vectorDeserializeRow.getDetailedReadPositionString(), e);
}
}
Aggregations