use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class SparkReduceRecordHandler method processVectors.
/**
* @param values
* @return true if it is not done and can take more inputs
*/
private <E> boolean processVectors(Iterator<E> values, byte tag) throws HiveException {
VectorizedRowBatch batch = batches[tag];
batch.reset();
buffer.reset();
/* deserialize key into columns */
VectorizedBatchUtil.addRowToBatchFrom(keyObject, keyStructInspector, 0, 0, batch, buffer);
for (int i = 0; i < keysColumnOffset; i++) {
VectorizedBatchUtil.setRepeatingColumn(batch, i);
}
int rowIdx = 0;
try {
while (values.hasNext()) {
/* deserialize value into columns */
BytesWritable valueWritable = (BytesWritable) values.next();
Object valueObj = deserializeValue(valueWritable, tag);
VectorizedBatchUtil.addRowToBatchFrom(valueObj, valueStructInspectors[tag], rowIdx, keysColumnOffset, batch, buffer);
rowIdx++;
if (rowIdx >= BATCH_SIZE) {
VectorizedBatchUtil.setBatchSize(batch, rowIdx);
reducer.process(batch, tag);
rowIdx = 0;
if (isLogInfoEnabled) {
logMemoryInfo();
}
}
}
if (rowIdx > 0) {
VectorizedBatchUtil.setBatchSize(batch, rowIdx);
reducer.process(batch, tag);
}
if (isLogInfoEnabled) {
logMemoryInfo();
}
} catch (Exception e) {
String rowString = null;
try {
rowString = batch.toString();
} catch (Exception e2) {
rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) + " ]";
}
throw new HiveException("Error while processing vector batch (tag=" + tag + ") " + rowString, e);
}
// give me more
return true;
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class VerifyFast method verifyDeserializeRead.
public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
boolean isNull;
isNull = !deserializeRead.readNextField();
if (isNull) {
if (writable != null) {
TestCase.fail("Field reports null but object is not null (class " + writable.getClass().getName() + ", " + writable.toString() + ")");
}
return;
} else if (writable == null) {
TestCase.fail("Field report not null but object is null");
}
switch(primitiveTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
{
boolean value = deserializeRead.currentBoolean;
if (!(writable instanceof BooleanWritable)) {
TestCase.fail("Boolean expected writable not Boolean");
}
boolean expected = ((BooleanWritable) writable).get();
if (value != expected) {
TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case BYTE:
{
byte value = deserializeRead.currentByte;
if (!(writable instanceof ByteWritable)) {
TestCase.fail("Byte expected writable not Byte");
}
byte expected = ((ByteWritable) writable).get();
if (value != expected) {
TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
}
}
break;
case SHORT:
{
short value = deserializeRead.currentShort;
if (!(writable instanceof ShortWritable)) {
TestCase.fail("Short expected writable not Short");
}
short expected = ((ShortWritable) writable).get();
if (value != expected) {
TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case INT:
{
int value = deserializeRead.currentInt;
if (!(writable instanceof IntWritable)) {
TestCase.fail("Integer expected writable not Integer");
}
int expected = ((IntWritable) writable).get();
if (value != expected) {
TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case LONG:
{
long value = deserializeRead.currentLong;
if (!(writable instanceof LongWritable)) {
TestCase.fail("Long expected writable not Long");
}
Long expected = ((LongWritable) writable).get();
if (value != expected) {
TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case FLOAT:
{
float value = deserializeRead.currentFloat;
if (!(writable instanceof FloatWritable)) {
TestCase.fail("Float expected writable not Float");
}
float expected = ((FloatWritable) writable).get();
if (value != expected) {
TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case DOUBLE:
{
double value = deserializeRead.currentDouble;
if (!(writable instanceof DoubleWritable)) {
TestCase.fail("Double expected writable not Double");
}
double expected = ((DoubleWritable) writable).get();
if (value != expected) {
TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
}
}
break;
case STRING:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
String expected = ((Text) writable).toString();
if (!string.equals(expected)) {
TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
}
}
break;
case CHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
if (!hiveChar.equals(expected)) {
TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
}
}
break;
case VARCHAR:
{
byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
Text text = new Text(stringBytes);
String string = text.toString();
HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
if (!hiveVarchar.equals(expected)) {
TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
}
}
break;
case DECIMAL:
{
HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
if (value == null) {
TestCase.fail("Decimal field evaluated to NULL");
}
HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
if (!value.equals(expected)) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
int precision = decimalTypeInfo.getPrecision();
int scale = decimalTypeInfo.getScale();
TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
}
}
break;
case DATE:
{
Date value = deserializeRead.currentDateWritable.get();
Date expected = ((DateWritable) writable).get();
if (!value.equals(expected)) {
TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case TIMESTAMP:
{
Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
Timestamp expected = ((TimestampWritable) writable).getTimestamp();
if (!value.equals(expected)) {
TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_YEAR_MONTH:
{
HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case INTERVAL_DAY_TIME:
{
HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
if (!value.equals(expected)) {
TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
}
}
break;
case BINARY:
{
byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
BytesWritable bytesWritable = (BytesWritable) writable;
byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
if (byteArray.length != expected.length) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
for (int b = 0; b < byteArray.length; b++) {
if (byteArray[b] != expected[b]) {
TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
}
}
}
break;
default:
throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
}
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestCombineSequenceFileInputFormat method testFormat.
@Test(timeout = 10000)
public void testFormat() throws IOException, InterruptedException {
Job job = Job.getInstance(conf);
Random random = new Random();
long seed = random.nextLong();
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create files with a variety of lengths
createFiles(length, numFiles, random, job);
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
// create a combine split for the files
InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
LOG.info("splitting: requesting = " + numSplits);
List<InputSplit> splits = format.getSplits(job);
LOG.info("splitting: got = " + splits.size());
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.size());
InputSplit split = splits.get(0);
assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
// check the split
BitSet bits = new BitSet(length);
RecordReader<IntWritable, BytesWritable> reader = format.createRecordReader(split, context);
MapContext<IntWritable, BytesWritable, IntWritable, BytesWritable> mcontext = new MapContextImpl<IntWritable, BytesWritable, IntWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
assertEquals("reader class is CombineFileRecordReader.", CombineFileRecordReader.class, reader.getClass());
try {
while (reader.nextKeyValue()) {
IntWritable key = reader.getCurrentKey();
BytesWritable value = reader.getCurrentValue();
assertNotNull("Value should not be null.", value);
final int k = key.get();
LOG.debug("read " + k);
assertFalse("Key in multiple partitions.", bits.get(k));
bits.set(k);
}
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestCombineSequenceFileInputFormat method createFiles.
private static void createFiles(int length, int numFiles, Random random, Job job) throws IOException {
Range[] ranges = createRanges(length, numFiles, random);
for (int i = 0; i < numFiles; i++) {
Path file = new Path(workDir, "test_" + i + ".seq");
// create a file with length entries
@SuppressWarnings("deprecation") SequenceFile.Writer writer = SequenceFile.createWriter(localFs, job.getConfiguration(), file, IntWritable.class, BytesWritable.class);
Range range = ranges[i];
try {
for (int j = range.start; j < range.end; j++) {
IntWritable key = new IntWritable(j);
byte[] data = new byte[random.nextInt(10)];
random.nextBytes(data);
BytesWritable value = new BytesWritable(data);
writer.append(key, value);
}
} finally {
writer.close();
}
}
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestMRSequenceFileAsBinaryInputFormat method testBinary.
@Test
public void testBinary() throws IOException, InterruptedException {
Job job = Job.getInstance();
FileSystem fs = FileSystem.getLocal(job.getConfiguration());
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path file = new Path(dir, "testbinary.seq");
Random r = new Random();
long seed = r.nextLong();
r.setSeed(seed);
fs.delete(dir, true);
FileInputFormat.setInputPaths(job, dir);
Text tkey = new Text();
Text tval = new Text();
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job.getConfiguration(), file, Text.class, Text.class);
try {
for (int i = 0; i < RECORDS; ++i) {
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
writer.append(tkey, tval);
}
} finally {
writer.close();
}
TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
InputFormat<BytesWritable, BytesWritable> bformat = new SequenceFileAsBinaryInputFormat();
int count = 0;
r.setSeed(seed);
BytesWritable bkey = new BytesWritable();
BytesWritable bval = new BytesWritable();
Text cmpkey = new Text();
Text cmpval = new Text();
DataInputBuffer buf = new DataInputBuffer();
FileInputFormat.setInputPaths(job, file);
for (InputSplit split : bformat.getSplits(job)) {
RecordReader<BytesWritable, BytesWritable> reader = bformat.createRecordReader(split, context);
MapContext<BytesWritable, BytesWritable, BytesWritable, BytesWritable> mcontext = new MapContextImpl<BytesWritable, BytesWritable, BytesWritable, BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(), reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
while (reader.nextKeyValue()) {
bkey = reader.getCurrentKey();
bval = reader.getCurrentValue();
tkey.set(Integer.toString(r.nextInt(), 36));
tval.set(Long.toString(r.nextLong(), 36));
buf.reset(bkey.getBytes(), bkey.getLength());
cmpkey.readFields(buf);
buf.reset(bval.getBytes(), bval.getLength());
cmpval.readFields(buf);
assertTrue("Keys don't match: " + "*" + cmpkey.toString() + ":" + tkey.toString() + "*", cmpkey.toString().equals(tkey.toString()));
assertTrue("Vals don't match: " + "*" + cmpval.toString() + ":" + tval.toString() + "*", cmpval.toString().equals(tval.toString()));
++count;
}
} finally {
reader.close();
}
}
assertEquals("Some records not found", RECORDS, count);
}
Aggregations