use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestFixedLengthInputFormat method testNoRecordLength.
/**
* Test with no record length set.
*/
@Test(timeout = 5000)
public void testNoRecordLength() throws IOException {
localFs.delete(workDir, true);
Path file = new Path(workDir, new String("testFormat.txt"));
createFile(file, null, 10, 10);
// Set the fixed length record length config property
JobConf job = new JobConf(defaultConf);
FileInputFormat.setInputPaths(job, workDir);
FixedLengthInputFormat format = new FixedLengthInputFormat();
format.configure(job);
InputSplit[] splits = format.getSplits(job, 1);
boolean exceptionThrown = false;
for (InputSplit split : splits) {
try {
RecordReader<LongWritable, BytesWritable> reader = format.getRecordReader(split, job, voidReporter);
} catch (IOException ioe) {
exceptionThrown = true;
LOG.info("Exception message:" + ioe.getMessage());
}
}
assertTrue("Exception for not setting record length:", exceptionThrown);
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestFixedLengthInputFormat method readSplit.
private static List<String> readSplit(FixedLengthInputFormat format, InputSplit split, JobConf job) throws IOException {
List<String> result = new ArrayList<String>();
RecordReader<LongWritable, BytesWritable> reader = format.getRecordReader(split, job, voidReporter);
LongWritable key = reader.createKey();
BytesWritable value = reader.createValue();
try {
while (reader.next(key, value)) {
result.add(new String(value.getBytes(), 0, value.getLength()));
}
} finally {
reader.close();
}
return result;
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestFixedLengthInputFormat method testZeroRecordLength.
/**
* Test with record length set to 0
*/
@Test(timeout = 5000)
public void testZeroRecordLength() throws IOException {
localFs.delete(workDir, true);
Path file = new Path(workDir, new String("testFormat.txt"));
createFile(file, null, 10, 10);
// Set the fixed length record length config property
JobConf job = new JobConf(defaultConf);
FileInputFormat.setInputPaths(job, workDir);
FixedLengthInputFormat format = new FixedLengthInputFormat();
format.setRecordLength(job, 0);
format.configure(job);
InputSplit[] splits = format.getSplits(job, 1);
boolean exceptionThrown = false;
for (InputSplit split : splits) {
try {
RecordReader<LongWritable, BytesWritable> reader = format.getRecordReader(split, job, voidReporter);
} catch (IOException ioe) {
exceptionThrown = true;
LOG.info("Exception message:" + ioe.getMessage());
}
}
assertTrue("Exception for zero record length:", exceptionThrown);
}
use of org.apache.hadoop.io.BytesWritable in project hadoop by apache.
the class TestCombineSequenceFileInputFormat method testFormat.
@Test(timeout = 10000)
public void testFormat() throws Exception {
JobConf job = new JobConf(conf);
Reporter reporter = Reporter.NULL;
Random random = new Random();
long seed = random.nextLong();
LOG.info("seed = " + seed);
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create a file with various lengths
createFiles(length, numFiles, random);
// create a combine split for the files
InputFormat<IntWritable, BytesWritable> format = new CombineSequenceFileInputFormat<IntWritable, BytesWritable>();
IntWritable key = new IntWritable();
BytesWritable value = new BytesWritable();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length / (SequenceFile.SYNC_INTERVAL / 20)) + 1;
LOG.info("splitting: requesting = " + numSplits);
InputSplit[] splits = format.getSplits(job, numSplits);
LOG.info("splitting: got = " + splits.length);
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.length);
InputSplit split = splits[0];
assertEquals("It should be CombineFileSplit", CombineFileSplit.class, split.getClass());
// check each split
BitSet bits = new BitSet(length);
RecordReader<IntWritable, BytesWritable> reader = format.getRecordReader(split, job, reporter);
try {
while (reader.next(key, value)) {
assertFalse("Key in multiple partitions.", bits.get(key.get()));
bits.set(key.get());
}
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class TestLazyBinaryFast method testLazyBinaryFast.
private void testLazyBinaryFast(SerdeRandomRowSource source, Object[][] rows, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
int rowCount = rows.length;
int columnCount = primitiveTypeInfos.length;
boolean[] columnsToInclude = null;
if (useIncludeColumns) {
columnsToInclude = new boolean[columnCount];
for (int i = 0; i < columnCount; i++) {
columnsToInclude[i] = r.nextBoolean();
}
}
int writeColumnCount = columnCount;
PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos;
if (doWriteFewerColumns) {
writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount);
}
LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(writeColumnCount);
// Try to serialize
BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
Output output = new Output();
lazyBinarySerializeWrite.set(output);
for (int index = 0; index < writeColumnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable);
}
BytesWritable bytesWritable = new BytesWritable();
bytesWritable.set(output.getData(), 0, output.getLength());
serializeWriteBytes[i] = bytesWritable;
}
// Try to deserialize
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
// column.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(writePrimitiveTypeInfos, /* useExternalBuffer */
false);
BytesWritable bytesWritable = serializeWriteBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazyBinaryDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
}
}
// Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
for (int i = 0; i < rowCount; i++) {
BytesWritable bytesWritable = serializeWriteBytes[i];
LazyBinaryStruct lazyBinaryStruct;
if (doWriteFewerColumns) {
lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable);
} else {
lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable);
}
Object[] row = rows[i];
for (int index = 0; index < writeColumnCount; index++) {
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index];
Writable writable = (Writable) row[index];
Object object = lazyBinaryStruct.getField(index);
if (writable == null || object == null) {
if (writable != null || object != null) {
fail("SerDe deserialized NULL column mismatch");
}
} else {
if (!object.equals(writable)) {
fail("SerDe deserialized value does not match");
}
}
}
}
// One Writable per row.
BytesWritable[] serdeBytes = new BytesWritable[rowCount];
// Serialize using the SerDe, then below deserialize using DeserializeRead.
Object[] serdeRow = new Object[writeColumnCount];
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// LazyBinary seems to work better with an row object array instead of a Java object...
for (int index = 0; index < writeColumnCount; index++) {
serdeRow[index] = row[index];
}
BytesWritable serialized;
if (doWriteFewerColumns) {
serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI);
} else {
serialized = (BytesWritable) serde.serialize(serdeRow, rowOI);
}
BytesWritable bytesWritable = new BytesWritable(Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()));
byte[] bytes1 = bytesWritable.getBytes();
BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i];
byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength());
if (bytes1.length != bytes2.length) {
fail("SerializeWrite length " + bytes2.length + " and " + "SerDe serialization length " + bytes1.length + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")");
}
if (!Arrays.equals(bytes1, bytes2)) {
fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")");
}
serdeBytes[i] = bytesWritable;
}
// Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
// When doWriteFewerColumns, try to read more fields than exist in buffer.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
false);
BytesWritable bytesWritable = serdeBytes[i];
lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
for (int index = 0; index < columnCount; index++) {
if (useIncludeColumns && !columnsToInclude[index]) {
lazyBinaryDeserializeRead.skipNextField();
} else if (index >= writeColumnCount) {
// Should come back a null.
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null);
} else {
Writable writable = (Writable) row[index];
VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable);
}
}
if (writeColumnCount == columnCount) {
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
}
}
}
Aggregations