use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testSerDeOuterNulls.
@Test
public void testSerDeOuterNulls() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
serde.initialize(new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
ObjectInspector out_oi = serde.getObjectInspector();
Object out_o = serde.deserialize(braw);
if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
fail("Deserialized object does not compare");
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project elephant-bird by twitter.
the class RCFilePigStorage method getNext.
@Override
public Tuple getNext() throws IOException {
try {
if (!in.nextKeyValue()) {
return null;
}
BytesRefArrayWritable byteRefs = (BytesRefArrayWritable) in.getCurrentValue();
boolean isProjected = requiredColumns != null;
int inputSize = byteRefs.size();
int tupleSize = isProjected ? requiredColumns.length : inputSize;
Tuple tuple = tupleFactory.newTuple(tupleSize);
int tupleIdx = 0;
for (int i = 0; i < inputSize && tupleIdx < tupleSize; i++) {
if (!isProjected || i == requiredColumns[tupleIdx]) {
// set if all the fields are required or the field is projected
BytesRefWritable ref = byteRefs.get(i);
if (ref != null && ref.getLength() > 0) {
tuple.set(tupleIdx, new DataByteArray(ref.getBytesCopy()));
}
tupleIdx++;
}
}
return tuple;
} catch (InterruptedException e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project elephant-bird by twitter.
the class RCFilePigStorage method putNext.
@SuppressWarnings("unchecked")
@Override
public void putNext(Tuple t) throws IOException {
if (rowWritable == null) {
// initialize
if (numColumns < 1) {
throw new IOException("number of columns is not set");
}
byteStream = new ByteStream.Output();
rowWritable = new BytesRefArrayWritable();
colValRefs = new BytesRefWritable[numColumns];
for (int i = 0; i < numColumns; i++) {
colValRefs[i] = new BytesRefWritable();
rowWritable.set(i, colValRefs[i]);
}
}
byteStream.reset();
// write each field as a text (just like PigStorage)
int sz = t.size();
int startPos = 0;
for (int i = 0; i < sz && i < numColumns; i++) {
StorageUtil.putField(byteStream, t.get(i));
colValRefs[i].set(byteStream.getData(), startPos, byteStream.getCount() - startPos);
startPos = byteStream.getCount();
}
try {
writer.write(null, rowWritable);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestRCFile method testReadCorruptFile.
@Test
public void testReadCorruptFile() throws IOException, SerDeException {
cleanup();
byte[][] record = { null, null, null, null, null, null, null, null };
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length);
final int recCount = 100;
Random rand = new Random();
for (int recIdx = 0; recIdx < recCount; recIdx++) {
for (int i = 0; i < record.length; i++) {
record[i] = String.valueOf(rand.nextInt()).getBytes(StandardCharsets.UTF_8);
}
for (int i = 0; i < record.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
bytes.clear();
}
writer.close();
// Insert junk in middle of file. Assumes file is on local disk.
RandomAccessFile raf = new RandomAccessFile(file.toUri().getPath(), "rw");
long corruptOffset = raf.length() / 2;
LOG.info("corrupting " + raf + " at offset " + corruptOffset);
raf.seek(corruptOffset);
raf.writeBytes("junkjunkjunkjunkjunkjunkjunkjunk");
raf.close();
// Set the option for tolerating corruptions. The read should succeed.
Configuration tmpConf = new Configuration(conf);
tmpConf.setBoolean("hive.io.rcfile.tolerate.corruptions", true);
RCFile.Reader reader = new RCFile.Reader(fs, file, tmpConf);
LongWritable rowID = new LongWritable();
while (true) {
boolean more = reader.next(rowID);
if (!more) {
break;
}
BytesRefArrayWritable cols = new BytesRefArrayWritable();
reader.getCurrentRow(cols);
cols.resetValid(8);
}
reader.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestRCFile method testReadOldFileHeader.
@Test
public void testReadOldFileHeader() throws IOException {
String[] row = new String[] { "Tester", "Bart", "333 X St.", "Reno", "NV", "USA" };
RCFile.Reader reader = new RCFile.Reader(fs, new Path(HiveTestUtils.getFileFromClasspath("rc-file-v0.rc")), conf);
LongWritable rowID = new LongWritable();
BytesRefArrayWritable cols = new BytesRefArrayWritable();
assertTrue("old file reader first row", reader.next(rowID));
reader.getCurrentRow(cols);
assertEquals(row.length, cols.size());
for (int i = 0; i < cols.size(); ++i) {
assertEquals(row[i], new String(cols.get(i).getBytesCopy()));
}
assertFalse("old file reader end", reader.next(rowID));
reader.close();
}
Aggregations