use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class TestRCFile method testGetColumn.
/**
* Tests {@link RCFile.Reader#getColumn(int, BytesRefArrayWritable) } method.
* @throws IOException
*/
@Test
public void testGetColumn() throws IOException {
cleanup();
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
byte[][] record_1 = { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
byte[][] record_2 = { "100".getBytes("UTF-8"), "200".getBytes("UTF-8"), "123".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
for (int i = 0; i < record_1.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
bytes.clear();
for (int i = 0; i < record_2.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
writer.close();
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
assertTrue(reader.next(rowID));
assertEquals(rowID.get(), 0L);
assertTrue(reader.next(rowID));
assertEquals(rowID.get(), 1L);
BytesRefArrayWritable result = null;
BytesRefWritable brw;
for (int col = 0; col < 8; col++) {
BytesRefArrayWritable result2 = reader.getColumn(col, result);
if (result == null) {
assertNotNull(result2);
result = result2;
} else {
// #getColumn(2) should return the instance passed in:
assertSame(result2, result);
}
// each column has height of 2:
assertEquals(2, result.size());
for (int row = 0; row < result.size(); row++) {
brw = result.get(row);
int start = brw.getStart();
int len = brw.getLength();
byte[] actualData = Arrays.copyOfRange(brw.getData(), start, start + len);
byte[] expectedData = (row == 0) ? record_1[col] : record_2[col];
assertArrayEquals("col=" + col + " : row=" + row, expectedData, actualData);
}
result.clear();
}
reader.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class TestRCFile method testReadCorruptFile.
@Test
public void testReadCorruptFile() throws IOException, SerDeException {
cleanup();
byte[][] record = { null, null, null, null, null, null, null, null };
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length);
final int recCount = 100;
Random rand = new Random();
for (int recIdx = 0; recIdx < recCount; recIdx++) {
for (int i = 0; i < record.length; i++) {
record[i] = new Integer(rand.nextInt()).toString().getBytes("UTF-8");
}
for (int i = 0; i < record.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
bytes.clear();
}
writer.close();
// Insert junk in middle of file. Assumes file is on local disk.
RandomAccessFile raf = new RandomAccessFile(file.toUri().getPath(), "rw");
long corruptOffset = raf.length() / 2;
LOG.info("corrupting " + raf + " at offset " + corruptOffset);
raf.seek(corruptOffset);
raf.writeBytes("junkjunkjunkjunkjunkjunkjunkjunk");
raf.close();
// Set the option for tolerating corruptions. The read should succeed.
Configuration tmpConf = new Configuration(conf);
tmpConf.setBoolean("hive.io.rcfile.tolerate.corruptions", true);
RCFile.Reader reader = new RCFile.Reader(fs, file, tmpConf);
LongWritable rowID = new LongWritable();
while (true) {
boolean more = reader.next(rowID);
if (!more) {
break;
}
BytesRefArrayWritable cols = new BytesRefArrayWritable();
reader.getCurrentRow(cols);
cols.resetValid(8);
}
reader.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class PerformTestRCFileAndSeqFile method writeSeqenceFileTest.
private void writeSeqenceFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec) throws IOException {
byte[][] columnRandom;
resetRandomGenerators();
BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
columnRandom = new byte[columnNum][];
for (int i = 0; i < columnNum; i++) {
BytesRefWritable cu = new BytesRefWritable();
bytes.set(i, cu);
}
// zero length key is not allowed by block compress writer, so we use a byte
// writable
ByteWritable key = new ByteWritable();
SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, file, ByteWritable.class, BytesRefArrayWritable.class, CompressionType.BLOCK, codec);
for (int i = 0; i < rowCount; i++) {
nextRandomRow(columnRandom, bytes);
seqWriter.append(key, bytes);
}
seqWriter.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class TestRCFileCat method write.
private void write(RCFile.Writer writer, byte[][] record) throws IOException {
BytesRefArrayWritable bytes = new BytesRefArrayWritable(record.length);
for (int i = 0; i < record.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record[i], 0, record[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.
the class ColumnarBinaryHiveRecordCursor method parseStringColumn.
private void parseStringColumn(int column) {
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
} else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
} catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseStringColumn(column, bytes, start, length);
}
}
Aggregations