use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class PerformTestRCFileAndSeqFile method performRCFileReadFirstAndLastColumnTest.
public int performRCFileReadFirstAndLastColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException {
byte[][] checkBytes = null;
BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber);
if (chechCorrect) {
resetRandomGenerators();
checkBytes = new byte[allColumnsNumber][];
}
int actualReadCount = 0;
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(0));
readCols.add(Integer.valueOf(allColumnsNumber - 1));
ColumnProjectionUtils.appendReadColumns(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
boolean ok = true;
if (chechCorrect) {
nextRandomRow(checkBytes, checkRow);
ok = ok && (checkRow.get(0).equals(cols.get(0)));
ok = ok && checkRow.get(allColumnsNumber - 1).equals(cols.get(allColumnsNumber - 1));
}
if (!ok) {
throw new IllegalStateException("Compare read and write error.");
}
actualReadCount++;
}
return actualReadCount;
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestRCFile method writeTest.
private void writeTest(FileSystem fs, int count, Path file, byte[][] fieldsData, Configuration conf) throws IOException, SerDeException {
cleanup();
RCFileOutputFormat.setColumnNumber(conf, fieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, new DefaultCodec());
BytesRefArrayWritable bytes = new BytesRefArrayWritable(fieldsData.length);
for (int i = 0; i < fieldsData.length; i++) {
BytesRefWritable cu = null;
cu = new BytesRefWritable(fieldsData[i], 0, fieldsData[i].length);
bytes.set(i, cu);
}
for (int i = 0; i < count; i++) {
writer.append(bytes);
}
writer.close();
long fileLen = fs.getFileStatus(file).getLen();
System.out.println("The file size of RCFile with " + bytes.size() + " number columns and " + count + " number rows is " + fileLen);
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestRCFile method setup.
@Before
public void setup() throws Exception {
conf = new Configuration();
ColumnProjectionUtils.setReadAllColumns(conf);
fs = FileSystem.getLocal(conf);
dir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred");
file = new Path(dir, "test_rcfile");
cleanup();
// the SerDe part is from TestLazySimpleSerDe
serDe = new ColumnarSerDe();
// Create the SerDe
tbl = createProperties();
serDe.initialize(conf, tbl, null);
bytesArray = new byte[][] { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
s = new BytesRefArrayWritable(bytesArray.length);
s.set(0, new BytesRefWritable("123".getBytes(StandardCharsets.UTF_8)));
s.set(1, new BytesRefWritable("456".getBytes(StandardCharsets.UTF_8)));
s.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8)));
s.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8)));
s.set(4, new BytesRefWritable("5.3".getBytes(StandardCharsets.UTF_8)));
s.set(5, new BytesRefWritable("hive and hadoop".getBytes(StandardCharsets.UTF_8)));
s.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
s.set(7, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
// partial test init
patialS.set(0, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
patialS.set(1, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
patialS.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8)));
patialS.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8)));
patialS.set(4, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
// LazyString has no so-called NULL sequence. The value is empty string if not.
patialS.set(5, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8)));
patialS.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
// LazyString has no so-called NULL sequence. The value is empty string if not.
patialS.set(7, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8)));
numRepeat = (int) Math.ceil((double) SequenceFile.SYNC_INTERVAL / (double) bytesArray.length);
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestRCFile method testSimpleReadAndWrite.
@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
cleanup();
byte[][] record_1 = { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
byte[][] record_2 = { "100".getBytes(StandardCharsets.UTF_8), "200".getBytes(StandardCharsets.UTF_8), "123".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
for (int i = 0; i < record_1.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
bytes.clear();
for (int i = 0; i < record_2.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
writer.close();
Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
LongWritable rowID = new LongWritable();
for (int i = 0; i < 2; i++) {
reader.next(rowID);
BytesRefArrayWritable cols = new BytesRefArrayWritable();
reader.getCurrentRow(cols);
cols.resetValid(8);
Object row = serDe.deserialize(cols);
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals("Field size should be 8", 8, fieldRefs.size());
for (int j = 0; j < fieldRefs.size(); j++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
if (i == 0) {
assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
} else {
assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
}
}
}
reader.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestRCFile method partialReadTest.
private void partialReadTest(FileSystem fs, int count, Path file) throws IOException, SerDeException {
LOG.debug("reading " + count + " records");
long start = System.currentTimeMillis();
java.util.ArrayList<Integer> readCols = new java.util.ArrayList<Integer>();
readCols.add(Integer.valueOf(2));
readCols.add(Integer.valueOf(3));
ColumnProjectionUtils.appendReadColumns(conf, readCols);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
cols.resetValid(8);
Object row = serDe.deserialize(cols);
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals("Field size should be 8", 8, fieldRefs.size());
for (int i : readCols) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(i).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
assertEquals("Field " + i, standardWritableData, expectedPartitalFieldsData[i]);
}
assertEquals("Class of the serialized object should be BytesRefArrayWritable", BytesRefArrayWritable.class, serDe.getSerializedClass());
BytesRefArrayWritable serializedBytes = (BytesRefArrayWritable) serDe.serialize(row, oi);
assertEquals("Serialized data", patialS, serializedBytes);
}
reader.close();
long cost = System.currentTimeMillis() - start;
LOG.debug("reading fully costs:" + cost + " milliseconds");
}
Aggregations