use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project presto by prestodb.
the class RcFileTester method assertFileContentsOld.
private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception {
JobConf configuration = new JobConf(new Configuration(false));
configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
configuration.setBoolean(READ_ALL_COLUMNS, false);
Properties schema = new Properties();
schema.setProperty(META_TABLE_COLUMNS, "test");
schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName());
Deserializer deserializer;
if (format == Format.BINARY) {
deserializer = new LazyBinaryColumnarSerDe();
} else {
deserializer = new ColumnarSerDe();
}
deserializer.initialize(configuration, schema);
configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName());
InputFormat<K, V> inputFormat = new RCFileInputFormat<>();
RecordReader<K, V> recordReader = inputFormat.getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL);
K key = recordReader.createKey();
V value = recordReader.createValue();
StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector();
StructField field = rowInspector.getStructFieldRef("test");
Iterator<?> iterator = expectedValues.iterator();
while (recordReader.next(key, value)) {
Object expectedValue = iterator.next();
Object rowData = deserializer.deserialize(value);
Object actualValue = rowInspector.getStructFieldData(rowData, field);
actualValue = decodeRecordReaderValue(type, actualValue);
assertColumnValueEquals(type, actualValue, expectedValue);
}
assertFalse(iterator.hasNext());
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class RCFileGenerator method genData.
private static void genData(String format, int numRows, String output, String plainOutput) throws Exception {
int numFields = 0;
if (format.equals("student")) {
rand = new Random(numRows);
numFields = 3;
} else if (format.equals("voter")) {
rand = new Random(1000000000 + numRows);
numFields = 4;
} else if (format.equals("alltypes")) {
rand = new Random(2000000000L + numRows);
numFields = 10;
}
RCFileOutputFormat.setColumnNumber(conf, numFields);
RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec());
PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
for (int j = 0; j < numRows; j++) {
BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
byte[][] fields = null;
if (format.equals("student")) {
byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), Double.valueOf(randomGpa()).toString().getBytes("UTF-8") };
fields = f;
} else if (format.equals("voter")) {
byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), randomRegistration().getBytes("UTF-8"), Double.valueOf(randomContribution()).toString().getBytes("UTF-8") };
fields = f;
} else if (format.equals("alltypes")) {
byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), randomName().getBytes("UTF-8"), randomMap(), randomArray() };
fields = f;
}
for (int i = 0; i < fields.length; i++) {
BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length);
row.set(i, field);
pw.print(new String(fields[i]));
if (i != fields.length - 1)
pw.print("\t");
else
pw.println();
}
writer.append(row);
}
writer.close();
pw.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestRCFileMapReduceInputFormat method writeThenReadByRecordReader.
private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
Path testFile = new Path(testDir, "test_rcfile");
fs.delete(testFile, true);
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
for (int i = 0; i < bytesArray.length; i++) {
BytesRefWritable cu = null;
cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
bytes.set(i, cu);
}
for (int i = 0; i < writeCount; i++) {
writer.append(bytes);
}
writer.close();
RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
Configuration jonconf = new Configuration(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
JobContext context = new Job(jonconf);
HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
List<InputSplit> splits = inputFormat.getSplits(context);
assertEquals("splits length should be " + splitNumber, splitNumber, splits.size());
int readCount = 0;
for (int i = 0; i < splits.size(); i++) {
TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID());
RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
rr.initialize(splits.get(i), tac);
while (rr.nextKeyValue()) {
readCount++;
}
}
assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class RCFileCat method printRecord.
/**
* Print record to string builder
* @param value
* @param buf
* @throws IOException
*/
private void printRecord(BytesRefArrayWritable value, StringBuilder buf) throws IOException {
int n = value.size();
if (n > 0) {
BytesRefWritable v = value.unCheckedGet(0);
ByteBuffer bb = ByteBuffer.wrap(v.getData(), v.getStart(), v.getLength());
buf.append(decoder.decode(bb));
for (int i = 1; i < n; i++) {
// do not put the TAB for the last column
buf.append(RCFileCat.TAB);
v = value.unCheckedGet(i);
bb = ByteBuffer.wrap(v.getData(), v.getStart(), v.getLength());
buf.append(decoder.decode(bb));
}
buf.append(RCFileCat.NEWLINE);
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class PerformTestRCFileAndSeqFile method performRCFileFullyReadColumnTest.
public int performRCFileFullyReadColumnTest(FileSystem fs, Path file, int allColumnsNumber, boolean chechCorrect) throws IOException {
byte[][] checkBytes = null;
BytesRefArrayWritable checkRow = new BytesRefArrayWritable(allColumnsNumber);
if (chechCorrect) {
resetRandomGenerators();
checkBytes = new byte[allColumnsNumber][];
}
int actualReadCount = 0;
ColumnProjectionUtils.setReadAllColumns(conf);
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
LongWritable rowID = new LongWritable();
BytesRefArrayWritable cols = new BytesRefArrayWritable();
while (reader.next(rowID)) {
reader.getCurrentRow(cols);
boolean ok = true;
if (chechCorrect) {
nextRandomRow(checkBytes, checkRow);
ok = ok && checkRow.equals(cols);
}
if (!ok) {
throw new IllegalStateException("Compare read and write error.");
}
actualReadCount++;
}
return actualReadCount;
}
Aggregations