use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class RCFileGenerator method genData.
private static void genData(String format, int numRows, String output, String plainOutput) throws Exception {
int numFields = 0;
if (format.equals("student")) {
rand = new Random(numRows);
numFields = 3;
} else if (format.equals("voter")) {
rand = new Random(1000000000 + numRows);
numFields = 4;
} else if (format.equals("alltypes")) {
rand = new Random(2000000000L + numRows);
numFields = 10;
}
RCFileOutputFormat.setColumnNumber(conf, numFields);
RCFile.Writer writer = new RCFile.Writer(fs, conf, getFile(output), null, new DefaultCodec());
PrintWriter pw = new PrintWriter(new FileWriter(plainOutput));
for (int j = 0; j < numRows; j++) {
BytesRefArrayWritable row = new BytesRefArrayWritable(numFields);
byte[][] fields = null;
if (format.equals("student")) {
byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), Double.valueOf(randomGpa()).toString().getBytes("UTF-8") };
fields = f;
} else if (format.equals("voter")) {
byte[][] f = { randomName().getBytes("UTF-8"), Integer.valueOf(randomAge()).toString().getBytes("UTF-8"), randomRegistration().getBytes("UTF-8"), Double.valueOf(randomContribution()).toString().getBytes("UTF-8") };
fields = f;
} else if (format.equals("alltypes")) {
byte[][] f = { Integer.valueOf(rand.nextInt(Byte.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt(Short.MAX_VALUE)).toString().getBytes("UTF-8"), Integer.valueOf(rand.nextInt()).toString().getBytes("UTF-8"), Long.valueOf(rand.nextLong()).toString().getBytes("UTF-8"), Float.valueOf(rand.nextFloat() * 1000).toString().getBytes("UTF-8"), Double.valueOf(rand.nextDouble() * 1000000).toString().getBytes("UTF-8"), randomName().getBytes("UTF-8"), randomMap(), randomArray() };
fields = f;
}
for (int i = 0; i < fields.length; i++) {
BytesRefWritable field = new BytesRefWritable(fields[i], 0, fields[i].length);
row.set(i, field);
pw.print(new String(fields[i]));
if (i != fields.length - 1)
pw.print("\t");
else
pw.println();
}
writer.append(row);
}
writer.close();
pw.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class RCFileCat method printRecord.
/**
* Print record to string builder
* @param value
* @param buf
* @throws IOException
*/
private void printRecord(BytesRefArrayWritable value, StringBuilder buf) throws IOException {
int n = value.size();
if (n > 0) {
BytesRefWritable v = value.unCheckedGet(0);
ByteBuffer bb = ByteBuffer.wrap(v.getData(), v.getStart(), v.getLength());
buf.append(decoder.decode(bb));
for (int i = 1; i < n; i++) {
// do not put the TAB for the last column
buf.append(RCFileCat.TAB);
v = value.unCheckedGet(i);
bb = ByteBuffer.wrap(v.getData(), v.getStart(), v.getLength());
buf.append(decoder.decode(bb));
}
buf.append(RCFileCat.NEWLINE);
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class TestRCFileMapReduceInputFormat method writeThenReadByRecordReader.
private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
Path testFile = new Path(testDir, "test_rcfile");
fs.delete(testFile, true);
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
for (int i = 0; i < bytesArray.length; i++) {
BytesRefWritable cu = null;
cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
bytes.set(i, cu);
}
for (int i = 0; i < writeCount; i++) {
writer.append(bytes);
}
writer.close();
RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
Configuration jonconf = new Configuration(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
JobContext context = new Job(jonconf);
HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
List<InputSplit> splits = inputFormat.getSplits(context);
assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber);
int readCount = 0;
for (int i = 0; i < splits.size(); i++) {
TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf, new TaskAttemptID());
RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i), tac);
rr.initialize(splits.get(i), tac);
while (rr.nextKeyValue()) {
readCount++;
}
}
assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.
the class ColumnarBinaryHiveRecordCursor method parseObjectColumn.
private void parseObjectColumn(int column) {
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
} else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
} catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseObjectColumn(column, bytes, start, length);
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.
the class ColumnarBinaryHiveRecordCursor method parseDoubleColumn.
private void parseDoubleColumn(int column) {
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
} else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
} catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseDoubleColumn(column, bytes, start, length);
}
}
Aggregations