use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.
the class ColumnarTextHiveRecordCursor method parseDecimalColumn.
private void parseDecimalColumn(int column) {
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
} else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
} catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseDecimalColumn(column, bytes, start, length);
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project presto by prestodb.
the class ColumnarTextHiveRecordCursor method parseBooleanColumn.
private void parseBooleanColumn(int column) {
loaded[column] = true;
if (hiveColumnIndexes[column] >= value.size()) {
// this partition may contain fewer fields than what's declared in the schema
// this happens when additional columns are added to the hive table after a partition has been created
nulls[column] = true;
} else {
BytesRefWritable fieldData = value.unCheckedGet(hiveColumnIndexes[column]);
byte[] bytes;
try {
bytes = fieldData.getData();
} catch (IOException e) {
throw Throwables.propagate(e);
}
int start = fieldData.getStart();
int length = fieldData.getLength();
parseBooleanColumn(column, bytes, start, length);
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class ColumnarSerDeBase method initialize.
protected void initialize(int size) throws SerDeException {
field = new BytesRefWritable[size];
for (int i = 0; i < size; i++) {
field[i] = new BytesRefWritable();
serializeCache.set(i, field[i]);
}
serializedSize = 0;
stats = new SerDeStats();
lastOperationSerialize = false;
lastOperationDeserialize = false;
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class TestRCFile method testSync.
@Test
public void testSync() throws IOException {
Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsync");
Path testFile = new Path(testDir, "test_rcfile");
fs.delete(testFile, true);
int intervalRecordCount = 500;
CompressionCodec codec = null;
int writeCount = 2500;
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
for (int i = 0; i < bytesArray.length; i++) {
BytesRefWritable cu = null;
cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
bytes.set(i, cu);
}
for (int i = 0; i < writeCount; i++) {
writer.append(bytes);
}
writer.close();
long fileLen = fs.getFileStatus(testFile).getLen();
RCFileInputFormat inputFormat = new RCFileInputFormat();
JobConf jobconf = new JobConf(cloneConf);
jobconf.set("mapred.input.dir", testDir.toString());
HiveConf.setLongVar(jobconf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, fileLen);
InputSplit[] splits = inputFormat.getSplits(jobconf, 1);
RCFileRecordReader rr = new RCFileRecordReader(jobconf, (FileSplit) splits[0]);
long lastSync = 0;
for (int i = 0; i < 2500; i++) {
rr.sync(i);
if (rr.getPos() < lastSync) {
String reason = String.format("Sync at offset %d skipped sync block at location %d (returned %d instead)", i - 1, rr.getPos(), lastSync);
System.out.println(reason);
fail(reason);
}
lastSync = rr.getPos();
}
rr.close();
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefWritable in project hive by apache.
the class TestRCFile method writeThenReadByRecordReader.
private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber, long minSplitSize, CompressionCodec codec) throws IOException {
Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
Path testFile = new Path(testDir, "test_rcfile");
fs.delete(testFile, true);
Configuration cloneConf = new Configuration(conf);
RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);
RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);
BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
for (int i = 0; i < bytesArray.length; i++) {
BytesRefWritable cu = null;
cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
bytes.set(i, cu);
}
for (int i = 0; i < writeCount; i++) {
if (i == intervalRecordCount) {
System.out.println("write position:" + writer.getLength());
}
writer.append(bytes);
}
writer.close();
RCFileInputFormat inputFormat = new RCFileInputFormat();
JobConf jonconf = new JobConf(cloneConf);
jonconf.set("mapred.input.dir", testDir.toString());
HiveConf.setLongVar(jonconf, HiveConf.ConfVars.MAPREDMINSPLITSIZE, minSplitSize);
InputSplit[] splits = inputFormat.getSplits(jonconf, splitNumber);
assertEquals("splits length should be " + splitNumber, splits.length, splitNumber);
int readCount = 0;
for (int i = 0; i < splits.length; i++) {
int previousReadCount = readCount;
RecordReader rr = inputFormat.getRecordReader(splits[i], jonconf, Reporter.NULL);
Object key = rr.createKey();
Object value = rr.createValue();
while (rr.next(key, value)) {
readCount++;
}
rr.close();
System.out.println("The " + i + "th split read " + (readCount - previousReadCount));
}
assertEquals("readCount should be equal to writeCount", writeCount, readCount);
}
Aggregations