use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project presto by prestodb.
the class RcFileTester method assertFileContentsOld.
private static <K extends LongWritable, V extends BytesRefArrayWritable> void assertFileContentsOld(Type type, TempFile tempFile, Format format, Iterable<?> expectedValues) throws Exception {
JobConf configuration = new JobConf(new Configuration(false));
configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
configuration.setBoolean(READ_ALL_COLUMNS, false);
Properties schema = new Properties();
schema.setProperty(META_TABLE_COLUMNS, "test");
schema.setProperty(META_TABLE_COLUMN_TYPES, getJavaObjectInspector(type).getTypeName());
Deserializer deserializer;
if (format == Format.BINARY) {
deserializer = new LazyBinaryColumnarSerDe();
} else {
deserializer = new ColumnarSerDe();
}
deserializer.initialize(configuration, schema);
configuration.set(SERIALIZATION_LIB, deserializer.getClass().getName());
InputFormat<K, V> inputFormat = new RCFileInputFormat<>();
RecordReader<K, V> recordReader = inputFormat.getRecordReader(new FileSplit(new Path(tempFile.getFile().getAbsolutePath()), 0, tempFile.getFile().length(), (String[]) null), configuration, NULL);
K key = recordReader.createKey();
V value = recordReader.createValue();
StructObjectInspector rowInspector = (StructObjectInspector) deserializer.getObjectInspector();
StructField field = rowInspector.getStructFieldRef("test");
Iterator<?> iterator = expectedValues.iterator();
while (recordReader.next(key, value)) {
Object expectedValue = iterator.next();
Object rowData = deserializer.deserialize(value);
Object actualValue = rowInspector.getStructFieldData(rowData, field);
actualValue = decodeRecordReaderValue(type, actualValue);
assertColumnValueEquals(type, actualValue, expectedValue);
}
assertFalse(iterator.hasNext());
}
use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.
the class TestRCFile method setup.
@Before
public void setup() throws Exception {
conf = new Configuration();
ColumnProjectionUtils.setReadAllColumns(conf);
fs = FileSystem.getLocal(conf);
dir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred");
file = new Path(dir, "test_rcfile");
cleanup();
// the SerDe part is from TestLazySimpleSerDe
serDe = new ColumnarSerDe();
// Create the SerDe
tbl = createProperties();
serDe.initialize(conf, tbl, null);
bytesArray = new byte[][] { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
s = new BytesRefArrayWritable(bytesArray.length);
s.set(0, new BytesRefWritable("123".getBytes(StandardCharsets.UTF_8)));
s.set(1, new BytesRefWritable("456".getBytes(StandardCharsets.UTF_8)));
s.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8)));
s.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8)));
s.set(4, new BytesRefWritable("5.3".getBytes(StandardCharsets.UTF_8)));
s.set(5, new BytesRefWritable("hive and hadoop".getBytes(StandardCharsets.UTF_8)));
s.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
s.set(7, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
// partial test init
patialS.set(0, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
patialS.set(1, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
patialS.set(2, new BytesRefWritable("789".getBytes(StandardCharsets.UTF_8)));
patialS.set(3, new BytesRefWritable("1000".getBytes(StandardCharsets.UTF_8)));
patialS.set(4, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
// LazyString has no so-called NULL sequence. The value is empty string if not.
patialS.set(5, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8)));
patialS.set(6, new BytesRefWritable("NULL".getBytes(StandardCharsets.UTF_8)));
// LazyString has no so-called NULL sequence. The value is empty string if not.
patialS.set(7, new BytesRefWritable("".getBytes(StandardCharsets.UTF_8)));
numRepeat = (int) Math.ceil((double) SequenceFile.SYNC_INTERVAL / (double) bytesArray.length);
}
use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.
the class TestStatsSerde method testColumnarSerDe.
/**
* Test ColumnarSerDe
*/
@Test
public void testColumnarSerDe() throws Throwable {
try {
System.out.println("test: testColumnarSerde");
// Create the SerDe
ColumnarSerDe serDe = new ColumnarSerDe();
Configuration conf = new Configuration();
Properties tbl = createProperties();
serDe.initialize(conf, tbl, null);
// Data
BytesRefArrayWritable braw = new BytesRefArrayWritable(8);
String[] data = { "123", "456", "789", "1000", "5.3", "hive and hadoop", "1.", "NULL" };
for (int i = 0; i < 8; i++) {
braw.set(i, new BytesRefWritable(data[i].getBytes()));
}
// Test
deserializeAndSerializeColumnar(serDe, braw, data);
System.out.println("test: testColumnarSerde - OK");
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testLazyBinaryColumnarSerDeWithEmptyBinary.
@Test
public void testLazyBinaryColumnarSerDeWithEmptyBinary() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
serde.initialize(new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
outerStruct.mByte = 101;
outerStruct.mShort = 2002;
outerStruct.mInt = 3003;
outerStruct.mLong = 4004l;
outerStruct.mFloat = 5005.01f;
outerStruct.mDouble = 6006.001d;
outerStruct.mString = "";
outerStruct.mBA = new byte[] {};
outerStruct.mArray = new ArrayList<InnerStruct>();
outerStruct.mMap = new TreeMap<String, InnerStruct>();
outerStruct.mStruct = new InnerStruct(180018, 190019l);
try {
serde.serialize(outerStruct, oi);
} catch (RuntimeException re) {
assertEquals(re.getMessage(), "LazyBinaryColumnarSerde cannot serialize a non-null " + "zero length binary field. Consider using either LazyBinarySerde or ColumnarSerde.");
return;
}
assert false;
}
use of org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe in project hive by apache.
the class TestRCFile method main.
/**
* For debugging and testing.
*/
public static void main(String[] args) throws Exception {
int count = 10000;
boolean create = true;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
Path file = null;
// the SerDe part is from TestLazySimpleSerDe
AbstractSerDe serDe = new ColumnarSerDe();
// Create the SerDe
Properties tbl = createProperties();
serDe.initialize(conf, tbl, null);
String usage = "Usage: RCFile " + "[-count N]" + " file";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
try {
for (int i = 0; i < args.length; ++i) {
// parse command line
if (args[i] == null) {
continue;
} else if (args[i].equals("-count")) {
count = Integer.parseInt(args[++i]);
} else {
// file is required parameter
file = new Path(args[i]);
}
}
if (file == null) {
System.err.println(usage);
System.exit(-1);
}
LOG.info("count = " + count);
LOG.info("create = " + create);
LOG.info("file = " + file);
TestRCFile test = new TestRCFile();
// test.performanceTest();
test.testSimpleReadAndWrite();
byte[][] bytesArray = new byte[][] { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
test.writeTest(fs, count, file, bytesArray);
test.fullyReadTest(fs, count, file);
test.partialReadTest(fs, count, file);
System.out.println("Finished.");
} finally {
fs.close();
}
}
Aggregations