use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class MapJoinTestConfig method createMapJoinTableContainerSerDe.
public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoinDesc mapJoinDesc) throws SerDeException {
final Byte smallTablePos = 1;
// UNDONE: Why do we need to specify BinarySortableSerDe explicitly here???
TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc();
AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(BinarySortableSerDe.class, null);
SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
TableDesc valueTableDesc;
if (mapJoinDesc.getNoOuterJoin()) {
valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos);
} else {
valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos);
}
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(mapJoinDesc, smallTablePos));
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = new MapJoinTableContainerSerDe(keyContext, valueContext);
return mapJoinTableContainerSerDe;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestRCFile method main.
/**
* For debugging and testing.
*/
public static void main(String[] args) throws Exception {
int count = 10000;
boolean create = true;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
Path file = null;
// the SerDe part is from TestLazySimpleSerDe
AbstractSerDe serDe = new ColumnarSerDe();
// Create the SerDe
Properties tbl = createProperties();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
String usage = "Usage: RCFile " + "[-count N]" + " file";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
try {
for (int i = 0; i < args.length; ++i) {
// parse command line
if (args[i] == null) {
continue;
} else if (args[i].equals("-count")) {
count = Integer.parseInt(args[++i]);
} else {
// file is required parameter
file = new Path(args[i]);
}
}
if (file == null) {
System.err.println(usage);
System.exit(-1);
}
LOG.info("count = " + count);
LOG.info("create = " + create);
LOG.info("file = " + file);
TestRCFile test = new TestRCFile();
// test.performanceTest();
test.testSimpleReadAndWrite();
byte[][] bytesArray = new byte[][] { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
test.writeTest(fs, count, file, bytesArray);
test.fullyReadTest(fs, count, file);
test.partialReadTest(fs, count, file);
System.out.println("Finished.");
} finally {
fs.close();
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class LazySimpleSerDe method initialize.
/**
* Initialize the SerDe given the parameters. serialization.format: separator
* char or byte code (only supports byte-value up to 127) columns:
* ","-separated column names columns.types: ",", ":", or ";"-separated column
* types
*
* @see org.apache.hadoop.hive.serde2.AbstractSerDe#initialize(Configuration, Properties)
*/
@Override
public void initialize(Configuration job, Properties tbl) throws SerDeException {
super.initialize(job, tbl);
serdeParams = new LazySerDeParameters(job, tbl, getClass().getName());
// Create the ObjectInspectors for the fields
cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams.getColumnNames(), serdeParams.getColumnTypes(), new LazyObjectInspectorParametersImpl(serdeParams));
cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector);
serializedSize = 0;
stats = new SerDeStats();
lastOperationSerialize = false;
lastOperationDeserialize = false;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinaryFast method testLazyBinaryFastCase.
public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable {
SerdeRandomRowSource source = new SerdeRandomRowSource();
source.init(r, supportedTypes, depth);
int rowCount = 100;
Object[][] rows = source.randomRows(rowCount);
if (doNonRandomFill) {
MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
}
StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
TypeInfo[] typeInfos = source.typeInfos();
int columnCount = typeInfos.length;
int writeColumnCount = columnCount;
StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
boolean doWriteFewerColumns = r.nextBoolean();
if (doWriteFewerColumns) {
writeColumnCount = 1 + r.nextInt(columnCount);
if (writeColumnCount == columnCount) {
doWriteFewerColumns = false;
} else {
writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
}
}
String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
TestLazyBinarySerDe testLazyBinarySerDe = new TestLazyBinarySerDe();
AbstractSerDe serde = testLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
AbstractSerDe serde_fewer = null;
if (doWriteFewerColumns) {
String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
serde_fewer = testLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);
;
}
testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
false, r);
testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
false, r);
/*
* Can the LazyBinary format really tolerate writing fewer columns?
*/
// if (doWriteFewerColumns) {
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
// }
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinarySerDe method testShorterSchemaDeserialization.
/**
* Test shorter schema deserialization where a bigger struct is serialized and
* it is then deserialized with a smaller struct. Here the serialized struct
* has 10 fields and we deserialized to a struct of 9 fields.
*/
private void testShorterSchemaDeserialization(Random r) throws Throwable {
StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
serde1.getObjectInspector();
StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
ObjectInspector serdeOI2 = serde2.getObjectInspector();
int num = 100;
for (int itest = 0; itest < num; itest++) {
MyTestClassBigger t = new MyTestClassBigger();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
Object output = serde2.deserialize(bw);
if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
System.out.println("structs = " + SerDeUtils.getJSONString(t, rowOI1));
System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
assertEquals(t, output);
}
}
}
Aggregations