use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinarySerDe method testShorterSchemaDeserialization1.
/**
* Test shorter schema deserialization where a bigger struct is serialized and
* it is then deserialized with a smaller struct. Here the serialized struct
* has 9 fields and we deserialized to a struct of 8 fields.
*/
private void testShorterSchemaDeserialization1(Random r) throws Throwable {
StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
serde1.getObjectInspector();
StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA);
String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
ObjectInspector serdeOI2 = serde2.getObjectInspector();
int num = 100;
for (int itest = 0; itest < num; itest++) {
MyTestClass t = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
Object output = serde2.deserialize(bw);
if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
System.out.println("structs = " + SerDeUtils.getJSONString(t, rowOI1));
System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
assertEquals(t, output);
}
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class LazySimpleSerDe method doSerialize.
/**
* Serialize a row of data.
*
* @param obj
* The row object
* @param objInspector
* The ObjectInspector for the row object
* @return The serialized Writable object
* @throws SerDeException
* @see org.apache.hadoop.hive.serde2.AbstractSerDe#serialize(Object, ObjectInspector)
*/
@Override
public Writable doSerialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
}
// Prepare the field ObjectInspectors
StructObjectInspector soi = (StructObjectInspector) objInspector;
List<? extends StructField> fields = soi.getAllStructFieldRefs();
List<Object> list = soi.getStructFieldsDataAsList(obj);
List<? extends StructField> declaredFields = (serdeParams.getRowTypeInfo() != null && ((StructTypeInfo) serdeParams.getRowTypeInfo()).getAllStructFieldNames().size() > 0) ? ((StructObjectInspector) getObjectInspector()).getAllStructFieldRefs() : null;
serializeStream.reset();
serializedSize = 0;
// Serialize each field
for (int i = 0; i < fields.size(); i++) {
// Append the separator if needed.
if (i > 0) {
serializeStream.write(serdeParams.getSeparators()[0]);
}
// Get the field objectInspector and the field object.
ObjectInspector foi = fields.get(i).getFieldObjectInspector();
Object f = (list == null ? null : list.get(i));
if (declaredFields != null && i >= declaredFields.size()) {
throw new SerDeException("Error: expecting " + declaredFields.size() + " but asking for field " + i + "\n" + "data=" + obj + "\n" + "tableType=" + serdeParams.getRowTypeInfo().toString() + "\n" + "dataType=" + TypeInfoUtils.getTypeInfoFromObjectInspector(objInspector));
}
serializeField(serializeStream, f, foi, serdeParams);
}
// TODO: The copy of data is unnecessary, but there is no work-around
// since we cannot directly set the private byte[] field inside Text.
serializeCache.set(serializeStream.getData(), 0, serializeStream.getLength());
serializedSize = serializeStream.getLength();
lastOperationSerialize = true;
lastOperationDeserialize = false;
return serializeCache;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class FileOutputFormatContainer method getRecordWriter.
@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
// this needs to be manually set, under normal circumstances MR Task does this
setWorkOutputPath(context);
// Configure the output key and value classes.
// This is required for writing null as key for file based tables.
context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName());
String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo);
Class<? extends AbstractSerDe> serde = storageHandler.getSerDeClass();
AbstractSerDe sd = (AbstractSerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName());
RecordWriter<WritableComparable<?>, HCatRecord> rw;
if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()) {
// When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
// (That's because records can't be written until the values of the dynamic partitions are deduced.
// By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
rw = new DynamicPartitionFileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context);
} else {
Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
Path childPath = new Path(parentDir, FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), context.getConfiguration().get("mapreduce.output.basename", "part")));
rw = new StaticPartitionFileRecordWriterContainer(getBaseOutputFormat().getRecordWriter(parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()), childPath.toString(), InternalUtil.createReporter(context)), context);
}
return rw;
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class FileRecordWriterContainer method write.
@Override
public void write(WritableComparable<?> key, HCatRecord value) throws IOException, InterruptedException {
LocalFileWriter localFileWriter = getLocalFileWriter(value);
RecordWriter localWriter = localFileWriter.getLocalWriter();
ObjectInspector localObjectInspector = localFileWriter.getLocalObjectInspector();
AbstractSerDe localSerDe = localFileWriter.getLocalSerDe();
OutputJobInfo localJobInfo = localFileWriter.getLocalJobInfo();
for (Integer colToDel : partColsToDel) {
value.remove(colToDel);
}
try {
// The key given by user is ignored - in case of Parquet we need to supply null
Object keyToWrite = localWriter instanceof ParquetRecordWriterWrapper ? null : NullWritable.get();
localWriter.write(keyToWrite, localSerDe.serialize(value.getAll(), localObjectInspector));
} catch (SerDeException e) {
throw new IOException("Failed to serialize object", e);
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestRegexSerDe method testRegexSerDe.
/**
* Test the LazySimpleSerDe class.
*/
public void testRegexSerDe() throws Throwable {
try {
// Create the SerDe
AbstractSerDe serDe = createSerDe("host,identity,user,time,request,status,size,referer,agent", "string,string,string,string,string,string,string,string,string", "([^ ]*) ([^ ]*) ([^ ]*) (-|\\[[^\\]]*\\]) ([^ \"]*|\"[^\"]*\") " + "([0-9]*) ([0-9]*) ([^ \"]*|\"[^\"]*\") ([^ \"]*|\"[^\"]*\")", "%1$s %2$s %3$s %4$s %5$s %6$s %7$s %8$s %9$s");
// Data
Text t = new Text("127.0.0.1 - - [26/May/2009:00:00:00 +0000] " + "\"GET /someurl/?track=Blabla(Main) HTTP/1.1\" 200 5864 - " + "\"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) " + "AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.65 Safari/525.19\"");
// Deserialize
Object row = serDe.deserialize(t);
ObjectInspector rowOI = serDe.getObjectInspector();
System.out.println("Deserialized row: " + row);
// Serialize
Text serialized = (Text) serDe.serialize(row, rowOI);
assertEquals(t, serialized);
// Do some changes (optional)
ObjectInspector standardWritableRowOI = ObjectInspectorUtils.getStandardObjectInspector(rowOI, ObjectInspectorCopyOption.WRITABLE);
Object standardWritableRow = ObjectInspectorUtils.copyToStandardObject(row, rowOI, ObjectInspectorCopyOption.WRITABLE);
// Serialize
serialized = (Text) serDe.serialize(standardWritableRow, standardWritableRowOI);
assertEquals(t, serialized);
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
Aggregations