use of org.apache.hadoop.io.Writable in project hive by apache.
the class TestVectorMapJoinFastRowHashMap method addAndVerifyRows.
private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows, VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType, VerifyFastRowHashMap verifyTable, String[] keyTypeNames, boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
final int keyCount = keyTypeNames.length;
PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList = new ArrayList<ObjectInspector>(keyCount);
for (int i = 0; i < keyCount; i++) {
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
keyPrimitiveCategories[i] = primitiveCategory;
keyPrimitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
}
boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
Arrays.fill(keyColumnSortOrderIsDesc, false);
byte[] keyColumnNullMarker = new byte[keyCount];
Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
byte[] keyColumnNotNullMarker = new byte[keyCount];
Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc, keyColumnNullMarker, keyColumnNotNullMarker);
PrimitiveTypeInfo[] valuePrimitiveTypeInfos = valueSource.primitiveTypeInfos();
final int columnCount = valuePrimitiveTypeInfos.length;
SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
final int count = rows.length;
for (int i = 0; i < count; i++) {
Object[] valueRow = rows[i];
Output valueOutput = new Output();
((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) valueRow[index];
VerifyFastRow.serializeWrite(valueSerializeWrite, valuePrimitiveTypeInfos[index], writable);
}
byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
// Add a new key or add a value to an existing key?
byte[] key;
if (random.nextBoolean() || verifyTable.getCount() == 0) {
Object[] keyRow = VectorRandomRowSource.randomRow(keyCount, random, keyPrimitiveObjectInspectorList, keyPrimitiveCategories, keyPrimitiveTypeInfos);
Output keyOutput = new Output();
keySerializeWrite.set(keyOutput);
for (int index = 0; index < keyCount; index++) {
Writable writable = (Writable) keyRow[index];
VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], writable);
}
key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
verifyTable.add(key, keyRow, value, valueRow);
} else {
key = verifyTable.addRandomExisting(value, valueRow, random);
}
// Serialize keyRow into key bytes.
BytesWritable keyWritable = new BytesWritable(key);
BytesWritable valueWritable = new BytesWritable(value);
map.putRow(keyWritable, valueWritable);
// verifyTable.verify(map);
}
verifyTable.verify(map, hashTableKeyType, valuePrimitiveTypeInfos, doClipping, useExactBytes, random);
}
use of org.apache.hadoop.io.Writable in project hive by apache.
the class CheckFastRowHashMap method verifyHashMapRows.
public static void verifyHashMapRows(List<Object[]> rows, int[] actualToValueMap, VectorMapJoinHashMapResult hashMapResult, TypeInfo[] typeInfos) throws IOException {
final int count = rows.size();
final int columnCount = typeInfos.length;
WriteBuffers.ByteSegmentRef ref = hashMapResult.first();
for (int a = 0; a < count; a++) {
int valueIndex = actualToValueMap[a];
Object[] row = rows.get(valueIndex);
byte[] bytes = ref.getBytes();
int offset = (int) ref.getOffset();
int length = ref.getLength();
LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(typeInfos, /* useExternalBuffer */
false);
lazyBinaryDeserializeRead.set(bytes, offset, length);
for (int index = 0; index < columnCount; index++) {
Writable writable = (Writable) row[index];
VerifyFastRow.verifyDeserializeRead(lazyBinaryDeserializeRead, (PrimitiveTypeInfo) typeInfos[index], writable);
}
TestCase.assertTrue(lazyBinaryDeserializeRead.isEndOfInputReached());
ref = hashMapResult.next();
if (a == count - 1) {
TestCase.assertTrue(ref == null);
} else {
TestCase.assertTrue(ref != null);
}
}
}
use of org.apache.hadoop.io.Writable in project hive by apache.
the class TestParquetSerDe method testParquetHiveSerDe.
public void testParquetHiveSerDe() throws Throwable {
try {
// Create the SerDe
System.out.println("test: testParquetHiveSerDe");
final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
final Configuration conf = new Configuration();
final Properties tbl = createProperties();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
// Data
final Writable[] arr = new Writable[9];
//primitive types
arr[0] = new ByteWritable((byte) 123);
arr[1] = new ShortWritable((short) 456);
arr[2] = new IntWritable(789);
arr[3] = new LongWritable(1000l);
arr[4] = new DoubleWritable((double) 5.3);
arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
final Writable[] map = new Writable[3];
for (int i = 0; i < 3; ++i) {
final Writable[] pair = new Writable[2];
pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
pair[1] = new IntWritable(i);
map[i] = new ArrayWritable(Writable.class, pair);
}
arr[7] = new ArrayWritable(Writable.class, map);
final Writable[] array = new Writable[5];
for (int i = 0; i < 5; ++i) {
array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
}
arr[8] = new ArrayWritable(Writable.class, array);
final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
// Test
deserializeAndSerializeLazySimple(serDe, arrWritable);
System.out.println("test: testParquetHiveSerDe - OK");
} catch (final Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.io.Writable in project hive by apache.
the class TestAvroSerializer method canSerializeStructs.
@Test
public void canSerializeStructs() throws SerDeException {
String field = "{ \"name\":\"struct1\", \"type\":{\"type\":\"record\", " + "\"name\":\"struct1_name\", \"fields\": [\n" + "{ \"name\":\"sInt\", \"type\":\"int\" }, { \"name\"" + ":\"sBoolean\", \"type\":\"boolean\" }, { \"name\":\"sString\", \"type\":\"string\" } ] } }";
Schema s = buildSchema(field);
GenericData.Record innerRecord = new GenericData.Record(s.getField("struct1").schema());
innerRecord.put("sInt", 77);
innerRecord.put("sBoolean", false);
innerRecord.put("sString", "tedious");
GenericData.Record r = new GenericData.Record(s);
r.put("struct1", innerRecord);
AvroSerializer as = new AvroSerializer();
AvroDeserializer ad = new AvroDeserializer();
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
ObjectInspector oi = aoig.getObjectInspector();
List<String> columnNames = aoig.getColumnNames();
List<TypeInfo> columnTypes = aoig.getColumnTypes();
AvroGenericRecordWritable agrw = new AvroGenericRecordWritable(r);
agrw.setFileSchema(r.getSchema());
Object obj = ad.deserialize(columnNames, columnTypes, agrw, s);
Writable result = as.serialize(obj, oi, columnNames, columnTypes, s);
assertTrue(result instanceof AvroGenericRecordWritable);
GenericRecord r2 = ((AvroGenericRecordWritable) result).getRecord();
assertEquals(s, r2.getSchema());
GenericRecord r3 = (GenericRecord) r2.get("struct1");
assertEquals(77, r3.get("sInt"));
assertEquals(false, r3.get("sBoolean"));
assertEquals("tedious", r3.get("sString"));
}
use of org.apache.hadoop.io.Writable in project hive by apache.
the class TestAvroSerializer method serializeAndDeserialize.
/**
* Verify that we can serialize an avro value by taking one, running it through
* the deser process and then serialize it again.
*/
private GenericRecord serializeAndDeserialize(String recordValue, String fieldName, Object fieldValue) throws SerDeException, IOException {
Schema s = buildSchema(recordValue);
GenericData.Record r = new GenericData.Record(s);
r.put(fieldName, fieldValue);
AvroSerializer as = new AvroSerializer();
AvroDeserializer ad = new AvroDeserializer();
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
ObjectInspector oi = aoig.getObjectInspector();
List<String> columnNames = aoig.getColumnNames();
List<TypeInfo> columnTypes = aoig.getColumnTypes();
AvroGenericRecordWritable agrw = Utils.serializeAndDeserializeRecord(r);
Object obj = ad.deserialize(columnNames, columnTypes, agrw, s);
Writable result = as.serialize(obj, oi, columnNames, columnTypes, s);
assertTrue(result instanceof AvroGenericRecordWritable);
GenericRecord r2 = ((AvroGenericRecordWritable) result).getRecord();
assertEquals(s, r2.getSchema());
return r2;
}
Aggregations