use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class TestMapJoinOperator method doTestLong.
public void doTestLong(long seed, TypeInfo numberTypeInfo, VectorMapJoinVariation vectorMapJoinVariation) throws Exception {
int rowCount = 10000;
HiveConf hiveConf = new HiveConf();
String[] bigTableColumnNames = new String[] { "number1" };
TypeInfo[] bigTableTypeInfos = new TypeInfo[] { TypeInfoFactory.longTypeInfo };
int[] bigTableKeyColumnNums = new int[] { 0 };
String[] smallTableValueColumnNames = new String[] { "sv1", "sv2" };
TypeInfo[] smallTableValueTypeInfos = new TypeInfo[] { TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo };
int[] bigTableRetainColumnNums = new int[] { 0 };
int[] smallTableRetainKeyColumnNums = new int[] {};
int[] smallTableRetainValueColumnNums = new int[] { 0, 1 };
SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters();
// ----------------------------------------------------------------------------------------------
MapJoinTestDescription testDesc = new MapJoinTestDescription(hiveConf, vectorMapJoinVariation, bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, smallTableGenerationParameters);
// Prepare data. Good for ANY implementation variation.
MapJoinTestData testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10);
executeTest(testDesc, testData);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project hive by apache.
the class TestVectorMapJoinFastRowHashMap method addAndVerifyRows.
private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows, VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType, VerifyFastRowHashMap verifyTable, String[] keyTypeNames, boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
final int keyCount = keyTypeNames.length;
PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList = new ArrayList<ObjectInspector>(keyCount);
for (int i = 0; i < keyCount; i++) {
PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
keyPrimitiveCategories[i] = primitiveCategory;
keyPrimitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo));
}
boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
Arrays.fill(keyColumnSortOrderIsDesc, false);
byte[] keyColumnNullMarker = new byte[keyCount];
Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
byte[] keyColumnNotNullMarker = new byte[keyCount];
Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc, keyColumnNullMarker, keyColumnNotNullMarker);
TypeInfo[] valueTypeInfos = valueSource.typeInfos();
final int columnCount = valueTypeInfos.length;
SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
final int count = rows.length;
for (int i = 0; i < count; i++) {
Object[] valueRow = rows[i];
Output valueOutput = new Output();
((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
for (int index = 0; index < columnCount; index++) {
VerifyFastRow.serializeWrite(valueSerializeWrite, valueTypeInfos[index], valueRow[index]);
}
byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
// Add a new key or add a value to an existing key?
byte[] key;
if (random.nextBoolean() || verifyTable.getCount() == 0) {
Object[] keyRow = VectorRandomRowSource.randomWritablePrimitiveRow(keyCount, random, keyPrimitiveTypeInfos);
Output keyOutput = new Output();
keySerializeWrite.set(keyOutput);
for (int index = 0; index < keyCount; index++) {
VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], keyRow[index]);
}
key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
verifyTable.add(key, keyRow, value, valueRow);
} else {
key = verifyTable.addRandomExisting(value, valueRow, random);
}
// Serialize keyRow into key bytes.
BytesWritable keyWritable = new BytesWritable(key);
BytesWritable valueWritable = new BytesWritable(value);
map.putRow(keyWritable, valueWritable);
// verifyTable.verify(map);
}
verifyTable.verify(map, hashTableKeyType, valueTypeInfos, doClipping, useExactBytes, random);
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.
the class TestConvertAvroToORC method test_onTrigger_nested_complex_record.
@Test
public void test_onTrigger_nested_complex_record() throws Exception {
Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {
{
put("key1", Arrays.asList(1.0, 2.0));
put("key2", Arrays.asList(3.0, 4.0));
}
};
Map<String, String> arrayMap11 = new TreeMap<String, String>() {
{
put("key1", "v1");
put("key2", "v2");
}
};
Map<String, String> arrayMap12 = new TreeMap<String, String>() {
{
put("key3", "v3");
put("key4", "v4");
}
};
GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12));
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
ByteArrayOutputStream out = new ByteArrayOutputStream();
fileWriter.create(record.getSchema(), out);
fileWriter.append(record);
// Put another record in
Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {
{
put("key1", Arrays.asList(-1.0, -2.0));
put("key2", Arrays.asList(-3.0, -4.0));
}
};
Map<String, String> arrayMap21 = new TreeMap<String, String>() {
{
put("key1", "v-1");
put("key2", "v-2");
}
};
Map<String, String> arrayMap22 = new TreeMap<String, String>() {
{
put("key3", "v-3");
put("key4", "v-4");
}
};
record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
fileWriter.append(record);
fileWriter.flush();
fileWriter.close();
out.close();
Map<String, String> attributes = new HashMap<String, String>() {
{
put(CoreAttributes.FILENAME.key(), "test");
}
};
runner.enqueue(out.toByteArray(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
// Write the flow file out to disk, since the ORC Reader needs a path
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " + "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
FileOutputStream fos = new FileOutputStream("target/test1.orc");
fos.write(resultContents);
fos.flush();
fos.close();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rows();
Object o = rows.next(null);
assertNotNull(o);
assertTrue(o instanceof OrcStruct);
TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
// check values
Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
assertTrue(myMapOfArray instanceof Map);
Map map = (Map) myMapOfArray;
Object mapValue = map.get(new Text("key1"));
assertNotNull(mapValue);
assertTrue(mapValue instanceof List);
assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);
Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
assertTrue(myArrayOfMap instanceof List);
List list = (List) myArrayOfMap;
Object el0 = list.get(0);
assertNotNull(el0);
assertTrue(el0 instanceof Map);
assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.
the class TestConvertAvroToORC method test_onTrigger_primitive_record.
@Test
public void test_onTrigger_primitive_record() throws Exception {
GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World");
DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
ByteArrayOutputStream out = new ByteArrayOutputStream();
fileWriter.create(record.getSchema(), out);
fileWriter.append(record);
// Put another record in
record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record");
fileWriter.append(record);
// And one more
record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!");
fileWriter.append(record);
fileWriter.flush();
fileWriter.close();
out.close();
Map<String, String> attributes = new HashMap<String, String>() {
{
put(CoreAttributes.FILENAME.key(), "test.avro");
}
};
runner.enqueue(out.toByteArray(), attributes);
runner.run();
runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);
// Write the flow file out to disk, since the ORC Reader needs a path
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)" + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
FileOutputStream fos = new FileOutputStream("target/test1.orc");
fos.write(resultContents);
fos.flush();
fos.close();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rows();
Object o = rows.next(null);
assertNotNull(o);
assertTrue(o instanceof OrcStruct);
TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema();
StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);
// Check some fields in the first row
Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int"));
assertTrue(intFieldObject instanceof IntWritable);
assertEquals(10, ((IntWritable) intFieldObject).get());
Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string"));
assertTrue(stringFieldObject instanceof Text);
assertEquals("World", stringFieldObject.toString());
}
use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfo in project nifi by apache.
the class TestNiFiOrcUtils method test_getOrcField_union.
@Test
public void test_getOrcField_union() throws Exception {
final SchemaBuilder.FieldAssembler<Schema> builder = SchemaBuilder.record("testRecord").namespace("any.data").fields();
builder.name("union").type().unionOf().intType().and().booleanType().endUnion().noDefault();
Schema testSchema = builder.endRecord();
TypeInfo orcType = NiFiOrcUtils.getOrcField(testSchema.getField("union").schema());
assertEquals(TypeInfoFactory.getUnionTypeInfo(Arrays.asList(TypeInfoCreator.createInt(), TypeInfoCreator.createBoolean())), orcType);
}
Aggregations