use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class TestOrcPageSourceMemoryTracking method createTestFile.
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serDe.initialize(CONFIGURATION, tableProperties);
JobConf jobConf = new JobConf();
if (compressionCodec != null) {
CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
}
RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
try {
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serDe.serialize(row, objectInspector);
recordWriter.write(record);
if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
flushStripe(recordWriter);
}
}
} finally {
recordWriter.close(false);
}
Path path = new Path(filePath);
path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class ParquetTester method writeParquetColumn.
private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
});
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
int i = 0;
while (values.hasNext()) {
Object value = values.next();
objectInspector.setStructFieldData(row, fields.get(0), value);
ParquetHiveSerDe serde = new ParquetHiveSerDe();
serde.initialize(jobConf, createTableProperties("test", columnObjectInspector.getTypeName()), null);
Writable record = serde.serialize(row, objectInspector);
recordWriter.write(record);
i++;
}
recordWriter.close(false);
return succinctBytes(outputFile.length());
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project nifi by apache.
the class NiFiOrcUtils method createOrcStruct.
/**
* Create an object of OrcStruct given a TypeInfo and a list of objects
*
* @param typeInfo The TypeInfo object representing the ORC record schema
* @param objs ORC objects/Writables
* @return an OrcStruct containing the specified objects for the specified schema
*/
public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) {
SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct.createObjectInspector(typeInfo);
List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs();
OrcStruct result = (OrcStruct) oi.create();
result.setNumFields(fields.size());
for (int i = 0; i < fields.size(); i++) {
oi.setStructFieldData(result, fields.get(i), objs[i]);
}
return result;
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project hive by apache.
the class VectorExpressionWriterFactory method getSettableExpressionWriters.
public static VectorExpressionWriter[] getSettableExpressionWriters(SettableStructObjectInspector objInspector) throws HiveException {
List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs();
VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()];
for (int i = 0; i < writers.length; ++i) {
StructField fieldRef = fieldsRef.get(i);
VectorExpressionWriter baseWriter = genVectorExpressionWritable(fieldRef.getFieldObjectInspector());
writers[i] = genVectorExpressionWritable(objInspector, fieldRef, baseWriter);
}
return writers;
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class ParquetTestUtils method writeParquetColumnHive.
static void writeParquetColumnHive(File file, String columnName, boolean nullable, Type type, Iterator<?> values) throws Exception {
JobConf jobConf = new JobConf();
// Set this config to get around the issue of LocalFileSystem not getting registered when running the benchmarks using
// the standalone jar with all dependencies
jobConf.set("fs.file.impl", LocalFileSystem.class.getCanonicalName());
jobConf.setLong(ParquetOutputFormat.BLOCK_SIZE, new DataSize(256, MEGABYTE).toBytes());
jobConf.setLong(ParquetOutputFormat.PAGE_SIZE, new DataSize(100, KILOBYTE).toBytes());
jobConf.set(ParquetOutputFormat.COMPRESSION, "snappy");
Properties properties = new Properties();
properties.setProperty("columns", columnName);
properties.setProperty("columns.types", getHiveType(type));
RecordWriter recordWriter = createParquetWriter(nullable, new Path(file.getAbsolutePath()), jobConf, properties, true);
List<ObjectInspector> objectInspectors = getRowObjectInspectors(type);
SettableStructObjectInspector tableObjectInspector = getStandardStructObjectInspector(ImmutableList.of(columnName), objectInspectors);
Object row = tableObjectInspector.create();
StructField structField = tableObjectInspector.getStructFieldRef(columnName);
Setter setter = getSetter(type, tableObjectInspector, row, structField);
Serializer serializer = initializeSerializer(jobConf, properties);
while (values.hasNext()) {
Object value = values.next();
if (value == null) {
tableObjectInspector.setStructFieldData(row, structField, null);
} else {
setter.set(value);
}
recordWriter.write(serializer.serialize(row, tableObjectInspector));
}
recordWriter.close(false);
}
Aggregations