use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class TestOrcReaderMemoryUsage method createSingleColumnVarcharFile.
/**
* Write a file that contains a number of rows with 1 VARCHAR column, and all values are not null.
*/
private static TempFile createSingleColumnVarcharFile(int count, int length) throws Exception {
Serializer serde = new OrcSerde();
TempFile tempFile = new TempFile();
FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, VARCHAR);
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR);
Object row = objectInspector.create();
StructField field = objectInspector.getAllStructFieldRefs().get(0);
for (int i = 0; i < count; i++) {
objectInspector.setStructFieldData(row, field, Strings.repeat("0", length));
Writable record = serde.serialize(row, objectInspector);
writer.write(record);
}
writer.close(false);
return tempFile;
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class TestOrcReaderMemoryUsage method createSingleColumnMapFileWithNullValues.
/**
* Write a file that contains a given number of maps where each row has 10 entries in total
* and some entries have null keys/values.
*/
private static TempFile createSingleColumnMapFileWithNullValues(Type mapType, int rows) throws IOException, ReflectiveOperationException, SerDeException {
Serializer serde = new OrcSerde();
TempFile tempFile = new TempFile();
FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, mapType);
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", mapType);
Object row = objectInspector.create();
StructField field = objectInspector.getAllStructFieldRefs().get(0);
for (int i = 1; i <= rows; i++) {
HashMap<Long, Long> map = new HashMap<>();
for (int j = 1; j <= 8; j++) {
Long value = (long) j;
map.put(value, value);
}
// Add null values so that the StreamReader nullVectors are not empty.
map.put(null, 0L);
map.put(0L, null);
objectInspector.setStructFieldData(row, field, map);
Writable record = serde.serialize(row, objectInspector);
writer.write(record);
}
writer.close(false);
return tempFile;
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class AbstractTestHiveFileFormats method createTestFile.
public static FileSplit createTestFile(String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
HiveOutputFormat<?, ?> outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class);
Serializer serializer = newInstance(storageFormat.getSerDe(), Serializer.class);
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serializer.initialize(new Configuration(), tableProperties);
JobConf jobConf = configureCompression(new JobConf(), compressionCodec);
RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(filePath), Text.class, compressionCodec != HiveCompressionCodec.NONE, tableProperties, () -> {
});
try {
serializer.initialize(new Configuration(), tableProperties);
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serializer.serialize(row, objectInspector);
recordWriter.write(record);
}
} finally {
recordWriter.close(false);
}
// todo to test with compression, the file must be renamed with the compression extension
Path path = new Path(filePath);
path.getFileSystem(new Configuration()).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class TestOrcBatchPageSourceMemoryTracking method createTestFile.
public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, Serializer serializer, String compressionCodec, List<TestColumn> testColumns, int numRows, int stripeRows) throws Exception {
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
serializer.initialize(CONFIGURATION, tableProperties);
JobConf jobConf = new JobConf();
if (compressionCodec != null) {
CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
}
RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
try {
SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
for (int i = 0; i < testColumns.size(); i++) {
Object writeValue = testColumns.get(i).getWriteValue();
if (writeValue instanceof Slice) {
writeValue = ((Slice) writeValue).getBytes();
}
objectInspector.setStructFieldData(row, fields.get(i), writeValue);
}
Writable record = serializer.serialize(row, objectInspector);
recordWriter.write(record);
if (rowNumber % stripeRows == stripeRows - 1) {
flushStripe(recordWriter);
}
}
} finally {
recordWriter.close(false);
}
Path path = new Path(filePath);
path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
File file = new File(filePath);
return new FileSplit(path, 0, file.length(), new String[0]);
}
use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.
the class OrcTester method writeOrcFileColumnOld.
public static DataSize writeOrcFileColumnOld(File outputFile, Format format, RecordWriter recordWriter, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
int i = 0;
TypeInfo typeInfo = getTypeInfoFromTypeString(columnObjectInspector.getTypeName());
while (values.hasNext()) {
Object value = values.next();
value = preprocessWriteValueOld(typeInfo, value);
objectInspector.setStructFieldData(row, fields.get(0), value);
@SuppressWarnings("deprecation") Serializer serde;
if (DWRF == format) {
serde = new com.facebook.hive.orc.OrcSerde();
if (i == 142_345) {
setDwrfLowMemoryFlag(recordWriter);
}
} else {
serde = new OrcSerde();
}
Writable record = serde.serialize(row, objectInspector);
recordWriter.write(record);
i++;
}
recordWriter.close(false);
return succinctBytes(outputFile.length());
}
Aggregations