use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class ParquetTester method writeParquetColumn.
private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
});
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
int i = 0;
while (values.hasNext()) {
Object value = values.next();
objectInspector.setStructFieldData(row, fields.get(0), value);
ParquetHiveSerDe serde = new ParquetHiveSerDe();
serde.initialize(jobConf, createTableProperties("test", columnObjectInspector.getTypeName()), null);
Writable record = serde.serialize(row, objectInspector);
recordWriter.write(record);
i++;
}
recordWriter.close(false);
return succinctBytes(outputFile.length());
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class ParquetTestUtils method writeParquetColumnHive.
static void writeParquetColumnHive(File file, String columnName, boolean nullable, Type type, Iterator<?> values) throws Exception {
JobConf jobConf = new JobConf();
// Set this config to get around the issue of LocalFileSystem not getting registered when running the benchmarks using
// the standalone jar with all dependencies
jobConf.set("fs.file.impl", LocalFileSystem.class.getCanonicalName());
jobConf.setLong(ParquetOutputFormat.BLOCK_SIZE, new DataSize(256, MEGABYTE).toBytes());
jobConf.setLong(ParquetOutputFormat.PAGE_SIZE, new DataSize(100, KILOBYTE).toBytes());
jobConf.set(ParquetOutputFormat.COMPRESSION, "snappy");
Properties properties = new Properties();
properties.setProperty("columns", columnName);
properties.setProperty("columns.types", getHiveType(type));
RecordWriter recordWriter = createParquetWriter(nullable, new Path(file.getAbsolutePath()), jobConf, properties, true);
List<ObjectInspector> objectInspectors = getRowObjectInspectors(type);
SettableStructObjectInspector tableObjectInspector = getStandardStructObjectInspector(ImmutableList.of(columnName), objectInspectors);
Object row = tableObjectInspector.create();
StructField structField = tableObjectInspector.getStructFieldRef(columnName);
Setter setter = getSetter(type, tableObjectInspector, row, structField);
Serializer serializer = initializeSerializer(jobConf, properties);
while (values.hasNext()) {
Object value = values.next();
if (value == null) {
tableObjectInspector.setStructFieldData(row, structField, null);
} else {
setter.set(value);
}
recordWriter.write(serializer.serialize(row, tableObjectInspector));
}
recordWriter.close(false);
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class HiveWriteUtils method createRcFileWriter.
private static RecordWriter createRcFileWriter(Path target, JobConf conf, Properties properties, boolean compress) throws IOException {
int columns = properties.getProperty(META_TABLE_COLUMNS).split(",").length;
RCFileOutputFormat.setColumnNumber(conf, columns);
CompressionCodec codec = null;
if (compress) {
codec = ReflectionUtil.newInstance(getOutputCompressorClass(conf, DefaultCodec.class), conf);
}
RCFile.Writer writer = new RCFile.Writer(target.getFileSystem(conf), conf, target, () -> {
}, codec);
return new ExtendedRecordWriter() {
private long length;
@Override
public long getWrittenBytes() {
return length;
}
@Override
public void write(Writable value) throws IOException {
writer.append(value);
length = writer.getLength();
}
@Override
public void close(boolean abort) throws IOException {
writer.close();
if (!abort) {
length = target.getFileSystem(conf).getFileStatus(target).getLen();
}
}
};
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project presto by prestodb.
the class RcFileTester method writeRcFileColumnOld.
private static DataSize writeRcFileColumnOld(File outputFile, Format format, Compression compression, Type type, Iterator<?> values) throws Exception {
ObjectInspector columnObjectInspector = getJavaObjectInspector(type);
RecordWriter recordWriter = createRcFileWriterOld(outputFile, compression, columnObjectInspector);
SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
Object row = objectInspector.create();
List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
Serializer serializer = format.createSerializer();
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", "test");
tableProperties.setProperty("columns.types", objectInspector.getTypeName());
serializer.initialize(new JobConf(false), tableProperties);
while (values.hasNext()) {
Object value = values.next();
value = preprocessWriteValueOld(type, value);
objectInspector.setStructFieldData(row, fields.get(0), value);
Writable record = serializer.serialize(row, objectInspector);
recordWriter.write(record);
}
recordWriter.close(false);
return new DataSize(outputFile.length(), BYTE).convertToMostSuccinctDataSize();
}
use of org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter in project hive by apache.
the class Utilities method createEmptyBuckets.
/**
* Check the existence of buckets according to bucket specification. Create empty buckets if
* needed.
*
* @param hconf The definition of the FileSink.
* @param paths A list of empty buckets to create
* @param reporter The mapreduce reporter object
* @throws HiveException
* @throws IOException
*/
static void createEmptyBuckets(Configuration hconf, List<Path> paths, boolean isCompressed, TableDesc tableInfo, Reporter reporter) throws HiveException, IOException {
JobConf jc;
if (hconf instanceof JobConf) {
jc = new JobConf(hconf);
} else {
// test code path
jc = new JobConf(hconf);
}
HiveOutputFormat<?, ?> hiveOutputFormat = null;
Class<? extends Writable> outputClass = null;
try {
AbstractSerDe serde = tableInfo.getSerDeClass().newInstance();
serde.initialize(hconf, tableInfo.getProperties(), null);
outputClass = serde.getSerializedClass();
hiveOutputFormat = HiveFileFormatUtils.getHiveOutputFormat(hconf, tableInfo);
} catch (SerDeException e) {
throw new HiveException(e);
} catch (InstantiationException e) {
throw new HiveException(e);
} catch (IllegalAccessException e) {
throw new HiveException(e);
}
for (Path path : paths) {
Utilities.FILE_OP_LOGGER.trace("creating empty bucket for {}", path);
RecordWriter writer = hiveOutputFormat.getHiveRecordWriter(jc, path, outputClass, isCompressed, tableInfo.getProperties(), reporter);
writer.close(false);
LOG.info("created empty bucket for enforcing bucketing at {}", path);
}
}
Aggregations