use of org.apache.orc.Writer in project flink by apache.
the class OrcColumnarRowSplitReaderNoHiveTest method prepareReadFileWithTypes.
@Override
protected void prepareReadFileWithTypes(String file, int rowSize) throws IOException {
// NOTE: orc has field name information, so name should be same as orc
TypeDescription schema = TypeDescription.fromString("struct<" + "f0:float," + "f1:double," + "f2:timestamp," + "f3:tinyint," + "f4:smallint" + ">");
org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(file);
Configuration conf = new Configuration();
Writer writer = OrcFile.createWriter(filePath, OrcFile.writerOptions(conf).setSchema(schema));
VectorizedRowBatch batch = schema.createRowBatch(rowSize);
DoubleColumnVector col0 = (DoubleColumnVector) batch.cols[0];
DoubleColumnVector col1 = (DoubleColumnVector) batch.cols[1];
TimestampColumnVector col2 = (TimestampColumnVector) batch.cols[2];
LongColumnVector col3 = (LongColumnVector) batch.cols[3];
LongColumnVector col4 = (LongColumnVector) batch.cols[4];
col0.noNulls = false;
col1.noNulls = false;
col2.noNulls = false;
col3.noNulls = false;
col4.noNulls = false;
for (int i = 0; i < rowSize - 1; i++) {
col0.vector[i] = i;
col1.vector[i] = i;
Timestamp timestamp = toTimestamp(i);
col2.time[i] = timestamp.getTime();
col2.nanos[i] = timestamp.getNanos();
col3.vector[i] = i;
col4.vector[i] = i;
}
col0.isNull[rowSize - 1] = true;
col1.isNull[rowSize - 1] = true;
col2.isNull[rowSize - 1] = true;
col3.isNull[rowSize - 1] = true;
col4.isNull[rowSize - 1] = true;
batch.size = rowSize;
writer.addRowBatch(batch);
batch.reset();
writer.close();
}
use of org.apache.orc.Writer in project zeppelin by apache.
the class SqlInterpreterTest method createORCFile.
public File createORCFile(int[] values) throws IOException {
File file = File.createTempFile("zeppelin-flink-input", ".orc");
file.delete();
Path path = new Path(file.getAbsolutePath());
Configuration conf = new Configuration();
conf.set("orc.compress", "snappy");
TypeDescription schema = TypeDescription.fromString("struct<msg:int>");
Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf).setSchema(schema));
VectorizedRowBatch batch = schema.createRowBatch();
LongColumnVector x = (LongColumnVector) batch.cols[0];
for (int i = 0; i < values.length; ++i) {
int row = batch.size++;
x.vector[row] = values[i];
// If the batch is full, write it out and start over.
if (batch.size == batch.getMaxSize()) {
writer.addRowBatch(batch);
batch.reset();
}
}
if (batch.size != 0) {
writer.addRowBatch(batch);
batch.reset();
}
writer.close();
return file;
}
use of org.apache.orc.Writer in project druid by druid-io.
the class DruidOrcInputFormatTest method makeOrcFile.
private File makeOrcFile() throws IOException {
final File dir = temporaryFolder.newFolder();
final File testOrc = new File(dir, "test.orc");
TypeDescription schema = TypeDescription.createStruct().addField("timestamp", TypeDescription.createString()).addField("col1", TypeDescription.createString()).addField("col2", TypeDescription.createList(TypeDescription.createString())).addField("val1", TypeDescription.createFloat());
Configuration conf = new Configuration();
Writer writer = OrcFile.createWriter(new Path(testOrc.getPath()), OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000).bufferSize(10000).compress(CompressionKind.ZLIB).version(OrcFile.Version.CURRENT));
VectorizedRowBatch batch = schema.createRowBatch();
batch.size = 1;
((BytesColumnVector) batch.cols[0]).setRef(0, timestamp.getBytes(), 0, timestamp.length());
((BytesColumnVector) batch.cols[1]).setRef(0, col1.getBytes(), 0, col1.length());
ListColumnVector listColumnVector = (ListColumnVector) batch.cols[2];
listColumnVector.childCount = col2.length;
listColumnVector.lengths[0] = 3;
for (int idx = 0; idx < col2.length; idx++) {
((BytesColumnVector) listColumnVector.child).setRef(idx, col2[idx].getBytes(), 0, col2[idx].length());
}
((DoubleColumnVector) batch.cols[3]).vector[0] = val1;
writer.addRowBatch(batch);
writer.close();
return testOrc;
}
use of org.apache.orc.Writer in project flink by apache.
the class OrcColumnarRowSplitReaderTest method prepareReadFileWithTypes.
protected void prepareReadFileWithTypes(String file, int rowSize) throws IOException {
// NOTE: orc has field name information, so name should be same as orc
TypeDescription schema = TypeDescription.fromString("struct<" + "f0:float," + "f1:double," + "f2:timestamp," + "f3:tinyint," + "f4:smallint" + ">");
org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(file);
Configuration conf = new Configuration();
Writer writer = OrcFile.createWriter(filePath, OrcFile.writerOptions(conf).setSchema(schema));
VectorizedRowBatch batch = schema.createRowBatch(rowSize);
DoubleColumnVector col0 = (DoubleColumnVector) batch.cols[0];
DoubleColumnVector col1 = (DoubleColumnVector) batch.cols[1];
TimestampColumnVector col2 = (TimestampColumnVector) batch.cols[2];
LongColumnVector col3 = (LongColumnVector) batch.cols[3];
LongColumnVector col4 = (LongColumnVector) batch.cols[4];
col0.noNulls = false;
col1.noNulls = false;
col2.noNulls = false;
col3.noNulls = false;
col4.noNulls = false;
for (int i = 0; i < rowSize - 1; i++) {
col0.vector[i] = i;
col1.vector[i] = i;
Timestamp timestamp = toTimestamp(i);
col2.time[i] = timestamp.getTime();
col2.nanos[i] = timestamp.getNanos();
col3.vector[i] = i;
col4.vector[i] = i;
}
col0.isNull[rowSize - 1] = true;
col1.isNull[rowSize - 1] = true;
col2.isNull[rowSize - 1] = true;
col3.isNull[rowSize - 1] = true;
col4.isNull[rowSize - 1] = true;
batch.size = rowSize;
writer.addRowBatch(batch);
batch.reset();
writer.close();
}
Aggregations