use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project hive by apache.
the class VectorizedColumnReaderTestBase method writeData.
protected static void writeData(ParquetWriter<Group> writer, boolean isDictionaryEncoding) throws IOException {
SimpleGroupFactory f = new SimpleGroupFactory(schema);
for (int i = 0; i < nElements; i++) {
boolean isNull = isNull(i);
int intVal = getIntValue(isDictionaryEncoding, i);
long longVal = getLongValue(isDictionaryEncoding, i);
Binary timeStamp = getTimestamp(isDictionaryEncoding, i);
HiveDecimal decimalVal = getDecimal(isDictionaryEncoding, i).setScale(2);
double doubleVal = getDoubleValue(isDictionaryEncoding, i);
float floatVal = getFloatValue(isDictionaryEncoding, i);
boolean booleanVal = getBooleanValue(i);
Binary binary = getBinaryValue(isDictionaryEncoding, i);
Group group = f.newGroup().append("int32_field", intVal).append("int64_field", longVal).append("int96_field", timeStamp).append("double_field", doubleVal).append("float_field", floatVal).append("boolean_field", booleanVal).append("flba_field", "abc");
if (!isNull) {
group.append("some_null_field", "x");
}
group.append("binary_field", binary);
if (!isNull) {
group.append("binary_field_some_null", binary);
}
HiveDecimalWritable w = new HiveDecimalWritable(decimalVal);
group.append("value", Binary.fromConstantByteArray(w.getInternalStorage()));
group.addGroup("struct_field").append("a", intVal).append("b", doubleVal);
Group g = group.addGroup("nested_struct_field");
g.addGroup("nsf").append("c", intVal).append("d", intVal);
g.append("e", doubleVal);
Group some_null_g = group.addGroup("struct_field_some_null");
if (i % 2 != 0) {
some_null_g.append("f", intVal);
}
if (i % 3 != 0) {
some_null_g.append("g", doubleVal);
}
Group mapGroup = group.addGroup("map_field");
if (i % 13 != 1) {
mapGroup.addGroup("map").append("key", binary).append("value", "abc");
} else {
mapGroup.addGroup("map").append("key", binary);
}
Group arrayGroup = group.addGroup("array_list");
for (int j = 0; j < i % 4; j++) {
arrayGroup.addGroup("bag").append("array_element", intVal);
}
writer.write(group);
}
writer.close();
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class TestFiltersWithMissingColumns method createDataFile.
@Before
public void createDataFile() throws Exception {
File file = temp.newFile("test.parquet");
this.path = new Path(file.toString());
MessageType type = Types.buildMessage().required(INT64).named("id").required(BINARY).as(UTF8).named("data").named("test");
SimpleGroupFactory factory = new SimpleGroupFactory(type);
ParquetWriter<Group> writer = ExampleParquetWriter.builder(path).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withType(type).build();
try {
for (long i = 0; i < 1000; i += 1) {
Group g = factory.newGroup();
g.add(0, i);
g.add(1, "data-" + i);
writer.write(g);
}
} finally {
writer.close();
}
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class ParquetFileTest method createTestParquetFile.
private void createTestParquetFile() throws IOException {
File file = parquetFile();
Path fsPath = new Path(file.getPath());
Configuration conf = new Configuration();
MessageType schema = createSchema();
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
GroupWriteSupport.setSchema(schema, conf);
try (ParquetWriter<Group> writer = new ParquetWriter<>(fsPath, new GroupWriteSupport(), CompressionCodecName.UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf)) {
for (int i = 0; i < 10; i++) {
final byte[] bytes = new byte[12];
ThreadLocalRandom.current().nextBytes(bytes);
writer.write(fact.newGroup().append(INT32_FIELD, 32 + i).append(INT64_FIELD, 64L + i).append(FLOAT_FIELD, 1.0f + i).append(DOUBLE_FIELD, 2.0d + i).append(BINARY_FIELD, Binary.fromString(COLORS[i % COLORS.length])).append(FIXED_LEN_BYTE_ARRAY_FIELD, Binary.fromConstantByteArray(bytes)));
}
}
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testRequiredOfRequired.
@Test
public void testRequiredOfRequired() {
MessageType reqreqSchema = MessageTypeParser.parseMessageType("message Document {\n" + " required group foo {\n" + " required int64 bar;\n" + " }\n" + "}\n");
GroupFactory gf = new SimpleGroupFactory(reqreqSchema);
Group g1 = gf.newGroup();
g1.addGroup("foo").append("bar", 2l);
testSchema(reqreqSchema, Arrays.asList(g1));
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testReadUsingProjectedSchema.
@Test
public void testReadUsingProjectedSchema() {
MessageType orginalSchema = new MessageType("schema", new PrimitiveType(REQUIRED, INT32, "a"), new PrimitiveType(REQUIRED, INT32, "b"));
MessageType projectedSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "b"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));
{
List<Group> groups = new ArrayList<>();
groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
Object[][] expected = { { 2 } };
validateGroups(groups, expected);
}
}
Aggregations