use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class TestMergeMetadataFiles method writeFile.
private static void writeFile(File out, Configuration conf, boolean useSchema2) throws IOException {
if (!useSchema2) {
GroupWriteSupport.setSchema(schema, conf);
} else {
GroupWriteSupport.setSchema(schema2, conf);
}
SimpleGroupFactory f = new SimpleGroupFactory(schema);
Map<String, String> extraMetaData = new HashMap<String, String>();
extraMetaData.put("schema_num", useSchema2 ? "2" : "1");
ParquetWriter<Group> writer = ExampleParquetWriter.builder(new Path(out.getAbsolutePath())).withConf(conf).withExtraMetaData(extraMetaData).build();
for (int i = 0; i < 1000; i++) {
Group g = f.newGroup().append("binary_field", "test" + i).append("int32_field", i).append("int64_field", (long) i).append("boolean_field", i % 2 == 0).append("float_field", (float) i).append("double_field", (double) i).append("flba_field", "foo");
if (!useSchema2) {
g = g.append("int96_field", Binary.fromConstantByteArray(new byte[12]));
}
writer.write(g);
}
writer.close();
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class TestPropertiesDrivenEncryption method writeEncryptedParquetFile.
private void writeEncryptedParquetFile(Path root, List<SingleRow> data, EncryptionConfiguration encryptionConfiguration, int threadNumber) {
MessageType schema = SingleRow.getSchema();
SimpleGroupFactory f = new SimpleGroupFactory(schema);
// Ensure that several pages will be created
int pageSize = data.size() / 10;
// Ensure that there are more row-groups created
int rowGroupSize = pageSize * 6 * 5;
Path file = new Path(root, getFileName(root, encryptionConfiguration, threadNumber));
LOG.info("\nWrite " + file.toString());
Configuration conf = encryptionConfiguration.getHadoopConfiguration(this);
FileEncryptionProperties fileEncryptionProperties = null;
try {
if (null == conf) {
conf = new Configuration();
} else {
EncryptionPropertiesFactory cryptoFactory = EncryptionPropertiesFactory.loadFactory(conf);
fileEncryptionProperties = cryptoFactory.getFileEncryptionProperties(conf, file, null);
}
} catch (Exception e) {
addErrorToErrorCollectorAndLog("Failed writing " + file.toString(), e, encryptionConfiguration, null);
return;
}
try (ParquetWriter<Group> writer = ExampleParquetWriter.builder(file).withConf(conf).withWriteMode(OVERWRITE).withType(schema).withPageSize(pageSize).withRowGroupSize(rowGroupSize).withEncryption(fileEncryptionProperties).build()) {
for (SingleRow singleRow : data) {
writer.write(f.newGroup().append(SingleRow.BOOLEAN_FIELD_NAME, singleRow.boolean_field).append(SingleRow.INT32_FIELD_NAME, singleRow.int32_field).append(SingleRow.FLOAT_FIELD_NAME, singleRow.float_field).append(SingleRow.DOUBLE_FIELD_NAME, singleRow.double_field).append(SingleRow.BINARY_FIELD_NAME, Binary.fromConstantByteArray(singleRow.ba_field)).append(SingleRow.FIXED_LENGTH_BINARY_FIELD_NAME, Binary.fromConstantByteArray(singleRow.flba_field)).append(SingleRow.PLAINTEXT_INT32_FIELD_NAME, singleRow.plaintext_int32_field));
}
} catch (Exception e) {
addErrorToErrorCollectorAndLog("Failed writing " + file.toString(), e, encryptionConfiguration, null);
}
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class DictionaryFilterTest method prepareFile.
private static void prepareFile(WriterVersion version, Path file) throws IOException {
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory f = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = ExampleParquetWriter.builder(file).withWriterVersion(version).withCompressionCodec(GZIP).withRowGroupSize(1024 * 1024).withPageSize(1024).enableDictionaryEncoding().withDictionaryPageSize(2 * 1024).withConf(conf).build();
writeData(f, writer);
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testOptionalRequiredInteraction.
@Test
public void testOptionalRequiredInteraction() {
for (int i = 0; i < 6; i++) {
Type current = new PrimitiveType(Repetition.REQUIRED, PrimitiveTypeName.BINARY, "primitive");
for (int j = 0; j < i; j++) {
current = new GroupType(Repetition.REQUIRED, "req" + j, current);
}
MessageType groupSchema = new MessageType("schema" + i, current);
GroupFactory gf = new SimpleGroupFactory(groupSchema);
List<Group> groups = new ArrayList<>();
Group root = gf.newGroup();
Group currentGroup = root;
for (int j = 0; j < i; j++) {
currentGroup = currentGroup.addGroup(0);
}
currentGroup.add(0, Binary.fromString("foo"));
groups.add(root);
testSchema(groupSchema, groups);
}
for (int i = 0; i < 6; i++) {
Type current = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "primitive");
for (int j = 0; j < i; j++) {
current = new GroupType(Repetition.REQUIRED, "req" + j, current);
}
MessageType groupSchema = new MessageType("schema" + (i + 6), current);
GroupFactory gf = new SimpleGroupFactory(groupSchema);
List<Group> groups = new ArrayList<>();
Group rootDefined = gf.newGroup();
Group rootUndefined = gf.newGroup();
Group currentDefinedGroup = rootDefined;
Group currentUndefinedGroup = rootUndefined;
for (int j = 0; j < i; j++) {
currentDefinedGroup = currentDefinedGroup.addGroup(0);
currentUndefinedGroup = currentUndefinedGroup.addGroup(0);
}
currentDefinedGroup.add(0, Binary.fromString("foo"));
groups.add(rootDefined);
groups.add(rootUndefined);
testSchema(groupSchema, groups);
}
for (int i = 0; i < 6; i++) {
Type current = new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "primitive");
for (int j = 0; j < 6; j++) {
current = new GroupType(i == j ? Repetition.OPTIONAL : Repetition.REQUIRED, "req" + j, current);
}
MessageType groupSchema = new MessageType("schema" + (i + 12), current);
GroupFactory gf = new SimpleGroupFactory(groupSchema);
List<Group> groups = new ArrayList<>();
Group rootDefined = gf.newGroup();
Group rootUndefined = gf.newGroup();
Group currentDefinedGroup = rootDefined;
Group currentUndefinedGroup = rootUndefined;
for (int j = 0; j < 6; j++) {
currentDefinedGroup = currentDefinedGroup.addGroup(0);
if (i < j) {
currentUndefinedGroup = currentUndefinedGroup.addGroup(0);
}
}
currentDefinedGroup.add(0, Binary.fromString("foo"));
groups.add(rootDefined);
groups.add(rootUndefined);
testSchema(groupSchema, groups);
}
}
use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testOneOfEach.
@Test
public void testOneOfEach() {
MessageType oneOfEachSchema = MessageTypeParser.parseMessageType(oneOfEach);
GroupFactory gf = new SimpleGroupFactory(oneOfEachSchema);
Group g1 = gf.newGroup().append("a", 1l).append("b", 2).append("c", 3.0f).append("d", 4.0d).append("e", true).append("f", Binary.fromString("6")).append("g", new NanoTime(1234, System.currentTimeMillis() * 1000)).append("h", Binary.fromString("abc"));
testSchema(oneOfEachSchema, Arrays.asList(g1));
}
Aggregations