Search in sources :

Example 6 with SimpleGroupFactory

use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project hive by apache.

the class VectorizedColumnReaderTestBase method writeData.

protected static void writeData(ParquetWriter<Group> writer, boolean isDictionaryEncoding) throws IOException {
    SimpleGroupFactory f = new SimpleGroupFactory(schema);
    for (int i = 0; i < nElements; i++) {
        boolean isNull = isNull(i);
        int intVal = getIntValue(isDictionaryEncoding, i);
        long longVal = getLongValue(isDictionaryEncoding, i);
        Binary timeStamp = getTimestamp(isDictionaryEncoding, i);
        HiveDecimal decimalVal = getDecimal(isDictionaryEncoding, i).setScale(2);
        double doubleVal = getDoubleValue(isDictionaryEncoding, i);
        float floatVal = getFloatValue(isDictionaryEncoding, i);
        boolean booleanVal = getBooleanValue(i);
        Binary binary = getBinaryValue(isDictionaryEncoding, i);
        Group group = f.newGroup().append("int32_field", intVal).append("int64_field", longVal).append("int96_field", timeStamp).append("double_field", doubleVal).append("float_field", floatVal).append("boolean_field", booleanVal).append("flba_field", "abc");
        if (!isNull) {
            group.append("some_null_field", "x");
        }
        group.append("binary_field", binary);
        if (!isNull) {
            group.append("binary_field_some_null", binary);
        }
        HiveDecimalWritable w = new HiveDecimalWritable(decimalVal);
        group.append("value", Binary.fromConstantByteArray(w.getInternalStorage()));
        group.addGroup("struct_field").append("a", intVal).append("b", doubleVal);
        Group g = group.addGroup("nested_struct_field");
        g.addGroup("nsf").append("c", intVal).append("d", intVal);
        g.append("e", doubleVal);
        Group some_null_g = group.addGroup("struct_field_some_null");
        if (i % 2 != 0) {
            some_null_g.append("f", intVal);
        }
        if (i % 3 != 0) {
            some_null_g.append("g", doubleVal);
        }
        Group mapGroup = group.addGroup("map_field");
        if (i % 13 != 1) {
            mapGroup.addGroup("map").append("key", binary).append("value", "abc");
        } else {
            mapGroup.addGroup("map").append("key", binary);
        }
        Group arrayGroup = group.addGroup("array_list");
        for (int j = 0; j < i % 4; j++) {
            arrayGroup.addGroup("bag").append("array_element", intVal);
        }
        writer.write(group);
    }
    writer.close();
}
Also used : Group(org.apache.parquet.example.data.Group) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) Binary(org.apache.parquet.io.api.Binary)

Example 7 with SimpleGroupFactory

use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.

the class TestFiltersWithMissingColumns method createDataFile.

@Before
public void createDataFile() throws Exception {
    File file = temp.newFile("test.parquet");
    this.path = new Path(file.toString());
    MessageType type = Types.buildMessage().required(INT64).named("id").required(BINARY).as(UTF8).named("data").named("test");
    SimpleGroupFactory factory = new SimpleGroupFactory(type);
    ParquetWriter<Group> writer = ExampleParquetWriter.builder(path).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withType(type).build();
    try {
        for (long i = 0; i < 1000; i += 1) {
            Group g = factory.newGroup();
            g.add(0, i);
            g.add(1, "data-" + i);
            writer.write(g);
        }
    } finally {
        writer.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) File(java.io.File) MessageType(org.apache.parquet.schema.MessageType) Before(org.junit.Before)

Example 8 with SimpleGroupFactory

use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.

the class ParquetFileTest method createTestParquetFile.

private void createTestParquetFile() throws IOException {
    File file = parquetFile();
    Path fsPath = new Path(file.getPath());
    Configuration conf = new Configuration();
    MessageType schema = createSchema();
    SimpleGroupFactory fact = new SimpleGroupFactory(schema);
    GroupWriteSupport.setSchema(schema, conf);
    try (ParquetWriter<Group> writer = new ParquetWriter<>(fsPath, new GroupWriteSupport(), CompressionCodecName.UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf)) {
        for (int i = 0; i < 10; i++) {
            final byte[] bytes = new byte[12];
            ThreadLocalRandom.current().nextBytes(bytes);
            writer.write(fact.newGroup().append(INT32_FIELD, 32 + i).append(INT64_FIELD, 64L + i).append(FLOAT_FIELD, 1.0f + i).append(DOUBLE_FIELD, 2.0d + i).append(BINARY_FIELD, Binary.fromString(COLORS[i % COLORS.length])).append(FIXED_LEN_BYTE_ARRAY_FIELD, Binary.fromConstantByteArray(bytes)));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) GroupWriteSupport(org.apache.parquet.hadoop.example.GroupWriteSupport) Configuration(org.apache.hadoop.conf.Configuration) ParquetWriter(org.apache.parquet.hadoop.ParquetWriter) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) File(java.io.File) MessageType(org.apache.parquet.schema.MessageType)

Example 9 with SimpleGroupFactory

use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testRequiredOfRequired.

@Test
public void testRequiredOfRequired() {
    MessageType reqreqSchema = MessageTypeParser.parseMessageType("message Document {\n" + "  required group foo {\n" + "    required int64 bar;\n" + "  }\n" + "}\n");
    GroupFactory gf = new SimpleGroupFactory(reqreqSchema);
    Group g1 = gf.newGroup();
    g1.addGroup("foo").append("bar", 2l);
    testSchema(reqreqSchema, Arrays.asList(g1));
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) GroupFactory(org.apache.parquet.example.data.GroupFactory) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 10 with SimpleGroupFactory

use of org.apache.parquet.example.data.simple.SimpleGroupFactory in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testReadUsingProjectedSchema.

@Test
public void testReadUsingProjectedSchema() {
    MessageType orginalSchema = new MessageType("schema", new PrimitiveType(REQUIRED, INT32, "a"), new PrimitiveType(REQUIRED, INT32, "b"));
    MessageType projectedSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "b"));
    MemPageStore store = new MemPageStore(1);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
    writeGroups(orginalSchema, store, groupFactory.newGroup().append("a", 1).append("b", 2));
    {
        List<Group> groups = new ArrayList<>();
        groups.addAll(readGroups(store, orginalSchema, projectedSchema, 1));
        Object[][] expected = { { 2 } };
        validateGroups(groups, expected);
    }
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) ArrayList(java.util.ArrayList) List(java.util.List) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

SimpleGroupFactory (org.apache.parquet.example.data.simple.SimpleGroupFactory)37 Group (org.apache.parquet.example.data.Group)33 MessageType (org.apache.parquet.schema.MessageType)20 Path (org.apache.hadoop.fs.Path)16 Configuration (org.apache.hadoop.conf.Configuration)13 Test (org.junit.Test)12 File (java.io.File)10 GroupFactory (org.apache.parquet.example.data.GroupFactory)10 MessageTypeParser.parseMessageType (org.apache.parquet.schema.MessageTypeParser.parseMessageType)8 GroupWriteSupport (org.apache.parquet.hadoop.example.GroupWriteSupport)7 ParquetWriter (org.apache.parquet.hadoop.ParquetWriter)6 PrimitiveType (org.apache.parquet.schema.PrimitiveType)5 MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)4 HadoopInputFile (org.apache.parquet.hadoop.util.HadoopInputFile)4 HadoopOutputFile (org.apache.parquet.hadoop.util.HadoopOutputFile)4 OutputFile (org.apache.parquet.io.OutputFile)4 Binary (org.apache.parquet.io.api.Binary)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Random (java.util.Random)3