use of uk.gov.gchq.gaffer.parquetstore.serialisation.impl.StringParquetSerialiser in project Gaffer by gchq.
the class WriteDataTest method testTwoWritesToSamePartitionDoesntThrowException.
@Test
public void testTwoWritesToSamePartitionDoesntThrowException(@TempDir java.nio.file.Path tempDir) throws Exception {
// Given
final Schema schema = new Schema.Builder().type("int", new TypeDefinition.Builder().clazz(Integer.class).serialiser(new IntegerParquetSerialiser()).build()).type("string", new TypeDefinition.Builder().clazz(String.class).serialiser(new StringParquetSerialiser()).build()).entity("entity", new SchemaEntityDefinition.Builder().vertex("string").property("property1", "int").aggregate(false).build()).edge("edge", new SchemaEdgeDefinition.Builder().source("string").destination("string").property("property2", "int").aggregate(false).build()).vertexSerialiser(new StringParquetSerialiser()).build();
final Function<String, String> groupToDirectory = group -> tempDir.toAbsolutePath().toString() + "/" + group;
final List<Element> elements = new ArrayList<>();
elements.add(new Entity.Builder().group("entity").vertex("A").property("property1", 1).build());
elements.add(new Edge.Builder().group("edge").source("B").dest("C").property("property2", 100).build());
final WriteData writeData = new WriteData(groupToDirectory, schema, CompressionCodecName.GZIP);
final FileSystem fileSystem = FileSystem.get(new Configuration());
// When
final ExecutorService executorService = Executors.newFixedThreadPool(3);
final List<Callable<Void>> tasks = new ArrayList<>();
LongStream.range(1000L, 1003L).forEach(l -> {
tasks.add(() -> {
writeData.call(elements.iterator(), 1, l);
return null;
});
});
executorService.invokeAll(tasks);
// Then
// - Check that a file named with the partition id has been created
assertTrue(fileSystem.exists(new Path(groupToDirectory.apply("entity") + "/" + "input-1.parquet")));
assertTrue(fileSystem.exists(new Path(groupToDirectory.apply("edge") + "/" + "input-1.parquet")));
}
use of uk.gov.gchq.gaffer.parquetstore.serialisation.impl.StringParquetSerialiser in project Gaffer by gchq.
the class ParquetStoreTest method shouldCorrectlyUseCompressionOption.
@Test
public void shouldCorrectlyUseCompressionOption(@TempDir java.nio.file.Path tempDir) throws Exception {
for (final String compressionType : Sets.newHashSet("GZIP", "SNAPPY", "UNCOMPRESSED")) {
// Given
final Schema schema = new Schema.Builder().type("int", new TypeDefinition.Builder().clazz(Integer.class).serialiser(new IntegerParquetSerialiser()).build()).type("string", new TypeDefinition.Builder().clazz(String.class).serialiser(new StringParquetSerialiser()).build()).type(DIRECTED_EITHER, Boolean.class).entity("entity", new SchemaEntityDefinition.Builder().vertex("string").property("property1", "int").aggregate(false).build()).edge("edge", new SchemaEdgeDefinition.Builder().source("string").destination("string").property("property2", "int").directed(DIRECTED_EITHER).aggregate(false).build()).vertexSerialiser(new StringParquetSerialiser()).build();
final ParquetStoreProperties parquetStoreProperties = TestUtils.getParquetStoreProperties(tempDir);
parquetStoreProperties.setCompressionCodecName(compressionType);
final ParquetStore parquetStore = (ParquetStore) ParquetStore.createStore("graphId", schema, parquetStoreProperties);
final List<Element> elements = new ArrayList<>();
elements.add(new Entity.Builder().group("entity").vertex("A").property("property1", 1).build());
elements.add(new Edge.Builder().group("edge").source("B").dest("C").property("property2", 100).build());
// When
final AddElements add = new AddElements.Builder().input(elements).build();
parquetStore.execute(add, new Context());
// Then
final List<Path> files = parquetStore.getFilesForGroup("entity");
for (final Path path : files) {
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(new Configuration(), path, ParquetMetadataConverter.NO_FILTER);
for (final BlockMetaData blockMetadata : parquetMetadata.getBlocks()) {
blockMetadata.getColumns().forEach(c -> assertEquals(compressionType, c.getCodec().name()));
}
}
}
}
Aggregations