use of uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter in project Gaffer by gchq.
the class CalculatePartitioner method call.
public GraphPartitioner call() throws IOException {
final SchemaUtils schemaUtils = new SchemaUtils(schema);
final GraphPartitioner graphPartitioner = new GraphPartitioner();
for (final String group : schema.getGroups()) {
LOGGER.info("Calculating GroupPartitioner for group {}", group);
final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
final List<PartitionKey> partitionKeys = new ArrayList<>();
final Path groupPath = new Path(path, ParquetStore.getGroupSubDir(group, false));
final FileStatus[] files = fs.listStatus(groupPath, p -> p.getName().endsWith(".parquet"));
final SortedSet<Path> sortedFiles = new TreeSet<>();
Arrays.stream(files).map(f -> f.getPath()).forEach(sortedFiles::add);
final Path[] sortedPaths = sortedFiles.toArray(new Path[] {});
LOGGER.debug("Found {} files in {}", files.length, groupPath);
for (int i = 1; i < sortedPaths.length; i++) {
// NB Skip first file
LOGGER.debug("Reading first line of {}", sortedPaths[i]);
final ParquetReader<Element> reader = new ParquetElementReader.Builder<Element>(sortedPaths[i]).isEntity(schema.getEntityGroups().contains(group)).usingConverter(converter).build();
// NB Should never be null as empty files are removed before this is called
final Element element = reader.read();
if (null == element) {
throw new IOException("No first element in file " + files[i].getPath() + " - empty files are supposed to be removed");
}
reader.close();
final Object[] parquetObjects = converter.corePropertiesToParquetObjects(element);
final PartitionKey key = new PartitionKey(parquetObjects);
partitionKeys.add(key);
}
final GroupPartitioner groupPartitioner = new GroupPartitioner(group, partitionKeys);
graphPartitioner.addGroupPartitioner(group, groupPartitioner);
LOGGER.info("GroupPartitioner for group {} is {}", group, groupPartitioner);
}
for (final String group : schema.getEdgeGroups()) {
LOGGER.info("Calculating GroupPartitioner for reversed edge group {}", group);
final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
final List<PartitionKey> partitionKeys = new ArrayList<>();
final Path groupPath = new Path(path, ParquetStore.getGroupSubDir(group, true));
final FileStatus[] files = fs.listStatus(groupPath, p -> p.getName().endsWith(".parquet"));
final SortedSet<Path> sortedFiles = new TreeSet<>();
Arrays.stream(files).map(f -> f.getPath()).forEach(sortedFiles::add);
final Path[] sortedPaths = sortedFiles.toArray(new Path[] {});
LOGGER.debug("Found {} files in {}", files.length, groupPath);
for (int i = 1; i < sortedPaths.length; i++) {
// NB Skip first file
LOGGER.debug("Reading first line of {}", sortedPaths[i]);
final ParquetReader<Element> reader = new ParquetElementReader.Builder<Element>(sortedPaths[i]).isEntity(false).usingConverter(converter).build();
final Edge edge = (Edge) reader.read();
if (null == edge) {
throw new IOException("No first edge in file " + files[i].getPath() + " - empty files are supposed to be removed");
}
reader.close();
final Object[] parquetObjects = converter.corePropertiesToParquetObjectsForReversedEdge(edge);
final PartitionKey key = new PartitionKey(parquetObjects);
partitionKeys.add(key);
}
final GroupPartitioner groupPartitioner = new GroupPartitioner(group, partitionKeys);
graphPartitioner.addGroupPartitionerForReversedEdges(group, groupPartitioner);
}
return graphPartitioner;
}
use of uk.gov.gchq.gaffer.parquetstore.utils.GafferGroupObjectConverter in project Gaffer by gchq.
the class RetrieveElementsFromFile method openParquetReader.
private ParquetReader<Element> openParquetReader() throws IOException {
final boolean isEntity = schemaUtils.getEntityGroups().contains(group);
final GafferGroupObjectConverter converter = schemaUtils.getConverter(group);
LOGGER.debug("Opening a new Parquet reader for file {}", filePath);
if (null != filter) {
return new ParquetElementReader.Builder<Element>(filePath).isEntity(isEntity).usingConverter(converter).withFilter(FilterCompat.get(filter)).build();
} else {
return new ParquetElementReader.Builder<Element>(filePath).isEntity(isEntity).usingConverter(converter).build();
}
}
Aggregations