use of org.openstreetmap.atlas.geography.boundary.CountryBoundaryMap in project atlas-generator by osmlab.
the class AtlasGenerator method start.
@Override
public void start(final CommandMap command) {
final Map<String, String> sparkContext = configurationMap();
final List<String> countries = ((StringList) command.get(AtlasGeneratorParameters.COUNTRIES)).stream().collect(Collectors.toList());
final String previousOutputForDelta = (String) command.get(AtlasGeneratorParameters.PREVIOUS_OUTPUT_FOR_DELTA);
final String pbfPath = (String) command.get(AtlasGeneratorParameters.PBF_PATH);
final SlippyTilePersistenceScheme pbfScheme = (SlippyTilePersistenceScheme) command.get(AtlasGeneratorParameters.PBF_SCHEME);
final SlippyTilePersistenceScheme atlasScheme = (SlippyTilePersistenceScheme) command.get(AtlasGeneratorParameters.ATLAS_SCHEME);
final Sharding sharding = atlasSharding(command);
final Sharding pbfSharding = pbfSharding(command);
final PbfContext pbfContext = new PbfContext(pbfPath, pbfSharding, pbfScheme);
final String shouldIncludeFilteredOutputConfiguration = (String) command.get(AtlasGeneratorParameters.SHOULD_INCLUDE_FILTERED_OUTPUT_CONFIGURATION);
final StringList configuredFilterPath = (StringList) command.get(AtlasGeneratorParameters.CONFIGURED_FILTER_OUTPUT);
final StringList configuredFilterName = (StringList) command.get(AtlasGeneratorParameters.CONFIGURED_FILTER_NAME);
final Predicate<AtlasEntity> taggableOutputFilter;
List<ConfiguredFilter> configuredOutputFilter = null;
if (shouldIncludeFilteredOutputConfiguration == null) {
taggableOutputFilter = atlasEntity -> false;
} else {
taggableOutputFilter = atlasEntity -> AtlasGeneratorParameters.getTaggableFilterFrom(shouldIncludeFilteredOutputConfiguration, sparkContext).test(atlasEntity);
}
if (configuredFilterPath != null) {
if (configuredFilterName == null) {
throw new CoreException("A filter name must be provided for configured filter output!");
}
configuredOutputFilter = AtlasGeneratorParameters.getConfiguredFilterListFrom(configuredFilterName, configuredFilterPath, sparkContext);
}
final String output = output(command);
final boolean lineDelimitedGeojsonOutput = (boolean) command.get(AtlasGeneratorParameters.LINE_DELIMITED_GEOJSON_OUTPUT);
final CountryBoundaryMap boundaries = boundaries(countries, command);
// Generate country-shard generation tasks
final Time timer = Time.now();
final List<AtlasGenerationTask> tasks = generateTasks(countries, boundaries, sharding);
logger.debug("Generated {} tasks in {}.", tasks.size(), timer.elapsedSince());
// AtlasLoadingOption isn't serializable, neither is command map. To avoid duplicating
// boiler-plate code for creating the AtlasLoadingOption, extract the properties we need
// from the command map and pass those around to create the AtlasLoadingOption
final Map<String, String> atlasLoadingOptions = AtlasGeneratorParameters.extractAtlasLoadingProperties(command, sparkContext);
// Leverage Spark broadcast to have a read-only variable cached on each machine, instead of
// shipping a copy with each task. All of these are re-used across tasks and are unchanged.
final Broadcast<CountryBoundaryMap> broadcastBoundaries = getContext().broadcast(boundaries);
final Broadcast<Map<String, String>> broadcastLoadingOptions = getContext().broadcast(atlasLoadingOptions);
final Broadcast<Sharding> broadcastSharding = getContext().broadcast(sharding);
// Generate the raw Atlas and filter any null atlases
final JavaPairRDD<String, Atlas> countryRawAtlasRDD = getContext().parallelize(tasks, tasks.size()).mapToPair(AtlasGeneratorHelper.generateRawAtlas(broadcastBoundaries, sparkContext, broadcastLoadingOptions, pbfContext, atlasScheme)).filter(tuple -> tuple._2() != null);
countryRawAtlasRDD.cache();
// Persist the RDD and save the intermediary state
saveAsHadoop(countryRawAtlasRDD, AtlasGeneratorJobGroup.RAW, output);
// Subatlas the raw shard Atlas files based on water relations
final Predicate<AtlasEntity> slicingFilter = AtlasGeneratorParameters.buildAtlasLoadingOption(broadcastBoundaries.getValue(), broadcastLoadingOptions.getValue()).getRelationSlicingFilter();
final JavaPairRDD<String, Atlas> relationSubAtlasRDD = countryRawAtlasRDD.mapToPair(AtlasGeneratorHelper.subatlas(slicingFilter, AtlasCutType.SILK_CUT)).filter(tuple -> tuple._2() != null);
relationSubAtlasRDD.cache();
saveAsHadoop(relationSubAtlasRDD, AtlasGeneratorJobGroup.SLICED_SUB, output);
// Relation slice the line sliced Atlas and filter any null atlases
final JavaPairRDD<String, Atlas> slicedAtlasShardsRDD = countryRawAtlasRDD.mapToPair(AtlasGeneratorHelper.sliceAtlas(broadcastBoundaries, broadcastLoadingOptions, broadcastSharding, getAlternateSubFolderOutput(output, AtlasGeneratorJobGroup.SLICED_SUB.getCacheFolder()), atlasScheme, sparkContext)).filter(tuple -> tuple._2() != null);
slicedAtlasShardsRDD.cache();
saveAsHadoop(slicedAtlasShardsRDD, AtlasGeneratorJobGroup.SLICED, output);
// Remove the line sliced atlas RDD from cache since we've cached the fully sliced RDD
try {
countryRawAtlasRDD.unpersist();
} catch (final Exception exception) {
logger.warn(EXCEPTION_MESSAGE, AtlasGeneratorJobGroup.SLICED.getDescription(), exception);
}
final Predicate<AtlasEntity> edgeFilter = AtlasGeneratorParameters.buildAtlasLoadingOption(broadcastBoundaries.getValue(), broadcastLoadingOptions.getValue()).getEdgeFilter();
final JavaPairRDD<String, Atlas> edgeOnlySubAtlasRDD = slicedAtlasShardsRDD.mapToPair(AtlasGeneratorHelper.subatlas(edgeFilter, AtlasCutType.SILK_CUT)).filter(tuple -> tuple._2() != null);
edgeOnlySubAtlasRDD.cache();
saveAsHadoop(edgeOnlySubAtlasRDD, AtlasGeneratorJobGroup.EDGE_SUB, output);
// Section the sliced Atlas
final JavaPairRDD<String, Atlas> countryAtlasShardsRDD = slicedAtlasShardsRDD.mapToPair(AtlasGeneratorHelper.sectionAtlas(broadcastBoundaries, broadcastSharding, sparkContext, broadcastLoadingOptions, getAlternateSubFolderOutput(output, AtlasGeneratorJobGroup.EDGE_SUB.getCacheFolder()), getAlternateSubFolderOutput(output, AtlasGeneratorJobGroup.SLICED.getCacheFolder()), atlasScheme));
countryAtlasShardsRDD.cache();
saveAsHadoop(countryAtlasShardsRDD, AtlasGeneratorJobGroup.WAY_SECTIONED_PBF, output);
this.copyToOutput(command, pbfPath, getAlternateSubFolderOutput(output, AtlasGeneratorJobGroup.WAY_SECTIONED_PBF.getCacheFolder()));
// Remove the edge-only subatlas as we've finished way-sectioning
try {
edgeOnlySubAtlasRDD.unpersist();
} catch (final Exception exception) {
logger.warn(EXCEPTION_MESSAGE, AtlasGeneratorJobGroup.WAY_SECTIONED_PBF.getDescription(), exception);
}
if (lineDelimitedGeojsonOutput) {
countryAtlasShardsRDD.saveAsHadoopFile(getAlternateSubFolderOutput(output, LINE_DELIMITED_GEOJSON_STATISTICS_FOLDER), Text.class, String.class, MultipleLineDelimitedGeojsonOutputFormat.class, new JobConf(configuration()));
logger.info("\n\n********** SAVED THE LINE DELIMITED GEOJSON ATLAS **********\n");
}
// Remove the sliced atlas RDD from cache since we've cached the final RDD
try {
slicedAtlasShardsRDD.unpersist();
} catch (final Exception exception) {
logger.warn(EXCEPTION_MESSAGE, AtlasGeneratorJobGroup.WAY_SECTIONED_PBF.getDescription(), exception);
}
if (AtlasGeneratorParameters.runStatistics(command)) {
// Create the metrics
final JavaPairRDD<String, AtlasStatistics> statisticsRDD = countryAtlasShardsRDD.mapToPair(AtlasGeneratorHelper.generateAtlasStatistics(broadcastSharding));
statisticsRDD.cache();
saveAsHadoop(statisticsRDD, AtlasGeneratorJobGroup.SHARD_STATISTICS, output);
// Aggregate the metrics
final JavaPairRDD<String, AtlasStatistics> reducedStatisticsRDD = statisticsRDD.mapToPair(AtlasGeneratorHelper.groupAtlasStatisticsByCountry()).reduceByKey(AtlasGeneratorHelper.reduceAtlasStatistics()).mapToPair(tuple -> new Tuple2<>(tuple._1(), tuple._2().getAtlasStatistics()));
// Save aggregated metrics
saveAsHadoop(reducedStatisticsRDD, AtlasGeneratorJobGroup.COUNTRY_STATISTICS, output);
try {
statisticsRDD.unpersist();
} catch (final Exception exception) {
logger.warn(EXCEPTION_MESSAGE, AtlasGeneratorJobGroup.COUNTRY_STATISTICS.getDescription(), exception);
}
}
// Compute the deltas, if needed
if (!previousOutputForDelta.isEmpty()) {
final JavaPairRDD<String, List<FeatureChange>> diffsRDD = countryAtlasShardsRDD.mapToPair(AtlasGeneratorHelper.computeAtlasDiff(sparkContext, previousOutputForDelta)).filter(tuple -> tuple._2() != null);
saveAsHadoop(diffsRDD, AtlasGeneratorJobGroup.DIFFS, output);
}
if (shouldIncludeFilteredOutputConfiguration != null) {
final JavaPairRDD<String, Atlas> subAtlasRDD = countryAtlasShardsRDD.mapToPair(AtlasGeneratorHelper.subatlas(taggableOutputFilter, AtlasCutType.SOFT_CUT)).filter(tuple -> tuple._2() != null);
saveAsHadoop(subAtlasRDD, AtlasGeneratorJobGroup.TAGGABLE_FILTERED_OUTPUT, output);
}
if (configuredOutputFilter != null) {
for (final ConfiguredFilter configuredFilter : configuredOutputFilter) {
final JavaPairRDD<String, Atlas> subAtlasRDD = countryAtlasShardsRDD.mapToPair(AtlasGeneratorHelper.subatlas(configuredFilter, AtlasCutType.SOFT_CUT)).filter(tuple -> tuple._2() != null);
saveAsHadoop(subAtlasRDD, AtlasGeneratorJobGroup.CONFIGURED_FILTERED_OUTPUT, configuredFilter.toString(), output);
}
}
try {
countryAtlasShardsRDD.unpersist();
} catch (final Exception exception) {
logger.warn(EXCEPTION_MESSAGE, AtlasGeneratorJobGroup.DIFFS.getDescription(), exception);
}
}
use of org.openstreetmap.atlas.geography.boundary.CountryBoundaryMap in project atlas-generator by osmlab.
the class AtlasMissingShardVerifier method verifier.
public int verifier(final CountryBoundaryMap boundaries, final Set<CountryShard> missingCountryShardsUntrimmed, final File output, final String server, final HttpHost proxy, final ConfiguredTaggableFilter filter, final Integer numRetryQueries) {
int returnCode = 0;
final Set<CountryShard> missingCountryShards = removeShardsWithZeroIntersection(missingCountryShardsUntrimmed, boundaries);
final String masterQuery = createMasterQuery(boundaries, missingCountryShards);
final OverpassClient client = new OverpassClient(server, proxy);
try (SafeBufferedWriter writer = output.writer()) {
final List<OverpassOsmNode> nodes = client.nodesFromQuery(masterQuery);
final List<OverpassOsmWay> ways = client.waysFromQuery(masterQuery);
if (client.hasTooMuchResponseData()) {
logger.warn("The overpass query returned too much data. This means that there's potentially" + "large amounts of data missing! Rerunning with smaller queries.");
client.resetTooMuchDataError();
final StringList splitQueries = createQueryList(boundaries, missingCountryShards, numRetryQueries);
nodes.clear();
ways.clear();
for (final String query : splitQueries) {
nodes.addAll(client.nodesFromQuery(query));
ways.addAll(client.waysFromQuery(query));
}
}
if (client.hasTooMuchResponseData()) {
throw new CoreException("The overpass query had to much data, even when split " + numRetryQueries + " times. There is lots of data missing!");
}
if (client.hasUnknownError()) {
throw new CoreException("The overpass query encountered an error. Validation has failed.");
}
final STRtree nodeTree = initializeNodeTree(nodes);
final STRtree wayTree = initializeWayTree(nodes, ways);
for (final CountryShard countryShard : missingCountryShards) {
final Clip clip = intersectionClip(countryShard, boundaries);
final MultiPolygon clipMulti = clip.getClipMultiPolygon();
final Rectangle clipBounds = clipMulti.bounds();
@SuppressWarnings("unchecked") final List<OverpassOsmNode> nodeList = nodeTree.query(clipBounds.asEnvelope());
// Prune extra nodes returned by STRtree that might not actually be contained within
// clipBounds
nodeList.removeIf(node -> !clipBounds.fullyGeometricallyEncloses(Location.forString(node.getLatitude() + "," + node.getLongitude())));
@SuppressWarnings("unchecked") final List<OverpassOsmWay> wayList = wayTree.query(clipBounds.asEnvelope());
// Filter out ways that aren't ingested into atlas
wayList.stream().filter(way -> !filter.test(Taggable.with(way.getTags()))).forEach(way -> {
nodeList.removeIf(node -> way.getNodeIdentifiers().contains(node.getIdentifier()));
});
// bounds then the shard should have been built, so break and list the shard
for (final OverpassOsmNode node : nodeList) {
final Location nodeLocation = Location.forString(node.getLatitude() + "," + node.getLongitude());
if (clipMulti.fullyGeometricallyEncloses(nodeLocation)) {
returnCode = -1;
writer.writeLine(countryShard.toString());
writer.writeLine("Boundary/Shard intersection zone: " + clipMulti.toString());
writer.writeLine("Id of node that should have been imported: " + node.getIdentifier());
writer.writeLine("Node Location: " + nodeLocation.toString() + "\n");
logger.info("{} is missing!", countryShard);
break;
}
}
}
} catch (final Exception e) {
logger.error("Error!", e);
return -1;
}
if (returnCode == 0) {
logger.info("No shards are missing!");
}
return returnCode;
}
use of org.openstreetmap.atlas.geography.boundary.CountryBoundaryMap in project atlas-generator by osmlab.
the class AtlasGenerator method boundaries.
private CountryBoundaryMap boundaries(final List<String> countries, final CommandMap command) {
final String countryShapes = (String) command.get(AtlasGeneratorParameters.COUNTRY_SHAPES);
final CountryBoundaryMap boundaries;
if (countryShapes == null) {
boundaries = persistenceTools().boundaries((String) command.get(AtlasGeneratorParameters.PBF_PATH));
} else {
boundaries = new CountryBoundaryMapArchiver().read(resource(countryShapes));
}
if (!boundaries.hasGridIndex()) {
logger.warn("Given country boundary file didn't have grid index. Initializing grid index for {}.", countries);
boundaries.initializeGridIndex(new HashSet<>(countries));
}
return boundaries;
}
use of org.openstreetmap.atlas.geography.boundary.CountryBoundaryMap in project atlas-generator by osmlab.
the class AtlasGeneratorTest method testGenerateTasksEmptyBoundary.
@Test
public void testGenerateTasksEmptyBoundary() {
final CountryBoundaryMap boundaryMap = new CountryBoundaryMap();
Assert.assertTrue(AtlasGenerator.generateTasks(Lists.newArrayList("HTI"), boundaryMap, SHARDING).isEmpty());
}
use of org.openstreetmap.atlas.geography.boundary.CountryBoundaryMap in project atlas-generator by osmlab.
the class AtlasMissingShardVerifierTest method testCreateQueryList.
@Test
public void testCreateQueryList() {
final CountryBoundaryMap boundaryMap = new CountryBoundaryMap();
final GeometryFactory geoFactory = new GeometryFactory();
final Coordinate point1 = new Coordinate(-64.544677734375, 35.3759765625);
final Coordinate point2 = new Coordinate(71.158447265625, 50.8447265625);
final Coordinate point3 = new Coordinate(71.158447265625, -27.9052734375);
final Coordinate point4 = new Coordinate(-60.325927734375, -31.4208984375);
final Coordinate[] coordinates = { point1, point2, point3, point4, point1 };
final Polygon boundary = geoFactory.createPolygon(coordinates);
boundaryMap.addCountry("WWW", boundary);
final Set<CountryShard> countryShards = new HashSet<>();
countryShards.add(new CountryShard("WWW", "1-0-0"));
countryShards.add(new CountryShard("WWW", "1-1-0"));
countryShards.add(new CountryShard("WWW", "1-0-1"));
countryShards.add(new CountryShard("WWW", "1-1-1"));
final StringList queries = AtlasMissingShardVerifier.createQueryList(boundaryMap, countryShards, 3);
Assert.assertEquals("(node(0.0,-64.5446777,42.733401,0.0);<;);out body;", queries.get(0));
Assert.assertEquals("(node(0.0,0.0,50.8447266,71.1584473);<;);out body;", queries.get(1));
Assert.assertEquals(3, queries.size());
}
Aggregations