Search in sources :

Example 1 with BaseCheck

use of org.openstreetmap.atlas.checks.base.BaseCheck in project atlas-checks by osmlab.

the class IntegrityCheckSparkJob method start.

@SuppressWarnings({ "rawtypes" })
@Override
public void start(final CommandMap commandMap) {
    final String atlasDirectory = (String) commandMap.get(ATLAS_FOLDER);
    final String input = Optional.ofNullable(input(commandMap)).orElse(atlasDirectory);
    final String output = output(commandMap);
    @SuppressWarnings("unchecked") final Set<OutputFormats> outputFormats = (Set<OutputFormats>) commandMap.get(OUTPUT_FORMATS);
    final StringList countries = StringList.split((String) commandMap.get(COUNTRIES), CommonConstants.COMMA);
    final MapRouletteConfiguration mapRouletteConfiguration = (MapRouletteConfiguration) commandMap.get(MAP_ROULETTE);
    final Configuration checksConfiguration = ConfigurationResolver.loadConfiguration(commandMap, CONFIGURATION_FILES, CONFIGURATION_JSON);
    final boolean saveIntermediateAtlas = (Boolean) commandMap.get(PBF_SAVE_INTERMEDIATE_ATLAS);
    @SuppressWarnings("unchecked") final Rectangle pbfBoundary = ((Optional<Rectangle>) commandMap.getOption(PBF_BOUNDING_BOX)).orElse(Rectangle.MAXIMUM);
    final boolean compressOutput = Boolean.valueOf((String) commandMap.get(SparkJob.COMPRESS_OUTPUT));
    final Map<String, String> sparkContext = configurationMap();
    final CheckResourceLoader checkLoader = new CheckResourceLoader(checksConfiguration);
    // check configuration and country list
    final Set<BaseCheck> preOverriddenChecks = checkLoader.loadChecks();
    if (!isValidInput(countries, preOverriddenChecks)) {
        logger.error("No countries supplied or checks enabled, exiting!");
        return;
    }
    // Read priority countries from the configuration
    final List<String> priorityCountries = checksConfiguration.get("priority.countries", Collections.EMPTY_LIST).value();
    // Create a list of Country to Check tuples
    // Add priority countries first if they are supplied by parameter
    final List<Tuple2<String, Set<BaseCheck>>> countryCheckTuples = new ArrayList<>();
    countries.stream().filter(priorityCountries::contains).forEach(country -> countryCheckTuples.add(new Tuple2<>(country, checkLoader.loadChecksForCountry(country))));
    // Then add the rest of the countries
    countries.stream().filter(country -> !priorityCountries.contains(country)).forEach(country -> countryCheckTuples.add(new Tuple2<>(country, checkLoader.loadChecksForCountry(country))));
    // Log countries and integrity
    logger.info("Initialized countries: {}", countryCheckTuples.stream().map(tuple -> tuple._1).collect(Collectors.joining(",")));
    logger.info("Initialized checks: {}", preOverriddenChecks.stream().map(BaseCheck::getCheckName).collect(Collectors.joining(",")));
    // Parallelize on the countries
    final JavaPairRDD<String, Set<BaseCheck>> countryCheckRDD = getContext().parallelizePairs(countryCheckTuples, countryCheckTuples.size());
    // Set target and temporary folders
    final String targetOutputFolder = SparkFileHelper.parentPath(output);
    final String temporaryOutputFolder = SparkFileHelper.combine(targetOutputFolder, SparkFileHelper.TEMPORARY_FOLDER_NAME);
    // Useful file helper to create/delete/name files and directories
    final SparkFileHelper fileHelper = new SparkFileHelper(sparkContext);
    // Atlas Helper to load different types of Atlas data
    final AtlasDataSource atlasLoader = new AtlasDataSource(sparkContext, checksConfiguration, pbfBoundary);
    // Create target folders
    fileHelper.mkdir(SparkFileHelper.combine(targetOutputFolder, OUTPUT_FLAG_FOLDER));
    fileHelper.mkdir(SparkFileHelper.combine(targetOutputFolder, OUTPUT_GEOJSON_FOLDER));
    fileHelper.mkdir(SparkFileHelper.combine(targetOutputFolder, OUTPUT_METRIC_FOLDER));
    // Run the set of flags per country per check. The output will be an RDD pair mapping each
    // country with a set of SparkFilePaths to flags, geojson and metrics generated.
    final JavaPairRDD<String, Set<SparkFilePath>> resultRDD = countryCheckRDD.mapToPair(tuple -> {
        final Time timer = Time.now();
        final String country = tuple._1();
        final Set<BaseCheck> checks = tuple._2();
        logger.info("Initialized checks for {}: {}", country, checks.stream().map(BaseCheck::getCheckName).collect(Collectors.joining(",")));
        final Set<SparkFilePath> resultingFiles = new HashSet<>();
        final SparkFilePath flagOutput;
        if (outputFormats.contains(OutputFormats.FLAGS)) {
            // Initialize flag output processor
            flagOutput = initializeOutput(OUTPUT_FLAG_FOLDER, TaskContext.get(), country, temporaryOutputFolder, targetOutputFolder);
            EventService.get(country).register(new CheckFlagFileProcessor(fileHelper, flagOutput.getTemporaryPath()).withCompression(compressOutput));
        } else {
            flagOutput = null;
        }
        final SparkFilePath geoJsonOutput;
        if (outputFormats.contains(OutputFormats.GEOJSON)) {
            // Initialize geojson output processor
            geoJsonOutput = initializeOutput(OUTPUT_GEOJSON_FOLDER, TaskContext.get(), country, temporaryOutputFolder, targetOutputFolder);
            EventService.get(country).register(new CheckFlagGeoJsonProcessor(fileHelper, geoJsonOutput.getTemporaryPath()).withCompression(compressOutput));
        } else {
            geoJsonOutput = null;
        }
        final SparkFilePath metricOutput;
        if (outputFormats.contains(OutputFormats.METRICS)) {
            // Initialize metric output processor
            metricOutput = initializeOutput(OUTPUT_METRIC_FOLDER, TaskContext.get(), country, temporaryOutputFolder, targetOutputFolder);
            EventService.get(country).register(new MetricFileGenerator(METRICS_FILENAME, fileHelper, metricOutput.getTemporaryPath()));
        } else {
            metricOutput = null;
        }
        final Consumer<Atlas> intermediateAtlasHandler;
        if (saveIntermediateAtlas) {
            final SparkFilePath atlasOutput = initializeOutput(OUTPUT_ATLAS_FOLDER, TaskContext.get(), country, temporaryOutputFolder, targetOutputFolder);
            intermediateAtlasHandler = atlas -> {
                writeAtlas(atlas, country, atlasOutput, fileHelper);
                resultingFiles.add(atlasOutput);
            };
        } else {
            intermediateAtlasHandler = atlas -> {
            // no-op
            };
        }
        try {
            final Atlas atlas = atlasLoader.load(input, country, intermediateAtlasHandler);
            if (atlas == null) {
                logger.error("Could not find {} Atlas files. Skipping country!", country);
            } else {
                executeChecks(country, atlas, checks, mapRouletteConfiguration);
                // Add output folders for handling later
                Stream.of(flagOutput, metricOutput, geoJsonOutput).filter(Objects::nonNull).forEach(resultingFiles::add);
            }
            EventService.get(country).complete();
            return new Tuple2<>(country, resultingFiles);
        } catch (final CoreException e) {
            logger.error("Exception running integrity checks on {}", country, e);
        }
        logger.trace("Integrity checks took {} ms to execute for {}.", timer.elapsedSince().asMilliseconds(), country);
        return new Tuple2<>(IGNORED_KEY, null);
    }).filter(tuple -> !tuple._1().equals(IGNORED_KEY));
    // Commit results
    resultRDD.foreach(countryPathPair -> {
        final String country = countryPathPair._1();
        final Set<SparkFilePath> paths = countryPathPair._2();
        logger.info("[{}] Committing outputs: {}", country, paths);
        paths.forEach(fileHelper::commit);
    });
    try {
        // Clean up
        logger.info("Deleting {}.", temporaryOutputFolder);
        fileHelper.deleteDirectory(temporaryOutputFolder);
    } catch (final Exception e) {
        logger.warn("Clean up failed!", e);
    }
}
Also used : SparkJob(org.openstreetmap.atlas.generator.tools.spark.SparkJob) Atlas(org.openstreetmap.atlas.geography.atlas.Atlas) CommonConstants(org.openstreetmap.atlas.checks.constants.CommonConstants) CheckResourceLoader(org.openstreetmap.atlas.checks.base.CheckResourceLoader) SparkFilePath(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFilePath) LoggerFactory(org.slf4j.LoggerFactory) MapRouletteConfiguration(org.openstreetmap.atlas.checks.maproulette.MapRouletteConfiguration) Relation(org.openstreetmap.atlas.geography.atlas.items.Relation) Finder(org.openstreetmap.atlas.geography.atlas.items.complex.Finder) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseCheck(org.openstreetmap.atlas.checks.base.BaseCheck) MapRouletteClient(org.openstreetmap.atlas.checks.maproulette.MapRouletteClient) AtlasEntity(org.openstreetmap.atlas.geography.atlas.items.AtlasEntity) Configuration(org.openstreetmap.atlas.utilities.configuration.Configuration) ComplexEntity(org.openstreetmap.atlas.geography.atlas.items.complex.ComplexEntity) StringConverter(org.openstreetmap.atlas.utilities.conversion.StringConverter) Map(java.util.Map) ConfigurationResolver(org.openstreetmap.atlas.checks.configuration.ConfigurationResolver) CheckFlagGeoJsonProcessor(org.openstreetmap.atlas.checks.event.CheckFlagGeoJsonProcessor) Duration(org.openstreetmap.atlas.utilities.scalars.Duration) Logger(org.slf4j.Logger) TaskContext(org.apache.spark.TaskContext) CoreException(org.openstreetmap.atlas.exception.CoreException) SparkFileOutput(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFileOutput) Set(java.util.Set) CommandMap(org.openstreetmap.atlas.utilities.runtime.CommandMap) CheckFlagFileProcessor(org.openstreetmap.atlas.checks.event.CheckFlagFileProcessor) MetricFileGenerator(org.openstreetmap.atlas.checks.event.MetricFileGenerator) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) Pool(org.openstreetmap.atlas.utilities.threads.Pool) Objects(java.util.Objects) Consumer(java.util.function.Consumer) FileSuffix(org.openstreetmap.atlas.streaming.resource.FileSuffix) List(java.util.List) Stream(java.util.stream.Stream) Iterables(org.openstreetmap.atlas.utilities.collections.Iterables) EventService(org.openstreetmap.atlas.checks.event.EventService) SparkFileHelper(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFileHelper) MultiIterable(org.openstreetmap.atlas.utilities.collections.MultiIterable) Optional(java.util.Optional) Rectangle(org.openstreetmap.atlas.geography.Rectangle) StringList(org.openstreetmap.atlas.utilities.collections.StringList) Time(org.openstreetmap.atlas.utilities.time.Time) Collections(java.util.Collections) HashSet(java.util.HashSet) Set(java.util.Set) MapRouletteConfiguration(org.openstreetmap.atlas.checks.maproulette.MapRouletteConfiguration) Configuration(org.openstreetmap.atlas.utilities.configuration.Configuration) Rectangle(org.openstreetmap.atlas.geography.Rectangle) ArrayList(java.util.ArrayList) CheckFlagGeoJsonProcessor(org.openstreetmap.atlas.checks.event.CheckFlagGeoJsonProcessor) BaseCheck(org.openstreetmap.atlas.checks.base.BaseCheck) Time(org.openstreetmap.atlas.utilities.time.Time) Consumer(java.util.function.Consumer) CheckResourceLoader(org.openstreetmap.atlas.checks.base.CheckResourceLoader) Atlas(org.openstreetmap.atlas.geography.atlas.Atlas) Optional(java.util.Optional) StringList(org.openstreetmap.atlas.utilities.collections.StringList) CoreException(org.openstreetmap.atlas.exception.CoreException) CheckFlagFileProcessor(org.openstreetmap.atlas.checks.event.CheckFlagFileProcessor) MapRouletteConfiguration(org.openstreetmap.atlas.checks.maproulette.MapRouletteConfiguration) CoreException(org.openstreetmap.atlas.exception.CoreException) SparkFilePath(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFilePath) Tuple2(scala.Tuple2) MetricFileGenerator(org.openstreetmap.atlas.checks.event.MetricFileGenerator) Objects(java.util.Objects) SparkFileHelper(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFileHelper)

Example 2 with BaseCheck

use of org.openstreetmap.atlas.checks.base.BaseCheck in project atlas-checks by osmlab.

the class IntegrityCheckSparkJob method executeChecks.

/**
 * Executes all {@link BaseCheck}s on the given {@link Atlas}. Each check runs in a separate
 * thread. The checks go over all {@link AtlasEntity}s and {@link Relation}s.
 * {@link ComplexEntity}s can be processed by using the appropriate {@link Finder} and adding
 * them to the {@link Iterable} of objects.
 *
 * @param atlas
 *            the {@link Atlas} on which the checks will be run
 * @param checksToRun
 *            the set of {@link BaseCheck}s to execute
 * @param configuration
 *            {@link MapRouletteConfiguration} to create a new {@link MapRouletteClient}s
 */
@SuppressWarnings("rawtypes")
private static void executeChecks(final String country, final Atlas atlas, final Set<BaseCheck> checksToRun, final MapRouletteConfiguration configuration) {
    final Pool checkExecutionPool = new Pool(checksToRun.size(), "Check execution pool", POOL_DURATION_BEFORE_KILL);
    checksToRun.stream().filter(check -> check.validCheckForCountry(country)).forEach(check -> checkExecutionPool.queue(new RunnableCheck(country, check, new MultiIterable<>(atlas.items(), atlas.relations(), findComplexEntities(check, atlas)), MapRouletteClient.instance(configuration))));
    checkExecutionPool.close();
}
Also used : SparkJob(org.openstreetmap.atlas.generator.tools.spark.SparkJob) Atlas(org.openstreetmap.atlas.geography.atlas.Atlas) CommonConstants(org.openstreetmap.atlas.checks.constants.CommonConstants) CheckResourceLoader(org.openstreetmap.atlas.checks.base.CheckResourceLoader) SparkFilePath(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFilePath) LoggerFactory(org.slf4j.LoggerFactory) MapRouletteConfiguration(org.openstreetmap.atlas.checks.maproulette.MapRouletteConfiguration) Relation(org.openstreetmap.atlas.geography.atlas.items.Relation) Finder(org.openstreetmap.atlas.geography.atlas.items.complex.Finder) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseCheck(org.openstreetmap.atlas.checks.base.BaseCheck) MapRouletteClient(org.openstreetmap.atlas.checks.maproulette.MapRouletteClient) AtlasEntity(org.openstreetmap.atlas.geography.atlas.items.AtlasEntity) Configuration(org.openstreetmap.atlas.utilities.configuration.Configuration) ComplexEntity(org.openstreetmap.atlas.geography.atlas.items.complex.ComplexEntity) StringConverter(org.openstreetmap.atlas.utilities.conversion.StringConverter) Map(java.util.Map) ConfigurationResolver(org.openstreetmap.atlas.checks.configuration.ConfigurationResolver) CheckFlagGeoJsonProcessor(org.openstreetmap.atlas.checks.event.CheckFlagGeoJsonProcessor) Duration(org.openstreetmap.atlas.utilities.scalars.Duration) Logger(org.slf4j.Logger) TaskContext(org.apache.spark.TaskContext) CoreException(org.openstreetmap.atlas.exception.CoreException) SparkFileOutput(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFileOutput) Set(java.util.Set) CommandMap(org.openstreetmap.atlas.utilities.runtime.CommandMap) CheckFlagFileProcessor(org.openstreetmap.atlas.checks.event.CheckFlagFileProcessor) MetricFileGenerator(org.openstreetmap.atlas.checks.event.MetricFileGenerator) Tuple2(scala.Tuple2) Collectors(java.util.stream.Collectors) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) Pool(org.openstreetmap.atlas.utilities.threads.Pool) Objects(java.util.Objects) Consumer(java.util.function.Consumer) FileSuffix(org.openstreetmap.atlas.streaming.resource.FileSuffix) List(java.util.List) Stream(java.util.stream.Stream) Iterables(org.openstreetmap.atlas.utilities.collections.Iterables) EventService(org.openstreetmap.atlas.checks.event.EventService) SparkFileHelper(org.openstreetmap.atlas.generator.tools.spark.utilities.SparkFileHelper) MultiIterable(org.openstreetmap.atlas.utilities.collections.MultiIterable) Optional(java.util.Optional) Rectangle(org.openstreetmap.atlas.geography.Rectangle) StringList(org.openstreetmap.atlas.utilities.collections.StringList) Time(org.openstreetmap.atlas.utilities.time.Time) Collections(java.util.Collections) Pool(org.openstreetmap.atlas.utilities.threads.Pool)

Aggregations

ArrayList (java.util.ArrayList)2 Collections (java.util.Collections)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Objects (java.util.Objects)2 Optional (java.util.Optional)2 Set (java.util.Set)2 Consumer (java.util.function.Consumer)2 Collectors (java.util.stream.Collectors)2 Stream (java.util.stream.Stream)2 TaskContext (org.apache.spark.TaskContext)2 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)2 BaseCheck (org.openstreetmap.atlas.checks.base.BaseCheck)2 CheckResourceLoader (org.openstreetmap.atlas.checks.base.CheckResourceLoader)2 ConfigurationResolver (org.openstreetmap.atlas.checks.configuration.ConfigurationResolver)2 CommonConstants (org.openstreetmap.atlas.checks.constants.CommonConstants)2 CheckFlagFileProcessor (org.openstreetmap.atlas.checks.event.CheckFlagFileProcessor)2 CheckFlagGeoJsonProcessor (org.openstreetmap.atlas.checks.event.CheckFlagGeoJsonProcessor)2 EventService (org.openstreetmap.atlas.checks.event.EventService)2