Search in sources :

Example 1 with LocalInputCommandLineOptions

use of org.locationtech.geowave.core.store.ingest.LocalInputCommandLineOptions in project geowave by locationtech.

the class SparkIngestDriver method runOperation.

public boolean runOperation(final File configFile, final LocalInputCommandLineOptions localInput, final String inputStoreName, final String indexList, final VisibilityOptions ingestOptions, final SparkCommandLineOptions sparkOptions, final String basePath, final Console console) throws IOException {
    final Properties configProperties = ConfigOptions.loadProperties(configFile);
    JavaSparkContext jsc = null;
    SparkSession session = null;
    int numExecutors;
    int numCores;
    int numPartitions;
    Path inputPath;
    String s3EndpointUrl = null;
    final boolean isS3 = basePath.startsWith("s3://");
    final boolean isHDFS = !isS3 && (basePath.startsWith("hdfs://") || basePath.startsWith("file:/"));
    // If input path is S3
    if (isS3) {
        s3EndpointUrl = ConfigAWSCommand.getS3Url(configProperties);
        inputPath = URLIngestUtils.setupS3FileSystem(basePath, s3EndpointUrl);
    } else // If input path is HDFS
    if (isHDFS) {
        final String hdfsFSUrl = ConfigHDFSCommand.getHdfsUrl(configProperties);
        inputPath = setUpHDFSFilesystem(basePath, hdfsFSUrl, basePath.startsWith("file:/"));
    } else {
        LOGGER.warn("Spark ingest support only S3 or HDFS as input location");
        return false;
    }
    if ((inputPath == null) || (!Files.exists(inputPath))) {
        LOGGER.error("Error in accessing Input path " + basePath);
        return false;
    }
    final List<Path> inputFileList = new ArrayList<>();
    Files.walkFileTree(inputPath, new SimpleFileVisitor<Path>() {

        @Override
        public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
            inputFileList.add(file);
            return FileVisitResult.CONTINUE;
        }
    });
    final int numInputFiles = inputFileList.size();
    if (sparkOptions.getNumExecutors() < 1) {
        numExecutors = (int) Math.ceil((double) numInputFiles / 8);
    } else {
        numExecutors = sparkOptions.getNumExecutors();
    }
    if (sparkOptions.getNumCores() < 1) {
        numCores = 4;
    } else {
        numCores = sparkOptions.getNumCores();
    }
    numPartitions = numExecutors * numCores * 2;
    if (session == null) {
        String jar = "";
        try {
            jar = SparkIngestDriver.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath();
        } catch (final URISyntaxException e) {
            LOGGER.error("Unable to set jar location in spark configuration", e);
        }
        session = SparkSession.builder().appName(sparkOptions.getAppName()).master(sparkOptions.getMaster()).config("spark.driver.host", sparkOptions.getHost()).config("spark.jars", jar).config("spark.executor.instances", Integer.toString(numExecutors)).config("spark.executor.cores", Integer.toString(numCores)).getOrCreate();
        jsc = JavaSparkContext.fromSparkContext(session.sparkContext());
    }
    final JavaRDD<URI> fileRDD = jsc.parallelize(Lists.transform(inputFileList, path -> path.toUri()), numPartitions);
    if (isS3) {
        final String s3FinalEndpointUrl = s3EndpointUrl;
        fileRDD.foreachPartition(uri -> {
            final S3FileSystem fs = initializeS3FS(s3FinalEndpointUrl);
            final List<URI> inputFiles = new ArrayList<>();
            while (uri.hasNext()) {
                final Path inputFile = fs.getPath(uri.next().toString().replaceFirst(s3FinalEndpointUrl, ""));
                inputFiles.add(inputFile.toUri());
            }
            processInput(configFile, localInput, inputStoreName, indexList, ingestOptions, configProperties, inputFiles.iterator(), console);
        });
    } else if (isHDFS) {
        try {
            setHdfsURLStreamHandlerFactory();
        } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        fileRDD.foreachPartition(uri -> {
            processInput(configFile, localInput, inputStoreName, indexList, ingestOptions, configProperties, uri, new JCommander().getConsole());
        });
    }
    close(session);
    return true;
}
Also used : Path(java.nio.file.Path) FsUrlStreamHandlerFactory(org.apache.hadoop.fs.FsUrlStreamHandlerFactory) Arrays(java.util.Arrays) VisibilityOptions(org.locationtech.geowave.core.store.cli.VisibilityOptions) LocalIngestRunData(org.locationtech.geowave.core.store.ingest.LocalIngestRunData) CLIUtils(org.locationtech.geowave.core.store.cli.CLIUtils) URL(java.net.URL) IngestFormatPluginOptions(org.locationtech.geowave.core.ingest.operations.options.IngestFormatPluginOptions) URISyntaxException(java.net.URISyntaxException) LoggerFactory(org.slf4j.LoggerFactory) S3FileSystemProvider(com.upplication.s3fs.S3FileSystemProvider) S3FileSystem(com.upplication.s3fs.S3FileSystem) IngestUtils(org.locationtech.geowave.core.store.ingest.IngestUtils) Map(java.util.Map) URI(java.net.URI) Path(java.nio.file.Path) SimpleFileVisitor(java.nio.file.SimpleFileVisitor) DataStoreUtils(org.locationtech.geowave.core.store.util.DataStoreUtils) GeoWaveAmazonS3Factory(org.locationtech.geowave.mapreduce.s3.GeoWaveAmazonS3Factory) URLStreamHandlerFactory(java.net.URLStreamHandlerFactory) Console(com.beust.jcommander.internal.Console) Serializable(java.io.Serializable) FileVisitResult(java.nio.file.FileVisitResult) ConfigOptions(org.locationtech.geowave.core.cli.operations.config.options.ConfigOptions) List(java.util.List) LocalInputCommandLineOptions(org.locationtech.geowave.core.store.ingest.LocalInputCommandLineOptions) ConfigAWSCommand(org.locationtech.geowave.core.ingest.operations.ConfigAWSCommand) IndexStore(org.locationtech.geowave.core.store.index.IndexStore) Entry(java.util.Map.Entry) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LocalFileIngestPlugin(org.locationtech.geowave.core.store.ingest.LocalFileIngestPlugin) Lists(com.google.common.collect.Lists) URLTYPE(org.locationtech.geowave.core.ingest.URLIngestUtils.URLTYPE) DataTypeAdapter(org.locationtech.geowave.core.store.api.DataTypeAdapter) LocalFileIngestCLIDriver(org.locationtech.geowave.core.ingest.local.LocalFileIngestCLIDriver) Index(org.locationtech.geowave.core.store.api.Index) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkSession(org.apache.spark.sql.SparkSession) Properties(java.util.Properties) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) MalformedURLException(java.net.MalformedURLException) Files(java.nio.file.Files) DataStore(org.locationtech.geowave.core.store.api.DataStore) JCommander(com.beust.jcommander.JCommander) IOException(java.io.IOException) Field(java.lang.reflect.Field) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) File(java.io.File) URLIngestUtils(org.locationtech.geowave.core.ingest.URLIngestUtils) PluginVisitor(org.locationtech.geowave.core.store.ingest.LocalPluginFileVisitor.PluginVisitor) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) Paths(java.nio.file.Paths) Collections(java.util.Collections) ConfigHDFSCommand(org.locationtech.geowave.mapreduce.operations.ConfigHDFSCommand) SparkSession(org.apache.spark.sql.SparkSession) ArrayList(java.util.ArrayList) FileVisitResult(java.nio.file.FileVisitResult) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) Properties(java.util.Properties) URI(java.net.URI) S3FileSystem(com.upplication.s3fs.S3FileSystem) JCommander(com.beust.jcommander.JCommander) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes)

Example 2 with LocalInputCommandLineOptions

use of org.locationtech.geowave.core.store.ingest.LocalInputCommandLineOptions in project geowave by locationtech.

the class TestUtils method testSparkIngest.

public static void testSparkIngest(final DataStorePluginOptions dataStore, final DimensionalityType dimensionalityType, final String s3Url, final String ingestFilePath, final String format) throws Exception {
    // ingest a shapefile (geotools type) directly into GeoWave using the
    // ingest framework's main method and pre-defined commandline arguments
    // Indexes
    final String indexes = dimensionalityType.getDimensionalityArg();
    final File configFile = File.createTempFile("test_spark_ingest", null);
    final ManualOperationParams operationParams = new ManualOperationParams();
    operationParams.getContext().put(ConfigOptions.PROPERTIES_FILE_CONTEXT, configFile);
    final ConfigAWSCommand configS3 = new ConfigAWSCommand();
    configS3.setS3UrlParameter(s3Url);
    configS3.execute(operationParams);
    final LocalInputCommandLineOptions localOptions = new LocalInputCommandLineOptions();
    localOptions.setFormats(format);
    final SparkCommandLineOptions sparkOptions = new SparkCommandLineOptions();
    sparkOptions.setAppName("SparkIngestTest");
    sparkOptions.setMaster("local");
    sparkOptions.setHost("localhost");
    // Create the command and execute.
    final SparkIngestDriver sparkIngester = new SparkIngestDriver();
    final Properties props = new Properties();
    dataStore.save(props, DataStorePluginOptions.getStoreNamespace("test"));
    final AddStoreCommand addStore = new AddStoreCommand();
    addStore.setParameters("test");
    addStore.setPluginOptions(dataStore);
    addStore.execute(operationParams);
    final IndexStore indexStore = dataStore.createIndexStore();
    final org.locationtech.geowave.core.store.api.DataStore geowaveDataStore = dataStore.createDataStore();
    final String[] indexTypes = dimensionalityType.getDimensionalityArg().split(",");
    for (final String indexType : indexTypes) {
        if (indexStore.getIndex(indexType) == null) {
            final IndexPluginOptions pluginOptions = new IndexPluginOptions();
            pluginOptions.selectPlugin(indexType);
            pluginOptions.setName(indexType);
            pluginOptions.save(props, IndexPluginOptions.getIndexNamespace(indexType));
            geowaveDataStore.addIndex(pluginOptions.createIndex(geowaveDataStore));
        }
    }
    props.setProperty(ConfigAWSCommand.AWS_S3_ENDPOINT_URL, s3Url);
    sparkIngester.runOperation(configFile, localOptions, "test", indexes, new VisibilityOptions(), sparkOptions, ingestFilePath, new JCommander().getConsole());
    verifyStats(dataStore);
}
Also used : ConfigAWSCommand(org.locationtech.geowave.core.ingest.operations.ConfigAWSCommand) SparkIngestDriver(org.locationtech.geowave.core.ingest.spark.SparkIngestDriver) Properties(java.util.Properties) AddStoreCommand(org.locationtech.geowave.core.store.cli.store.AddStoreCommand) ManualOperationParams(org.locationtech.geowave.core.cli.parser.ManualOperationParams) VisibilityOptions(org.locationtech.geowave.core.store.cli.VisibilityOptions) SparkCommandLineOptions(org.locationtech.geowave.core.ingest.spark.SparkCommandLineOptions) JCommander(com.beust.jcommander.JCommander) IndexPluginOptions(org.locationtech.geowave.core.store.index.IndexPluginOptions) File(java.io.File) LocalInputCommandLineOptions(org.locationtech.geowave.core.store.ingest.LocalInputCommandLineOptions) IndexStore(org.locationtech.geowave.core.store.index.IndexStore)

Aggregations

JCommander (com.beust.jcommander.JCommander)2 File (java.io.File)2 Properties (java.util.Properties)2 ConfigAWSCommand (org.locationtech.geowave.core.ingest.operations.ConfigAWSCommand)2 Console (com.beust.jcommander.internal.Console)1 Lists (com.google.common.collect.Lists)1 S3FileSystem (com.upplication.s3fs.S3FileSystem)1 S3FileSystemProvider (com.upplication.s3fs.S3FileSystemProvider)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 Field (java.lang.reflect.Field)1 MalformedURLException (java.net.MalformedURLException)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 URL (java.net.URL)1 URLStreamHandlerFactory (java.net.URLStreamHandlerFactory)1 FileVisitResult (java.nio.file.FileVisitResult)1 Files (java.nio.file.Files)1 Path (java.nio.file.Path)1 Paths (java.nio.file.Paths)1