Search in sources :

Example 1 with S3FileSystem

use of com.upplication.s3fs.S3FileSystem in project geowave by locationtech.

the class SparkIngestDriver method runOperation.

public boolean runOperation(final File configFile, final LocalInputCommandLineOptions localInput, final String inputStoreName, final String indexList, final VisibilityOptions ingestOptions, final SparkCommandLineOptions sparkOptions, final String basePath, final Console console) throws IOException {
    final Properties configProperties = ConfigOptions.loadProperties(configFile);
    JavaSparkContext jsc = null;
    SparkSession session = null;
    int numExecutors;
    int numCores;
    int numPartitions;
    Path inputPath;
    String s3EndpointUrl = null;
    final boolean isS3 = basePath.startsWith("s3://");
    final boolean isHDFS = !isS3 && (basePath.startsWith("hdfs://") || basePath.startsWith("file:/"));
    // If input path is S3
    if (isS3) {
        s3EndpointUrl = ConfigAWSCommand.getS3Url(configProperties);
        inputPath = URLIngestUtils.setupS3FileSystem(basePath, s3EndpointUrl);
    } else // If input path is HDFS
    if (isHDFS) {
        final String hdfsFSUrl = ConfigHDFSCommand.getHdfsUrl(configProperties);
        inputPath = setUpHDFSFilesystem(basePath, hdfsFSUrl, basePath.startsWith("file:/"));
    } else {
        LOGGER.warn("Spark ingest support only S3 or HDFS as input location");
        return false;
    }
    if ((inputPath == null) || (!Files.exists(inputPath))) {
        LOGGER.error("Error in accessing Input path " + basePath);
        return false;
    }
    final List<Path> inputFileList = new ArrayList<>();
    Files.walkFileTree(inputPath, new SimpleFileVisitor<Path>() {

        @Override
        public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
            inputFileList.add(file);
            return FileVisitResult.CONTINUE;
        }
    });
    final int numInputFiles = inputFileList.size();
    if (sparkOptions.getNumExecutors() < 1) {
        numExecutors = (int) Math.ceil((double) numInputFiles / 8);
    } else {
        numExecutors = sparkOptions.getNumExecutors();
    }
    if (sparkOptions.getNumCores() < 1) {
        numCores = 4;
    } else {
        numCores = sparkOptions.getNumCores();
    }
    numPartitions = numExecutors * numCores * 2;
    if (session == null) {
        String jar = "";
        try {
            jar = SparkIngestDriver.class.getProtectionDomain().getCodeSource().getLocation().toURI().getPath();
        } catch (final URISyntaxException e) {
            LOGGER.error("Unable to set jar location in spark configuration", e);
        }
        session = SparkSession.builder().appName(sparkOptions.getAppName()).master(sparkOptions.getMaster()).config("spark.driver.host", sparkOptions.getHost()).config("spark.jars", jar).config("spark.executor.instances", Integer.toString(numExecutors)).config("spark.executor.cores", Integer.toString(numCores)).getOrCreate();
        jsc = JavaSparkContext.fromSparkContext(session.sparkContext());
    }
    final JavaRDD<URI> fileRDD = jsc.parallelize(Lists.transform(inputFileList, path -> path.toUri()), numPartitions);
    if (isS3) {
        final String s3FinalEndpointUrl = s3EndpointUrl;
        fileRDD.foreachPartition(uri -> {
            final S3FileSystem fs = initializeS3FS(s3FinalEndpointUrl);
            final List<URI> inputFiles = new ArrayList<>();
            while (uri.hasNext()) {
                final Path inputFile = fs.getPath(uri.next().toString().replaceFirst(s3FinalEndpointUrl, ""));
                inputFiles.add(inputFile.toUri());
            }
            processInput(configFile, localInput, inputStoreName, indexList, ingestOptions, configProperties, inputFiles.iterator(), console);
        });
    } else if (isHDFS) {
        try {
            setHdfsURLStreamHandlerFactory();
        } catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        fileRDD.foreachPartition(uri -> {
            processInput(configFile, localInput, inputStoreName, indexList, ingestOptions, configProperties, uri, new JCommander().getConsole());
        });
    }
    close(session);
    return true;
}
Also used : Path(java.nio.file.Path) FsUrlStreamHandlerFactory(org.apache.hadoop.fs.FsUrlStreamHandlerFactory) Arrays(java.util.Arrays) VisibilityOptions(org.locationtech.geowave.core.store.cli.VisibilityOptions) LocalIngestRunData(org.locationtech.geowave.core.store.ingest.LocalIngestRunData) CLIUtils(org.locationtech.geowave.core.store.cli.CLIUtils) URL(java.net.URL) IngestFormatPluginOptions(org.locationtech.geowave.core.ingest.operations.options.IngestFormatPluginOptions) URISyntaxException(java.net.URISyntaxException) LoggerFactory(org.slf4j.LoggerFactory) S3FileSystemProvider(com.upplication.s3fs.S3FileSystemProvider) S3FileSystem(com.upplication.s3fs.S3FileSystem) IngestUtils(org.locationtech.geowave.core.store.ingest.IngestUtils) Map(java.util.Map) URI(java.net.URI) Path(java.nio.file.Path) SimpleFileVisitor(java.nio.file.SimpleFileVisitor) DataStoreUtils(org.locationtech.geowave.core.store.util.DataStoreUtils) GeoWaveAmazonS3Factory(org.locationtech.geowave.mapreduce.s3.GeoWaveAmazonS3Factory) URLStreamHandlerFactory(java.net.URLStreamHandlerFactory) Console(com.beust.jcommander.internal.Console) Serializable(java.io.Serializable) FileVisitResult(java.nio.file.FileVisitResult) ConfigOptions(org.locationtech.geowave.core.cli.operations.config.options.ConfigOptions) List(java.util.List) LocalInputCommandLineOptions(org.locationtech.geowave.core.store.ingest.LocalInputCommandLineOptions) ConfigAWSCommand(org.locationtech.geowave.core.ingest.operations.ConfigAWSCommand) IndexStore(org.locationtech.geowave.core.store.index.IndexStore) Entry(java.util.Map.Entry) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LocalFileIngestPlugin(org.locationtech.geowave.core.store.ingest.LocalFileIngestPlugin) Lists(com.google.common.collect.Lists) URLTYPE(org.locationtech.geowave.core.ingest.URLIngestUtils.URLTYPE) DataTypeAdapter(org.locationtech.geowave.core.store.api.DataTypeAdapter) LocalFileIngestCLIDriver(org.locationtech.geowave.core.ingest.local.LocalFileIngestCLIDriver) Index(org.locationtech.geowave.core.store.api.Index) JavaRDD(org.apache.spark.api.java.JavaRDD) SparkSession(org.apache.spark.sql.SparkSession) Properties(java.util.Properties) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) MalformedURLException(java.net.MalformedURLException) Files(java.nio.file.Files) DataStore(org.locationtech.geowave.core.store.api.DataStore) JCommander(com.beust.jcommander.JCommander) IOException(java.io.IOException) Field(java.lang.reflect.Field) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes) File(java.io.File) URLIngestUtils(org.locationtech.geowave.core.ingest.URLIngestUtils) PluginVisitor(org.locationtech.geowave.core.store.ingest.LocalPluginFileVisitor.PluginVisitor) DataStorePluginOptions(org.locationtech.geowave.core.store.cli.store.DataStorePluginOptions) Paths(java.nio.file.Paths) Collections(java.util.Collections) ConfigHDFSCommand(org.locationtech.geowave.mapreduce.operations.ConfigHDFSCommand) SparkSession(org.apache.spark.sql.SparkSession) ArrayList(java.util.ArrayList) FileVisitResult(java.nio.file.FileVisitResult) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) Properties(java.util.Properties) URI(java.net.URI) S3FileSystem(com.upplication.s3fs.S3FileSystem) JCommander(com.beust.jcommander.JCommander) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) BasicFileAttributes(java.nio.file.attribute.BasicFileAttributes)

Example 2 with S3FileSystem

use of com.upplication.s3fs.S3FileSystem in project geowave by locationtech.

the class DefaultGeoWaveAWSCredentialsProviderTest method testAnonymousAccess.

@Test
public void testAnonymousAccess() throws NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException, URISyntaxException, IOException {
    final File temp = File.createTempFile("temp", Long.toString(System.nanoTime()));
    temp.mkdirs();
    final S3Mock mockS3 = new S3Mock.Builder().withPort(8001).withFileBackend(temp.getAbsolutePath()).withInMemoryBackend().build();
    mockS3.start();
    URLIngestUtils.setURLStreamHandlerFactory(URLTYPE.S3);
    final SparkIngestDriver sparkDriver = new SparkIngestDriver();
    final S3FileSystem s3 = sparkDriver.initializeS3FS("s3://s3.amazonaws.com");
    s3.getClient().setEndpoint("http://127.0.0.1:8001");
    s3.getClient().createBucket("testbucket");
    s3.getClient().putObject("testbucket", "test", "content");
    try (Stream<Path> s = Files.list(URLIngestUtils.setupS3FileSystem("s3://testbucket/", "s3://s3.amazonaws.com"))) {
        Assert.assertEquals(1, s.count());
    }
    mockS3.shutdown();
}
Also used : Path(java.nio.file.Path) SparkIngestDriver(org.locationtech.geowave.core.ingest.spark.SparkIngestDriver) File(java.io.File) S3Mock(io.findify.s3mock.S3Mock) S3FileSystem(com.upplication.s3fs.S3FileSystem) Test(org.junit.Test)

Aggregations

S3FileSystem (com.upplication.s3fs.S3FileSystem)2 File (java.io.File)2 Path (java.nio.file.Path)2 JCommander (com.beust.jcommander.JCommander)1 Console (com.beust.jcommander.internal.Console)1 Lists (com.google.common.collect.Lists)1 S3FileSystemProvider (com.upplication.s3fs.S3FileSystemProvider)1 S3Mock (io.findify.s3mock.S3Mock)1 IOException (java.io.IOException)1 Serializable (java.io.Serializable)1 Field (java.lang.reflect.Field)1 MalformedURLException (java.net.MalformedURLException)1 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 URL (java.net.URL)1 URLStreamHandlerFactory (java.net.URLStreamHandlerFactory)1 FileVisitResult (java.nio.file.FileVisitResult)1 Files (java.nio.file.Files)1 Paths (java.nio.file.Paths)1 SimpleFileVisitor (java.nio.file.SimpleFileVisitor)1