Search in sources :

Example 1 with InputSupplier

use of com.google.common.io.InputSupplier in project cdap by caskdata.

the class Locations method newInputSupplier.

/**
   * Creates a new {@link InputSupplier} that can provides {@link SeekableInputStream} from the given location.
   *
   * @param location Location for the input stream.
   * @return A {@link InputSupplier}.
   */
public static InputSupplier<? extends SeekableInputStream> newInputSupplier(final Location location) {
    return new InputSupplier<SeekableInputStream>() {

        @Override
        public SeekableInputStream getInput() throws IOException {
            InputStream input = location.getInputStream();
            try {
                if (input instanceof FileInputStream) {
                    return new FileSeekableInputStream((FileInputStream) input);
                }
                if (input instanceof FSDataInputStream) {
                    final FSDataInputStream dataInput = (FSDataInputStream) input;
                    LocationFactory locationFactory = location.getLocationFactory();
                    if (locationFactory instanceof FileContextLocationFactory) {
                        final FileContextLocationFactory lf = (FileContextLocationFactory) locationFactory;
                        return lf.getFileContext().getUgi().doAs(new PrivilegedExceptionAction<SeekableInputStream>() {

                            @Override
                            public SeekableInputStream run() throws IOException {
                                // Disable the FileSystem cache. The FileSystem will be closed when the InputStream is closed
                                String scheme = lf.getHomeLocation().toURI().getScheme();
                                Configuration hConf = new Configuration(lf.getConfiguration());
                                hConf.set(String.format("fs.%s.impl.disable.cache", scheme), "true");
                                FileSystem fs = FileSystem.get(hConf);
                                return new DFSSeekableInputStream(dataInput, createDFSStreamSizeProvider(fs, true, new Path(location.toURI()), dataInput));
                            }
                        });
                    }
                    // This shouldn't happen
                    return new DFSSeekableInputStream(dataInput, new StreamSizeProvider() {

                        @Override
                        public long size() throws IOException {
                            // Assumption is if the FS is not a HDFS fs, the location length tells the stream size
                            return location.length();
                        }
                    });
                }
                throw new IOException("Failed to create SeekableInputStream from location " + location);
            } catch (Throwable t) {
                Closeables.closeQuietly(input);
                Throwables.propagateIfInstanceOf(t, IOException.class);
                throw new IOException(t);
            }
        }
    };
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) FileContextLocationFactory(org.apache.twill.filesystem.FileContextLocationFactory) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocationFactory(org.apache.twill.filesystem.LocationFactory) FileContextLocationFactory(org.apache.twill.filesystem.FileContextLocationFactory) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputSupplier(com.google.common.io.InputSupplier)

Example 2 with InputSupplier

use of com.google.common.io.InputSupplier in project cdap by caskdata.

the class DecisionTreeRegressionAppTest method test.

@Test
public void test() throws Exception {
    // Deploy the Application
    ApplicationManager appManager = deployApplication(DecisionTreeRegressionApp.class);
    // Start the Service
    ServiceManager serviceManager = appManager.getServiceManager(ModelDataService.SERVICE_NAME).start();
    serviceManager.waitForStatus(true, 30, 1);
    URL serviceURL = serviceManager.getServiceURL(15, TimeUnit.SECONDS);
    URL addDataURL = new URL(serviceURL, "labels");
    HttpRequest request = HttpRequest.builder(HttpMethod.PUT, addDataURL).withBody(new InputSupplier<InputStream>() {

        @Override
        public InputStream getInput() throws IOException {
            return getClass().getClassLoader().getResourceAsStream("sample_libsvm_data.txt");
        }
    }).build();
    HttpResponse response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    // Start a Spark Program
    SparkManager sparkManager = appManager.getSparkManager(ModelTrainer.NAME).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
    // Check that there is a new model
    URL listModelsURL = new URL(serviceURL, "models");
    request = HttpRequest.builder(HttpMethod.GET, listModelsURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    List<String> models = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<List<String>>() {
    }.getType());
    Assert.assertEquals(1, models.size());
    // Check that there is some model metadata
    String modelId = models.get(0);
    URL modelMetaURL = new URL(serviceURL, "models/" + modelId);
    request = HttpRequest.builder(HttpMethod.GET, modelMetaURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    ModelMeta meta = GSON.fromJson(response.getResponseBodyAsString(), ModelMeta.class);
    Assert.assertNotNull(meta);
    Assert.assertEquals(0.7, meta.getTrainingPercentage(), 0.000001);
    Assert.assertEquals(692, meta.getNumFeatures());
    // Check that the corresponding model file exists
    DataSetManager<FileSet> modelFiles = getDataset(DecisionTreeRegressionApp.MODEL_DATASET);
    Assert.assertTrue(modelFiles.get().getBaseLocation().append(modelId).exists());
}
Also used : HttpRequest(co.cask.common.http.HttpRequest) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) FileSet(co.cask.cdap.api.dataset.lib.FileSet) HttpResponse(co.cask.common.http.HttpResponse) URL(java.net.URL) ServiceManager(co.cask.cdap.test.ServiceManager) TypeToken(com.google.gson.reflect.TypeToken) InputSupplier(com.google.common.io.InputSupplier) Test(org.junit.Test)

Example 3 with InputSupplier

use of com.google.common.io.InputSupplier in project cdap by caskdata.

the class InterceptableClassLoader method getManifest.

/**
   * Returns the {@link Manifest} of the given resource if it is representing a local JAR file.
   */
@Nullable
private Manifest getManifest(URL resource) {
    if (!"jar".equals(resource.getProtocol())) {
        return null;
    }
    String path = resource.getFile();
    final String jarURIString = path.substring(0, path.indexOf("!/"));
    // the loadClass call (caller of this method).
    synchronized (this) {
        if (!manifests.containsKey(jarURIString)) {
            try {
                // Tries to load the Manifest from the Jar URI
                final URI jarURI = URI.create(jarURIString);
                manifests.put(jarURIString, BundleJarUtil.getManifest(jarURI, new InputSupplier<InputStream>() {

                    @Override
                    public InputStream getInput() throws IOException {
                        return jarURI.toURL().openStream();
                    }
                }));
            } catch (IOException e) {
                // Ignore if cannot get Manifest from the jar file and remember the failure
                manifests.put(jarURIString, null);
            }
        }
        return manifests.get(jarURIString);
    }
}
Also used : IOException(java.io.IOException) URI(java.net.URI) InputSupplier(com.google.common.io.InputSupplier) Nullable(javax.annotation.Nullable)

Example 4 with InputSupplier

use of com.google.common.io.InputSupplier in project cdap by caskdata.

the class BundleJarUtil method getEntry.

/**
   * Returns an {@link InputSupplier} for a given entry. This avoids unjar the whole file to just get one entry.
   * However, to get many entries, unjar would be more efficient. Also, the jar file is scanned every time the
   * {@link InputSupplier#getInput()} is invoked.
   *
   * @param jarLocation Location of the jar file.
   * @param entryName Name of the entry to fetch
   * @return An {@link InputSupplier}.
   */
public static InputSupplier<InputStream> getEntry(final Location jarLocation, final String entryName) throws IOException {
    Preconditions.checkArgument(jarLocation != null);
    Preconditions.checkArgument(entryName != null);
    final URI uri = jarLocation.toURI();
    // Small optimization if the location is local
    if ("file".equals(uri.getScheme())) {
        return new InputSupplier<InputStream>() {

            @Override
            public InputStream getInput() throws IOException {
                final JarFile jarFile = new JarFile(new File(uri));
                ZipEntry entry = jarFile.getEntry(entryName);
                if (entry == null) {
                    throw new IOException("Entry not found for " + entryName);
                }
                return new FilterInputStream(jarFile.getInputStream(entry)) {

                    @Override
                    public void close() throws IOException {
                        try {
                            super.close();
                        } finally {
                            jarFile.close();
                        }
                    }
                };
            }
        };
    }
    // Otherwise, use JarInputStream
    return new InputSupplier<InputStream>() {

        @Override
        public InputStream getInput() throws IOException {
            JarInputStream is = new JarInputStream(jarLocation.getInputStream());
            JarEntry entry = is.getNextJarEntry();
            while (entry != null) {
                if (entryName.equals(entry.getName())) {
                    return is;
                }
                entry = is.getNextJarEntry();
            }
            Closeables.closeQuietly(is);
            throw new IOException("Entry not found for " + entryName);
        }
    };
}
Also used : FilterInputStream(java.io.FilterInputStream) JarInputStream(java.util.jar.JarInputStream) ZipEntry(java.util.zip.ZipEntry) IOException(java.io.IOException) JarFile(java.util.jar.JarFile) JarEntry(java.util.jar.JarEntry) URI(java.net.URI) JarFile(java.util.jar.JarFile) File(java.io.File) InputSupplier(com.google.common.io.InputSupplier)

Example 5 with InputSupplier

use of com.google.common.io.InputSupplier in project cdap by caskdata.

the class AvroStreamBodyConsumerTest method generateFile.

@Override
protected ContentInfo generateFile(final int recordCount) throws IOException {
    return new FileContentInfo(generateAvroFile(TMP_FOLDER.newFile(), recordCount)) {

        @Override
        public boolean verify(Map<String, String> headers, InputSupplier<? extends InputStream> contentSupplier) throws IOException {
            // Deserialize and verify the records
            Decoder decoder = DecoderFactory.get().binaryDecoder(contentSupplier.getInput(), null);
            DatumReader<Record> reader = new ReflectDatumReader<>(Record.class);
            reader.setSchema(new Schema.Parser().parse(headers.get("schema")));
            for (int i = 0; i < recordCount; i++) {
                Record record = reader.read(null, decoder);
                if (i != record.id) {
                    return false;
                }
                if (!("Record number " + i).equals(record.name)) {
                    return false;
                }
            }
            return true;
        }
    };
}
Also used : InputStream(java.io.InputStream) Decoder(org.apache.avro.io.Decoder) ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) Map(java.util.Map) InputSupplier(com.google.common.io.InputSupplier)

Aggregations

InputSupplier (com.google.common.io.InputSupplier)11 IOException (java.io.IOException)6 InputStream (java.io.InputStream)6 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)3 Test (org.junit.Test)3 URI (java.net.URI)2 Map (java.util.Map)2 Manifest (java.util.jar.Manifest)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 Location (org.apache.twill.filesystem.Location)2 ApplicationClass (co.cask.cdap.api.artifact.ApplicationClass)1 ArtifactClasses (co.cask.cdap.api.artifact.ArtifactClasses)1 ArtifactInfo (co.cask.cdap.api.artifact.ArtifactInfo)1 ArtifactRange (co.cask.cdap.api.artifact.ArtifactRange)1 ArtifactSummary (co.cask.cdap.api.artifact.ArtifactSummary)1 ArtifactVersion (co.cask.cdap.api.artifact.ArtifactVersion)1 Schema (co.cask.cdap.api.data.schema.Schema)1 FileSet (co.cask.cdap.api.dataset.lib.FileSet)1 PluginClass (co.cask.cdap.api.plugin.PluginClass)1