Search in sources :

Example 1 with InputSupplier

use of in project cdap by caskdata.

the class Locations method newInputSupplier.

   * Creates a new {@link InputSupplier} that can provides {@link SeekableInputStream} from the given location.
   * @param location Location for the input stream.
   * @return A {@link InputSupplier}.
public static InputSupplier<? extends SeekableInputStream> newInputSupplier(final Location location) {
    return new InputSupplier<SeekableInputStream>() {

        public SeekableInputStream getInput() throws IOException {
            InputStream input = location.getInputStream();
            try {
                if (input instanceof FileInputStream) {
                    return new FileSeekableInputStream((FileInputStream) input);
                if (input instanceof FSDataInputStream) {
                    final FSDataInputStream dataInput = (FSDataInputStream) input;
                    LocationFactory locationFactory = location.getLocationFactory();
                    if (locationFactory instanceof FileContextLocationFactory) {
                        final FileContextLocationFactory lf = (FileContextLocationFactory) locationFactory;
                        return lf.getFileContext().getUgi().doAs(new PrivilegedExceptionAction<SeekableInputStream>() {

                            public SeekableInputStream run() throws IOException {
                                // Disable the FileSystem cache. The FileSystem will be closed when the InputStream is closed
                                String scheme = lf.getHomeLocation().toURI().getScheme();
                                Configuration hConf = new Configuration(lf.getConfiguration());
                                hConf.set(String.format("fs.%s.impl.disable.cache", scheme), "true");
                                FileSystem fs = FileSystem.get(hConf);
                                return new DFSSeekableInputStream(dataInput, createDFSStreamSizeProvider(fs, true, new Path(location.toURI()), dataInput));
                    // This shouldn't happen
                    return new DFSSeekableInputStream(dataInput, new StreamSizeProvider() {

                        public long size() throws IOException {
                            // Assumption is if the FS is not a HDFS fs, the location length tells the stream size
                            return location.length();
                throw new IOException("Failed to create SeekableInputStream from location " + location);
            } catch (Throwable t) {
                Throwables.propagateIfInstanceOf(t, IOException.class);
                throw new IOException(t);
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FileInputStream( InputStream( FileContextLocationFactory(org.apache.twill.filesystem.FileContextLocationFactory) IOException( FileInputStream( LocalLocationFactory(org.apache.twill.filesystem.LocalLocationFactory) LocationFactory(org.apache.twill.filesystem.LocationFactory) FileContextLocationFactory(org.apache.twill.filesystem.FileContextLocationFactory) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) InputSupplier(

Example 2 with InputSupplier

use of in project cdap by caskdata.

the class DecisionTreeRegressionAppTest method test.

public void test() throws Exception {
    // Deploy the Application
    ApplicationManager appManager = deployApplication(DecisionTreeRegressionApp.class);
    // Start the Service
    ServiceManager serviceManager = appManager.getServiceManager(ModelDataService.SERVICE_NAME).start();
    serviceManager.waitForStatus(true, 30, 1);
    URL serviceURL = serviceManager.getServiceURL(15, TimeUnit.SECONDS);
    URL addDataURL = new URL(serviceURL, "labels");
    HttpRequest request = HttpRequest.builder(HttpMethod.PUT, addDataURL).withBody(new InputSupplier<InputStream>() {

        public InputStream getInput() throws IOException {
            return getClass().getClassLoader().getResourceAsStream("sample_libsvm_data.txt");
    HttpResponse response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    // Start a Spark Program
    SparkManager sparkManager = appManager.getSparkManager(ModelTrainer.NAME).start();
    sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
    // Check that there is a new model
    URL listModelsURL = new URL(serviceURL, "models");
    request = HttpRequest.builder(HttpMethod.GET, listModelsURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    List<String> models = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<List<String>>() {
    Assert.assertEquals(1, models.size());
    // Check that there is some model metadata
    String modelId = models.get(0);
    URL modelMetaURL = new URL(serviceURL, "models/" + modelId);
    request = HttpRequest.builder(HttpMethod.GET, modelMetaURL).build();
    response = HttpRequests.execute(request);
    Assert.assertEquals(200, response.getResponseCode());
    ModelMeta meta = GSON.fromJson(response.getResponseBodyAsString(), ModelMeta.class);
    Assert.assertEquals(0.7, meta.getTrainingPercentage(), 0.000001);
    Assert.assertEquals(692, meta.getNumFeatures());
    // Check that the corresponding model file exists
    DataSetManager<FileSet> modelFiles = getDataset(DecisionTreeRegressionApp.MODEL_DATASET);
Also used : HttpRequest(co.cask.common.http.HttpRequest) ApplicationManager(co.cask.cdap.test.ApplicationManager) SparkManager(co.cask.cdap.test.SparkManager) FileSet(co.cask.cdap.api.dataset.lib.FileSet) HttpResponse(co.cask.common.http.HttpResponse) URL( ServiceManager(co.cask.cdap.test.ServiceManager) TypeToken( InputSupplier( Test(org.junit.Test)

Example 3 with InputSupplier

use of in project cdap by caskdata.

the class InterceptableClassLoader method getManifest.

   * Returns the {@link Manifest} of the given resource if it is representing a local JAR file.
private Manifest getManifest(URL resource) {
    if (!"jar".equals(resource.getProtocol())) {
        return null;
    String path = resource.getFile();
    final String jarURIString = path.substring(0, path.indexOf("!/"));
    // the loadClass call (caller of this method).
    synchronized (this) {
        if (!manifests.containsKey(jarURIString)) {
            try {
                // Tries to load the Manifest from the Jar URI
                final URI jarURI = URI.create(jarURIString);
                manifests.put(jarURIString, BundleJarUtil.getManifest(jarURI, new InputSupplier<InputStream>() {

                    public InputStream getInput() throws IOException {
                        return jarURI.toURL().openStream();
            } catch (IOException e) {
                // Ignore if cannot get Manifest from the jar file and remember the failure
                manifests.put(jarURIString, null);
        return manifests.get(jarURIString);
Also used : IOException( URI( InputSupplier( Nullable(javax.annotation.Nullable)

Example 4 with InputSupplier

use of in project cdap by caskdata.

the class BundleJarUtil method getEntry.

   * Returns an {@link InputSupplier} for a given entry. This avoids unjar the whole file to just get one entry.
   * However, to get many entries, unjar would be more efficient. Also, the jar file is scanned every time the
   * {@link InputSupplier#getInput()} is invoked.
   * @param jarLocation Location of the jar file.
   * @param entryName Name of the entry to fetch
   * @return An {@link InputSupplier}.
public static InputSupplier<InputStream> getEntry(final Location jarLocation, final String entryName) throws IOException {
    Preconditions.checkArgument(jarLocation != null);
    Preconditions.checkArgument(entryName != null);
    final URI uri = jarLocation.toURI();
    // Small optimization if the location is local
    if ("file".equals(uri.getScheme())) {
        return new InputSupplier<InputStream>() {

            public InputStream getInput() throws IOException {
                final JarFile jarFile = new JarFile(new File(uri));
                ZipEntry entry = jarFile.getEntry(entryName);
                if (entry == null) {
                    throw new IOException("Entry not found for " + entryName);
                return new FilterInputStream(jarFile.getInputStream(entry)) {

                    public void close() throws IOException {
                        try {
                        } finally {
    // Otherwise, use JarInputStream
    return new InputSupplier<InputStream>() {

        public InputStream getInput() throws IOException {
            JarInputStream is = new JarInputStream(jarLocation.getInputStream());
            JarEntry entry = is.getNextJarEntry();
            while (entry != null) {
                if (entryName.equals(entry.getName())) {
                    return is;
                entry = is.getNextJarEntry();
            throw new IOException("Entry not found for " + entryName);
Also used : FilterInputStream( JarInputStream(java.util.jar.JarInputStream) ZipEntry( IOException( JarFile(java.util.jar.JarFile) JarEntry(java.util.jar.JarEntry) URI( JarFile(java.util.jar.JarFile) File( InputSupplier(

Example 5 with InputSupplier

use of in project cdap by caskdata.

the class AvroStreamBodyConsumerTest method generateFile.

protected ContentInfo generateFile(final int recordCount) throws IOException {
    return new FileContentInfo(generateAvroFile(TMP_FOLDER.newFile(), recordCount)) {

        public boolean verify(Map<String, String> headers, InputSupplier<? extends InputStream> contentSupplier) throws IOException {
            // Deserialize and verify the records
            Decoder decoder = DecoderFactory.get().binaryDecoder(contentSupplier.getInput(), null);
            DatumReader<Record> reader = new ReflectDatumReader<>(Record.class);
            reader.setSchema(new Schema.Parser().parse(headers.get("schema")));
            for (int i = 0; i < recordCount; i++) {
                Record record =, decoder);
                if (i != {
                    return false;
                if (!("Record number " + i).equals( {
                    return false;
            return true;
Also used : InputStream( Decoder( ReflectDatumReader(org.apache.avro.reflect.ReflectDatumReader) Map(java.util.Map) InputSupplier(


InputSupplier ( IOException ( InputStream ( LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)3 Test (org.junit.Test)3 URI ( Map (java.util.Map)2 Manifest (java.util.jar.Manifest)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 Location (org.apache.twill.filesystem.Location)2 ApplicationClass (co.cask.cdap.api.artifact.ApplicationClass)1 ArtifactClasses (co.cask.cdap.api.artifact.ArtifactClasses)1 ArtifactInfo (co.cask.cdap.api.artifact.ArtifactInfo)1 ArtifactRange (co.cask.cdap.api.artifact.ArtifactRange)1 ArtifactSummary (co.cask.cdap.api.artifact.ArtifactSummary)1 ArtifactVersion (co.cask.cdap.api.artifact.ArtifactVersion)1 Schema ( FileSet (co.cask.cdap.api.dataset.lib.FileSet)1 PluginClass (co.cask.cdap.api.plugin.PluginClass)1