use of com.google.common.io.InputSupplier in project cdap by caskdata.
the class Locations method newInputSupplier.
/**
* Creates a new {@link InputSupplier} that can provides {@link SeekableInputStream} from the given location.
*
* @param location Location for the input stream.
* @return A {@link InputSupplier}.
*/
public static InputSupplier<? extends SeekableInputStream> newInputSupplier(final Location location) {
return new InputSupplier<SeekableInputStream>() {
@Override
public SeekableInputStream getInput() throws IOException {
InputStream input = location.getInputStream();
try {
if (input instanceof FileInputStream) {
return new FileSeekableInputStream((FileInputStream) input);
}
if (input instanceof FSDataInputStream) {
final FSDataInputStream dataInput = (FSDataInputStream) input;
LocationFactory locationFactory = location.getLocationFactory();
if (locationFactory instanceof FileContextLocationFactory) {
final FileContextLocationFactory lf = (FileContextLocationFactory) locationFactory;
return lf.getFileContext().getUgi().doAs(new PrivilegedExceptionAction<SeekableInputStream>() {
@Override
public SeekableInputStream run() throws IOException {
// Disable the FileSystem cache. The FileSystem will be closed when the InputStream is closed
String scheme = lf.getHomeLocation().toURI().getScheme();
Configuration hConf = new Configuration(lf.getConfiguration());
hConf.set(String.format("fs.%s.impl.disable.cache", scheme), "true");
FileSystem fs = FileSystem.get(hConf);
return new DFSSeekableInputStream(dataInput, createDFSStreamSizeProvider(fs, true, new Path(location.toURI()), dataInput));
}
});
}
// This shouldn't happen
return new DFSSeekableInputStream(dataInput, new StreamSizeProvider() {
@Override
public long size() throws IOException {
// Assumption is if the FS is not a HDFS fs, the location length tells the stream size
return location.length();
}
});
}
throw new IOException("Failed to create SeekableInputStream from location " + location);
} catch (Throwable t) {
Closeables.closeQuietly(input);
Throwables.propagateIfInstanceOf(t, IOException.class);
throw new IOException(t);
}
}
};
}
use of com.google.common.io.InputSupplier in project cdap by caskdata.
the class DecisionTreeRegressionAppTest method test.
@Test
public void test() throws Exception {
// Deploy the Application
ApplicationManager appManager = deployApplication(DecisionTreeRegressionApp.class);
// Start the Service
ServiceManager serviceManager = appManager.getServiceManager(ModelDataService.SERVICE_NAME).start();
serviceManager.waitForStatus(true, 30, 1);
URL serviceURL = serviceManager.getServiceURL(15, TimeUnit.SECONDS);
URL addDataURL = new URL(serviceURL, "labels");
HttpRequest request = HttpRequest.builder(HttpMethod.PUT, addDataURL).withBody(new InputSupplier<InputStream>() {
@Override
public InputStream getInput() throws IOException {
return getClass().getClassLoader().getResourceAsStream("sample_libsvm_data.txt");
}
}).build();
HttpResponse response = HttpRequests.execute(request);
Assert.assertEquals(200, response.getResponseCode());
// Start a Spark Program
SparkManager sparkManager = appManager.getSparkManager(ModelTrainer.NAME).start();
sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 60, TimeUnit.SECONDS);
// Check that there is a new model
URL listModelsURL = new URL(serviceURL, "models");
request = HttpRequest.builder(HttpMethod.GET, listModelsURL).build();
response = HttpRequests.execute(request);
Assert.assertEquals(200, response.getResponseCode());
List<String> models = GSON.fromJson(response.getResponseBodyAsString(), new TypeToken<List<String>>() {
}.getType());
Assert.assertEquals(1, models.size());
// Check that there is some model metadata
String modelId = models.get(0);
URL modelMetaURL = new URL(serviceURL, "models/" + modelId);
request = HttpRequest.builder(HttpMethod.GET, modelMetaURL).build();
response = HttpRequests.execute(request);
Assert.assertEquals(200, response.getResponseCode());
ModelMeta meta = GSON.fromJson(response.getResponseBodyAsString(), ModelMeta.class);
Assert.assertNotNull(meta);
Assert.assertEquals(0.7, meta.getTrainingPercentage(), 0.000001);
Assert.assertEquals(692, meta.getNumFeatures());
// Check that the corresponding model file exists
DataSetManager<FileSet> modelFiles = getDataset(DecisionTreeRegressionApp.MODEL_DATASET);
Assert.assertTrue(modelFiles.get().getBaseLocation().append(modelId).exists());
}
use of com.google.common.io.InputSupplier in project cdap by caskdata.
the class InterceptableClassLoader method getManifest.
/**
* Returns the {@link Manifest} of the given resource if it is representing a local JAR file.
*/
@Nullable
private Manifest getManifest(URL resource) {
if (!"jar".equals(resource.getProtocol())) {
return null;
}
String path = resource.getFile();
final String jarURIString = path.substring(0, path.indexOf("!/"));
// the loadClass call (caller of this method).
synchronized (this) {
if (!manifests.containsKey(jarURIString)) {
try {
// Tries to load the Manifest from the Jar URI
final URI jarURI = URI.create(jarURIString);
manifests.put(jarURIString, BundleJarUtil.getManifest(jarURI, new InputSupplier<InputStream>() {
@Override
public InputStream getInput() throws IOException {
return jarURI.toURL().openStream();
}
}));
} catch (IOException e) {
// Ignore if cannot get Manifest from the jar file and remember the failure
manifests.put(jarURIString, null);
}
}
return manifests.get(jarURIString);
}
}
use of com.google.common.io.InputSupplier in project cdap by caskdata.
the class BundleJarUtil method getEntry.
/**
* Returns an {@link InputSupplier} for a given entry. This avoids unjar the whole file to just get one entry.
* However, to get many entries, unjar would be more efficient. Also, the jar file is scanned every time the
* {@link InputSupplier#getInput()} is invoked.
*
* @param jarLocation Location of the jar file.
* @param entryName Name of the entry to fetch
* @return An {@link InputSupplier}.
*/
public static InputSupplier<InputStream> getEntry(final Location jarLocation, final String entryName) throws IOException {
Preconditions.checkArgument(jarLocation != null);
Preconditions.checkArgument(entryName != null);
final URI uri = jarLocation.toURI();
// Small optimization if the location is local
if ("file".equals(uri.getScheme())) {
return new InputSupplier<InputStream>() {
@Override
public InputStream getInput() throws IOException {
final JarFile jarFile = new JarFile(new File(uri));
ZipEntry entry = jarFile.getEntry(entryName);
if (entry == null) {
throw new IOException("Entry not found for " + entryName);
}
return new FilterInputStream(jarFile.getInputStream(entry)) {
@Override
public void close() throws IOException {
try {
super.close();
} finally {
jarFile.close();
}
}
};
}
};
}
// Otherwise, use JarInputStream
return new InputSupplier<InputStream>() {
@Override
public InputStream getInput() throws IOException {
JarInputStream is = new JarInputStream(jarLocation.getInputStream());
JarEntry entry = is.getNextJarEntry();
while (entry != null) {
if (entryName.equals(entry.getName())) {
return is;
}
entry = is.getNextJarEntry();
}
Closeables.closeQuietly(is);
throw new IOException("Entry not found for " + entryName);
}
};
}
use of com.google.common.io.InputSupplier in project cdap by caskdata.
the class AvroStreamBodyConsumerTest method generateFile.
@Override
protected ContentInfo generateFile(final int recordCount) throws IOException {
return new FileContentInfo(generateAvroFile(TMP_FOLDER.newFile(), recordCount)) {
@Override
public boolean verify(Map<String, String> headers, InputSupplier<? extends InputStream> contentSupplier) throws IOException {
// Deserialize and verify the records
Decoder decoder = DecoderFactory.get().binaryDecoder(contentSupplier.getInput(), null);
DatumReader<Record> reader = new ReflectDatumReader<>(Record.class);
reader.setSchema(new Schema.Parser().parse(headers.get("schema")));
for (int i = 0; i < recordCount; i++) {
Record record = reader.read(null, decoder);
if (i != record.id) {
return false;
}
if (!("Record number " + i).equals(record.name)) {
return false;
}
}
return true;
}
};
}
Aggregations