use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.
the class SwiftAPIClient method isSparkOrigin.
/**
* Checks if container/object exists and verifies
* that it contains Data-Origin=stocator metadata
* If so, object was created by Spark.
*
* @param objectName
* @return boolean if object was created by Spark
*/
private boolean isSparkOrigin(String objectName) {
LOG.trace("Check if created by Stocator: {}", objectName);
if (cachedSparkOriginated.containsKey(objectName)) {
return cachedSparkOriginated.get(objectName).booleanValue();
}
String obj = objectName;
Boolean sparkOriginated = Boolean.FALSE;
StoredObject so = mJossAccount.getAccount().getContainer(container).getObject(obj);
if (so != null && so.exists()) {
Object sparkOrigin = so.getMetadata("Data-Origin");
if (sparkOrigin != null) {
String tmp = (String) sparkOrigin;
if (tmp.equals("stocator")) {
sparkOriginated = Boolean.TRUE;
LOG.trace("Object {} was created by Stocator", objectName);
}
}
}
cachedSparkOriginated.put(objectName, sparkOriginated);
return sparkOriginated.booleanValue();
}
use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.
the class SwiftAPIClient method list.
/**
* {@inheritDoc}
*
* some examples of failed attempts:
* a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_119
* a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_120
* a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_121
* a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_122
* or
* a/b/c.data/part-r-00000-48ae3461-203f-4dd3-b141-a45426e2d26c
* .csv-attempt_201603171328_0000_m_000000_1
* a/b/c.data/part-r-00000-48ae3461-203f-4dd3-b141-a45426e2d26c
* .csv-attempt_201603171328_0000_m_000000_0
* in all the cases format is objectname-taskid where
* taskid may vary, depends how many tasks were re-submitted
*
* @param hostName hostname
* @param path path to the object
* @param fullListing if true, will return objects of size 0
* @param prefixBased if set to true, container will be listed with prefix based query
* @return Array of Hadoop FileStatus
* @throws IOException in case of network failure
*/
public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws IOException {
LOG.debug("List container: raw path parent {} container {} hostname {}", path.toString(), container, hostName);
Container cObj = mJossAccount.getAccount().getContainer(container);
String obj;
if (path.toString().equals(container) || publicContainer) {
obj = "";
} else if (path.toString().startsWith(container + "/")) {
obj = path.toString().substring(container.length() + 1);
} else if (path.toString().startsWith(hostName)) {
obj = path.toString().substring(hostName.length());
} else {
obj = path.toString();
}
LOG.debug("List container for {} container {}", obj, container);
ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
StoredObject previousElement = null;
boolean moreData = true;
String marker = null;
FileStatus fs = null;
while (moreData) {
Collection<StoredObject> res = cObj.list(obj, marker, pageListSize);
moreData = (res.size() == pageListSize);
if (marker == null && (res == null || res.isEmpty() || res.size() == 0)) {
FileStatus[] emptyRes = {};
LOG.debug("List {} in container {} is empty", obj, container);
return emptyRes;
}
for (StoredObject tmp : res) {
marker = tmp.getAsObject().getName();
if (previousElement == null) {
// first entry
setCorrectSize(tmp, cObj);
previousElement = tmp.getAsObject();
continue;
}
String unifiedObjectName = extractUnifiedObjectName(tmp.getName());
LOG.trace("{} Matching {}", unifiedObjectName, obj);
if (!prefixBased && !obj.equals("") && !path.toString().endsWith("/") && !unifiedObjectName.equals(obj) && !unifiedObjectName.startsWith(obj + "/")) {
// JOSS returns all objects that start with the prefix of obj.
// These may include other unrelated objects.
LOG.trace("{} does not match {}. Skipped", unifiedObjectName, obj);
continue;
} else if (isDirectory && !unifiedObjectName.equals(obj) && !unifiedObjectName.startsWith(obj + "/")) {
LOG.trace("directory {}. {} does not match {}. Skipped", isDirectory, unifiedObjectName, obj);
continue;
}
LOG.trace("Unified name: {}, path {}", unifiedObjectName, tmp.getName());
if (!unifiedObjectName.equals(tmp.getName()) && isSparkOrigin(unifiedObjectName) && !fullListing) {
LOG.trace("{} created by Spark", unifiedObjectName);
if (!isJobSuccessful(unifiedObjectName)) {
LOG.trace("{} created by failed Spark job. Skipped", unifiedObjectName);
if (fModeAutomaticDelete) {
delete(hostName, new Path(tmp.getName()), true);
}
continue;
} else {
// we need to make sure there are no failed attempts
if (nameWithoutTaskID(tmp.getName()).equals(nameWithoutTaskID(previousElement.getName()))) {
// found failed that was not aborted.
LOG.trace("Colision identified between {} and {}", previousElement.getName(), tmp.getName());
setCorrectSize(tmp, cObj);
if (previousElement.getContentLength() < tmp.getContentLength()) {
LOG.trace("New candidate is {}. Removed {}", tmp.getName(), previousElement.getName());
previousElement = tmp.getAsObject();
}
continue;
}
}
}
fs = null;
if (previousElement.getContentLength() > 0 || fullListing) {
fs = createFileStatus(previousElement, cObj, hostName, path);
objectCache.put(getObjName(hostName, fs.getPath()), fs.getLen(), fs.getModificationTime());
tmpResult.add(fs);
}
previousElement = tmp.getAsObject();
}
}
if (previousElement != null && (previousElement.getContentLength() > 0 || fullListing)) {
LOG.trace("Adding {} to the list", previousElement.getPath());
fs = createFileStatus(previousElement, cObj, hostName, path);
if (filter == null) {
objectCache.put(getObjName(hostName, fs.getPath()), fs.getLen(), fs.getModificationTime());
tmpResult.add(fs);
} else if (filter != null && filter.accept(fs.getPath())) {
objectCache.put(getObjName(hostName, fs.getPath()), fs.getLen(), fs.getModificationTime());
tmpResult.add(fs);
} else {
LOG.trace("{} rejected by path filter during list", fs.getPath());
}
}
LOG.debug("Listing of {} completed with {} results", path.toString(), tmpResult.size());
return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.
the class SwiftAPIClient method isJobSuccessful.
/**
* Checks if container/object contains
* container/object/_SUCCESS
* If so, this object was created by successful Hadoop job
*
* @param objectName
* @return boolean if job is successful
*/
private boolean isJobSuccessful(String objectName) {
LOG.trace("Checking if job completed successfull for {}", objectName);
if (cachedSparkJobsStatus.containsKey(objectName)) {
return cachedSparkJobsStatus.get(objectName).booleanValue();
}
String obj = objectName;
Account account = mJossAccount.getAccount();
LOG.trace("HEAD {}", obj + "/" + HADOOP_SUCCESS);
StoredObject so = account.getContainer(container).getObject(obj + "/" + HADOOP_SUCCESS);
Boolean isJobOK = Boolean.FALSE;
if (so.exists()) {
LOG.debug("{} exists", obj + "/" + HADOOP_SUCCESS);
isJobOK = Boolean.TRUE;
}
cachedSparkJobsStatus.put(objectName, isJobOK);
return isJobOK.booleanValue();
}
use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.
the class SwiftObjectCache method get.
/**
* The get function will first search for the object in the cache.
* If not found will issue a HEAD request for the object metadata
* and add the object to the cache.
*
* @param objName object name
* @return cached entry of the object
* @throws IOException if failed to parse time stamp
*/
public SwiftCachedObject get(String objName) throws IOException {
LOG.trace("Get from cache {} ", objName);
SwiftCachedObject res = cache.get(objName);
if (res == null) {
LOG.trace("Cache get: {} is not in the cache. Access Swift to get content length", objName);
StoredObject rawObj = container.getObject(removeTrailingSlash(objName));
if (rawObj != null && rawObj.exists()) {
res = new SwiftCachedObject(rawObj.getContentLength(), Utils.lastModifiedAsLong(rawObj.getLastModified()));
put(objName, res);
} else {
return null;
}
}
return res;
}
use of org.javaswift.joss.model.StoredObject in project alluxio by Alluxio.
the class SwiftInputStream method createStream.
@Override
protected InputStream createStream(long startPos, long endPos) throws IOException {
NotFoundException lastException = null;
while (mRetryPolicy.attempt()) {
try {
StoredObject storedObject = mAccount.getContainer(mContainerName).getObject(mObjectPath);
DownloadInstructions downloadInstructions = new DownloadInstructions();
downloadInstructions.setRange(new MidPartLongRange(startPos, endPos - 1));
return storedObject.downloadObjectAsInputStream(downloadInstructions);
} catch (NotFoundException e) {
LOG.warn("Attempt {} to get object {} from container {} failed with exception : {}", mRetryPolicy.getAttemptCount(), mObjectPath, mContainerName, e.toString());
// Object does not exist
lastException = e;
}
}
// Failed after retrying object does not exist
throw lastException;
}
Aggregations