Search in sources :

Example 6 with StoredObject

use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.

the class SwiftAPIClient method isSparkOrigin.

/**
 * Checks if container/object exists and verifies
 * that it contains Data-Origin=stocator metadata
 * If so, object was created by Spark.
 *
 * @param objectName
 * @return boolean if object was created by Spark
 */
private boolean isSparkOrigin(String objectName) {
    LOG.trace("Check if created by Stocator: {}", objectName);
    if (cachedSparkOriginated.containsKey(objectName)) {
        return cachedSparkOriginated.get(objectName).booleanValue();
    }
    String obj = objectName;
    Boolean sparkOriginated = Boolean.FALSE;
    StoredObject so = mJossAccount.getAccount().getContainer(container).getObject(obj);
    if (so != null && so.exists()) {
        Object sparkOrigin = so.getMetadata("Data-Origin");
        if (sparkOrigin != null) {
            String tmp = (String) sparkOrigin;
            if (tmp.equals("stocator")) {
                sparkOriginated = Boolean.TRUE;
                LOG.trace("Object {} was created by Stocator", objectName);
            }
        }
    }
    cachedSparkOriginated.put(objectName, sparkOriginated);
    return sparkOriginated.booleanValue();
}
Also used : StoredObject(org.javaswift.joss.model.StoredObject) StoredObject(org.javaswift.joss.model.StoredObject) DirectoryOrObject(org.javaswift.joss.model.DirectoryOrObject)

Example 7 with StoredObject

use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.

the class SwiftAPIClient method list.

/**
 * {@inheritDoc}
 *
 * some examples of failed attempts:
 * a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_119
 * a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_120
 * a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_121
 * a/b/c.data/part-00099-attempt_201603171503_0001_m_000099_122
 * or
 * a/b/c.data/part-r-00000-48ae3461-203f-4dd3-b141-a45426e2d26c
 * .csv-attempt_201603171328_0000_m_000000_1
 * a/b/c.data/part-r-00000-48ae3461-203f-4dd3-b141-a45426e2d26c
 * .csv-attempt_201603171328_0000_m_000000_0
 * in all the cases format is objectname-taskid where
 * taskid may vary, depends how many tasks were re-submitted
 *
 * @param hostName hostname
 * @param path path to the object
 * @param fullListing if true, will return objects of size 0
 * @param prefixBased if set to true, container will be listed with prefix based query
 * @return Array of Hadoop FileStatus
 * @throws IOException in case of network failure
 */
public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws IOException {
    LOG.debug("List container: raw path parent {} container {} hostname {}", path.toString(), container, hostName);
    Container cObj = mJossAccount.getAccount().getContainer(container);
    String obj;
    if (path.toString().equals(container) || publicContainer) {
        obj = "";
    } else if (path.toString().startsWith(container + "/")) {
        obj = path.toString().substring(container.length() + 1);
    } else if (path.toString().startsWith(hostName)) {
        obj = path.toString().substring(hostName.length());
    } else {
        obj = path.toString();
    }
    LOG.debug("List container for {} container {}", obj, container);
    ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
    StoredObject previousElement = null;
    boolean moreData = true;
    String marker = null;
    FileStatus fs = null;
    while (moreData) {
        Collection<StoredObject> res = cObj.list(obj, marker, pageListSize);
        moreData = (res.size() == pageListSize);
        if (marker == null && (res == null || res.isEmpty() || res.size() == 0)) {
            FileStatus[] emptyRes = {};
            LOG.debug("List {} in container {} is empty", obj, container);
            return emptyRes;
        }
        for (StoredObject tmp : res) {
            marker = tmp.getAsObject().getName();
            if (previousElement == null) {
                // first entry
                setCorrectSize(tmp, cObj);
                previousElement = tmp.getAsObject();
                continue;
            }
            String unifiedObjectName = extractUnifiedObjectName(tmp.getName());
            LOG.trace("{} Matching {}", unifiedObjectName, obj);
            if (!prefixBased && !obj.equals("") && !path.toString().endsWith("/") && !unifiedObjectName.equals(obj) && !unifiedObjectName.startsWith(obj + "/")) {
                // JOSS returns all objects that start with the prefix of obj.
                // These may include other unrelated objects.
                LOG.trace("{} does not match {}. Skipped", unifiedObjectName, obj);
                continue;
            } else if (isDirectory && !unifiedObjectName.equals(obj) && !unifiedObjectName.startsWith(obj + "/")) {
                LOG.trace("directory {}. {} does not match {}. Skipped", isDirectory, unifiedObjectName, obj);
                continue;
            }
            LOG.trace("Unified name: {}, path {}", unifiedObjectName, tmp.getName());
            if (!unifiedObjectName.equals(tmp.getName()) && isSparkOrigin(unifiedObjectName) && !fullListing) {
                LOG.trace("{} created by Spark", unifiedObjectName);
                if (!isJobSuccessful(unifiedObjectName)) {
                    LOG.trace("{} created by failed Spark job. Skipped", unifiedObjectName);
                    if (fModeAutomaticDelete) {
                        delete(hostName, new Path(tmp.getName()), true);
                    }
                    continue;
                } else {
                    // we need to make sure there are no failed attempts
                    if (nameWithoutTaskID(tmp.getName()).equals(nameWithoutTaskID(previousElement.getName()))) {
                        // found failed that was not aborted.
                        LOG.trace("Colision identified between {} and {}", previousElement.getName(), tmp.getName());
                        setCorrectSize(tmp, cObj);
                        if (previousElement.getContentLength() < tmp.getContentLength()) {
                            LOG.trace("New candidate is {}. Removed {}", tmp.getName(), previousElement.getName());
                            previousElement = tmp.getAsObject();
                        }
                        continue;
                    }
                }
            }
            fs = null;
            if (previousElement.getContentLength() > 0 || fullListing) {
                fs = createFileStatus(previousElement, cObj, hostName, path);
                objectCache.put(getObjName(hostName, fs.getPath()), fs.getLen(), fs.getModificationTime());
                tmpResult.add(fs);
            }
            previousElement = tmp.getAsObject();
        }
    }
    if (previousElement != null && (previousElement.getContentLength() > 0 || fullListing)) {
        LOG.trace("Adding {} to the list", previousElement.getPath());
        fs = createFileStatus(previousElement, cObj, hostName, path);
        if (filter == null) {
            objectCache.put(getObjName(hostName, fs.getPath()), fs.getLen(), fs.getModificationTime());
            tmpResult.add(fs);
        } else if (filter != null && filter.accept(fs.getPath())) {
            objectCache.put(getObjName(hostName, fs.getPath()), fs.getLen(), fs.getModificationTime());
            tmpResult.add(fs);
        } else {
            LOG.trace("{} rejected by path filter during list", fs.getPath());
        }
    }
    LOG.debug("Listing of {} completed with {} results", path.toString(), tmpResult.size());
    return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
Also used : StocatorPath(com.ibm.stocator.fs.common.StocatorPath) Path(org.apache.hadoop.fs.Path) Container(org.javaswift.joss.model.Container) FileStatus(org.apache.hadoop.fs.FileStatus) StoredObject(org.javaswift.joss.model.StoredObject) ArrayList(java.util.ArrayList)

Example 8 with StoredObject

use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.

the class SwiftAPIClient method isJobSuccessful.

/**
 * Checks if container/object contains
 * container/object/_SUCCESS
 * If so, this object was created by successful Hadoop job
 *
 * @param objectName
 * @return boolean if job is successful
 */
private boolean isJobSuccessful(String objectName) {
    LOG.trace("Checking if job completed successfull for {}", objectName);
    if (cachedSparkJobsStatus.containsKey(objectName)) {
        return cachedSparkJobsStatus.get(objectName).booleanValue();
    }
    String obj = objectName;
    Account account = mJossAccount.getAccount();
    LOG.trace("HEAD {}", obj + "/" + HADOOP_SUCCESS);
    StoredObject so = account.getContainer(container).getObject(obj + "/" + HADOOP_SUCCESS);
    Boolean isJobOK = Boolean.FALSE;
    if (so.exists()) {
        LOG.debug("{} exists", obj + "/" + HADOOP_SUCCESS);
        isJobOK = Boolean.TRUE;
    }
    cachedSparkJobsStatus.put(objectName, isJobOK);
    return isJobOK.booleanValue();
}
Also used : Account(org.javaswift.joss.model.Account) JossAccount(com.ibm.stocator.fs.swift.auth.JossAccount) StoredObject(org.javaswift.joss.model.StoredObject)

Example 9 with StoredObject

use of org.javaswift.joss.model.StoredObject in project stocator by CODAIT.

the class SwiftObjectCache method get.

/**
 * The get function will first search for the object in the cache.
 * If not found will issue a HEAD request for the object metadata
 * and add the object to the cache.
 *
 * @param objName object name
 * @return cached entry of the object
 * @throws IOException if failed to parse time stamp
 */
public SwiftCachedObject get(String objName) throws IOException {
    LOG.trace("Get from cache  {} ", objName);
    SwiftCachedObject res = cache.get(objName);
    if (res == null) {
        LOG.trace("Cache get:  {} is not in the cache. Access Swift to get content length", objName);
        StoredObject rawObj = container.getObject(removeTrailingSlash(objName));
        if (rawObj != null && rawObj.exists()) {
            res = new SwiftCachedObject(rawObj.getContentLength(), Utils.lastModifiedAsLong(rawObj.getLastModified()));
            put(objName, res);
        } else {
            return null;
        }
    }
    return res;
}
Also used : StoredObject(org.javaswift.joss.model.StoredObject)

Example 10 with StoredObject

use of org.javaswift.joss.model.StoredObject in project alluxio by Alluxio.

the class SwiftInputStream method createStream.

@Override
protected InputStream createStream(long startPos, long endPos) throws IOException {
    NotFoundException lastException = null;
    while (mRetryPolicy.attempt()) {
        try {
            StoredObject storedObject = mAccount.getContainer(mContainerName).getObject(mObjectPath);
            DownloadInstructions downloadInstructions = new DownloadInstructions();
            downloadInstructions.setRange(new MidPartLongRange(startPos, endPos - 1));
            return storedObject.downloadObjectAsInputStream(downloadInstructions);
        } catch (NotFoundException e) {
            LOG.warn("Attempt {} to get object {} from container {} failed with exception : {}", mRetryPolicy.getAttemptCount(), mObjectPath, mContainerName, e.toString());
            // Object does not exist
            lastException = e;
        }
    }
    // Failed after retrying object does not exist
    throw lastException;
}
Also used : DownloadInstructions(org.javaswift.joss.instructions.DownloadInstructions) StoredObject(org.javaswift.joss.model.StoredObject) NotFoundException(org.javaswift.joss.exception.NotFoundException)

Aggregations

StoredObject (org.javaswift.joss.model.StoredObject)26 IOException (java.io.IOException)9 Container (org.javaswift.joss.model.Container)8 FileNotFoundException (java.io.FileNotFoundException)4 ArrayList (java.util.ArrayList)3 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)2 ConfigurationParseException (com.ibm.stocator.fs.common.exception.ConfigurationParseException)2 JossAccount (com.ibm.stocator.fs.swift.auth.JossAccount)2 DataFile (edu.harvard.iq.dataverse.DataFile)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 Path (org.apache.hadoop.fs.Path)2 AlreadyExistsException (org.javaswift.joss.exception.AlreadyExistsException)2 CommandException (org.javaswift.joss.exception.CommandException)2 Account (org.javaswift.joss.model.Account)2 DirectoryOrObject (org.javaswift.joss.model.DirectoryOrObject)2 Dataset (edu.harvard.iq.dataverse.Dataset)1 File (java.io.File)1 Date (java.util.Date)1 Properties (java.util.Properties)1