use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.
the class CopyCommitter method preserveFileAttributesForDirectories.
// This method changes the target-directories' file-attributes (owner,
// user/group permissions, etc.) based on the corresponding source directories.
private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
final boolean syncOrOverwrite = syncFolder || overwrite;
LOG.info("About to preserve attributes: " + attrSymbols);
EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
final boolean preserveRawXattrs = conf.getBoolean(DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);
Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
FileSystem clusterFS = sourceListing.getFileSystem(conf);
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(sourceListing));
long totalLen = clusterFS.getFileStatus(sourceListing).getLen();
Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
long preservedEntries = 0;
try {
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
// Iterate over every source path that was copied.
while (sourceReader.next(srcRelPath, srcFileStatus)) {
// in the map-task.
if (!srcFileStatus.isDirectory())
continue;
Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
//
if (targetRoot.equals(targetFile) && syncOrOverwrite)
continue;
FileSystem targetFS = targetFile.getFileSystem(conf);
DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes, preserveRawXattrs);
taskAttemptContext.progress();
taskAttemptContext.setStatus("Preserving status on directory entries. [" + sourceReader.getPosition() * 100 / totalLen + "%]");
}
} finally {
IOUtils.closeStream(sourceReader);
}
LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}
use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.
the class CopyCommitter method deleteMissing.
// This method deletes "extra" files from the target, if they're not
// available at the source.
private void deleteMissing(Configuration conf) throws IOException {
LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source");
// Sort the source-file listing alphabetically.
Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
FileSystem clusterFS = sourceListing.getFileSystem(conf);
Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
// Similarly, create the listing of target-files. Sort alphabetically.
Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
List<Path> targets = new ArrayList<Path>(1);
Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
targets.add(targetFinalPath);
Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath).toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME) ? DistCpConstants.RAW_NONE_PATH : DistCpConstants.NONE_PATH;
DistCpOptions options = new DistCpOptions(targets, resultNonePath);
//
// Set up options to be the same from the CopyListing.buildListing's perspective,
// so to collect similar listings as when doing the copy
//
options.setOverwrite(overwrite);
options.setSyncFolder(syncFolder);
options.setTargetPathExists(targetPathExists);
target.buildListing(targetListing, options);
Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(sortedSourceListing));
SequenceFile.Reader targetReader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(sortedTargetListing));
// Walk both source and target file listings.
// Delete all from target that doesn't also exist on source.
long deletedEntries = 0;
try {
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
Text trgtRelPath = new Text();
FileSystem targetFS = targetFinalPath.getFileSystem(conf);
boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
while (targetReader.next(trgtRelPath, trgtFileStatus)) {
// Skip sources that don't exist on target.
while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
}
if (srcAvailable && trgtRelPath.equals(srcRelPath))
continue;
// Target doesn't exist at source. Delete.
boolean result = targetFS.delete(trgtFileStatus.getPath(), true) || !targetFS.exists(trgtFileStatus.getPath());
if (result) {
LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
deletedEntries++;
} else {
throw new IOException("Unable to delete " + trgtFileStatus.getPath());
}
taskAttemptContext.progress();
taskAttemptContext.setStatus("Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]");
}
} finally {
IOUtils.closeStream(sourceReader);
IOUtils.closeStream(targetReader);
}
LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.
the class CopyMapper method map.
/**
* Implementation of the Mapper::map(). Does the copy.
* @param relPath The target path.
* @param sourceFileStatus The source path.
* @throws IOException
* @throws InterruptedException
*/
@Override
public void map(Text relPath, CopyListingFileStatus sourceFileStatus, Context context) throws IOException, InterruptedException {
Path sourcePath = sourceFileStatus.getPath();
if (LOG.isDebugEnabled())
LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath);
Path target = new Path(targetWorkPath.makeQualified(targetFS.getUri(), targetFS.getWorkingDirectory()) + relPath.toString());
EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context);
final boolean preserveRawXattrs = context.getConfiguration().getBoolean(DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);
final String description = "Copying " + sourcePath + " to " + target;
context.setStatus(description);
LOG.info(description);
try {
CopyListingFileStatus sourceCurrStatus;
FileSystem sourceFS;
try {
sourceFS = sourcePath.getFileSystem(conf);
final boolean preserveXAttrs = fileAttributes.contains(FileAttribute.XATTR);
sourceCurrStatus = DistCpUtils.toCopyListingFileStatus(sourceFS, sourceFS.getFileStatus(sourcePath), fileAttributes.contains(FileAttribute.ACL), preserveXAttrs, preserveRawXattrs);
} catch (FileNotFoundException e) {
throw new IOException(new RetriableFileCopyCommand.CopyReadException(e));
}
FileStatus targetStatus = null;
try {
targetStatus = targetFS.getFileStatus(target);
} catch (FileNotFoundException ignore) {
if (LOG.isDebugEnabled())
LOG.debug("Path could not be found: " + target, ignore);
}
if (targetStatus != null && (targetStatus.isDirectory() != sourceCurrStatus.isDirectory())) {
throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus));
}
if (sourceCurrStatus.isDirectory()) {
createTargetDirsWithRetry(description, target, context);
return;
}
FileAction action = checkUpdate(sourceFS, sourceCurrStatus, target, targetStatus);
if (action == FileAction.SKIP) {
LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target);
updateSkipCounters(context, sourceCurrStatus);
context.write(null, new Text("SKIP: " + sourceCurrStatus.getPath()));
} else {
copyFileWithRetry(description, sourceCurrStatus, target, context, action, fileAttributes);
}
DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes, preserveRawXattrs);
} catch (IOException exception) {
handleFailures(exception, sourceFileStatus, target, context);
}
}
use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.
the class UniformSizeInputFormat method getSplits.
private List<InputSplit> getSplits(Configuration configuration, int numSplits, long totalSizeBytes) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
long nBytesPerSplit = (long) Math.ceil(totalSizeBytes * 1.0 / numSplits);
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
long currentSplitSize = 0;
long lastSplitStart = 0;
long lastPosition = 0;
final Path listingFilePath = getListingFilePath(configuration);
if (LOG.isDebugEnabled()) {
LOG.debug("Average bytes per map: " + nBytesPerSplit + ", Number of maps: " + numSplits + ", total size: " + totalSizeBytes);
}
SequenceFile.Reader reader = null;
try {
reader = getListingFileReader(configuration);
while (reader.next(srcRelPath, srcFileStatus)) {
// limit. Add the current file to new split
if (currentSplitSize + srcFileStatus.getLen() > nBytesPerSplit && lastPosition != 0) {
FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null);
if (LOG.isDebugEnabled()) {
LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize);
}
splits.add(split);
lastSplitStart = lastPosition;
currentSplitSize = 0;
}
currentSplitSize += srcFileStatus.getLen();
lastPosition = reader.getPosition();
}
if (lastPosition > lastSplitStart) {
FileSplit split = new FileSplit(listingFilePath, lastSplitStart, lastPosition - lastSplitStart, null);
if (LOG.isDebugEnabled()) {
LOG.debug("Creating split : " + split + ", bytes in split: " + currentSplitSize);
}
splits.add(split);
}
} finally {
IOUtils.closeStream(reader);
}
return splits;
}
use of org.apache.hadoop.tools.CopyListingFileStatus in project hadoop by apache.
the class DistCpUtils method preserve.
/**
* Preserve attribute on file matching that of the file status being sent
* as argument. Barring the block size, all the other attributes are preserved
* by this function
*
* @param targetFS - File system
* @param path - Path that needs to preserve original file status
* @param srcFileStatus - Original file status
* @param attributes - Attribute set that needs to be preserved
* @param preserveRawXattrs if true, raw.* xattrs should be preserved
* @throws IOException - Exception if any (particularly relating to group/owner
* change or any transient error)
*/
public static void preserve(FileSystem targetFS, Path path, CopyListingFileStatus srcFileStatus, EnumSet<FileAttribute> attributes, boolean preserveRawXattrs) throws IOException {
// If not preserving anything from FileStatus, don't bother fetching it.
FileStatus targetFileStatus = attributes.isEmpty() ? null : targetFS.getFileStatus(path);
String group = targetFileStatus == null ? null : targetFileStatus.getGroup();
String user = targetFileStatus == null ? null : targetFileStatus.getOwner();
boolean chown = false;
if (attributes.contains(FileAttribute.ACL)) {
List<AclEntry> srcAcl = srcFileStatus.getAclEntries();
List<AclEntry> targetAcl = getAcl(targetFS, targetFileStatus);
if (!srcAcl.equals(targetAcl)) {
targetFS.setAcl(path, srcAcl);
}
// setAcl doesn't preserve sticky bit, so also call setPermission if needed.
if (srcFileStatus.getPermission().getStickyBit() != targetFileStatus.getPermission().getStickyBit()) {
targetFS.setPermission(path, srcFileStatus.getPermission());
}
} else if (attributes.contains(FileAttribute.PERMISSION) && !srcFileStatus.getPermission().equals(targetFileStatus.getPermission())) {
targetFS.setPermission(path, srcFileStatus.getPermission());
}
final boolean preserveXAttrs = attributes.contains(FileAttribute.XATTR);
if (preserveXAttrs || preserveRawXattrs) {
final String rawNS = StringUtils.toLowerCase(XAttr.NameSpace.RAW.name());
Map<String, byte[]> srcXAttrs = srcFileStatus.getXAttrs();
Map<String, byte[]> targetXAttrs = getXAttrs(targetFS, path);
if (srcXAttrs != null && !srcXAttrs.equals(targetXAttrs)) {
for (Entry<String, byte[]> entry : srcXAttrs.entrySet()) {
String xattrName = entry.getKey();
if (xattrName.startsWith(rawNS) || preserveXAttrs) {
targetFS.setXAttr(path, xattrName, entry.getValue());
}
}
}
}
if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() && (srcFileStatus.getReplication() != targetFileStatus.getReplication())) {
targetFS.setReplication(path, srcFileStatus.getReplication());
}
if (attributes.contains(FileAttribute.GROUP) && !group.equals(srcFileStatus.getGroup())) {
group = srcFileStatus.getGroup();
chown = true;
}
if (attributes.contains(FileAttribute.USER) && !user.equals(srcFileStatus.getOwner())) {
user = srcFileStatus.getOwner();
chown = true;
}
if (chown) {
targetFS.setOwner(path, user, group);
}
if (attributes.contains(FileAttribute.TIMES)) {
targetFS.setTimes(path, srcFileStatus.getModificationTime(), srcFileStatus.getAccessTime());
}
}
Aggregations