use of org.apache.hadoop.hdfs.DFSInputStream in project SSM by Intel-bigdata.
the class SmartFileSystem method open.
@Override
public FSDataInputStream open(Path path, final int bufferSize) throws IOException {
statistics.incrementReadOps(1);
Path absF = fixRelativePart(path);
final DFSInputStream in = smartDFSClient.open(absF.toUri().getPath(), bufferSize, verifyChecksum);
return smartDFSClient.createWrappedInputStream(in);
}
use of org.apache.hadoop.hdfs.DFSInputStream in project SSM by Intel-bigdata.
the class TestSmartDFSClientReadECData method testReadECDataCreatedBySSM.
@Test
public void testReadECDataCreatedBySSM() throws IOException {
cluster.getFileSystem().mkdirs(new Path(TEST_DIR));
String srcPath = "/ec/a.txt";
createTestFile(srcPath, 300000);
SmartConf smartConf = smartContext.getConf();
// The below single configuration is in order to make sure a SmartDFSClient can be created
// successfully, and the actual value for this property does't matter.
smartConf.set(SmartConfKeys.SMART_SERVER_RPC_ADDRESS_KEY, SmartConfKeys.SMART_SERVER_RPC_ADDRESS_DEFAULT);
SmartDFSClient smartDFSClient = new SmartDFSClient(smartConf);
ErasureCodingAction ecAction = new ErasureCodingAction();
ecAction.setContext(smartContext);
String ecTmpPath = "/ssm/ec_tmp/tmp_file";
Map<String, String> args = new HashMap<>();
args.put(HdfsAction.FILE_PATH, srcPath);
args.put(ErasureCodingBase.EC_TMP, ecTmpPath);
args.put(ErasureCodingAction.EC_POLICY_NAME, ecPolicy.getName());
ecAction.init(args);
ecAction.run();
assertTrue(ecAction.getExpectedAfterRun());
Assert.assertTrue(ecPolicy == dfsClient.getErasureCodingPolicy(srcPath));
DFSInputStream dfsInputStream = smartDFSClient.open(srcPath);
// In unit test, a DFSInputStream can still be used to read EC data. But in real environment,
// DFSStripedInputStream is required, otherwise, block not found exception will occur.
Assert.assertTrue(dfsInputStream instanceof DFSStripedInputStream);
int bufferSize = 64 * 1024;
byte[] buffer = new byte[bufferSize];
// Read EC data from HDFS
while (dfsInputStream.read(buffer, 0, bufferSize) != -1) {
}
dfsInputStream.close();
}
use of org.apache.hadoop.hdfs.DFSInputStream in project SSM by Intel-bigdata.
the class TestSmartDFSClientReadECData method testReadECDataCreatedByHDFS.
@Test
public void testReadECDataCreatedByHDFS() throws IOException {
cluster.getFileSystem().mkdirs(new Path(TEST_DIR));
// Set an EC policy for this test dir, so the file created under it will
// be stored by this EC policy.
dfsClient.setErasureCodingPolicy(TEST_DIR, ecPolicy.getName());
String srcPath = "/ec/a.txt";
createTestFile(srcPath, 300000);
Assert.assertTrue(ecPolicy == dfsClient.getErasureCodingPolicy(srcPath));
SmartConf smartConf = smartContext.getConf();
// The below single configuration is in order to make sure a SmartDFSClient can be created
// successfully, and the actual value for this property does't matter.
smartConf.set(SmartConfKeys.SMART_SERVER_RPC_ADDRESS_KEY, SmartConfKeys.SMART_SERVER_RPC_ADDRESS_DEFAULT);
SmartDFSClient smartDFSClient = new SmartDFSClient(smartConf);
DFSInputStream dfsInputStream = smartDFSClient.open(srcPath);
// In unit test, a DFSInputStream can still be used to read EC data. But in real environment,
// DFSStripedInputStream is required, otherwise, block not found exception will occur.
Assert.assertTrue(dfsInputStream instanceof DFSStripedInputStream);
int bufferSize = 64 * 1024;
byte[] buffer = new byte[bufferSize];
// Read EC data from HDFS
while (dfsInputStream.read(buffer, 0, bufferSize) != -1) {
}
dfsInputStream.close();
}
use of org.apache.hadoop.hdfs.DFSInputStream in project SSM by Intel-bigdata.
the class CompressionAction method execute.
@Override
protected void execute() throws Exception {
if (filePath == null) {
throw new IllegalArgumentException("File path is missing.");
}
if (compressTmpPath == null) {
throw new IllegalArgumentException("Compression tmp path is not specified!");
}
if (!compressionCodecList.contains(compressCodec)) {
throw new ActionException("Compression Action failed due to unsupported codec: " + compressCodec);
}
appendLog(String.format("Compression Action started at %s for %s", Utils.getFormatedCurrentTime(), filePath));
if (!dfsClient.exists(filePath)) {
throw new ActionException("Failed to execute Compression Action: the given file doesn't exist!");
}
HdfsFileStatus srcFileStatus = dfsClient.getFileInfo(filePath);
// Consider directory case.
if (srcFileStatus.isDir()) {
appendLog("Compression is not applicable to a directory.");
return;
}
// Generate compressed file
compressionFileState = new CompressionFileState(filePath, bufferSize, compressCodec);
compressionFileState.setOriginalLength(srcFileStatus.getLen());
OutputStream appendOut = null;
DFSInputStream in = null;
OutputStream out = null;
try {
if (srcFileStatus.getLen() == 0) {
compressionFileInfo = new CompressionFileInfo(false, compressionFileState);
} else {
short replication = srcFileStatus.getReplication();
long blockSize = srcFileStatus.getBlockSize();
long fileSize = srcFileStatus.getLen();
appendLog("File length: " + fileSize);
bufferSize = getActualBuffSize(fileSize);
// SmartDFSClient will fail to open compressing file with PROCESSING FileStage
// set by Compression scheduler. But considering DfsClient may be used, we use
// append operation to lock the file to avoid any modification.
appendOut = CompatibilityHelperLoader.getHelper().getDFSClientAppend(dfsClient, filePath, bufferSize);
in = dfsClient.open(filePath);
out = dfsClient.create(compressTmpPath, true, replication, blockSize);
// Keep storage policy consistent.
// The below statement is not supported on Hadoop-2.7.3 or CDH-5.10.1
// String storagePolicyName = dfsClient.getStoragePolicy(filePath).getName();
byte storagePolicyId = srcFileStatus.getStoragePolicy();
String storagePolicyName = SmartConstants.STORAGE_POLICY_MAP.get(storagePolicyId);
if (!storagePolicyName.equals("UNDEF")) {
dfsClient.setStoragePolicy(compressTmpPath, storagePolicyName);
}
compress(in, out);
HdfsFileStatus destFileStatus = dfsClient.getFileInfo(compressTmpPath);
dfsClient.setOwner(compressTmpPath, srcFileStatus.getOwner(), srcFileStatus.getGroup());
dfsClient.setPermission(compressTmpPath, srcFileStatus.getPermission());
compressionFileState.setCompressedLength(destFileStatus.getLen());
appendLog("Compressed file length: " + destFileStatus.getLen());
compressionFileInfo = new CompressionFileInfo(true, compressTmpPath, compressionFileState);
}
compressionFileState.setBufferSize(bufferSize);
appendLog("Compression buffer size: " + bufferSize);
appendLog("Compression codec: " + compressCodec);
String compressionInfoJson = new Gson().toJson(compressionFileInfo);
appendResult(compressionInfoJson);
LOG.warn(compressionInfoJson);
if (compressionFileInfo.needReplace()) {
// Add to temp path
// Please make sure content write to Xatte is less than 64K
dfsClient.setXAttr(compressionFileInfo.getTempPath(), XATTR_NAME, SerializationUtils.serialize(compressionFileState), EnumSet.of(XAttrSetFlag.CREATE));
// Rename operation is moved from CompressionScheduler.
// Thus, modification for original file will be avoided.
dfsClient.rename(compressTmpPath, filePath, Options.Rename.OVERWRITE);
} else {
// Add to raw path
dfsClient.setXAttr(filePath, XATTR_NAME, SerializationUtils.serialize(compressionFileState), EnumSet.of(XAttrSetFlag.CREATE));
}
} catch (IOException e) {
throw new IOException(e);
} finally {
if (appendOut != null) {
try {
appendOut.close();
} catch (IOException e) {
// Hide the expected exception that the original file is missing.
}
}
if (in != null) {
in.close();
}
if (out != null) {
out.close();
}
}
}
use of org.apache.hadoop.hdfs.DFSInputStream in project SSM by Intel-bigdata.
the class TestCompressDecompress method testSubmitCompressionAction.
@Test
public void testSubmitCompressionAction() throws Exception {
// if (!loadedNative()) {
// return;
// }
waitTillSSMExitSafeMode();
// initDB();
int arraySize = 1024 * 1024 * 80;
String fileName = "/ssm/compression/file1";
byte[] bytes = prepareFile(fileName, arraySize);
MetaStore metaStore = ssm.getMetaStore();
int bufSize = 1024 * 1024 * 10;
CmdletManager cmdletManager = ssm.getCmdletManager();
long cmdId = cmdletManager.submitCmdlet("compress -file " + fileName + " -bufSize " + bufSize + " -codec " + codec);
waitTillActionDone(cmdId);
FileState fileState = null;
// metastore test
int n = 0;
while (true) {
fileState = metaStore.getFileState(fileName);
if (FileState.FileType.COMPRESSION.equals(fileState.getFileType())) {
break;
}
Thread.sleep(1000);
if (n++ >= 20) {
throw new Exception("Time out in waiting for getting expect file state.");
}
}
Assert.assertEquals(FileState.FileStage.DONE, fileState.getFileStage());
Assert.assertTrue(fileState instanceof CompressionFileState);
CompressionFileState compressionFileState = (CompressionFileState) fileState;
Assert.assertEquals(fileName, compressionFileState.getPath());
Assert.assertEquals(bufSize, compressionFileState.getBufferSize());
Assert.assertEquals(codec, compressionFileState.getCompressionImpl());
Assert.assertEquals(arraySize, compressionFileState.getOriginalLength());
Assert.assertTrue(compressionFileState.getCompressedLength() > 0);
Assert.assertTrue(compressionFileState.getCompressedLength() < compressionFileState.getOriginalLength());
// data accuracy test
byte[] input = new byte[arraySize];
DFSInputStream dfsInputStream = smartDFSClient.open(fileName);
int offset = 0;
while (true) {
int len = dfsInputStream.read(input, offset, arraySize - offset);
if (len <= 0) {
break;
}
offset += len;
}
Assert.assertArrayEquals("original array not equals compress/decompressed array", input, bytes);
}
Aggregations