Search in sources :

Example 1 with MD5MD5CRC32FileChecksum

use of org.apache.hadoop.fs.MD5MD5CRC32FileChecksum in project hadoop by apache.

the class TestDistributedFileSystem method testCreateWithCustomChecksum.

@Test
public void testCreateWithCustomChecksum() throws Exception {
    Configuration conf = getTestConfiguration();
    MiniDFSCluster cluster = null;
    Path testBasePath = new Path("/test/csum");
    // create args 
    Path path1 = new Path(testBasePath, "file_wtih_crc1");
    Path path2 = new Path(testBasePath, "file_with_crc2");
    ChecksumOpt opt1 = new ChecksumOpt(DataChecksum.Type.CRC32C, 512);
    ChecksumOpt opt2 = new ChecksumOpt(DataChecksum.Type.CRC32, 512);
    // common args
    FsPermission perm = FsPermission.getDefault().applyUMask(FsPermission.getUMask(conf));
    EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.OVERWRITE, CreateFlag.CREATE);
    short repl = 1;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
        FileSystem dfs = cluster.getFileSystem();
        dfs.mkdirs(testBasePath);
        // create two files with different checksum types
        FSDataOutputStream out1 = dfs.create(path1, perm, flags, 4096, repl, 131072L, null, opt1);
        FSDataOutputStream out2 = dfs.create(path2, perm, flags, 4096, repl, 131072L, null, opt2);
        for (int i = 0; i < 1024; i++) {
            out1.write(i);
            out2.write(i);
        }
        out1.close();
        out2.close();
        // the two checksums must be different.
        MD5MD5CRC32FileChecksum sum1 = (MD5MD5CRC32FileChecksum) dfs.getFileChecksum(path1);
        MD5MD5CRC32FileChecksum sum2 = (MD5MD5CRC32FileChecksum) dfs.getFileChecksum(path2);
        assertFalse(sum1.equals(sum2));
        // check the individual params
        assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
        assertEquals(DataChecksum.Type.CRC32, sum2.getCrcType());
    } finally {
        if (cluster != null) {
            cluster.getFileSystem().delete(testBasePath, true);
            cluster.shutdown();
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) CreateFlag(org.apache.hadoop.fs.CreateFlag) MD5MD5CRC32FileChecksum(org.apache.hadoop.fs.MD5MD5CRC32FileChecksum) Configuration(org.apache.hadoop.conf.Configuration) ChecksumOpt(org.apache.hadoop.fs.Options.ChecksumOpt) FileSystem(org.apache.hadoop.fs.FileSystem) FsPermission(org.apache.hadoop.fs.permission.FsPermission) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) Test(org.junit.Test)

Example 2 with MD5MD5CRC32FileChecksum

use of org.apache.hadoop.fs.MD5MD5CRC32FileChecksum in project hadoop by apache.

the class JsonUtilClient method toMD5MD5CRC32FileChecksum.

/** Convert a Json map to a MD5MD5CRC32FileChecksum. */
static MD5MD5CRC32FileChecksum toMD5MD5CRC32FileChecksum(final Map<?, ?> json) throws IOException {
    if (json == null) {
        return null;
    }
    final Map<?, ?> m = (Map<?, ?>) json.get(FileChecksum.class.getSimpleName());
    final String algorithm = (String) m.get("algorithm");
    final int length = ((Number) m.get("length")).intValue();
    final byte[] bytes = StringUtils.hexStringToByte((String) m.get("bytes"));
    final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
    final DataChecksum.Type crcType = MD5MD5CRC32FileChecksum.getCrcTypeFromAlgorithmName(algorithm);
    final MD5MD5CRC32FileChecksum checksum;
    // Recreate what DFSClient would have returned.
    switch(crcType) {
        case CRC32:
            checksum = new MD5MD5CRC32GzipFileChecksum();
            break;
        case CRC32C:
            checksum = new MD5MD5CRC32CastagnoliFileChecksum();
            break;
        default:
            throw new IOException("Unknown algorithm: " + algorithm);
    }
    checksum.readFields(in);
    //check algorithm name
    if (!checksum.getAlgorithmName().equals(algorithm)) {
        throw new IOException("Algorithm not matched. Expected " + algorithm + ", Received " + checksum.getAlgorithmName());
    }
    //check length
    if (length != checksum.getLength()) {
        throw new IOException("Length not matched: length=" + length + ", checksum.getLength()=" + checksum.getLength());
    }
    return checksum;
}
Also used : MD5MD5CRC32FileChecksum(org.apache.hadoop.fs.MD5MD5CRC32FileChecksum) MD5MD5CRC32GzipFileChecksum(org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum) IOException(java.io.IOException) DataInputStream(java.io.DataInputStream) MD5MD5CRC32CastagnoliFileChecksum(org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum) DataChecksum(org.apache.hadoop.util.DataChecksum) ByteArrayInputStream(java.io.ByteArrayInputStream) Map(java.util.Map)

Example 3 with MD5MD5CRC32FileChecksum

use of org.apache.hadoop.fs.MD5MD5CRC32FileChecksum in project hadoop by apache.

the class WebHdfsHandler method onGetFileChecksum.

private void onGetFileChecksum(ChannelHandlerContext ctx) throws IOException {
    MD5MD5CRC32FileChecksum checksum = null;
    final String nnId = params.namenodeId();
    DFSClient dfsclient = newDfsClient(nnId, conf);
    try {
        checksum = dfsclient.getFileChecksum(path, Long.MAX_VALUE);
        dfsclient.close();
        dfsclient = null;
    } finally {
        IOUtils.cleanup(LOG, dfsclient);
    }
    final byte[] js = JsonUtil.toJsonString(checksum).getBytes(StandardCharsets.UTF_8);
    resp = new DefaultFullHttpResponse(HTTP_1_1, OK, Unpooled.wrappedBuffer(js));
    resp.headers().set(CONTENT_TYPE, APPLICATION_JSON_UTF8);
    resp.headers().set(CONTENT_LENGTH, js.length);
    resp.headers().set(CONNECTION, CLOSE);
    ctx.writeAndFlush(resp).addListener(ChannelFutureListener.CLOSE);
}
Also used : DFSClient(org.apache.hadoop.hdfs.DFSClient) DefaultFullHttpResponse(io.netty.handler.codec.http.DefaultFullHttpResponse) MD5MD5CRC32FileChecksum(org.apache.hadoop.fs.MD5MD5CRC32FileChecksum)

Example 4 with MD5MD5CRC32FileChecksum

use of org.apache.hadoop.fs.MD5MD5CRC32FileChecksum in project kylo by Teradata.

the class ComputeHDFSChecksumsTest method testSingleFileProperChecksum.

@Test
public void testSingleFileProperChecksum() throws Exception {
    String fileName = "000000_0";
    Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))).when(fileSystem).getFileChecksum(any(Path.class));
    runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "True");
    runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA="));
    runner.enqueue(new byte[0]);
    runner.run();
    // Check relationships
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
    Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());
    // Check file system calls
    verifyGetFileChecksumCall(fileName);
}
Also used : Path(org.apache.hadoop.fs.Path) MD5MD5CRC32FileChecksum(org.apache.hadoop.fs.MD5MD5CRC32FileChecksum) MD5Hash(org.apache.hadoop.io.MD5Hash) Test(org.junit.Test)

Example 5 with MD5MD5CRC32FileChecksum

use of org.apache.hadoop.fs.MD5MD5CRC32FileChecksum in project kylo by Teradata.

the class ComputeHDFSChecksumsTest method testSingleFileInListDontFailOnWrongChecksum.

@Test
public void testSingleFileInListDontFailOnWrongChecksum() throws Exception {
    String fileName = "000000_0";
    Mockito.doReturn(new MD5MD5CRC32FileChecksum(0, 512, new MD5Hash("112233445566778899aabbccddeeff00"))).when(fileSystem).getFileChecksum(any(Path.class));
    runner.setProperty(ComputeHDFSChecksums.FAIL_IF_INCORRECT_CHECKSUM, "False");
    runner.setProperty(ComputeHDFSChecksums.FILES, String.format("[" + fileEntry + "]", fileName, "AAACAAAAAAAAAAAArRnBpxcZ9ze14XqfLMB4yA=="));
    runner.enqueue(new byte[0]);
    runner.run();
    // Check relationships
    Assert.assertEquals(0, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_FAILURE).size());
    Assert.assertEquals(1, runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).size());
    // Check whether checksum was passed correctly to attributes
    String filesJSON = runner.getFlowFilesForRelationship(ComputeHDFSChecksums.REL_SUCCESS).get(0).getAttribute("files");
    Gson jsonParser = new Gson();
    ComputeHDFSChecksums.File[] files = jsonParser.fromJson(filesJSON, ComputeHDFSChecksums.File[].class);
    Assert.assertEquals(files[0].getComputedChecksum().getValue(), "AAAAAAAAAAAAAAIAESIzRFVmd4iZqrvM3e7/AAAAAAA=");
    // Check file system calls
    verifyGetFileChecksumCall(fileName);
}
Also used : Path(org.apache.hadoop.fs.Path) MD5MD5CRC32FileChecksum(org.apache.hadoop.fs.MD5MD5CRC32FileChecksum) Gson(com.google.gson.Gson) MD5Hash(org.apache.hadoop.io.MD5Hash) Test(org.junit.Test)

Aggregations

MD5MD5CRC32FileChecksum (org.apache.hadoop.fs.MD5MD5CRC32FileChecksum)11 Path (org.apache.hadoop.fs.Path)7 Test (org.junit.Test)6 MD5Hash (org.apache.hadoop.io.MD5Hash)5 IOException (java.io.IOException)3 ByteArrayInputStream (java.io.ByteArrayInputStream)2 DataInputStream (java.io.DataInputStream)2 InOrder (org.mockito.InOrder)2 Gson (com.google.gson.Gson)1 DefaultFullHttpResponse (io.netty.handler.codec.http.DefaultFullHttpResponse)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutputStream (java.io.DataOutputStream)1 Map (java.util.Map)1 Configuration (org.apache.hadoop.conf.Configuration)1 CreateFlag (org.apache.hadoop.fs.CreateFlag)1 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 MD5MD5CRC32CastagnoliFileChecksum (org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum)1 MD5MD5CRC32GzipFileChecksum (org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum)1 ChecksumOpt (org.apache.hadoop.fs.Options.ChecksumOpt)1