Examples with Tika - org.apache.tika.Tika

Example 26 with Tika

use of org.apache.tika.Tika in project nutch by apache.

the class FileDumper method dump.

/**
 * Dumps the reverse engineered raw content from the provided segment
 * directories if a parent directory contains more than one segment, otherwise
 * a single segment can be passed as an argument.
 *
 * @param outputDir
 *          the directory you wish to dump the raw content to. This directory
 *          will be created.
 * @param segmentRootDir
 *          a directory containing one or more segments.
 * @param mimeTypes
 *          an array of mime types we have to dump, all others will be
 *          filtered out.
 * @param flatDir
 *          a boolean flag specifying whether the output directory should contain
 *          only files instead of using nested directories to prevent naming
 *          conflicts.
 * @param mimeTypeStats
 *          a flag indicating whether mimetype stats should be displayed
 *          instead of dumping files.
 * @throws Exception
 */
public void dump(File outputDir, File segmentRootDir, String[] mimeTypes, boolean flatDir, boolean mimeTypeStats, boolean reverseURLDump) throws Exception {
    if (mimeTypes == null)
        LOG.info("Accepting all mimetypes.");
    // total file counts
    Map<String, Integer> typeCounts = new HashMap<>();
    // filtered file counts
    Map<String, Integer> filteredCounts = new HashMap<>();
    Configuration conf = NutchConfiguration.create();
    int fileCount = 0;
    File[] segmentDirs = segmentRootDir.listFiles(file -> file.canRead() && file.isDirectory());
    if (segmentDirs == null) {
        LOG.error("No segment directories found in [" + segmentRootDir.getAbsolutePath() + "]");
        return;
    }
    for (File segment : segmentDirs) {
        LOG.info("Processing segment: [" + segment.getAbsolutePath() + "]");
        DataOutputStream doutputStream = null;
        Map<String, String> filenameToUrl = new HashMap<String, String>();
        File segmentDir = new File(segment.getAbsolutePath(), Content.DIR_NAME);
        File[] partDirs = segmentDir.listFiles(file -> file.canRead() && file.isDirectory());
        if (partDirs == null) {
            LOG.warn("Skipping Corrupt Segment: [{}]", segment.getAbsolutePath());
            continue;
        }
        for (File partDir : partDirs) {
            try (FileSystem fs = FileSystem.get(conf)) {
                String segmentPath = partDir + "/data";
                Path file = new Path(segmentPath);
                if (!new File(file.toString()).exists()) {
                    LOG.warn("Skipping segment: [" + segmentPath + "]: no data directory present");
                    continue;
                }
                SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file));
                Writable key = (Writable) reader.getKeyClass().newInstance();
                Content content = null;
                while (reader.next(key)) {
                    content = new Content();
                    reader.getCurrentValue(content);
                    String url = key.toString();
                    String baseName = FilenameUtils.getBaseName(url);
                    String extension = FilenameUtils.getExtension(url);
                    if (extension == null || (extension != null && extension.equals(""))) {
                        extension = "html";
                    }
                    ByteArrayInputStream bas = null;
                    Boolean filter = false;
                    try {
                        bas = new ByteArrayInputStream(content.getContent());
                        String mimeType = new Tika().detect(content.getContent());
                        collectStats(typeCounts, mimeType);
                        if (mimeType != null) {
                            if (mimeTypes == null || Arrays.asList(mimeTypes).contains(mimeType)) {
                                collectStats(filteredCounts, mimeType);
                                filter = true;
                            }
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                        LOG.warn("Tika is unable to detect type for: [" + url + "]");
                    } finally {
                        if (bas != null) {
                            try {
                                bas.close();
                            } catch (Exception ignore) {
                            }
                        }
                    }
                    if (filter) {
                        if (!mimeTypeStats) {
                            String md5Ofurl = DumpFileUtil.getUrlMD5(url);
                            String fullDir = outputDir.getAbsolutePath();
                            if (!flatDir && !reverseURLDump) {
                                fullDir = DumpFileUtil.createTwoLevelsDirectory(fullDir, md5Ofurl);
                            }
                            if (!Strings.isNullOrEmpty(fullDir)) {
                                String outputFullPath;
                                if (reverseURLDump) {
                                    String[] reversedURL = TableUtil.reverseUrl(url).split(":");
                                    reversedURL[0] = reversedURL[0].replace('.', '/');
                                    String reversedURLPath = reversedURL[0] + "/" + DigestUtils.sha256Hex(url).toUpperCase();
                                    outputFullPath = String.format("%s/%s", fullDir, reversedURLPath);
                                    // We'll drop the trailing file name and create the nested structure if it doesn't already exist.
                                    String[] splitPath = outputFullPath.split("/");
                                    File fullOutputDir = new File(org.apache.commons.lang3.StringUtils.join(Arrays.copyOf(splitPath, splitPath.length - 1), "/"));
                                    if (!fullOutputDir.exists()) {
                                        fullOutputDir.mkdirs();
                                    }
                                } else {
                                    outputFullPath = String.format("%s/%s", fullDir, DumpFileUtil.createFileName(md5Ofurl, baseName, extension));
                                }
                                filenameToUrl.put(outputFullPath, url);
                                File outputFile = new File(outputFullPath);
                                if (!outputFile.exists()) {
                                    LOG.info("Writing: [" + outputFullPath + "]");
                                    // Modified to prevent FileNotFoundException (Invalid Argument)
                                    FileOutputStream output = null;
                                    try {
                                        output = new FileOutputStream(outputFile);
                                        IOUtils.write(content.getContent(), output);
                                    } catch (Exception e) {
                                        LOG.warn("Write Error: [" + outputFullPath + "]");
                                        e.printStackTrace();
                                    } finally {
                                        if (output != null) {
                                            output.flush();
                                            try {
                                                output.close();
                                            } catch (Exception ignore) {
                                            }
                                        }
                                    }
                                    fileCount++;
                                } else {
                                    LOG.info("Skipping writing: [" + outputFullPath + "]: file already exists");
                                }
                            }
                        }
                    }
                }
                reader.close();
            } finally {
                if (doutputStream != null) {
                    try {
                        doutputStream.close();
                    } catch (Exception ignore) {
                    }
                }
            }
        }
        // save filenameToUrl in a json file for each segment there is one mapping file
        String filenameToUrlFilePath = String.format("%s/%s_filenameToUrl.json", outputDir.getAbsolutePath(), segment.getName());
        new ObjectMapper().writeValue(new File(filenameToUrlFilePath), filenameToUrl);
    }
    LOG.info("Dumper File Stats: " + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts));
    if (mimeTypeStats) {
        System.out.println("Dumper File Stats: " + DumpFileUtil.displayFileTypes(typeCounts, filteredCounts));
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) NutchConfiguration(org.apache.nutch.util.NutchConfiguration) HashMap(java.util.HashMap) DataOutputStream(java.io.DataOutputStream) Writable(org.apache.hadoop.io.Writable) Tika(org.apache.tika.Tika) SequenceFile(org.apache.hadoop.io.SequenceFile) ByteArrayInputStream(java.io.ByteArrayInputStream) Content(org.apache.nutch.protocol.Content) FileSystem(org.apache.hadoop.fs.FileSystem) FileOutputStream(java.io.FileOutputStream) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Example 27 with Tika

use of org.apache.tika.Tika in project nutch by apache.

the class ZipTextExtractor method extractText.

public String extractText(InputStream input, String url, List<Outlink> outLinksList) throws IOException {
    String resultText = "";
    ZipInputStream zin = new ZipInputStream(input);
    ZipEntry entry;
    while ((entry = zin.getNextEntry()) != null) {
        if (!entry.isDirectory()) {
            int size = (int) entry.getSize();
            byte[] b = new byte[size];
            for (int x = 0; x < size; x++) {
                int err = zin.read();
                if (err != -1) {
                    b[x] = (byte) err;
                }
            }
            String newurl = url + "/";
            String fname = entry.getName();
            newurl += fname;
            URL aURL = new URL(newurl);
            String base = aURL.toString();
            int i = fname.lastIndexOf('.');
            if (i != -1) {
                // Trying to resolve the Mime-Type
                Tika tika = new Tika();
                String contentType = tika.detect(fname);
                try {
                    Metadata metadata = new Metadata();
                    metadata.set(Response.CONTENT_LENGTH, Long.toString(entry.getSize()));
                    metadata.set(Response.CONTENT_TYPE, contentType);
                    Content content = new Content(newurl, base, b, contentType, metadata, this.conf);
                    Parse parse = new ParseUtil(this.conf).parse(content).get(content.getUrl());
                    ParseData theParseData = parse.getData();
                    Outlink[] theOutlinks = theParseData.getOutlinks();
                    for (int count = 0; count < theOutlinks.length; count++) {
                        outLinksList.add(new Outlink(theOutlinks[count].getToUrl(), theOutlinks[count].getAnchor()));
                    }
                    resultText += entry.getName() + " " + parse.getText() + " ";
                } catch (ParseException e) {
                    if (LOG.isInfoEnabled()) {
                        LOG.info("fetch okay, but can't parse " + fname + ", reason: " + e.getMessage());
                    }
                }
            }
        }
    }
    return resultText;
}

Also used : Outlink(org.apache.nutch.parse.Outlink) ParseUtil(org.apache.nutch.parse.ParseUtil) Parse(org.apache.nutch.parse.Parse) ZipEntry(java.util.zip.ZipEntry) Metadata(org.apache.nutch.metadata.Metadata) Tika(org.apache.tika.Tika) URL(java.net.URL) ZipInputStream(java.util.zip.ZipInputStream) ParseData(org.apache.nutch.parse.ParseData) Content(org.apache.nutch.protocol.Content) ParseException(org.apache.nutch.parse.ParseException)

Example 28 with Tika

use of org.apache.tika.Tika in project gitblit by gitblit.

the class RawServlet method processRequest.

/**
	 * Retrieves the specified resource from the specified branch of the
	 * repository.
	 *
	 * @param request
	 * @param response
	 * @throws javax.servlet.ServletException
	 * @throws java.io.IOException
	 */
private void processRequest(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    String path = request.getPathInfo();
    if (path.toLowerCase().endsWith(".git")) {
        // forward to url with trailing /
        // this is important for relative pages links
        response.sendRedirect(request.getServletPath() + path + "/");
        return;
    }
    if (path.charAt(0) == '/') {
        // strip leading /
        path = path.substring(1);
    }
    // determine repository and resource from url
    String repository = path;
    Repository r = null;
    int terminator = repository.length();
    do {
        repository = repository.substring(0, terminator);
        r = repositoryManager.getRepository(repository, false);
        terminator = repository.lastIndexOf('/');
    } while (r == null && terminator > -1);
    ServletContext context = request.getSession().getServletContext();
    try {
        if (r == null) {
            // repository not found!
            String mkd = MessageFormat.format("# Error\nSorry, no valid **repository** specified in this url: {0}!", path);
            error(response, mkd);
            return;
        }
        // identify the branch
        String branch = getBranch(repository, request);
        if (StringUtils.isEmpty(branch)) {
            branch = r.getBranch();
            if (branch == null) {
                // no branches found!  empty?
                String mkd = MessageFormat.format("# Error\nSorry, no valid **branch** specified in this url: {0}!", path);
                error(response, mkd);
            } else {
                // redirect to default branch
                String base = request.getRequestURI();
                String url = base + branch + "/";
                response.sendRedirect(url);
            }
            return;
        }
        // identify the requested path
        String requestedPath = getPath(repository, branch, request);
        // identify the commit
        RevCommit commit = JGitUtils.getCommit(r, branch);
        if (commit == null) {
            // branch not found!
            String mkd = MessageFormat.format("# Error\nSorry, the repository {0} does not have a **{1}** branch!", repository, branch);
            error(response, mkd);
            return;
        }
        Map<String, String> quickContentTypes = new HashMap<>();
        quickContentTypes.put("html", "text/html");
        quickContentTypes.put("htm", "text/html");
        quickContentTypes.put("xml", "application/xml");
        quickContentTypes.put("json", "application/json");
        List<PathModel> pathEntries = JGitUtils.getFilesInPath(r, requestedPath, commit);
        if (pathEntries.isEmpty()) {
            // requested a specific resource
            String file = StringUtils.getLastPathElement(requestedPath);
            try {
                String ext = StringUtils.getFileExtension(file).toLowerCase();
                // We can't parse out an extension for classic "dotfiles", so make a general assumption that
                // they're text files to allow presenting them in browser instead of only for download.
                //
                // However, that only holds for files with no other extension included, for files that happen
                // to start with a dot but also include an extension, process the extension normally.
                // This logic covers .gitattributes, .gitignore, .zshrc, etc., but does not cover .mongorc.js, .zshrc.bak
                boolean isExtensionlessDotfile = file.charAt(0) == '.' && (file.length() == 1 || file.indexOf('.', 1) < 0);
                String contentType = isExtensionlessDotfile ? "text/plain" : quickContentTypes.get(ext);
                if (contentType == null) {
                    List<String> exts = runtimeManager.getSettings().getStrings(Keys.web.prettyPrintExtensions);
                    if (exts.contains(ext)) {
                        // extension is a registered text type for pretty printing
                        contentType = "text/plain";
                    } else {
                        // query Tika for the content type
                        Tika tika = new Tika();
                        contentType = tika.detect(file);
                    }
                }
                if (contentType == null) {
                    // ask the container for the content type
                    contentType = context.getMimeType(requestedPath);
                    if (contentType == null) {
                        // still unknown content type, assume binary
                        contentType = "application/octet-stream";
                    }
                }
                if (isTextType(contentType) || isTextDataType(contentType)) {
                    // load, interpret, and serve text content as UTF-8
                    String[] encodings = runtimeManager.getSettings().getStrings(Keys.web.blobEncodings).toArray(new String[0]);
                    String content = JGitUtils.getStringContent(r, commit.getTree(), requestedPath, encodings);
                    if (content == null) {
                        logger.error("RawServlet Failed to load {} {} {}", repository, commit.getName(), path);
                        notFound(response, requestedPath, branch);
                        return;
                    }
                    byte[] bytes = content.getBytes(Constants.ENCODING);
                    setContentType(response, contentType);
                    response.setContentLength(bytes.length);
                    ByteArrayInputStream is = new ByteArrayInputStream(bytes);
                    sendContent(response, JGitUtils.getCommitDate(commit), is);
                } else {
                    // stream binary content directly from the repository
                    if (!streamFromRepo(request, response, r, commit, requestedPath)) {
                        logger.error("RawServlet Failed to load {} {} {}", repository, commit.getName(), path);
                        notFound(response, requestedPath, branch);
                    }
                }
                return;
            } catch (Exception e) {
                logger.error(null, e);
            }
        } else {
            // path request
            if (!request.getPathInfo().endsWith("/")) {
                // redirect to trailing '/' url
                response.sendRedirect(request.getServletPath() + request.getPathInfo() + "/");
                return;
            }
            if (renderIndex()) {
                // locate and render an index file
                Map<String, String> names = new TreeMap<String, String>();
                for (PathModel entry : pathEntries) {
                    names.put(entry.name.toLowerCase(), entry.name);
                }
                List<String> extensions = new ArrayList<String>();
                extensions.add("html");
                extensions.add("htm");
                String content = null;
                for (String ext : extensions) {
                    String key = "index." + ext;
                    if (names.containsKey(key)) {
                        String fileName = names.get(key);
                        String fullPath = fileName;
                        if (!requestedPath.isEmpty()) {
                            fullPath = requestedPath + "/" + fileName;
                        }
                        String[] encodings = runtimeManager.getSettings().getStrings(Keys.web.blobEncodings).toArray(new String[0]);
                        String stringContent = JGitUtils.getStringContent(r, commit.getTree(), fullPath, encodings);
                        if (stringContent == null) {
                            continue;
                        }
                        content = stringContent;
                        requestedPath = fullPath;
                        break;
                    }
                }
                response.setContentType("text/html; charset=" + Constants.ENCODING);
                byte[] bytes = content.getBytes(Constants.ENCODING);
                response.setContentLength(bytes.length);
                ByteArrayInputStream is = new ByteArrayInputStream(bytes);
                sendContent(response, JGitUtils.getCommitDate(commit), is);
                return;
            }
        }
        // no content, document list or 404 page
        if (pathEntries.isEmpty()) {
            // default 404 page
            notFound(response, requestedPath, branch);
            return;
        } else {
            //
            // directory list
            //
            response.setContentType("text/html");
            response.getWriter().append("<style>table th, table td { min-width: 150px; text-align: left; }</style>");
            response.getWriter().append("<table>");
            response.getWriter().append("<thead><tr><th>path</th><th>mode</th><th>size</th></tr>");
            response.getWriter().append("</thead>");
            response.getWriter().append("<tbody>");
            String pattern = "<tr><td><a href=\"{0}/{1}\">{1}</a></td><td>{2}</td><td>{3}</td></tr>";
            final ByteFormat byteFormat = new ByteFormat();
            if (!pathEntries.isEmpty()) {
                if (pathEntries.get(0).path.indexOf('/') > -1) {
                    // we are in a subdirectory, add parent directory link
                    String pp = URLEncoder.encode(requestedPath, Constants.ENCODING);
                    pathEntries.add(0, new PathModel("..", pp + "/..", null, 0, FileMode.TREE.getBits(), null, null));
                }
            }
            String basePath = request.getServletPath() + request.getPathInfo();
            if (basePath.charAt(basePath.length() - 1) == '/') {
                // strip trailing slash
                basePath = basePath.substring(0, basePath.length() - 1);
            }
            for (PathModel entry : pathEntries) {
                String pp = URLEncoder.encode(entry.name, Constants.ENCODING);
                response.getWriter().append(MessageFormat.format(pattern, basePath, pp, JGitUtils.getPermissionsFromMode(entry.mode), entry.isFile() ? byteFormat.format(entry.size) : ""));
            }
            response.getWriter().append("</tbody>");
            response.getWriter().append("</table>");
        }
    } catch (Throwable t) {
        logger.error("Failed to write page to client", t);
    } finally {
        r.close();
    }
}

Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Tika(org.apache.tika.Tika) TreeMap(java.util.TreeMap) ServletException(javax.servlet.ServletException) ParseException(java.text.ParseException) IOException(java.io.IOException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) Repository(org.eclipse.jgit.lib.Repository) PathModel(com.gitblit.models.PathModel) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteFormat(com.gitblit.utils.ByteFormat) ServletContext(javax.servlet.ServletContext) RevCommit(org.eclipse.jgit.revwalk.RevCommit)

Example 29 with Tika

use of org.apache.tika.Tika in project lucene-solr by apache.

the class MailEntityProcessor method addPartToDocument.

public void addPartToDocument(Part part, Map<String, Object> row, boolean outerMost) throws Exception {
    if (part instanceof Message) {
        addEnvelopeToDocument(part, row);
    }
    String ct = part.getContentType().toLowerCase(Locale.ROOT);
    ContentType ctype = new ContentType(ct);
    if (part.isMimeType("multipart/*")) {
        Object content = part.getContent();
        if (content != null && content instanceof Multipart) {
            Multipart mp = (Multipart) part.getContent();
            int count = mp.getCount();
            if (part.isMimeType("multipart/alternative"))
                count = 1;
            for (int i = 0; i < count; i++) addPartToDocument(mp.getBodyPart(i), row, false);
        } else {
            LOG.warn("Multipart content is a not an instance of Multipart! Content is: " + (content != null ? content.getClass().getName() : "null") + ". Typically, this is due to the Java Activation JAR being loaded by the wrong classloader.");
        }
    } else if (part.isMimeType("message/rfc822")) {
        addPartToDocument((Part) part.getContent(), row, false);
    } else {
        String disp = part.getDisposition();
        if (includeContent && !(disp != null && disp.equalsIgnoreCase(Part.ATTACHMENT))) {
            InputStream is = part.getInputStream();
            Metadata contentTypeHint = new Metadata();
            contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType().toLowerCase(Locale.ENGLISH));
            String content = (new Tika()).parseToString(is, contentTypeHint);
            if (row.get(CONTENT) == null)
                row.put(CONTENT, new ArrayList<String>());
            List<String> contents = (List<String>) row.get(CONTENT);
            contents.add(content.trim());
            row.put(CONTENT, contents);
        }
        if (!processAttachment || disp == null || !disp.equalsIgnoreCase(Part.ATTACHMENT))
            return;
        InputStream is = part.getInputStream();
        String fileName = part.getFileName();
        Metadata contentTypeHint = new Metadata();
        contentTypeHint.set(Metadata.CONTENT_TYPE, ctype.getBaseType().toLowerCase(Locale.ENGLISH));
        String content = (new Tika()).parseToString(is, contentTypeHint);
        if (content == null || content.trim().length() == 0)
            return;
        if (row.get(ATTACHMENT) == null)
            row.put(ATTACHMENT, new ArrayList<String>());
        List<String> contents = (List<String>) row.get(ATTACHMENT);
        contents.add(content.trim());
        row.put(ATTACHMENT, contents);
        if (row.get(ATTACHMENT_NAMES) == null)
            row.put(ATTACHMENT_NAMES, new ArrayList<String>());
        List<String> names = (List<String>) row.get(ATTACHMENT_NAMES);
        names.add(fileName);
        row.put(ATTACHMENT_NAMES, names);
    }
}

Also used : MimeMessage(javax.mail.internet.MimeMessage) IMAPMessage(com.sun.mail.imap.IMAPMessage) ContentType(javax.mail.internet.ContentType) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) Tika(org.apache.tika.Tika)

Example 30 with Tika

use of org.apache.tika.Tika in project ddf by codice.

the class URLResourceReader method getMimeType.

private String getMimeType(URI resourceURI, String productName) throws MimeTypeResolutionException, IOException {
    // Determine the mime type in a hierarchical fashion. The hierarchy is based on the
    // most accurate mime type resolution being used and lesser accurate approaches being
    // used
    // if a mime type is not resolved.
    // The approaches, in order, are:
    // 1. Try using the DDF MimeTypeMapper so that custom MimeTypeResolvers are used
    // 2. Try using Apache Tika directly on the URL
    String mimeType = null;
    if (mimeTypeMapper == null) {
        LOGGER.debug("mimeTypeMapper is NULL");
    } else {
        // Extract the file extension (if any) from the URL's filename
        String fileExtension = FilenameUtils.getExtension(productName);
        mimeType = mimeTypeMapper.getMimeTypeForFileExtension(fileExtension);
    }
    // mime type resolution than just file extension mime type mapping
    if ((mimeType == null || mimeType.isEmpty() || mimeType.equals(DEFAULT_MIME_TYPE)) && URL_FILE_SCHEME.equalsIgnoreCase(resourceURI.getScheme())) {
        // Use Apache Tika to detect mime type from URL
        Tika tika = new Tika();
        mimeType = tika.detect(resourceURI.toURL());
        LOGGER.debug("Tika determined mimeType for url = {}", mimeType);
    } else {
        LOGGER.debug("mimeType = {} set by MimeTypeMapper", mimeType);
    }
    // never be returned.
    if (mimeType == null || mimeType.equals("content/unknown")) {
        mimeType = "application/unknown";
    }
    LOGGER.debug("mimeType set to: {}", mimeType);
    return mimeType;
}

Also used : Tika(org.apache.tika.Tika)

Aggregations

Tika (org.apache.tika.Tika)54 Test (org.junit.Test)32 Metadata (org.apache.tika.metadata.Metadata)29 ByteArrayInputStream (java.io.ByteArrayInputStream)14 TikaTest (org.apache.tika.TikaTest)12 TikaConfig (org.apache.tika.config.TikaConfig)12 File (java.io.File)8 InputStream (java.io.InputStream)7 URL (java.net.URL)6 TikaInputStream (org.apache.tika.io.TikaInputStream)5 IOException (java.io.IOException)4 HashSet (java.util.HashSet)4 Ignore (org.junit.Ignore)4 FileInputStream (java.io.FileInputStream)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Content (org.apache.nutch.protocol.Content)3 Before (org.junit.Before)3 FileOutputStream (java.io.FileOutputStream)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)2