Search in sources :

Example 11 with GridFSInputFile

use of com.mongodb.gridfs.GridFSInputFile in project mongo-hadoop by mongodb.

the class PrepareShakespeare method run.

@Override
public int run(final String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return 1;
    }
    String inputFilePath = args[0];
    String mongoURI = args[1];
    MongoClientURI uri = new MongoClientURI(mongoURI);
    MongoClient client = new MongoClient(uri);
    DB gridfsDB = client.getDB(uri.getDatabase());
    GridFS gridFS = new GridFS(gridfsDB);
    Scanner scanner = new Scanner(new File(inputFilePath));
    // Each work is dated with a year.
    Pattern delimiter = Pattern.compile("^\\d{4}", Pattern.MULTILINE);
    scanner.useDelimiter(delimiter);
    int numWorks = 0;
    // Drop database before uploading anything.
    gridfsDB.dropDatabase();
    try {
        for (; scanner.hasNext(); ++numWorks) {
            String nextWork = scanner.next();
            // Skip legal notice/intro.
            if (0 == numWorks) {
                continue;
            }
            Scanner titleScanner = new Scanner(nextWork);
            String workTitle = null;
            while (titleScanner.hasNextLine()) {
                String line = titleScanner.nextLine();
                if (!line.isEmpty()) {
                    // Work title is first non-blank line.
                    workTitle = line;
                    break;
                }
            }
            if (null == workTitle) {
                throw new IOException("Could not find a title!");
            }
            GridFSInputFile file = gridFS.createFile(workTitle);
            // Set chunk size low enough that we get multiple chunks.
            file.setChunkSize(1024 * 10);
            OutputStream os = file.getOutputStream();
            os.write(nextWork.getBytes());
            os.close();
        }
    } finally {
        scanner.close();
        client.close();
    }
    System.out.printf("Wrote %d works to GridFS.\n", numWorks);
    return 0;
}
Also used : MongoClient(com.mongodb.MongoClient) Scanner(java.util.Scanner) Pattern(java.util.regex.Pattern) GridFSInputFile(com.mongodb.gridfs.GridFSInputFile) MongoClientURI(com.mongodb.MongoClientURI) OutputStream(java.io.OutputStream) IOException(java.io.IOException) GridFS(com.mongodb.gridfs.GridFS) GridFSInputFile(com.mongodb.gridfs.GridFSInputFile) File(java.io.File) DB(com.mongodb.DB)

Aggregations

GridFSInputFile (com.mongodb.gridfs.GridFSInputFile)11 GridFS (com.mongodb.gridfs.GridFS)5 OutputStream (java.io.OutputStream)4 DB (com.mongodb.DB)3 GridFSDBFile (com.mongodb.gridfs.GridFSDBFile)3 ByteArrayInputStream (java.io.ByteArrayInputStream)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 InputStream (java.io.InputStream)3 BasicDBObject (com.mongodb.BasicDBObject)2 MongoClient (com.mongodb.MongoClient)2 IMongodConfig (de.flapdoodle.embed.mongo.config.IMongodConfig)2 MongoCmdOptionsBuilder (de.flapdoodle.embed.mongo.config.MongoCmdOptionsBuilder)2 MongodConfigBuilder (de.flapdoodle.embed.mongo.config.MongodConfigBuilder)2 Net (de.flapdoodle.embed.mongo.config.Net)2 Storage (de.flapdoodle.embed.mongo.config.Storage)2 File (java.io.File)2 IOException (java.io.IOException)2 OutputStreamWriter (java.io.OutputStreamWriter)2 Random (java.util.Random)2 BeforeClass (org.junit.BeforeClass)2