use of com.mongodb.gridfs.GridFSInputFile in project mongo-hadoop by mongodb.
the class PrepareShakespeare method run.
@Override
public int run(final String[] args) throws Exception {
if (args.length < 2) {
printUsage();
return 1;
}
String inputFilePath = args[0];
String mongoURI = args[1];
MongoClientURI uri = new MongoClientURI(mongoURI);
MongoClient client = new MongoClient(uri);
DB gridfsDB = client.getDB(uri.getDatabase());
GridFS gridFS = new GridFS(gridfsDB);
Scanner scanner = new Scanner(new File(inputFilePath));
// Each work is dated with a year.
Pattern delimiter = Pattern.compile("^\\d{4}", Pattern.MULTILINE);
scanner.useDelimiter(delimiter);
int numWorks = 0;
// Drop database before uploading anything.
gridfsDB.dropDatabase();
try {
for (; scanner.hasNext(); ++numWorks) {
String nextWork = scanner.next();
// Skip legal notice/intro.
if (0 == numWorks) {
continue;
}
Scanner titleScanner = new Scanner(nextWork);
String workTitle = null;
while (titleScanner.hasNextLine()) {
String line = titleScanner.nextLine();
if (!line.isEmpty()) {
// Work title is first non-blank line.
workTitle = line;
break;
}
}
if (null == workTitle) {
throw new IOException("Could not find a title!");
}
GridFSInputFile file = gridFS.createFile(workTitle);
// Set chunk size low enough that we get multiple chunks.
file.setChunkSize(1024 * 10);
OutputStream os = file.getOutputStream();
os.write(nextWork.getBytes());
os.close();
}
} finally {
scanner.close();
client.close();
}
System.out.printf("Wrote %d works to GridFS.\n", numWorks);
return 0;
}
Aggregations