use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.
the class MongoDBSink method append.
@Override
public void append(final Event e) throws IOException {
/*
* TODO - Performance would be best if we wrote directly to BSON here...
* e.g. Not double converting the timestamp, and skipping string
* encoding/decoding the message body
*/
// Would it work to use Timestamp + Nanos + Hostname as the ID or is
// there still a collision chance?
BasicDBObjectBuilder b = BasicDBObjectBuilder.start("timestamp", new Date(e.getTimestamp()));
b.append("nanoseconds", e.getNanos());
b.append("hostname", e.getHost());
b.append("priority", e.getPriority().name());
b.append("message", new String(e.getBody()));
b.append("metadata", new BasicDBObject(e.getAttrs()));
collection.insert(b.get());
}
use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.
the class HiveMongoInputFormatTest method testProjection.
@Test
public void testProjection() {
String selectedColumns = "i,j";
JobConf conf = new JobConf();
conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, selectedColumns);
conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
assertEquals(new BasicDBObjectBuilder().add("i", 1).add("j", 1).add("_id", 0).get(), inputFormat.getProjection(conf, null));
}
use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.
the class SampleSplitterTest method setUpClass.
@BeforeClass
public static void setUpClass() {
DBCollection inputCollection = client.getDB(uri.getDatabase()).getCollection(uri.getCollection());
// Fill up with 10MB. Average object size is just over 2KB.
StringBuilder paddingBuilder = new StringBuilder();
for (int i = 0; i < 2048; ++i) {
paddingBuilder.append("-");
}
String padding = paddingBuilder.toString();
List<DBObject> documents = new ArrayList<DBObject>();
for (int i = 0; i < 10 * 512; i++) {
documents.add(new BasicDBObjectBuilder().add("_id", i).add("i", i).add("padding", padding).get());
}
inputCollection.insert(documents);
}
use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.
the class JSONPigReplace method substitute.
/*
* Returns result of substituting pig objects in Tuple t into
* initStr
*
* @param Tuple t : Pig tuple containing pig objects
* @param Object s : Schema representing Tuple t
* @param String un : String to represent un-named Schema Fields
*
* @return Array of BasicBSONObjects that contain all replacements for "marked" strings
*/
public BasicBSONObject[] substitute(final Tuple t, final Object s, final String un) throws Exception {
unnamedStr = un;
final ResourceFieldSchema[] fields;
try {
final ResourceSchema schema;
if (s instanceof String) {
schema = new ResourceSchema(Utils.getSchemaFromString((String) s));
} else if (s instanceof Schema) {
schema = new ResourceSchema((Schema) s);
} else if (s instanceof ResourceSchema) {
schema = (ResourceSchema) s;
} else {
throw new IllegalArgumentException("Schema must be represented either by a string or a Schema " + "object, not " + s);
}
fields = schema.getFields();
} catch (Exception e) {
throw new IllegalArgumentException("Invalid Schema Format", e);
}
// Make Tuple t into BSONObject using schema provided and store result in pObj
final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
for (int i = 0; i < fields.length; i++) {
writeField(builder, fields[i], t.get(i));
}
// BSONObject that represents Pig Tuple input using Pig Schema
BasicBSONObject pObj = (BasicBSONObject) builder.get();
// fill map of replacement strings to corresponding objects to replace these strings with
fillReplacementMap(pObj);
// Now, replace replacement strings (of form $elem) with corresponding objects in pObj
return replaceAll(initBSONs, reps);
}
use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.
the class SampleSplitter method calculateSplits.
@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
Configuration conf = getConfiguration();
long splitSizeMB = MongoConfigUtil.getSplitSize(conf);
long samplesPerSplit = MongoConfigUtil.getSamplesPerSplit(conf);
DBObject splitKey = MongoConfigUtil.getInputSplitKey(conf);
DBCollection inputCollection = MongoConfigUtil.getInputCollection(conf);
CommandResult result = inputCollection.getDB().command(new BasicDBObject("collstats", inputCollection.getName()));
if (!result.ok()) {
throw new SplitFailedException("Could not execute command 'collstats': " + result.getErrorMessage());
}
int count = result.getInt("count");
int avgObjSize = result.getInt("avgObjSize");
int numDocsPerSplit = (int) Math.floor(splitSizeMB * 1024 * 1024 / avgObjSize);
int numSplits = (int) Math.ceil((double) count / numDocsPerSplit);
int totalSamples = (int) Math.floor(samplesPerSplit * numSplits);
if (count < numDocsPerSplit) {
LOG.warn("Not enough documents for more than one split! Consider " + "setting " + MongoConfigUtil.INPUT_SPLIT_SIZE + " to a " + "lower value.");
InputSplit split = createSplitFromBounds(null, null);
return Collections.singletonList(split);
}
DBObject[] pipeline = { new BasicDBObjectBuilder().push("$sample").add("size", totalSamples).get(), new BasicDBObject("$project", splitKey), new BasicDBObject("$sort", splitKey) };
AggregationOutput aggregationOutput;
try {
aggregationOutput = inputCollection.aggregate(Arrays.asList(pipeline));
} catch (MongoException e) {
throw new SplitFailedException("Failed to aggregate sample documents. Note that this Splitter " + "implementation is incompatible with MongoDB versions " + "prior to 3.2.", e);
}
BasicDBObject previousKey = null;
List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
int i = 0;
for (DBObject sample : aggregationOutput.results()) {
if (i++ % samplesPerSplit == 0) {
BasicDBObject bdbo = (BasicDBObject) sample;
splits.add(createSplitFromBounds(previousKey, bdbo));
previousKey = bdbo;
}
}
splits.add(createSplitFromBounds(previousKey, null));
if (MongoConfigUtil.isFilterEmptySplitsEnabled(conf)) {
return filterEmptySplits(splits);
}
return splits;
}
Aggregations