Search in sources :

Example 36 with BasicDBObjectBuilder

use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.

the class MongoDBSink method append.

@Override
public void append(final Event e) throws IOException {
    /*
         * TODO - Performance would be best if we wrote directly to BSON here...
         * e.g. Not double converting the timestamp, and skipping string
         * encoding/decoding the message body
         */
    // Would it work to use Timestamp + Nanos + Hostname as the ID or is
    // there still a collision chance?
    BasicDBObjectBuilder b = BasicDBObjectBuilder.start("timestamp", new Date(e.getTimestamp()));
    b.append("nanoseconds", e.getNanos());
    b.append("hostname", e.getHost());
    b.append("priority", e.getPriority().name());
    b.append("message", new String(e.getBody()));
    b.append("metadata", new BasicDBObject(e.getAttrs()));
    collection.insert(b.get());
}
Also used : BasicDBObject(com.mongodb.BasicDBObject) BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) Date(java.util.Date)

Example 37 with BasicDBObjectBuilder

use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.

the class HiveMongoInputFormatTest method testProjection.

@Test
public void testProjection() {
    String selectedColumns = "i,j";
    JobConf conf = new JobConf();
    conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, selectedColumns);
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    assertEquals(new BasicDBObjectBuilder().add("i", 1).add("j", 1).add("_id", 0).get(), inputFormat.getProjection(conf, null));
}
Also used : BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) JobConf(org.apache.hadoop.mapred.JobConf) HiveTest(com.mongodb.hadoop.hive.HiveTest) Test(org.junit.Test)

Example 38 with BasicDBObjectBuilder

use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.

the class SampleSplitterTest method setUpClass.

@BeforeClass
public static void setUpClass() {
    DBCollection inputCollection = client.getDB(uri.getDatabase()).getCollection(uri.getCollection());
    // Fill up with 10MB. Average object size is just over 2KB.
    StringBuilder paddingBuilder = new StringBuilder();
    for (int i = 0; i < 2048; ++i) {
        paddingBuilder.append("-");
    }
    String padding = paddingBuilder.toString();
    List<DBObject> documents = new ArrayList<DBObject>();
    for (int i = 0; i < 10 * 512; i++) {
        documents.add(new BasicDBObjectBuilder().add("_id", i).add("i", i).add("padding", padding).get());
    }
    inputCollection.insert(documents);
}
Also used : DBCollection(com.mongodb.DBCollection) BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ArrayList(java.util.ArrayList) DBObject(com.mongodb.DBObject) BeforeClass(org.junit.BeforeClass)

Example 39 with BasicDBObjectBuilder

use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.

the class JSONPigReplace method substitute.

/*
     * Returns result of substituting pig objects in Tuple t into
     * initStr
     * 
     * @param Tuple t : Pig tuple containing pig objects
     * @param Object s : Schema representing Tuple t
     * @param String un : String to represent un-named Schema Fields 
     * 
     * @return Array of BasicBSONObjects that contain all replacements for "marked" strings
     */
public BasicBSONObject[] substitute(final Tuple t, final Object s, final String un) throws Exception {
    unnamedStr = un;
    final ResourceFieldSchema[] fields;
    try {
        final ResourceSchema schema;
        if (s instanceof String) {
            schema = new ResourceSchema(Utils.getSchemaFromString((String) s));
        } else if (s instanceof Schema) {
            schema = new ResourceSchema((Schema) s);
        } else if (s instanceof ResourceSchema) {
            schema = (ResourceSchema) s;
        } else {
            throw new IllegalArgumentException("Schema must be represented either by a string or a Schema " + "object, not " + s);
        }
        fields = schema.getFields();
    } catch (Exception e) {
        throw new IllegalArgumentException("Invalid Schema Format", e);
    }
    // Make Tuple t into BSONObject using schema provided and store result in pObj
    final BasicDBObjectBuilder builder = BasicDBObjectBuilder.start();
    for (int i = 0; i < fields.length; i++) {
        writeField(builder, fields[i], t.get(i));
    }
    // BSONObject that represents Pig Tuple input using Pig Schema
    BasicBSONObject pObj = (BasicBSONObject) builder.get();
    // fill map of replacement strings to corresponding objects to replace these strings with
    fillReplacementMap(pObj);
    // Now, replace replacement strings (of form $elem) with corresponding objects in pObj
    return replaceAll(initBSONs, reps);
}
Also used : BasicBSONObject(org.bson.BasicBSONObject) ResourceSchema(org.apache.pig.ResourceSchema) BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) ResourceSchema(org.apache.pig.ResourceSchema) ResourceFieldSchema(org.apache.pig.ResourceSchema.ResourceFieldSchema) Schema(org.apache.pig.impl.logicalLayer.schema.Schema) IOException(java.io.IOException)

Example 40 with BasicDBObjectBuilder

use of com.mongodb.BasicDBObjectBuilder in project mongo-hadoop by mongodb.

the class SampleSplitter method calculateSplits.

@Override
public List<InputSplit> calculateSplits() throws SplitFailedException {
    Configuration conf = getConfiguration();
    long splitSizeMB = MongoConfigUtil.getSplitSize(conf);
    long samplesPerSplit = MongoConfigUtil.getSamplesPerSplit(conf);
    DBObject splitKey = MongoConfigUtil.getInputSplitKey(conf);
    DBCollection inputCollection = MongoConfigUtil.getInputCollection(conf);
    CommandResult result = inputCollection.getDB().command(new BasicDBObject("collstats", inputCollection.getName()));
    if (!result.ok()) {
        throw new SplitFailedException("Could not execute command 'collstats': " + result.getErrorMessage());
    }
    int count = result.getInt("count");
    int avgObjSize = result.getInt("avgObjSize");
    int numDocsPerSplit = (int) Math.floor(splitSizeMB * 1024 * 1024 / avgObjSize);
    int numSplits = (int) Math.ceil((double) count / numDocsPerSplit);
    int totalSamples = (int) Math.floor(samplesPerSplit * numSplits);
    if (count < numDocsPerSplit) {
        LOG.warn("Not enough documents for more than one split! Consider " + "setting " + MongoConfigUtil.INPUT_SPLIT_SIZE + " to a " + "lower value.");
        InputSplit split = createSplitFromBounds(null, null);
        return Collections.singletonList(split);
    }
    DBObject[] pipeline = { new BasicDBObjectBuilder().push("$sample").add("size", totalSamples).get(), new BasicDBObject("$project", splitKey), new BasicDBObject("$sort", splitKey) };
    AggregationOutput aggregationOutput;
    try {
        aggregationOutput = inputCollection.aggregate(Arrays.asList(pipeline));
    } catch (MongoException e) {
        throw new SplitFailedException("Failed to aggregate sample documents. Note that this Splitter " + "implementation is incompatible with MongoDB versions " + "prior to 3.2.", e);
    }
    BasicDBObject previousKey = null;
    List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
    int i = 0;
    for (DBObject sample : aggregationOutput.results()) {
        if (i++ % samplesPerSplit == 0) {
            BasicDBObject bdbo = (BasicDBObject) sample;
            splits.add(createSplitFromBounds(previousKey, bdbo));
            previousKey = bdbo;
        }
    }
    splits.add(createSplitFromBounds(previousKey, null));
    if (MongoConfigUtil.isFilterEmptySplitsEnabled(conf)) {
        return filterEmptySplits(splits);
    }
    return splits;
}
Also used : BasicDBObjectBuilder(com.mongodb.BasicDBObjectBuilder) MongoException(com.mongodb.MongoException) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) BasicDBObject(com.mongodb.BasicDBObject) DBObject(com.mongodb.DBObject) AggregationOutput(com.mongodb.AggregationOutput) CommandResult(com.mongodb.CommandResult) DBCollection(com.mongodb.DBCollection) BasicDBObject(com.mongodb.BasicDBObject) InputSplit(org.apache.hadoop.mapreduce.InputSplit)

Aggregations

BasicDBObjectBuilder (com.mongodb.BasicDBObjectBuilder)44 DBObject (com.mongodb.DBObject)30 Test (org.junit.Test)21 DBCollection (com.mongodb.DBCollection)18 BasicDBObject (com.mongodb.BasicDBObject)12 ArrayList (java.util.ArrayList)7 BasicDBList (com.mongodb.BasicDBList)6 List (java.util.List)4 ResourceFieldSchema (org.apache.pig.ResourceSchema.ResourceFieldSchema)4 HiveTest (com.mongodb.hadoop.hive.HiveTest)3 IOException (java.io.IOException)3 Map (java.util.Map)3 MongoException (com.mongodb.MongoException)2 Configuration (org.apache.hadoop.conf.Configuration)2 JobConf (org.apache.hadoop.mapred.JobConf)2 InputSplit (org.apache.hadoop.mapreduce.InputSplit)2 ResourceSchema (org.apache.pig.ResourceSchema)2 RyaStatement (org.apache.rya.api.domain.RyaStatement)2 BasicBSONObject (org.bson.BasicBSONObject)2 GeometryType (org.teiid.core.types.GeometryType)2