{
"_id" : ObjectId("4fb9fb91d066d657de8d6f36"),
"text" : “MongoDB uses Map/Reduce #epic #win",
…
"user" : {
"friends_count" : 73,
…
"followers_count" : 102,
"id" : 53507833,
},
…
}
mongod --rest --shardsvr --port 27017 --dbpath /tmp/shard1/ --smallfiles
mongod --rest --shardsvr --port 27017 --dbpath /tmp/shard1/ --smallfiles
mongod --configsvr --port 10000 --dbpath /tmp/config/ --smallfiles
mongos --port 22222 --configdb localhost:10000
1.db.tweets.mapReduce()
2.db.tweets.group()
3.db.tweets.aggregate()
4.MongoDB-Hadoop Adapter
5.db.tweets.find()
var measure = function(c) {
var a = Date.now();
var results = c.apply();
var d = Date.now() - a;
return { results:results, duration:d };
};
function() {
if (this.user != null) {
emit("user",
{userName: this.user.name,
followers: this.user.followers_count});
}
}
function(key, values) {
var result = null;
values.forEach( function(value) {
if (result == null ||
result.followers < value.followers) {
result = value;
}
})
return result;
}
db.tweets.group({
key: {},
initial: { name:'', followers_count:0 },
reduce: function(obj,prev) {
if (obj.user != null &&
prev.followers_count < obj.user.followers_count)
{
prev.name = obj.user.name;
prev.followers_count = obj.user.followers_count;
}
}
})
db.tweets.aggregate(
{$group: {
_id: {user_name: "$user.name"},
followers_count: {$max: "$user.followers_count"}
}},
{$sort: {"followers_count" : -1}},
{$limit : 1},
{$project: {
_id : 0,
user_name : "$_id.user_name",
followers_count : "$followers_count"
}})
#!/usr/bin/env python
# encoding: utf-8
import sys
sys.path.append(".")
from pymongo_hadoop import BSONMapper
def mapper(documents):
for doc in documents:
if doc['user'] != None:
yield {'_id': doc['user']['name'].encode('utf-8'),
'followers':doc['user']['followers_count']}
BSONMapper(mapper)
print >> sys.stderr, "Done Mapping!"
#!/usr/bin/env python
# encoding: utf-8
import sys
sys.path.append('.')
from pymongo_hadoop import BSONReducer
def reducer(key, values):
print >> sys.stderr, "Processing key %s" % key.encode('utf-8')
_count = 0
for v in values:
if _count < v['followers']:
_count = v["followers"]
return {"_id": key.encode('utf-8'), "count": _count}
BSONReducer(reducer)
print >> sys.stderr, "Done Reducing!"
hadoop jar /usr/lib/hadoop/lib/mongo-hadoop-streaming-
assembly-1.1.0-SNAPSHOT.jar
-files mapper.py, reducer.py
-inputURI mongodb://localhost:27017/twitter.tweets
-outputURI mongodb://localhost:27017/twitter.top_user
-mapper mapper.py
-reducer reducer.py
db.tweets.find().sort( {"user.followers_count": -1} ).limit(1)
db.tweets.mapReduce()
db.tweets.group()
db.tweets.aggregate()
MongoDB-Hadoop Adapter
db.tweets.find()
db.tweets.mapReduce()
db.tweets.group()
db.tweets.aggregate()
MongoDB-Hadoop Adapter
db.tweets.find()
db.tweets.mapReduce()
db.tweets.group()
db.tweets.aggregate()
MongoDB-Hadoop Adapter
db.tweets.find()
db.tweets.mapReduce()
db.tweets.group()
db.tweets.aggregate()
MongoDB-Hadoop Adapter
db.tweets.find()
db.tweets.mapReduce()
db.tweets.group()
db.tweets.aggregate()
MongoDB-Hadoop Adapter
db.tweets.find()
Top Related