can we write mongodb crud queries and aggregate query together?

In MongoDB can we execute below written query?

db.dbaname.find(userName:"abc").aggregate([])

else is there any other way we can execute CRUD and aggregate query together.

Short answer - No you can't do this : .find(userName:"abc").aggregate([])

aggregation-pipeline is heavily used for reads which is mostly similar to .find() but capable of executing complex queries with help of it's multiple stages & many aggregation-operators. there are only two stages in aggregation $out & $merge that can perform writes to database - these stages are not that much used compared to other stages & needs to be used only when needed & as they need to be last stages in aggregation pipeline, then all the previous stages are to be tested very well. So when it comes to CRUD eliminating CUD you'll benefit over R - Reads.

Same .find(userName:"abc") can be written as :

.aggregate( [ { $match : { userName:"abc"} } ] ) // Using `$match` stage

for any particular person document, you can use the populate() function like

var query = mongoose.model("person").find({ "name": "foo" }).populate("projects.tags");

and if you want to search for any persons that have any tag with 'mongodb' or 'node js' for example, you can include the query option in the populate() function overload as:

var query = mongoose.model("person").find({ "name": "foo" }).populate({
    "path": "projects.tags",
    "match": { "en": { "$in": ["mongodb", "node js"] } }
});

if you want all tags existing in "project.tags" for all persons, then aggregation framework is the way to go. consider running this pipeline on the person collection and uses the $lookup operator to do a left join on the tags collection:

mongoose.model('person').aggregate([
    { "$unwind": "$projects" },
    { "$unwind": "$projects.tags" },
    {
        "$lookup": {
            "from": "tags",
            "localfield": "projects.tags",
            "foreignfield": "_id",
            "as": "resultingtagsarray"
        }
    },
    { "$unwind": "$resultingtagsarray" },
    {
        "$group": {
            "_id": null,
            "alltags": { "$addtoset": "$resultingtagsarray" },
            "count": { "$sum": 1 }
        }
    }
 ]).exec(function(err, results){
    console.log(results);
 })

for any particular person then apply a $match pipeline as the first step to filter the documents:

mongoose.model('person').aggregate([
    { "$match": { "name": "foo" } },
    { "$unwind": "$projects" },
    { "$unwind": "$projects.tags" },
    {
        "$lookup": {
            "from": "tags",
            "localfield": "projects.tags",
            "foreignfield": "_id",
            "as": "resultingtagsarray"
        }
    },
    { "$unwind": "$resultingtagsarray" },
    {
        "$group": {
            "_id": null,
            "alltags": { "$addtoset": "$resultingtagsarray" },
            "count": { "$sum": 1 }
        }
    }
 ]).exec(function(err, results){
    console.log(results);
 })

another workaround if you are using mongodb versions >= 2.6 or <= 3.0 which do not have support for the $lookup operator is to populate the results from the aggregation as:

mongoose.model('person').aggregate([
    { "$unwind": "$projects" },
    { "$unwind": "$projects.tags" },    
    {
        "$group": {
            "_id": null,
            "alltags": { "$addtoset": "$projects.tags" }
        }
    }
 ], function(err, result) {
    mongoose.model('person')
    .populate(result, { "path": "alltags" }, function(err, results) {
        if (err) throw err;
        console.log(json.stringify(results, undefined, 4 ));
    });
});

you need to "project" the match here since all the mongodb query does is look for a "document" that has "at least one element" that is "greater than" the condition you asked for.

so filtering an "array" is not the same as the "query" condition you have.

a simple "projection" will just return the "first" matched item to that condtion. so it's probably not what you want, but as an example:

order.find({ "articles.quantity": { "$gte": 5 } })
    .select({ "articles.$": 1 })
    .populate({
        "path": "articles.article",
        "match": { "price": { "$lte": 500 } }
    }).exec(function(err,orders) {
       // populated and filtered twice
    }
)

that "sort of" does what you want, but the problem is really going to be that will only ever return at most one element within the "articles" array.

to do this properly you need .aggregate() to filter the array content. ideally this is done with mongodb 3.2 and $filter. but there is also a special way to .populate() here:

order.aggregate(
    [
        { "$match": { "artciles.quantity": { "$gte": 5 } } },
        { "$project": {
            "orderdate": 1,
            "articles": {
                "$filter": {
                    "input": "$articles",
                    "as": "article",
                    "cond": {
                       "$gte": [ "$$article.quantity", 5 ]
                    }
                }
            },
            "__v": 1
        }}
    ],
    function(err,orders) {
        order.populate(
            orders.map(function(order) { return new order(order) }),
            {
                "path": "articles.article",
                "match": { "price": { "$lte": 500 } }
            },
            function(err,orders) {
                // now it's all populated and mongoose documents
            }
        )
    }
)

so what happens here is the actual "filtering" of the array happens within the .aggregate() statement, but of course the result from this is no longer a "mongoose document" because one aspect of .aggregate() is that it can "alter" the document structure, and for this reason mongoose "presumes" that is the case and just returns a "plain object".

that's not really a problem, since when you see the $project stage, we are actually asking for all of the same fields present in the document according to the defined schema. so even though it's just a "plain object" there is no problem "casting" it back into an mongoose document.

this is where the .map() comes in, as it returns an array of converted "documents", which is then important for the next stage.

now you call model.populate() which can then run the further "population" on the "array of mongoose documents".

the result then is finally what you want.


mongodb older versions than 3.2.x

the only things that really change here are the aggregation pipeline, so that is all that needs to be included for brevity.

mongodb 2.6 - can filter arrays with a combination of $map and $setdifference. the result is a "set" but that is not a problem when mongoose creates an _id field on all sub-document arrays by default:

    [
        { "$match": { "artciles.quantity": { "$gte": 5 } } },
        { "$project": {
            "orderdate": 1,
            "articles": {
                "$setdiffernce": [
                   { "$map": {
                      "input": "$articles",
                      "as": "article",
                      "in": {
                         "$cond": [
                             { "$gte": [ "$$article.price", 5 ] },
                             "$$article",
                             false
                         ]
                      }
                   }},
                   [false]
                ]
            },
            "__v": 1
        }}
    ],

older revisions of than that must use $unwind:

    [
        { "$match": { "artciles.quantity": { "$gte": 5 } }},
        { "$unwind": "$articles" },
        { "$match": { "artciles.quantity": { "$gte": 5 } }},
        { "$group": {
          "_id": "$_id",
          "orderdate": { "$first": "$orderdate" },
          "articles": { "$push": "$articles" },
          "__v": { "$first": "$__v" }
        }}
    ],

the $lookup alternative

another alternate is to just do everything on the "server" instead. this is an option with $lookup of mongodb 3.2 and greater:

order.aggregate(
    [
        { "$match": { "artciles.quantity": { "$gte": 5 } }},
        { "$project": {
            "orderdate": 1,
            "articles": {
                "$filter": {
                    "input": "$articles",
                    "as": "article",
                    "cond": {
                       "$gte": [ "$$article.quantity", 5 ]
                    }
                }
            },
            "__v": 1
        }},
        { "$unwind": "$articles" },
        { "$lookup": {
            "from": "articles",
            "localfield": "articles.article",
            "foreignfield": "_id",
            "as": "articles.article"
        }},
        { "$unwind": "$articles.article" },
        { "$group": {
          "_id": "$_id",
          "orderdate": { "$first": "$orderdate" },
          "articles": { "$push": "$articles" },
          "__v": { "$first": "$__v" }
        }},
        { "$project": {
            "orderdate": 1,
            "articles": {
                "$filter": {
                    "input": "$articles",
                    "as": "article",
                    "cond": {
                       "$lte": [ "$$article.article.price", 500 ]
                    }
                }
            },
            "__v": 1
        }}
    ],
    function(err,orders) {

    }
)

and though those are just plain documents, it's just the same results as what you would have got from the .populate() approach. and of course you can always go and "cast" to mongoose documents in all cases again if you really must.

the "shortest" path

this really goes back to the orginal statement where you basically just "accept" that the "query" is not meant to "filter" the array content. the .populate() can happilly do so becuse it's just another "query" and is stuffing in "documents" by convenience.

so if you really are not saving "bucketloads" of bandwith by the removal of additional array members in the orginal document array, then just .filter() them out in post processing code:

order.find({ "articles.quantity": { "$gte": 5 } })
    .populate({
        "path": "articles.article",
        "match": { "price": { "$lte": 500 } }
    }).exec(function(err,orders) {
        orders = orders.filter(function(order) {
            order.articles = order.articles.filter(function(article) {
                return (
                    ( article.quantity >= 5 ) &&
                    ( article.article != null )
                )
            });
            return order.aricles.length > 0;
        })

        // orders has non matching entries removed            
    }
)

you can try $facet with $addfields for parallel aggregation in 3.4 version.

this will reduce the overall complexity and you can run groupings with its own matching input at the same time.

the below code builds the aggregation pipeline dynamically based on request object.

// sample request
var request = {
  "name":"ringgo",
  "year": 2017,
  "month":3,
  "week":12
};

// build initial match document on name

var match1 = {
  name: request["name"]
};

// build project & facet document for date based aggregation

var addfields = {};
var facet = {};

// add year followed by year facet

if (request["year"]) {
    addfields["year"] = { "$year": "$date" },
    facet["yearly"] = 
      [
        {
          "$match":{ "year": request["year"] }
        },
        {
          "$group": {
            "_id": {
              "name": "$name",
              "year": "$year"
            },
            "spend": { "$push":"$amount" },
            "total": { "$sum": "$amount" }
        }
      }
    ];
}

// add month followed by month facet

if (request["month"]) {
    addfields["month"] = { "$month": "$date" };
    facet["monthly"] = 
      [
        {
          "$match":{ "month": request["month"] }
        },
        {
          "$group": {
            "_id": {
              "name": "$name",
              "month": "$month"
            },
            "spend": { "$push":"$amount" },
            "total": { "$sum": "$amount" }
         }
      }
    ];
}

// add week followed by week facet

if (request["week"]) {
    addfields["week"] = { "$week": "$date" };
    facet["weekly"] = 
      [
        {
          "$match":{ "week": request["week"] }
        },
        {
          "$group": {
            "_id": {
              "name": "$name",
              "week": "$week"
            },
            "spend": { "$push":"$amount" },
            "total": { "$sum": "$amount" }
         }
      }
    ];
}

// use aggregate builder

statements.aggregate()
        .match(match1)
        .append({"$addfields": addfields}) // no addfields stage in mongoose builder
        .facet(facet)
        .exec(function(err, data) {});

mongo shell query for name/year/month/week criteria.

db.statements.aggregate({
    '$match': {
        name: 'ringgo'
    }
}, {
    '$addfields': {
        year: {
            '$year': '$date'
        },
        month: {
            '$month': '$date'
        },
        week: {
            '$week': '$date'
        }
    }
}, {
    '$facet': {
        yearly: [{
                '$match': {
                    year: 2017
                }
            },
            {
                '$group': {
                    _id: {
                        name: '$name',
                        year: '$year'
                    },
                    spend: {
                        '$push': '$amount'
                    },
                    total: {
                        '$sum': '$amount'
                    }
                }
            }
        ],
        monthly: [{
                '$match': {
                    month: 3
                }
            },
            {
                '$group': {
                    _id: {
                        name: '$name',
                        month: '$month'
                    },
                    spend: {
                        '$push': '$amount'
                    },
                    total: {
                        '$sum': '$amount'
                    }
                }
            }
        ],
        weekly: [{
                '$match': {
                    week: 12
                }
            },
            {
                '$group': {
                    _id: {
                        name: '$name',
                        week: '$week'
                    },
                    spend: {
                        '$push': '$amount'
                    },
                    total: {
                        '$sum': '$amount'
                    }
                }
            }
        ]
    }
})

sample response

    {
    "yearly": [{
        "_id": {
            "name": "ringgo",
            "year": 2017
        },
        "spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
        "total": -25.799999999999997
    }],
    "monthly": [{
        "_id": {
            "name": "ringgo",
            "month": 3
        },
        "spend": [-3.3, -6.3, -3.3, -6.3, -3.3, -3.3],
        "total": -25.799999999999997
    }],
    "weekly": [{
        "_id": {
            "name": "ringgo",
            "week": 12
        },
        "spend": [-6.3, -3.3],
        "total": -9.6
    }]
}

you can run the similar aggregation for year/month and year input values.

so you can see that there is a different number of items in monthlyspends in previous output compared to that shown in the output from the find by name. also you can see that some of the values are being summed together in monthlyspends when they shouldn't be.

this happens in $group 1 where $week aggregation rolls up each of two dates [15, 16] amount into week 11 and other two dates [22, 23] amount into week 12 later to show up as summed totals in monthyspends.

it seems as of mongodb 2.6, the ability to limit the size of an array using $slice or $push with the .aggregate() function/command is unsupported. here's the feature request on the mongodb issue tracker.

what i would do is output the aggregated result to an collection. then update the collection.

example:

setup:

use test;
var rint = function(x) {
    return 1 + ~~(math.random() * x);
};
var robj = function() {
    return {
        "timestamp": new date(),
        "category": "movies" + rint(5),
        "term": "my movie" + rint(20)
    }
};
for (var i = 0, l = 100; i < l; i++) {
    db.al.insert(robj());
}

aggregate query

db.al_out.drop();

db.al.aggregate([
  { 
    $group : { 
      _id :  { 
        category: "$category",
        term: "$term",
      },
      total: { $sum : 1 } 
    }
  },
  { $sort : { total : -1 } },
  { 
    $group : { 
        _id :  "$_id.category",
        terms: { 
            $push: { 
                term: "$_id.term",
                total: "$total"
            }
        }
     }
  }
  ,{ $out : "al_out" }  // output the documents to `db.al_out`
]);

// limit the size of terms to 3 elements.
db.al_out.update( {}, {
  $push : {
    terms : { 
      $each : [],
      $slice : 3 
    }
  }
}, {
  multi:true
});

result:

db.al_out.find();

{ "_id" : "movies1", "terms" : [ { "term" : "my movie7", "total" : 3 }, { "term" : "my movie6", "total" : 3 }, { "term" : "my movie17", "total" : 2 } ] }
{ "_id" : "movies2", "terms" : [ { "term" : "my movie3", "total" : 4 }, { "term" : "my movie11", "total" : 2 }, { "term" : "my movie2", "total" : 2 } ] }
{ "_id" : "movies4", "terms" : [ { "term" : "my movie9", "total" : 3 }, { "term" : "my movie1", "total" : 3 }, { "term" : "my movie7", "total" : 2 } ] }
{ "_id" : "movies3", "terms" : [ { "term" : "my movie19", "total" : 5 }, { "term" : "my movie8", "total" : 4 }, { "term" : "my movie14", "total" : 4 } ] }
{ "_id" : "movies5", "terms" : [ { "term" : "my movie7", "total" : 6 }, { "term" : "my movie17", "total" : 4 }, { "term" : "my movie3", "total" : 2 } ] }

Tags: Mongodb Mongoose Mongodb Query Aggregation Framework