MongoDB计数查询优化

4

我可以帮您进行翻译。以下是需要翻译的内容:

我的一个集合中有1644662条记录,我已经在名为“document_type_id”的列上创建了索引。当我运行查询时

db.getCollection('my_collection').find({"document_type_id": {"$in": ["7"]}}).count()

当我运行一个查询时,它在0.285秒内返回了计数为“753800”的结果。

db.getCollection('my_collection').find({"document_type_id": {"$in": ["3"]}}).count()

当我分别运行这两个查询时,它们都能完美地返回计数"819438"并且用时0.365秒。但是当我将这两个查询组合在一起运行时问题就来了。

db.getCollection('my_collection').find({"document_type_id": {"$in": ["3", "7"]}}).count()

这个查询需要54秒才能返回计数。第三个查询的执行计划如下:

{
"queryPlanner" : {
    "plannerVersion" : 1,
    "namespace" : "test.my_collection",
    "indexFilterSet" : false,
    "parsedQuery" : {
        "document_type_id" : {
            "$in" : [ 
                "3", 
                "7"
            ]
        }
    },
    "winningPlan" : {
        "stage" : "COUNT",
        "inputStage" : {
            "stage" : "FETCH",
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "document_type_id" : 1.0
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : []
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [ 
                        "[\"3\", \"3\"]", 
                        "[\"7\", \"7\"]"
                    ]
                }
            }
        }
    },
    "rejectedPlans" : []
},
"executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 0,
    "executionTimeMillis" : 74166,
    "totalKeysExamined" : 1573238,
    "totalDocsExamined" : 1573238,
    "executionStages" : {
        "stage" : "COUNT",
        "nReturned" : 0,
        "executionTimeMillisEstimate" : 73504,
        "works" : 1573239,
        "advanced" : 0,
        "needTime" : 1573238,
        "needYield" : 0,
        "saveState" : 14007,
        "restoreState" : 14007,
        "isEOF" : 1,
        "invalidates" : 0,
        "nCounted" : 1573238,
        "nSkipped" : 0,
        "inputStage" : {
            "stage" : "FETCH",
            "nReturned" : 1573238,
            "executionTimeMillisEstimate" : 73253,
            "works" : 1573239,
            "advanced" : 1573238,
            "needTime" : 0,
            "needYield" : 0,
            "saveState" : 14007,
            "restoreState" : 14007,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 1573238,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 1573238,
                "executionTimeMillisEstimate" : 2729,
                "works" : 1573239,
                "advanced" : 1573238,
                "needTime" : 0,
                "needYield" : 0,
                "saveState" : 14007,
                "restoreState" : 14007,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "document_type_id" : 1.0
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : []
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [ 
                        "[\"3\", \"3\"]", 
                        "[\"7\", \"7\"]"
                    ]
                },
                "keysExamined" : 1573238,
                "seeks" : 1,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    }
},
"serverInfo" : {
    "host" : "nb-fara",
    "port" : 27017,
    "version" : "3.4.9",
    "gitVersion" : "876ebee8c7dd0e2d992f36a848ff4dc50ee6603e"
},
"ok" : 1.0

}


  1. document_type_id 的值是字符串中的数字吗?
  2. 在您的 explain 输出中有 FETCH 阶段,那么生成 explain 的实际查询是什么?
  3. 您的 MongoDB 部署拓扑结构是独立部署、副本集还是分片集群?
- Wan B.
  1. 是的
  2. db.getCollection('my_collection').explain('executionStats').find({"document_type_id": {"$in": ["3", "7"]}}).count()
  3. 我正在我的本地机器笔记本电脑上进行测试。
- fazalerabbi
  1. 你的笔记本电脑内存有多大?
  2. 一个文档的平均大小是多少?Object.bsonsize(db.my_collection.findOne())
- Wan B.
笔记本电脑有16GB的RAM和1TB的硬盘。文档的大小为339667,由Object.bsonsize返回。 - fazalerabbi
5个回答

4

你应该使用

db.collection.count({...})

代替
db.collection.find({...}).count()

因为count()可以直接将查询作为参数,详见db.collection.count()

所以你的查询应该是:

db.getCollection('my_collection').count({"document_type_id": {"$in": ["3", "7"]}})

这个查询的解释如下(在我的机器上使用MongoDB 3.6.2完成,耗时约0.202秒):
{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "test.test",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "document_type_id" : {
                "$in" : [
                    "3",
                    "7"
                ]
            }
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "document_type_id" : 1
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [
                        "[\"3\", \"3\"]",
                        "[\"7\", \"7\"]"
                    ]
                }
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 202,
        "totalKeysExamined" : 274112,
        "totalDocsExamined" : 0,
        "executionStages" : {
            "stage" : "COUNT",
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 170,
            "works" : 274112,
            "advanced" : 0,
            "needTime" : 274111,
            "needYield" : 0,
            "saveState" : 2141,
            "restoreState" : 2141,
            "isEOF" : 1,
            "invalidates" : 0,
            "nCounted" : 274110,
            "nSkipped" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 274110,
                "executionTimeMillisEstimate" : 140,
                "works" : 274112,
                "advanced" : 274110,
                "needTime" : 1,
                "needYield" : 0,
                "saveState" : 2141,
                "restoreState" : 2141,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "document_type_id" : 1
                },
                "indexName" : "document_type_id_1",
                "isMultiKey" : false,
                "multiKeyPaths" : {
                    "document_type_id" : [ ]
                },
                "isUnique" : false,
                "isSparse" : false,
                "isPartial" : false,
                "indexVersion" : 2,
                "direction" : "forward",
                "indexBounds" : {
                    "document_type_id" : [
                        "[\"3\", \"3\"]",
                        "[\"7\", \"7\"]"
                    ]
                },
                "keysExamined" : 274112,
                "seeks" : 2,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0
            }
        }
    },
    "serverInfo" : {
        "host" : "xxxxxxx",
        "port" : 27017,
        "version" : "3.6.2",
        "gitVersion" : "489d177dbd0f0420a8ca04d39fd78d0a2c539420"
    },
    "ok" : 1
}

谢谢,但我已经得到了50秒的执行时间。 - fazalerabbi
@fazalerabbi,你正在使用什么版本的mongodb? - felix
版本号是3.4.9。您也可以在查询解释中找到。 - fazalerabbi
@fazalerabbi 刚在 MongoDB 3.4.2 上进行了测试,查询花费了约0.5秒... 我猜这里还有其他问题,但如果没有完全相同的数据,很难确切知道是什么问题... - felix
我的每个文档都至少有45列,其中5列是子文档。总共有1644662个文档。其中15万个文档的文档类型ID为3和7。我在这个查询中卡住了,不知道怎么让它更快☺️ - fazalerabbi
显示剩余3条评论

1
db.getCollection('my_collection').find({"document_type_id": {"$or": ["3", "7"]}}).count()

这个 $or 语句将会给你相同的结果,并且减少了查询运行的时间。 - udaykumar vangari
谢谢,但我遇到了这个错误:2018-01-20T12:50:31.673+0500 E QUERY [thread1] Error: count failed: { "ok" : 0, "errmsg" : "unknown operator: $or", "code" : 2, "codeName" : "BadValue" } : - fazalerabbi
即使我真的不相信这个查询更快,但应该是 {"$or": [{"document_type_id": "3"}, {"document_type_id": "7"}]}。好奇想看看结果... - mtj
{"$or": [{"document_type_id": "3"}, {"document_type_id": "7"}]} yes this is the format i just forgot.I hope this is better when compared with $in - udaykumar vangari
它具有相同的执行时间(54秒)。 - fazalerabbi
MongoDB文档指出:“当使用$or与<表达式>一起进行相同字段值的等值检查时,请使用$in运算符代替$or运算符。” https://docs.mongodb.com/manual/reference/operator/query/or/ - fazalerabbi

0

使用数据库版本v3.0.7时,出现以下问题:

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "test.test",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "document_type_id" : {
                "$in" : [
                    "3",
                    "7"
                ]
            }
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "COLLSCAN",
                "filter" : {
                    "document_type_id" : {
                        "$in" : [
                            "3",
                            "7"
                        ]
                    }
                },
                "direction" : "forward"
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 681,
        "totalKeysExamined" : 0,
        "totalDocsExamined" : 2000100,
        "executionStages" : {
            "stage" : "COUNT",
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 630,
            "works" : 2000102,
            "advanced" : 0,
            "needTime" : 2000101,
            "needFetch" : 0,
            "saveState" : 15625,
            "restoreState" : 15625,
            "isEOF" : 1,
            "invalidates" : 0,
            "nCounted" : 400020,
            "nSkipped" : 0,
            "inputStage" : {
                "stage" : "COLLSCAN",
                "filter" : {
                    "document_type_id" : {
                        "$in" : [
                            "3",
                            "7"
                        ]
                    }
                },
                "nReturned" : 400020,
                "executionTimeMillisEstimate" : 600,
                "works" : 2000101,
                "advanced" : 400020,
                "needTime" : 1600081,
                "needFetch" : 0,
                "saveState" : 15625,
                "restoreState" : 15625,
                "isEOF" : 1,
                "invalidates" : 0,
                "direction" : "forward",
                "docsExamined" : 2000100
            }
        }
    },
    "serverInfo" : {
        "host" : "ptpll354",
        "port" : 5000,
        "version" : "3.0.7",
        "gitVersion" : "6ce7cbe8c6b899552dadd907604559806aa2e9bd"
    },
    "ok" : 1
}

你能在一个新的 mongod 实例中尝试一下吗? - M. Gopal

0

你尝试过聚合框架吗?

db.my_collection.aggregate( [
   { $project : { "document_type_id" : 1 }},
   { $match : { "document_type_id": { "$in": ["3", "7"] } } },
   { $group : { _id : '$document_type_id', count: { $sum: 1} } }
])

0

文档类型ID不是多个键,它只有单个值,可以是7或3。 - fazalerabbi

网页内容由stack overflow 提供, 点击上面的
可以查看英文原文,
原文链接