记录mongo查询语句

美美打不死

已于 2024-12-24 13:56:12 修改

阅读量776

点赞数 10

CC 4.0 BY-SA版权

文章标签： python mongodb

于 2024-01-08 11:19:38 首次发布

本文链接：https://blog.youkuaiyun.com/weixin_42219511/article/details/135364467

分组查询

查询website_type为CN_PBOC，根据city分组并统计每个city的数据量，按照city数据量倒叙排序

db.rd_t_files.aggregate([
		{"$match":{"website_type":"CN_PBOC"}},
    	{"$group": {"_id": "$city", "count": {"$sum": 1}}},
		{$sort:{count:-1}}
])
# 查询website_type为CN_PBOC，时间是21年，根据city分组并统计每个city的数据量，按照city数据量倒叙排序
db.rd_t_files.aggregate([
		{"$match":{"website_type":"CN_PBOC","file_time":{$lte:"2021-12-31",$gte:"2021-01-01"}}},
    	{"$group": {"_id": "$city", "count": {"$sum": 1}}},
		{$sort:{count:-1}}
])

查询website_type为UK_FCA，根据page分组并统计每个page的数据量，按照page数据量正叙排序

db.rd_t_files.aggregate([
		{"$match":{"website_type":"UK_FCA"}},
    	{"$group": {"_id": "$page", "count": {"$sum": 1}}},
		{"$sort":{page:1}}
])

查询website_type为UK_FCA，根据file_time的月份分组，统计每个月的数据量，并按月份排序(file_time：2023-01-01)

db.rd_t_files.aggregate([
	  { $match: { website_type: "UK_FCA" } },
	  { $group: { _id: { $month: { $dateFromString: { dateString: "$file_time" } } }, count: { $sum: 1 } } },
	  { $sort: { _id: 1 } }
])

统计每个website_type的数量

db.rd_t_files.aggregate([
  {
    $group: {
      _id: "$website_type",
      count: { $sum: 1 }
    }
  }
])

统计每个website_type的数量并且file_time在2022-01-01到2022-12-31之间

db.rd_t_files.aggregate([
    {
        $match: {
            file_time: {
                $lte: "2022-12-31",
                $gte: "2022-01-01"
            }
        }
    },
    {
        $group: {
            _id: "$website_type",
            count: {
                $sum: 1
            },
            
        }
    }
])

查询website_type为LU_CSSF，根据website_url分组并统计website_url重复的数量

db.rd_t_files.aggregate([
		{"$match":{"website_type":"LU_CSSF"}},
	    {"$group": {"_id": "$website_url", "count": {"$sum": 1}}},
	    {"$match": {"count": {"$gt": 1}}}
])

统计"website_type": “CH_FINMA”, 21年，22年，23年的数据，并按年份分组，倒叙

db.rd_t_files.aggregate([
    {
        $match: {
            "website_type": "CH_FINMA",
            "file_time": {
                $gte: "2021-01-01",
                $lte: "2023-12-31"
            }
        }
    },
    {
        $project: {
            year: { $substr: ["$file_time", 0, 4] },
        }
    },
    {
        $group: {
            _id: "$year",
            count: { $sum: 1 }
        }
    },
    {
        $sort: {
            _id: 1
        }
    }
])
# $match 阶段：首先筛选出website_type为CH_FINMA，并且file_time在2021年1月1日到2023年12月31日期间的文档。
# $project 阶段：然后使用$project来抽取每个文档的年份。这里假设file_time是一个格式为YYYY-MM-DD的字符串，因此通过$substr从file_time字段中提取前4个字符来获取年份。
# $group 阶段：接下来使用$group按年份进行分组，并计算每个年份组中的文档数量。
# $sort 阶段：最后使用$sort对结果按年份进行排序。

去重

要对集合中的某个字段进行去重查询，可以使用 distinct 方法

假设有一个集合 users，其中有字段 age。你想获取所有不同的年龄

db.users.distinct("age")

# 在 Python 中使用 PyMongo
from pymongo import MongoClient
# 连接到MongoDB
client = MongoClient("mongodb://localhost:27017/")
# 选择数据库和集合
db = client.mydatabase
collection = db.users
# 使用distinct方法去重
distinct_ages = collection.distinct("age")
print(distinct_ages)


# 在某个条件下获取去重的值，比如获取年龄在 20 岁以上的不同年龄
db.users.distinct("age", {age: {$gt: 20}})

去重多个字段

假设你有一个集合 users，其中包含字段 firstName 和 lastName，你希望对这两个字段进行去重

db.users.aggregate([
  {
    $group: {
      _id: { firstName: "$firstName", lastName: "$lastName" }
    }
  }
])


# 在 Python 中使用 PyMongo
from pymongo import MongoClient
# 连接到MongoDB
client = MongoClient("mongodb://localhost:27017/")
# 选择数据库和集合
db = client.mydatabase
collection = db.users
# 使用聚合管道进行去重查询
pipeline = [
    {
        "$group": {
            "_id": {
                "firstName": "$firstName",
                "lastName": "$lastName"
            }
        }
    }
]
result = collection.aggregate(pipeline)
# 打印去重结果
for doc in result:
    print(doc["_id"])

模糊查询

查询website_type为UK_FCA，website_url是https://www.fca.org.uk/开头的数据

db.rd_t_files.find({
  website_type: "UK_FCA",
  website_url: { $regex: "^https://www.fca.org.uk/" }
})

查询file_url为空的数据

db.rd_t_files.find({ "file_url": { "$exists": false, "$eq": null } })

$eq 是一个查询操作符，用于在查询条件中比较字段的值是否等于指定的值。
$eq 表示等于（equal）
例如，如果要在名为 mycollection 的集合中查找 age 字段值等于 30 的文档，可以使用以下查询：

db.mycollection.find({ age: { $eq: 30 } })

查询website_type为LU_CSSF，file_url不为空的数据

db.rd_t_files.find({ "file_url": { "$exists": true, "$ne": null }, "website_type": "LU_CSSF" })

$ne 是一个查询操作符，用于在查询条件中比较字段的值是否不等于指定的值。
$ne 表示不等于（not equal），它用于在查询中指定一个不精确匹配的条件。

查询website_type为UK_FCA，file_time是5月份的数据

db.rd_t_files.find({"file_time":{$lte:"2023-05-31", $gte:"2023-05-01"},"website_type":"UK_FCA"})

mongo更改字段名

更改某个集合中的update_date字段名称为update_time

db.collection.updateMany(
   {}, // 表示选择所有文档
   { $rename: {"update_date": "update_time"} } // 将"update_date"重命名为"update_time"
)

python mongo命令

查找数据量

documents = collection.count_documents({"websiteType": 'pbc'})

“”“查找{“websiteType”: “pbc”}，并且要存在的successCount这个字段”“”

# 定义查询条件
# 查询条件，websiteType字段等于"pbc"，successCount字段存在
query = {"websiteType": "pbc", "successCount": {"$exists": True}}  

# 定义排序规则
sort = [("endTime", -1)]  # 按照endTime字段倒序排序
####  使用find方法查询满足条件的文档，并按照指定排序规则排序，限制一条结果
documents = collection.find(query).sort(sort).limit(1)

获取查询到的文档

for document in documents:
    print("查询到的文档:")
    print(document)

需求：

1.查找给定的website_type列表，website_types=['CH_FINMA', 'CN_PBOC'];
2.已知的known_file_ids列表， known_file_ids=["53d70d2981db42d055181af0ea59c0f5", "998f7ea11158d0660d9a7dd4b46981dd"]
3.针对每个website_type查询随机5条记录，并且这5条记录不在已知的known_file_ids列表里，并且只获取mongo表里file_id这个字段

# 代码案例
website_types = ['CH_FINMA', 'CN_PBOC']
known_file_ids = ["53d70d2981db42d055181af0ea59c0f5", "998f7ea11158d0660d9a7dd4b46981dd"]
# 用于存储查询结果的列表
results = []

# 针对每个website_type查询随机5条记录
for w_type in website_types:
    pipeline = [
        {
            '$match': {
                'website_type': w_type,
                'file_id': {'$nin': known_file_ids}  # 排除已知的file_ids
            }
        },
        {
            '$sample': {'size': 5}  # 随机选择5条记录
        },
        {
            '$project': {'file_id': 1, '_id': 0}  # 只获取file_id
        }
    ]

    # 执行聚合查询
    random_docs = list(collection.aggregate(pipeline))
    # 打印每个website_type和对应的5个file_id
    # print(f"Website Type: {w_type}")
    # for doc in random_docs:
    #     print(doc['file_id'])

    # 将查询结果添加到结果列表
    results.extend(random_docs)

# 检查结果数是否符合预期
aa = [i["file_id"] for i in results]
print(aa)

其他

将aa.json导入mongo表

mongoimport --port 27017 --host 127.0.0.1  --db sfc_radar_test --collection rd_t_files --file /tmp/aa.json --jsonArray

mongo恢复和备份数据

# mongo恢复数据
mongodump -h 192.168.32.157 --port 27017  -d sfc_radar_test  -o C:\Users\dell\Desktop\桌面文档

# mongo备份数据
mongorestore -h 127.0.0.1:27017 -d test --dir C:\Users\dell\Desktop\桌面文档\sfc_radar_test