参考文章
pymongo 快速教程:http://api.mongodb.com/python/current/tutorial.html
mongodb version: 3.02
引包、新建、查询
#coding:utf-8
from pymongo import *
############# 数据库连接
client = MongoClient('localhost', 27017)
############# database连接 (sql数据库)
db = client.test_database
# db = client['test-database']
############# collection (sql 数据表)
posts = db.posts
############# document 操作 (sql数据 curl)
import datetime
post = {
"author": "Mike",
"text": "My first blog post!",
"tags": ["mongodb", "python", "pymongo"],
"date": datetime.datetime.utcnow()
}
post_id = posts.insert_one(post).inserted_id
############# 查看 collection (数据表)
db.collection_names(include_system_collections=False)
[u'posts']
############# 查看 document
posts.find_one(查询dict) # 匹配返回查询到的第一个值,
#查询条件错误 不会报错,返回 => None
posts.find() # 匹配返回一个 list 的游标,通过 索引值 查出来
posts.find(查询dict)[0] # 查询条件错误,会报错
# 通过统计 count 查看是否查询到数据
posts.find(查询dict).count() # 查询错误的话,返回 => 0
### 过滤剩下多少个
posts.find(查询dict).limit(n)
# 多个添加 document
posts.insert_many([{},{}...]) # 添加多个 document
############# 建立索引 # http://api.mongodb.com/python/current/tutorial.html#indexing
# 创建 collection 前创建 index
db.(collection).create_index([('user_id', pymongo.ASCENDING)],unique=True)
# 排序 document
sorted(list(db.(collection).index_information()))
############ 关闭数据库连接
client.close()
## 异步关闭数据库情况
import signal
def graceful_reload(signum, traceback):
"""Explicitly close some global MongoClient object."""
client.close()
signal.signal(signal.SIGHUP, graceful_reload)
pymongo 字符串必须是 utf-8 存入
pymongo 会默认的从 mongodb(UTF-8格式) 转成 python 的 unicode 展现出来;
所以,pymongo 存入数据库的 string 必须是 utf-8 格式的
坐标点样例
参考地址:http://api.mongodb.com/python/current/examples/geo.html
#coding:utf-8
from pymongo import MongoClient, GEO2D
client = MongoClient().test_database
db = client('localhost',27017)
db.places.create_index([("loc", GEO2D)])
lonlats = [{"loc": [2, 5]},{"loc": [30, 5]},{"loc": [1, 2]},{"loc": [4, 4]}]
db.places.insert_many(lonlats)
###### 查找最近的点
[doc for doc in db.places.find({"loc": {"$near": [3, 6]}})];
# 查找最近的一个点
db.places.find({"loc": {"$near": [3, 6]}}).limit(1)[0]
###### 查找一定距离内
from bson.son import SON
query = {"loc": SON([("$near", [3, 6]), ("$maxDistance", 100)])};
[doc for doc in db.places.find(query).limit(3).sort('_id')]
# 查找不到,返回 []
###### 查找 矩形 范围内
query = {"loc": {"$within": {"$box": [[2, 2], [5, 6]]}}};
[doc for doc in db.places.find(query)]
# 查找不到,返回 []
###### 查找 圆形 范围内
query = {"loc": {"$within": {"$center": [[0, 0], 6]}}};
[doc for doc in db.places.find(query)]
# 查找不到返回空的list; [].find(query)] == [] # True
异步样例
参考地址:http://api.mongodb.com/python/current/examples/gevent.html
#coding:utf-8
容灾 replSet 使用pymongo 操作
python操作:http://api.mongodb.com/python/current/examples/high_availability.html
pymongo 初始化 mongodb 集群
启动 mongo server 时,需要指明 replSet 名
#coding:utf-8
from pymongo import MongoClient
client = MongoClient('localhost', 27017)
########### copy 是 replSet 的设置名
config = {'_id': 'copy', 'members': [
{'_id': 0, 'host': 'localhost:27017'},
{'_id': 1, 'host': 'localhost:27018'},
{'_id': 2, 'host': 'localhost:27019'}]
}
client.admin.command("replSetInitiate", config)
# {'ok': 1.0, ...}
############ 连接 指定的 容灾数据库 使用
MongoClient('localhost:27017', replicaset='copy') ### 需要指明 replSet 名称
MongoClient('localhost:27018', replicaset='copy') ### 需要指明 replSet 名称
MongoClient('localhost:27019', replicaset='copy') ### 需要指明 replSet 名称
对任意一个 repSet 的增删改操作
db0 = MongoClient('127.0.0.1:27017',replicaSet='copy').haha
db1 = MongoClient('127.0.0.1:27018',replicaSet='copy',readPreference='secondaryPreferred').haha
'''
会预先加载连接 secondary,异常了会回退到 Primary 上
'''
db2= MongoClient('127.0.0.1:27019',replicaSet='copy').haha
db0.read_preference
# 输出 Primary()
db1.read_preference
# 输出 SecondaryPreferred(tag_sets=None)
db2.hehe.read_preference
# 输出 Primary()
##################所以,除非是 primary,其他 secondary 都需要指明 readPreference
# db1 Secondary 增删改操作
db1.hehe.insert_one({'ye':'yuguo'})
# db2 增删改操作
db2.hehe.insert_one({'27019':'xixi'})
# db0 Primary 去查询是否有数据
[doc for doc in db0.hehe.find({'ye':'yuguo'})]
# 有结果 {u'_id': ObjectId('59a76660421a2b1b2c675031'), u'ye': u'yuguo'},
# 有结果 {u'27019': u'xixi', u'_id': ObjectId('59a76967421a2b1b2c675033')}]
#####################################################################
'''
说明可以在任意一个 repSet的数据库连接并操作,会相应的备份到其他的数据库里
安装后面文档 readPreference 的属性解析,secondaryPreferred 会先加载 secondary
'''
如果 primary 主机还在,想要强制使用 备用mongodb Secondary
http://api.mongodb.com/python/current/examples/high_availability.html#secondary-reads
更换 默认连接 replSet 的操作
#coding:utf-8
client = MongoClient('localhost:27017',
readPreference='secondaryPreferred', # 默认连接哪个 数据库 primary 或 secondary
replicaSet='copy', # 需要指明 replSet 名称
)
# 查看
client.read_preference
#输出内容: SecondaryPreferred(tag_sets=None)
#### 手动的切换到 SECONDARY 的备份数据库
db_bk = client.get_database('test', read_preference=ReadPreference.SECONDARY)
db_bk.read_preference
# 输出 Secondary(tag_sets=None) # 表示已经切换到该备份,但还是属于 Secondary
#### 手动把 Secondary 变成 PRIMARY 数据库
collection = db_bk.get_collection('test', read_preference=ReadPreference.PRIMARY)
collection.read_preference
# 输出 Primary # 表示已经变成了 Primary 的模式
########### 从已经存在的 collection 切换到 read_preference 模式 ##########
collection2 = collection.with_options(read_preference=ReadPreference.NEAREST)
collection.read_preference
#输出 Primary() # 还是旧的
collection2.read_preference
#输出 Nearest(tag_sets=None) #最新的
如果没有 Secondary,会默认的退回 primary 使用
tag_sets 属性
可标记从地理位置上的顺序来访问 replica 数据库;
tag_sets 必须是一个 列表的字典 [{},…]
from pymongo.read_preferences import Secondary
db = client.get_database('test',
read_preference=Secondary([{'dc': 'ny'}, {'dc': 'sf'}])
)
db.read_preference
# 输出:Secondary(tag_sets=[{'dc': 'ny'}, {'dc': 'sf'}])
# 会依次从 ny(new York)、sf(旧金山) 来访问数据库,该字典的value值,必须在相应的 replica 数据库里必须有设置
readPreference 属性
优先使用:PRIMARY_PREFERRED、SECONDARY_PREFERRED、NEAREST 进行容灾
primaryPreferred、secondaryPreferred、nearest
- PRIMARY: Read from the primary. This is the default read preference, and provides the strongest consistency. If no primary is available, raise AutoReconnect. —– primary不可用,会一直自动重新连接
- PRIMARY_PREFERRED: Read from the primary if available, otherwise read from a secondary. —– primary不可用,会连接 Secondary
- SECONDARY: Read from a secondary. If no matching secondary is available, raise AutoReconnect. —– 不可用,会一直自动重新连接
- SECONDARY_PREFERRED: Read from a secondary if available, otherwise from the primary. —– secondary不可用,会连接Primary
- NEAREST: Read from any available member. —– 不可用,自动查找连接
容灾 (replSet) 高可用
参考文章:
mongodb server操作: http://blog.youkuaiyun.com/mchdba/article/details/51638131
python操作:http://api.mongodb.com/python/current/examples/high_availability.html
window 平台 mongodb操作
启动 mongo server 时,需要指明 replSet 名
启动3个终端
$ mkdir -p ./copyMain/logs ./copyBk1/logs ./copyBk2/logs # 需要先建立文件夹
$ mongod.exe --port=27017 --replSet=copy --dbpath=./copyMain
$ mongod.exe --port=27018 --replSet=copy --dbpath=./copyBk1
$ mongod.exe --port=27019 --replSet=copy --dbpath=./copyBk2
replSet 名需要是一样的,默认是 27017 作为 primary (主机)
mongodb 命令 初始化集群配置
# 连接
$ mongo.exe 127.0.0.1:27018 # 登录 slave 节点
####### 下面的操作是在 mongodb 终端下操作的
# 编写 replSet 配置
################## 第一个 _id 应该是 上述的 "copy"
> config = { _id:"copy", members:[
... {_id:0,host:"localhost:27017"},
... {_id:1,host:"localhost:27018"},
... {_id:2,host:"localhost:27019"}]
... };
#初始化 副本节点
> rs.initiate(config); # 提示 {"ok":0,...}
# 初始化后
> copy:SECONDARY> rs.status()
{
"set" : "copy",
"date" : ISODate("2017-08-30T07:35:01.690Z"),
"myState" : 1,
"members" : [
{
"_id" : 0,
"name" : "localhost:27017",
"health" : 1,
"state" : 5,
"stateStr" : "STARTUP2", ################ 此处显示是否为 主机
"uptime" : 173,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-08-30T07:35:01.461Z"),
"lastHeartbeatRecv" : ISODate("2017-08-30T07:35:01.628Z"),
"pingMs" : 0,
"configVersion" : 1
},
{
"_id" : 1,
"name" : "localhost:27018",
"health" : 1,
"state" : 1,
"stateStr" : "PRIMARY", ################ 此处显示是否为 主机
"uptime" : 1767,
"optime" : Timestamp(1504078428, 1),
"optimeDate" : ISODate("2017-08-30T07:33:48Z"),
"electionTime" : Timestamp(1504078432, 1),
"electionDate" : ISODate("2017-08-30T07:33:52Z"),
"configVersion" : 1,
"self" : true
},
{
"_id" : 2,
"name" : "localhost:27019",
"health" : 1,
"state" : 5,
"stateStr" : "STARTUP2", ################ 此处显示是否为 主机
"uptime" : 173,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-08-30T07:35:00.301Z"),
"lastHeartbeatRecv" : ISODate("2017-08-30T07:35:00.006Z"),
"pingMs" : 1,
"configVersion" : 1
}
],
"ok" : 1
}
# 继续进入另外一个 mongo.exe 127.0.0.1:27019
$ mongo.exe 127.0.0.1:27019
> rs.status() ######### 状态值和上面的是一样的 27018 为主机
mongo server 添加用户并给 replSet 用户名密码认证
核心参考文章
server 启动
集群的名称(此处我换成了正式环境用的名称 backup,与上面的 copy和copyBk有出入 )
$ mongod.exe --port=27017 --dbpath=./backup1 --replSet=backup
$ mongod.exe --port=27018 --dbpath=./backup2 --replSet=backup
从 $ mongod.exe 127.0.0.1:27017 主机 Primary 进入
添加用户
注意:
因是集群,必须在Primary上来新建(子群节点上也会有该用户)
$ mongod.exe --port=27017 --dbpath=./backup1 --replSet=backup
$ mongod.exe 127.0.0.1:27017
> db.createRole({role:'sysadmin',roles:[],privileges:[{resource:{anyResource:true},actions:['anyAction']}]})
{
"role" : "sysadmin",
"roles" : [ ],
"privileges" : [
{
"resource" : {
"anyResource" : true
},
"actions" : [
"anyAction"
]
}
]
}
> db.createUser({user:'346243440@qq.com',pwd:'xxxx',roles:[{role:'sysadmin',db:'admin'}]})
Successfully added user: {
"user" : "346243440@qq.com",
"roles" : [
{
"role" : "sysadmin",
"db" : "admin"
}
]
}
登录测试
$ mongo.exe 127.0.0.1:27017
> use admin
> show dbs # 报错了
2017-09-01T14:31:24.411+0800 E QUERY Error: listDatabases failed:{
"errmsg" : "not authorized on admin to execute command { listDatabases:...
> db.auth('346243440@qq.com',xxxx)
> 1 # 认证成功
查看用户名认证的方式
> use admin
> db.auth('3462...',xxx)
> db.system.users.find()
生成 keyFile
$ openssl rand -base64 1024 > mongodb.key
如果报错: 有异常的 ‘=’
解决办法:手动删除里面末尾的 ‘=’
重新启动 集群 replSet,并认证进入
$ mongod.exe --port=27017 --dbpath=./backup1 --replSet=backup --keyFile "mongodb.key"
$ mongod.exe --port=27018 --dbpath=./backup2 --replSet=backup --keyFile "mongodb.key"
$ mongo.exe 127.0.0.1:27017
> backup:PRIMARY> use admin
switched to db admin
backup:PRIMARY> db.auth('a','a')
1
> backup:PRIMARY> show dbs
admin 0.078GB
local 1.078GB
集群 slave 登录测试
rs.slaveOk() 打开可读权限
$ mongo.exe 127.0.0.1:27018
> use admin
> db.auth('a','a')
> show dbs # 报错如下
Error: listDatabases failed:{ "note" : "from execCommand", "ok" : 0, "errmsg" : "not master" }
> rs.slaveOk() # 打开可读权限
> backup:PRIMARY> show dbs
admin 0.078GB
local 1.078GB
—
—
后面的新建用户,权限不够,上面的是正确的
mongodb v.3.xxx 版本
> db.addUser("a","b")
2017-09-01T09:22:37.004+0800 E QUERY TypeError: Property 'addUser' of object admin is not a function
at (shell):1:4
>
原来在mongodb3.0中addUser已被废弃,具体参考:
> db.createUser({user:'a',pwd:'a',roles:['readWrite','dbAdmin']})
报错了 :
> db.createUser({user:'a',pwd:'a',roles:['readWrite','dbAdmin']})
2017-09-01T11:24:29.549+0800 E QUERY Error: couldn't add user: not master
at Error (<anonymous>)
at DB.createUser (src/mongo/shell/db.js:1101:11)
at (shell):1:4 at src/mongo/shell/db.js:1101
集群 新建用户必须是 Primary
查看 rs.status()
;发现当前连接的mongo 不是 primary,(是由于我建立了多个 replSet)
切换到 replSet Primary后
> db.createUser({user:'a',pwd:'a',roles:['readWrite','dbAdmin']})
Successfully added user: { "user" : "a", "b" : [ "readWrite", "dbAdmin" ] }
>
连接mongodb slave报错:
2017-09-01T12:29:43.064+0800 E QUERY Error: listDatabases failed:{ "note" : "from execCommand", "ok" : 0, "errmsg" : "not master" }
参考地址:https://stackoverflow.com/questions/29232821/in-slave-mongodb-3-0-1-when-i-run-show-dbs-command-im-getting-the-below-erro
解决: rs.slaveOk()
—
—
使用 pymongo 连接认证
from pymongo import *
from urllib import quote_plus
host = '127.0.0.1:27017'
user = 'a'
password = 'a'
mongo_url = "mongodb://%s:%s@%s" % (quote_plus(user), quote_plus(password), host)
client = MongoClient(mongo_url,replicaSet='backup',readPreference='nearest')
print client.address # ('127.0.0.1', 27017)
[doc for doc in client.testDB.hehe.find()]
# [{u'_id': ObjectId('59a91c57421a2b1cd8d1a625'), u'a': u'b'}]
上面的操作是已经完成认证并获取到数据,下面开始测试集群的容灾,手动关闭 Primary 数据库后,
从 mongodb 命令行 查看集群状态 rs.status()
Primary 连接状态: “stateStr” : “(not reachable/healthy)”,
backup:SECONDARY> rs.status()
{
"set" : "backup",
"date" : ISODate("2017-09-01T09:02:19.059Z"),
"myState" : 2,
"members" : [
{
"_id" : 0,
"name" : "127.0.0.1:27017",
"health" : 0,
"state" : 8,
"stateStr" : "(not reachable/healthy)", ##### Primary 已经挂掉了
"uptime" : 0,
"optime" : Timestamp(0, 0),
"optimeDate" : ISODate("1970-01-01T00:00:00Z"),
"lastHeartbeat" : ISODate("2017-09-01T09:02:16.232Z"),
"lastHeartbeatRecv" : ISODate("2017-09-01T09:02:11.226Z"
),
"pingMs" : 0,
"lastHeartbeatMessage" : "Failed attempt to connect to 1
27.0.0.1:27017; couldn't connect to server 127.0.0.1:27017 (127.0.0.1), connecti
on attempt failed",
"configVersion" : -1
},
{
"_id" : 1,
"name" : "127.0.0.1:27018",
"health" : 1,
"state" : 2,
"stateStr" : "SECONDARY",
"uptime" : 700,
"optime" : Timestamp(1504255064, 2),
"optimeDate" : ISODate("2017-09-01T08:37:44Z"),
"configVersion" : 1,
"self" : true
}
],
"ok" : 1
}
从 pymongo 查看 集群状态
client.address # 为空
client.address == None # True
[doc for doc in client.testDB.hehe.find()]
# [{u'_id': ObjectId('59a91c57421a2b1cd8d1a625'), u'a': u'b'}]
'''
可以查到数据,说明容灾起了作用,
但是 client.address 查看,当前连接的地址是为空的
TODO 待解决如何查看 当前节点的IP和端口
'''