1. Cassandra 安装:
1.1. CentOS安装
Installation from RPM packages
For the `` specify the major version number, without dot, and with an appended x.
The latest is 40x.
For older releases, the can be one of 311x, 30x, or 22x.
(Not all versions of Apache Cassandra are available, since building RPMs is a recent addition to the project.)
Add the Apache repository of Cassandra to /etc/yum.repos.d/cassandra.repo, for example for the latest 4.0 version:
[cassandra]
name=Apache Cassandra
baseurl=https://downloads.apache.org/cassandra/redhat/40x/
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://downloads.apache.org/cassandra/KEYS
Install Cassandra, accepting the gpg key import prompts:
sudo yum install cassandra
Start Cassandra (will not start automatically):
service cassandra start
Systemd based distributions may require to run systemctl daemon-reload once to make Cassandra available as a systemd service. This should happen automatically by running the command above.
Make Cassandra start automatically after reboot:
chkconfig cassandra on
Please note that official RPMs for Apache Cassandra only have been available recently and are not tested thoroughly on all platforms yet. We appreciate your feedback and support and ask you to post details on any issues in the corresponding Jira ticket.
1.2. Windows安装
参考
存在问题:
cqlsh.bat 执行需要python2.7 的环境, 如果本身使用conda, 建议手动安装一个python2.7。
2. Cassandra的命令
2.1 键
CREATE KEYSPACE tutorialspoint WITH replication = {'class':'SimpleStrategy', 'replication_factor' : 3};
DESCRIBE keyspaces;
SELECT * FROM system_schema.keyspaces;
ALTER KEYSPACE tutorialspoint WITH replication = {'class':'SimpleStrategy', 'replication_factor' : 1};
SELECT * FROM system_schema.keyspaces;
DROP KEYSPACE tutorialspoint;
DESCRIBE keyspaces;
2.2 表
USE tutorialspoint;
CREATE TABLE emp(
emp_id int PRIMARY KEY,
emp_name text,
emp_city text,
emp_sal varint,
emp_phone varint
);
select * from emp;
ALTER TABLE emp ADD emp_email text;
select * from emp;
DROP TABLE emp;
DESCRIBE COLUMNFAMILIES;
3. Cassandra的python编程
conda install cassandra-driver
# coding:utf-8
from cassandra import ConsistencyLevel
# 引入数据
from cassandra.cluster import Cluster
# 引入DCAwareRoundRobinPolicy模块,可用来自定义驱动程序的行为
# from cassandra.policies import DCAwareRoundRobinPolicy
from cassandra.auth import PlainTextAuthProvider
from cassandra.query import SimpleStatement
import pandas as pd
cluster = Cluster(contact_points=['127.0.0.1'],
port=9042)
session = cluster.connect()
# 创建keyspace和table
def createKeySpaceAndTable():
# 创建KeySpace;使用第一个副本放置策略,即简单策略;选择复制因子为3个副本。
session.execute("CREATE KEYSPACE test WITH replication = {'class':'SimpleStrategy', 'replication_factor' : 1};")
# 选择keyspace
session.execute('use test;')
# 创建table
session.execute('create table test.user(name text primary key, age int, email varchar);')
# 删除table
# session.execute('drop table test.user;')
# 关闭Session
# session.shutdown()
# 关闭Cluster
# cluster.shutdown()
# createKeySpaceAndTable()
'''查询keyspaces/tables/columns状态'''
def selectAllStatus():
# 查询keyspaces/tables/columns状态
print(cluster.metadata.keyspaces)
print('----------')
print(cluster.metadata.keyspaces['test'].tables)
print('----------')
print(cluster.metadata.keyspaces['test'].tables['user'])
print('----------')
print(cluster.metadata.keyspaces['test'].tables['user'].columns)
print('----------')
print(cluster.metadata.keyspaces['test'].tables['user'].columns['age'])
print('----------')
# 关闭连接
cluster.shutdown()
# 查看是否关闭连接
print(cluster.is_shutdown)
'''插入和查询表中的数据'''
def testInsertAndSelectTable():
# table中插入数据
session.execute('insert into test.user (name, age, email) values (%s, %s, %s);', ['aaa', 21, '222@21.com'])
session.execute('insert into test.user (name, age, email) values (%s, %s, %s);', ['bbb', 22, 'bbb@22.com'])
session.execute('insert into test.user (name, age, email) values (%s, %s, %s);', ['ddd', 20, 'ccc@20.com'])
# table中查询数据
rows = session.execute('select * from test.user;')
for row in rows:
print(row)
# 关闭连接
cluster.shutdown()
# 查看是否关闭连接
print(cluster.is_shutdown)
'''连接远程数据库'''
def testContectRemoteDatabase():
# 配置Cassandra集群的IP,记得改成自己的远程数据库IP哦
# contact_points = ['1.1.1.1', '2.2.2.2', '3.3.3.3']
contact_points = ['127.0.0.1']
# 配置登陆Cassandra集群的账号和密码,记得改成自己知道的账号和密码
auth_provider = PlainTextAuthProvider(username='cassandra', password='cassandra')
# 创建一个Cassandra的cluster
cluster = Cluster(contact_points=contact_points, auth_provider=auth_provider)
# 连接并创建一个会话
session = cluster.connect()
# 定义一条cql查询语句
cql_str = 'select * from keyspace.table limit 5;'
simple_statement = SimpleStatement(cql_str, consistency_level=ConsistencyLevel.ONE)
# 对语句的执行设置超时时间为None
execute_result = session.execute(simple_statement, timeout=None)
# 获取执行结果中的原始数据
result = execute_result._current_rows
# 把结果转成DataFrame格式
result = pd.DataFrame(result)
# 把查询结果写入csv
result.to_csv('连接远程数据库.csv', mode='a', header=True)
# 关闭连接
cluster.shutdown()
if __name__ == '__main__':
# 创建keyspace和table
createKeySpaceAndTable()
# 查询keyspaces/tables/columns状态
# selectAllStatus()
testInsertAndSelectTable()
# 插入和查询表中的数据
# testInsertAndSelectTable()