spark&python 操作mysql数据库案例

最新推荐文章于 2025-04-14 22:26:17 发布

心动（大数据进阶）

最新推荐文章于 2025-04-14 22:26:17 发布

阅读量562

点赞数 14

文章标签：数据库 mysql

本文链接：https://blog.youkuaiyun.com/weixin_56130021/article/details/141018331

版权

使用python操作数据库案例

import pymysql
# 创建连接对象
db_connection = pymysql.connect(host='hadoop11',port=3306, user='root', password='123456',database='test1')

# 创建游标对象
db_cursor=db_connection.cursor()
# 增加
# sql="insert into stu(stu_id,stu_name,age,gender) values (%s, %s, %s, %s)"
# db_cursor.execute(sql,(2,'xiaoMing',34,'男'))
# db_cursor.execute('insert into stu(stu_id,stu_name,age,gender) values(%s,%s,%s,%s)',(4,'zsss',18,'男'))
# 删除
# db_cursor.execute('delete from stu where stu_id = %s',4)
# 修改
# db_cursor.execute("update stu set stu_name = %s,age=%s where stu_id = %s",('ww',22,3))

sql = 'select * from stu'
db_cursor.execute(sql)

# while True:
#     result = cursor.fetchone()
#     if result is None:
#         break
#     print(result)

# result = cursor.fetchmany(2)
# print(result)

result = db_cursor.fetchall()
print(result)

使用spark操作mysql数据库

from pyspark.sql import SparkSession

if __name__ == '__main__':
    spark = SparkSession.builder \
        .config('hive.metastore.uris', 'thrift://hadoop11:9083') \
        .config('spark.sql.warehouse.dir', 'hdfs://hadoop11:8020/user/hive/warehouse') \
        .appName('spark操作mysql数据库').enableHiveSupport().getOrCreate()
    # 读取mysql上的数据
    props = {'user': 'root', 'password': '123456', 'driver': 'com.mysql.jdbc.Driver'}
    dataFrame=spark.read.jdbc(url="jdbc:mysql://hadoop11:3306/test1", table="funnel_name", properties=props)
    #创建临时表
    dataFrame.createTempView("funnel_name")

    sql1 = f'''
            select id from funnel_name 
        '''
    result_df = spark.sql(sql1)
    #将查询出来的id存储到一个列表中
    id_list = [row['id'] for row in result_df.collect()]
    # 打印列表
    print(id_list)

Spark读取本地文件，并写入MySQL中的表中，读取MySQL中的表

from pyspark.sql import SparkSession

if __name__ == '__main__':
    spark = (SparkSession.builder
             .master("local[*]").config("spark.driver.host","localhost")
             .appName("测试")
             .getOrCreate())
    # 读取本地文件
    df = spark.read.json(r"E:\bigdata\code\spark\code\sql\data\a.json")
    props = {'user': 'root', 'password': '123456','driver': 'com.mysql.jdbc.Driver'}
    # 写入MySQL中的df_user
    df.write.jdbc(url='jdbc:mysql://hadoop11:3306/test1',table='df_user',properties=props)
    # 读取mysql数据库上的df_user表
    df1=spark.read.jdbc(url="jdbc:mysql://hadoop11:3306/test1", table="df_user", properties=props)
    # 显示
    df1.show()
    spark.stop()