导入用到的 库 SparkSession
创建入口文件
# -*- coding: utf-8 -*- """ @Time: 2018/9/13 @Author: songhao @微信公众号: zeropython @File: 0801.py """ from <span class="wp_keywordlink_affiliate"><a href="https://www.168seo.cn/tag/pyspark" title="View all posts in pyspark" target="_blank">pyspark</a></span>.sql import SparkSession my_spark = SparkSession \ .builder \ .appName("myApp") \ .config("spark.mongodb.input.uri", "mongodb://127.0.0.1/test.coll") \ .config("spark.mongodb.output.uri", "mongodb://127.0.0.1/test.collsss") \ .getOrCreate()
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
# -*- coding: utf-8 -*-
"""
@Time: 2018/9/13
@Author: songhao
@微信公众号: zeropython
@File: 0801.py
"""
from
pyspark
.
sql
import
SparkSession
my_spark
=
SparkSession
\
.
builder
\
.
appName
(
"myApp"
)
\
.
config
(
"spark.mongodb.input.uri"
,
"mongodb://127.0.0.1/test.coll"
)
\
.
config
(
"spark.mongodb.output.uri"
,
"mongodb://127.0.0.1/test.collsss"
)
\
.
getOrCreate
(
)
|
创建 简单的DataFrame
people = my_spark.createDataFrame([("Bilbo Baggins", 50), ("Gandalf", 1000), ("Thorin", 195), ("Balin", 178), ("Kili", 77), ("Dwalin", 169), ("Oin", 167), ("Gloin", 158), ("Fili", 82), ("Bombur", None)], ["name", "age"])
|
1
2
3
|
people
=
my_spark
.
createDataFrame
(
[
(
"Bilbo Baggins"
,
50
)
,
(
"Gandalf"
,
1000
)
,
(
"Thorin"
,
195
)
,
(
"Balin"
,
178
)
,
(
"Kili"
,
77
)
,
(
"Dwalin"
,
169
)
,
(
"Oin"
,
167
)
,
(
"Gloin"
,
158
)
,
(
"Fili"
,
82
)
,
(
"Bombur"
,
None
)
]
,
[
"name"
,
"age"
]
)
|
保存到特定的数据库中
people.write.format("com.mongodb.spark.sql.DefaultSource").mode("append").save()
|
1
2
|
people
.
write
.
format
(
"com.mongodb.spark.sql.DefaultSource"
)
.
mode
(
"append"
)
.
save
(
)
|
保存到不同的数据库中
people.write.format("com.mongodb.spark.sql.DefaultSource").mode("append").option("database", "people").option("collection", "contacts").save() people.write.format("com.mongodb.spark.sql.DefaultSource").mode("append").option("database", "people1").option("collection", "contacts1").save()
|
1
2
3
4
5
6
|
people
.
write
.
format
(
"com.mongodb.spark.sql.DefaultSource"
)
.
mode
(
"append"
)
.
option
(
"database"
,
"people"
)
.
option
(
"collection"
,
"contacts"
)
.
save
(
)
people
.
write
.
format
(
"com.mongodb.spark.sql.DefaultSource"
)
.
mode
(
"append"
)
.
option
(
"database"
,
"people1"
)
.
option
(
"collection"
,
"contacts1"
)
.
save
(
)
|

被折叠的 条评论
为什么被折叠?



