from pyspark import SparkConf, SparkContext,SQLContext
from pyspark.sql import SparkSession
from pyspark.ml.feature import Word2Vec,CountVectorizer
conf = SparkConf().setAppName("yjs_rec")
sc = SparkContext(conf=conf)
sqlContext=SQLContext(sc)
'''
documentDF = sqlContext.createDataFrame([
("Hi I heard about Spark".split(" "),),
("I wish Java could use case classes".split(" "),),
("Logistic regression models are neat".split(" "),)
], ["text"])
'''
spark_df = sqlContext.createDataFrame(documentDF)
#word2vec
word2Vec = Word2Vec(vectorSize=100, minCount=0, inputCol="words", outputCol="result")
model = word2Vec.fit(spark_df)
result = model.transform(spark_df)
result.select("result").show()
spark之word2vec使用(python)
最新推荐文章于 2023-10-20 09:59:37 发布
本文介绍如何利用PySpark实现Word2Vec算法,并通过具体示例代码展示了从创建DataFrame到训练模型并获取词向量的全过程。

762

被折叠的 条评论
为什么被折叠?



