1.目的
求每个班级每个科目的平均成绩,并按照科目成绩排序
2.素材
dataframe.txt
class1 chinese 88
class2 chinese 76
class1 chinese 90
class2 english 56
class1 english 93
class2 chinese 68
class1 english 44
class3 english 77
class3 chinese 91
class1 english 83
class2 chinese 69
class1 english 49
class3 chinese 67
class3 english 85
missing english 65
3.代码
/**
* Created by puwenchao on 2016-08-06.
*/
import org.apache.spark.{SparkConf,SparkContext}
import org.apache.spark.sql.SQLContext
//定义一个Classinfo格式
case class Classinfo(classid:String,subject:String,num:Int)
object dataframe{
def main(ages:Array[String])={
//设置运行环境
val conf=new SparkConf().setAppName("dataframe").setMaster("local")
val sc=new SparkContext(conf)
val sqlc=new SQLContext(sc)
//手动导入隐式转换
import sqlc.implicits._
//引入数据并转换为DataFrame
val text=sc.textFile("e:\\dataframe.txt")
.map{ x=>
val row=x.split(" ")
(Classinfo(row(0),row(1),row(2).toInt))
}.toDF()
//对DataFrame进行处理
val fin=text.where($"classid"!=="missing")
.groupBy($"classid",$"subject").agg("num"->"avg")
.sort($"subject",$"avg(num)".desc)
.show
sc.stop()
}
}
4.输出
+-------+-------+--------+
|classid|subject|avg(num)|
+-------+-------+--------+
| class1|chinese| 89.0|
| class3|chinese| 79.0|
| class2|chinese| 71.0|
| class3|english| 81.0|
| class1|english| 67.25|
| class2|english| 56.0|
+-------+-------+--------+