废话不多说 直接上代码,把所有的想要的信息都封装成一个实体Bean,下面这个Bean比较简单。
public class Student implements Serializable {
private static final long serialVersionUID = 4L;
private String name;
private Integer age;
public Student() {
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
@Override
public String toString() {
return "Student{" +
"name='" + name + '\'' +
", age=" + age +
'}';
}
}
二,关于spark-sql的核心代码如下:
JavaSparkContext sparkContext = new JavaSparkContext("local[4]", "test-sql-sql-test");
//初始化sqlcontext
SQLContext sqlContext = new SQLContext(sparkContext);
//构建数据源
List<Student> students = new ArrayList<Student>();
Student dent = new Student();
dent.setAge(1);
dent.setName("lwj");
students.add(dent);
dent = new Student();
dent.setAge(20);
dent.setName("smj");
students.add(dent);
dent = new Student();
dent.setAge(30);
dent.setName("lwx");
students.add(dent);
JavaRDD<Student> studentRDD = sparkContext.parallelize(students);
//创建DataFrame
DataFrame dataFrame = sqlContext.applySchema(studentRDD, Student.class);
//注册表名
dataFrame.registerTempTable("stu");
//查询语句,尽量不要写 select * 因为 没办法保证查询出来的字段的顺序,所以想要什么就都写出来,按照顺序
String sql = "select name,age from stu as a where a.age >=20 ";
//执行操作
DataFrame sql1 = sqlContext.sql(sql);
//返回结果
List<String> teenagerNames = sql1.javaRDD().map(new Function<Row, String>() {
public String call(Row row) {
return "Name: " + row.getString(0);
}
}).collect();
//打印
System.out.println(teenagerNames);