- 博客(76)
- 收藏
- 关注
原创 直播人数的统计(一)
从clickhouse中查询各维度(SQL里要通过group by 维度)指标,clickhouse通过JavaAPI来实现实时展示。命令行开启clickhouse:Eg: Select osname, isNew,count(distinct deviced ) from tb_user_event group by isNew,channel,osname;优化:引入ROCKSDB(一种statebackend)的原因:如果把状态全都放入内存中,statede里的数据比较多,而且状态会实时变
2021-07-16 23:03:57
1528
原创 事实流和维度流广播案例
import org.apache.flink.api.common.functions.MapFunction;import org.apache.flink.api.common.state.BroadcastState;import org.apache.flink.api.common.state.MapStateDescriptor;import org.apache.flink.api.common.state.ReadOnlyBroadcastState;import org.apac
2021-07-10 23:42:22
166
原创 flink学习之状态----mapstate学习
import org.apache.flink.api.common.functions.MapFunction;import org.apache.flink.api.common.functions.RichMapFunction;import org.apache.flink.api.common.state.MapState;import org.apache.flink.api.common.state.MapStateDescriptor;import org.apache.flink.
2021-07-09 21:41:57
1406
2
原创 flink学习之状态学习---valuestate(1)
import org.apache.flink.api.common.functions.FlatMapFunction;import org.apache.flink.api.common.functions.RichMapFunction;import org.apache.flink.api.common.state.ValueState;import org.apache.flink.api.common.state.ValueStateDescriptor;import org.apach
2021-07-09 20:28:06
785
1
原创 spark-streaming
import org.apache.kafka.clients.consumer.ConsumerRecordimport org.apache.kafka.common.serialization.StringDeserializerimport org.apache.spark.SparkConfimport org.apache.spark.rdd.RDDimport org.apache.spark.streaming.{Seconds, StreamingContext}import o
2021-06-27 22:54:46
135
1
原创 SPARK-STREAMING
import org.apache.spark.SparkConfimport org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream}import org.apache.spark.streaming.{Seconds, StreamingContext}//只算当前批次object wordandcount { def main(args: Array[String]): Unit = { val conf:
2021-06-24 23:36:55
144
2
原创 kafka(一)
生产者程序:import java.util.Propertiesimport org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}import org.apache.kafka.common.serialization.StringSerializerobject ProducerDemo { def main(args: Array[String]): Unit = { val properties= new
2021-06-22 21:05:02
109
2
原创 spark_sql案例之流量统计DSL
uid,start_time,end_time,flow1,2020-02-18 14:20:30,2020-02-18 14:46:30,201,2020-02-18 14:47:20,2020-02-18 15:20:30,301,2020-02-18 15:37:23,2020-02-18 16:05:26,401,2020-02-18 16:06:27,2020-02-18 17:20:49,501,2020-02-18 17:21:50,2020-02-18 18:03:27,602,
2021-06-18 23:32:30
208
1
原创 spark_sql案例之流量统计
uid,start_time,end_time,flow1,2020-02-18 14:20:30,2020-02-18 14:46:30,201,2020-02-18 14:47:20,2020-02-18 15:20:30,301,2020-02-18 15:37:23,2020-02-18 16:05:26,401,2020-02-18 16:06:27,2020-02-18 17:20:49,501,2020-02-18 17:21:50,2020-02-18 18:03:27,602,
2021-06-18 22:46:57
177
原创 saprk:计算连续登陆3天及以上的用户
数据guid01,2018-02-28guid01,2018-03-01guid01,2018-03-01guid01,2018-03-02guid01,2018-03-05guid01,2018-03-04guid01,2018-03-06guid01,2018-03-07guid02,2018-03-01guid02,2018-03-02guid02,2018-03-03guid02,2018-03-06guid03,2018-03-06guid03,2018-03-07g
2021-06-16 21:44:19
127
原创 泛型的定义和上下界
class Demo001[K,V] { def show(k:K,v:V)={ println(s"$k $v") }}object Demo001{ def main(args: Array[String]): Unit = { val demo=new Demo001[Int,String] demo.show(1,"cc") val demo2=new Demo001[Int,Int] demo2.show(10,20) }}cl
2021-06-09 23:00:54
131
原创 自定义排序
object Text01 { def main(args: Array[String]): Unit = { val ls=List(new User(1,"WNN",10),new User(2,"CC",11),new User(3,"DD",19)) implicit def opt(user: User): Ordered[User] ={ new Ordered[User] { override def compare(that: User): I
2021-06-09 22:50:16
93
原创 隐式函数、隐式类
隐式函数object ImpliDemo { implicit val age:Int =100 def opt(x:Int,y:Int)(implicit f:(Int,Int)=>Int)={ f(x,y) } def opt1(x:Int,y:Int)(implicit f:(Int,Int)=>Int)={ f(x,y) } def main(args: Array[String]): Unit = { // implicit
2021-06-09 22:12:33
263
原创 scala之匹配模式
object Demo03 { def main(args: Array[String]): Unit = { val ls=List(1,2,3,"a","b","c",10.11,12.12,13.13) /*val res: List[Any] = ls.map(e=>{ if(e.isInstanceOf[Int]){ e.asInstanceOf[Int]*10 }else if(e.isInstanceOf[String]){
2021-06-07 23:12:44
103
原创 偏函数及其简写形式
object PartFun { def main(args: Array[String]): Unit = { val ls: List[Any] = List("wnn","a","b",1,2,3,10.0,11.0,true) val f: PartialFunction[Any, Int] =new PartialFunction[Any,Int] { override def isDefinedAt(x: Any): Boolean = { x.is
2021-06-07 22:02:53
179
原创 scala之高阶函数
object HanShu { def opt(x:Int,y:Int,f:(Int,Int)=>Int)={ f(x,y) } def main(args: Array[String]): Unit = { println(opt(1, 3, (x1, x2) => x1 + x2)) println(opt(1, 3, (x1, x2) => x1 * x2)) }}object HanShu2 { def opt(x:String,
2021-06-06 21:59:44
77
原创 特质动态混入
class Demo01 extends T1 with T2 with T3{ override def t11: Unit = { println("快捷键不会有t11") } def t1: Unit = { println("快捷键会有t1") } override def t2: Unit = { println("重写了t2") } override def t3: Unit = { println("重写了t3")
2021-06-06 19:57:22
304
原创 scala特质
trait Tdemo01 {def add2(x:Int,y:Int):Int def show2}trait Tdemo {def add(x:Int,y:Int):Int def show}import day03.Tdemo/** * Author Ning.W * Date: 2021/6/5 * Description: */class TraitDemo01 extends Tdemo with Tdemo01{ override def show:
2021-06-05 22:50:03
91
原创 进阶RDD
import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDDobject Demo { def main(args: Array[String]): Unit = { val sc: SparkContext =SparkUtils.getSparkContext() val rdd: RDD[(String, Int)] =sc.textFile("data/
2021-06-05 10:55:57
119
原创 scala之apply方法
object ApplyDemo { def apply(): ApplyDemo = new ApplyDemo() def main(args: Array[String]): Unit = { val demo: ApplyDemo =new ApplyDemo // demo.show ApplyDemo.apply().show }}class ApplyDemo{ def show={ println("ccccc") }}obje
2021-06-04 23:02:12
99
原创 Spark去重操作
import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDDobject ReduceByKeyDemo { def main(args: Array[String]): Unit = { val sc: SparkContext =SparkUtils.getSparkContext() val tf: RDD[String] =sc.textFile("da
2021-06-04 21:37:47
842
原创 Spark进阶之sample、takesample算子
import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDDobject SampleDemo { def main(args: Array[String]): Unit = { val sc: SparkContext = SparkUtils.getSparkContext() val rdd: RDD[Int] =sc.makeRDD(List(1,2,3
2021-06-04 20:51:47
319
原创 scala主构造器和辅助构造器
主构造器和辅助构造器class User(var id:Int,var name:String) { var sal: Double= _ def this()={ this(2,"cc") } def this(sal:Double){ this() this.sal=sal } override def toString: String = s"id = $id; name= $name"}object Demo05 { def main
2021-06-03 22:18:49
113
原创 Jdbc连接
import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet}import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDD/** 一个分区处理一次操作 * 在获取JDBC连接的时候 可以减少连接次数 * 占用内存比较大 ,有可能提升内存溢出的风险*/object Demo0
2021-06-03 20:04:43
79
原创 MapPartitionsWitnIndex算子
import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDDobject IndexDemo { def main(args: Array[String]): Unit = { val sc: SparkContext = SparkUtils.getSparkContext() val rdd: RDD[Int] = sc.makeRDD(List(1,2,3,4
2021-06-02 18:02:25
78
原创 MapPartitionsWithIndex算子
import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDDobject MapartitionsIndexDemo { def main(args: Array[String]): Unit = { val sc: SparkContext = SparkUtils.getSparkContext() val rdd: RDD[Int] =sc.makeRDD(
2021-06-02 17:56:35
156
原创 求每个分区的最大值
import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDDobject MapMaxDemo { def main(args: Array[String]): Unit = { val sc: SparkContext = SparkUtils.getSparkContext() val rdd1: RDD[Int] =sc.makeRDD(List(1,2,3
2021-06-02 17:44:43
436
原创 map算子
import java.sql.{Connection, Driver, DriverManager, PreparedStatement, ResultSet}import Utils.SparkUtilsimport org.apache.spark.SparkContextimport org.apache.spark.rdd.RDDobject MapDemo01 { def main(args: Array[String]): Unit = { val sc: SparkCo
2021-06-02 17:17:34
232
原创 spark
import org.apache.log4j.{Level, Logger}import org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}object RDDdEMO { Logger.getLogger("org").setLevel(Level.ERROR) def main(args: Array[String]): Unit = { val conf: SparkConf =new
2021-06-02 11:29:46
91
原创 scala 进阶篇
object SliceDemo { def main(args: Array[String]): Unit = { val arr: Array[String] =Array("wnn","cc","xx","dh","asd") val ls: List[String] =arr.toList // println(ls) val res: Array[String] =arr.slice(0,3) //println(res.toList) val s
2021-06-01 23:23:40
139
原创 Scala之进阶篇
object ZipDemo { def main(args: Array[String]): Unit = { val arr: Array[String] =Array("wnn","lcc","hxx") val arr1: Array[Int] =Array(1,2,3) val res: Array[(String, Int)] = arr.zip(arr1) //第一种方式 res.foreach(println) //第二种方式 val
2021-06-01 22:04:27
95
原创 用Scala实现二分查找
用Scala实现二分查找object SecondSreach { def main(args: Array[String]): Unit = { val arr = Array[Int](1,2,3,4,5,6,7,8,9) println("index:"+Search(arr,10)) } def Search(arr:Array[Int],key : Int):Int = { var left=0 var right=arr.length-1
2021-05-25 20:08:14
361
原创 大数据之Scala
scala底层就是java;spark底层是scala写的object Demo12 { def main(args: Array[String]): Unit = { println("aa") }}/** object是静态的* 里面的mian方法也是静态的* extends App 里面可以不写main方法* */object Demo11 extends App { println("wnn")}//变量的定义,不初始化赋值会报错 var ag.
2021-05-24 22:28:23
149
1
原创 HIVE的安装
hive的安装1 mysql数据 远程连接权限1) mysql -uroot -proot2) set global validate_password_policy=0;3) set global validate_password_length=1; 这个两个设置以后 密码很简单不会报错(等级低)4) grant all privileges on . to ‘root’@’%’ identified by ‘root’ with grant option;5) flush
2021-05-19 15:05:42
79
原创 MapReduce
package com.doit.hdp.day02;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapr
2021-04-27 22:59:41
62
原创 HDFS的运用
判断文件并删除文件package com.doit.hdp.day01;import com.doit.hdp.Utils;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;public class Demo03 { public static void main(String[] args) throws Exception { FileSystem fs = Utils.ge
2021-04-26 23:13:59
116
空空如也
空空如也
TA创建的收藏夹 TA关注的收藏夹
TA关注的人