1、使用python简单模拟日志输出
使用模块:
- os–>文件模块,判断文件是否存在;
- datetime–>日期模块,类似Java里的calendar作用,主要用于日期与字符串之间相互转换;
- time–>日期和时间戳的相互转换;
- random–>随机数,math–>四舍五入;
- json–>把字典转为json格式
生成日志要求为UTF-8文件,和hadoop平台匹配
- settings–>Editor–>File and Code Templates–>python scripts–>
#!/usr/bin/env python
# -*- coding:utf-8 -*-
json格式有空格,使用
代码如下:
import datetime
import time
import random
import math
import json
import os
def createLogFileByDate(begin, over, path):
# 判断文件目录是否存在,如果不存在则创建
if os.path.exists(path) == False:
os.mkdir(path,777)
# 先把输入的字符串转为日期格式,两个日期相减
addDay = (datetime.datetime.strptime(over, "%Y-%m-%d") - datetime.datetime.strptime(begin, "%Y-%m-%d")).days
# 把字符串转为日期格式
t = datetime.datetime.strptime(begin, "%Y-%m-%d")
# 把日期转为字符串格式
timestr = t.strftime("%Y-%m-%d")
for i in range(addDay):
for j in range(10000):
# 创建文件,并写入内容 #"D:/LearningNotes/数仓项目/数仓version_03/logmake_py/log"
with open(path + "/log_" + timestr + ".log", "a") as f:
f.write(logContentPerDay(timestr,j))
# 把字符串转为日期格式
t = datetime.datetime.strptime(timestr, "%Y-%m-%d")
# 日期增加一天
delta = datetime.timedelta(days=1)
t = delta +t
# 把日期转为字符串格式
timestr = t.strftime("%Y-%m-%d")
evs = ["CLICK", "MOVE", "CART", "ORDER"]
def logContentPerDay(timestr,j):
# 随机生成1-100000用户的编号
userid = str(random.randint(0, 100000))
# 随机生成4个事件之一
event = evs[math.floor(random.randint(0, 3))]
# 按照每8秒产生一条数据 生成1条事件
timeArray = time.strptime(timestr, "%Y-%m-%d")
timeStamp = str(int(time.mktime(timeArray)) + j * 8)+"000"
# 生成商品信息1-5000000商品信息
no = str(random.randint(0, 5000000) + 1)
goodinfo = str(json.dumps(dict(No=no, title="商品" + no, price=10000.0, shopid=1, mark="mark"),ensure_ascii=False,separators=(",",":")))
# 生成移动端信息
app = json.dumps(dict(appid=123456, appversion="11.1.0"))
# 生成用户的系统信息
os = "6.0.0"
os_version = "android"
# 生成浏览器信息
browse = json.dumps(dict(browsetype="chrome", browseversion="82.0"))
# 判断是APP还是网页版
divice = str(app if random.randint(0, 1) == 1 else browse)+"\n"
# 组合起来
infos = userid + " " + event + " " + timeStamp + " " + goodinfo + " " + os + " " + os_version + " " + divice
return infos
if __name__ == '__main__':
createLogFileByDate("2020-7-1","2020-12-31","本地路径\logmake_py")
# print(logContentPerDay("2020-1-1",2))
2、使用Java模拟
public class GeneratorLogs {
//用户行为事件
static String[] evs = {"CLICK", "MOVE", "CART", "ORDER"};
/**
* 根据用户传入的开始时间和结束时间创建N个日志文件
*
* @param begin 日志开始时间
* @param over 日志结束时间
* @param path 文件保存路径
*/
public void writeLog(Date begin, Date over, String path) {
//判断目录是否已存在
File f = new File(path);
if (!f.exists()) {
f.mkdir();
}
//获取应该创建日志文件的
long day = (over.getTime() - begin.getTime()) / (24 * 60 * 60 * 1000);
try {
//如果文件不存在,创建日志文件,,每天创建一个
for (int i = 0; i <= day; i++) {
//创建日志文件
File file = new File(path + "/" + createLogFileNameByDate(begin, i));
RandomAccessFile raf = new RandomAccessFile(file, "rw");
writeLogContent(raf, begin, i);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 根据日期创建日志文件的名字
*
* @param date
* @return
*/
private String createLogFileNameByDate(Date date, int addDay) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Calendar cal = Calendar.getInstance();
cal.setTime(date);
cal.add(Calendar.DATE, addDay);
String fileName = "log_" + sdf.format(cal.getTime()) + ".log";
return fileName;
}
/**
* 将日志文件内容每个文件填充1万条数据
*
* @param raf
*/
private void writeLogContent(RandomAccessFile raf, Date begin, int addDay) throws Exception {
Random random = new Random();
for (int i = 0; i < 10000; i++) {
//随机生成1-100000用户的编号
String userid = String.valueOf(random.nextInt(100000));
//随机生成4个事件之一
String event = evs[(int) Math.floor(random.nextDouble() * 4)];
//按照每8秒产生一条数据 生成1条事件
Calendar cal = Calendar.getInstance();
cal.setTime(begin);
cal.add(Calendar.DATE, addDay);
long time = cal.getTimeInMillis() + i * 8000;
//生成商品信息1-5000000商品信息
String no = String.valueOf(random.nextInt(5000000) + 1);
cn.kgc.mylogs.services.Goods good = new cn.kgc.mylogs.services.Goods(no, "商品" + no, 10000, 1, "mark");
ObjectMapper om = new ObjectMapper();
String goodinfo = om.writeValueAsString(good);
//生成移动端信息
String app = "{\"appid\":\"123456\",\"appversion\":\"11.1.0\"}";
//生成用户的系统信息
String os = "6.0.0";
String os_version = "android";
//生成浏览器信息
String browse = "{\"browsetype\":\"chrome\",\"browseversion\":\"82.0\"}";
//写入文件
String infos = userid + " " + event + " " + time + " " + goodinfo + " " + os + " " + os_version + " " + (random.nextInt(2) == 0 ? app : browse) + "\r\n";
raf.write(infos.getBytes());
}
System.out.println("开始日期加入了:" + addDay);
//关闭文件
raf.close();
}
public static void main(String[] args) throws Exception {
GeneratorLogs gen = new GeneratorLogs();
Date begin = new Date("2020/1/1");
Date over = new Date("2021/1/1");
gen.writeLog(begin, over, "D:\\LearningNotes\\数仓项目\\数仓version_03\\logmake");
}
}
如何把对象转为json字符串
- 需要引入jackson-core和jackson-bind依赖
- 把数据封装为对象(根据要转成json的字段名或属性)
- 调用ObjectMapper()对象,调用里边的writeValueAsString方法即可把json输出为字符串
String goodinfo = om.writeValueAsString(good);
如何把json字符串转成对象实例?
- 引入阿里巴巴的fastjson依赖,导入com.alibaba.fastjson._包
- 调用
JSON.parseObject(str,classOf[Shops])
方法
case class Shops(goodid: String, title: String, price: String, shopid: String, mark: String)
object ConnectTest3 {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val input = "D:\\LearningNotes\\数仓项目\\数仓version_03\\db_myshops\\myshops_fact_from_tomcat\\logmake_py\\log_2021-01-01.log"
val dataStream = env.readTextFile(input)
val one = dataStream.map(x => {
val reg = "(\\{.*?})".r
val str = reg.findFirstIn(x).get
JSON.parseObject(str, classOf[Shops])
})
val buffer = ArrayBuffer[Int]()
for (i <- 1 to 10000) {
buffer.append(i)
}
val two = env.fromCollection(buffer)
val three = one.connect(two)
three.map(shop => "id" + shop.goodid, no => no + 1)
.print()
env.execute()
}
}