在学习的时候,没有数据,在网上也没有找到合适的,所以就自己写了一Java程序生成需要的数据,下面就记录一下自己的实现过程。
数据格式
用户注册数据:user_base_info.json
{"userId":10,"userName":"userName10","registTime":"2019-08-05 22:32:34"} {"userId":1,"userName":"userName1","registTime":"2019-08-05 22:32:34"} {"userId":91,"userName":"userName91","registTime":"2019-08-05 22:32:34"} {"userId":53,"userName":"userName53","registTime":"2019-08-05 22:32:34"} {"userId":12,"userName":"userName12","registTime":"2019-08-05 22:32:34"} {"userId":9,"userName":"userName9","registTime":"2019-08-05 22:32:34"}
用户访问日志数据:user_action_log.json
{"logId":5518,"userId":28,"actionTime":"2019-08-05 23:22:39","actionType":1,"purchaseMoney":693.76} {"logId":4066,"userId":84,"actionTime":"2019-08-05 23:22:39","actionType":0,"purchaseMoney":162.73} {"logId":7417,"userId":42,"actionTime":"2019-08-05 23:22:39","actionType":0,"purchaseMoney":242.37} {"logId":5283,"userId":94,"actionTime":"2019-08-05 23:22:39","actionType":0,"purchaseMoney":218.16} {"logId":3095,"userId":97,"actionTime":"2019-08-05 23:22:39","actionType":0,"purchaseMoney":930.0} {"logId":5587,"userId":14,"actionTime":"2019-08-05 23:22:39","actionType":1,"purchaseMoney":5.54} {"logId":5804,"userId":63,"actionTime":"2019-08-05 23:22:39","actionType":1,"purchaseMoney":804.88}
生成数据代码
GeneratedUserVisitLog.java
package graduation.java.test; import com.google.gson.JsonObject; import java.io.File; import java.io.FileOutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.text.SimpleDateFormat; import java.util.Date; /** * FileName: GeneratedUserVisitLog * Author: hadoop * Email: 3165845957@qq.com * Date: 19-8-5 上午12:00 * Description: * 用户活跃度模块数据生成程序 */ public class GeneratedUserVisitLog { public static void main(String[] args){ //用户基本信息Json数据文件 String userBaseLogName = "user_base_info.json"; //用户访问日志JSON数据文件 String userActionLogName = "user_action_log.json"; // 文件保存的路径 String logPath = "/home/hadoop/IdeaProjects/BigDataGraduationProject/log/"; //生成用户基本信息的方法 createJsonFileInfoLog(logPath,userBaseLogName); //生成用户访问日志数据方法 createJsonFileActionLog(logPath,userActionLogName); } /** * 获取现在时间 * @return返回字符串格式 yyyy-MM-dd HH:mm:ss */ public static String getStringDate() { Date currentTime = new Date(); SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String dateString = formatter.format(currentTime); return dateString; } /** * 生成用户基本信息的方法 * @param logPath 文件保存的路径 * @param userBaseLogName 文件名 */ public static void createJsonFileInfoLog(String logPath,String userBaseLogName){ //文件保存的完整路径 String baseInfoFile = logPath+userBaseLogName; // 生成json格式文件 try { // 保证创建一个新文件 File file = new File(baseInfoFile); if (!file.getParentFile().exists()) { // 如果父目录不存在,创建父目录 file.getParentFile().mkdirs(); } if (file.exists()) { // 如果已存在,删除旧文件 file.delete(); } file.createNewFile(); int userId = -1; //用户ID String userName = null; //用户名 String registTime = null; //注册时间 String logString= null; //日志数据 StringBuffer buffer = new StringBuffer(); //生成用户基本信息 for(int i = 0 ; i < 100; i++){ JsonObject object = new JsonObject(); userId = (int)(Math.random()*99+1); userName = "userName"+userId; registTime = getStringDate(); object.addProperty("userId",userId); object.addProperty("userName",userName); object.addProperty("registTime",registTime); buffer.append(object.toString()+"\n"); } logString = buffer.toString(); logString = logString.substring(0,logString.length()-1); // 将格式化后的字符串写入文件 Writer write = new OutputStreamWriter(new FileOutputStream(file), "UTF-8"); write.write(logString); write.flush(); write.close(); } catch (Exception e) { e.printStackTrace(); } } /** * 用户访问日志生成方法 * @param logPath 文件保存路径 * @param userActionLogName 文件名 */ public static void createJsonFileActionLog(String logPath,String userActionLogName){ //完整的文件路径 String actionLogFile = logPath + userActionLogName; // 生成json格式文件 try { // 保证创建一个新文件 File file = new File(actionLogFile); if (!file.getParentFile().exists()) { // 如果父目录不存在,创建父目录 file.getParentFile().mkdirs(); } if (file.exists()) { // 如果已存在,删除旧文件 file.delete(); } file.createNewFile(); int logId = -1; //日志ID int userId = -1; //用户ID String actionTime = null; //访问时间 int actionType = -1; //访问类型 double purchaseMoney = -1; //消费金额 StringBuffer buffer = new StringBuffer(); String logString = null; //生成日志数据 for(int i = 0 ; i < 10000; i++){ JsonObject object = new JsonObject(); logId = (int)(Math.random()*10000); userId = (int)(Math.random()*99+1); actionTime = getStringDate(); actionType = (int)(Math.random()*2); purchaseMoney = ((int)((Math.random()*1000)*100)) / 100; object.addProperty("logId",logId); object.addProperty("userId",userId); object.addProperty("actionTime",actionTime); object.addProperty("actionType",actionType); object.addProperty("purchaseMoney",purchaseMoney); buffer.append(object.toString()+"\n"); } logString = buffer.toString(); logString = logString.substring(0,logString.length()-1); // 将格式化后的字符串写入文件 Writer write = new OutputStreamWriter(new FileOutputStream(file), "UTF-8"); write.write(logString); write.flush(); write.close(); } catch (Exception e) { e.printStackTrace(); } } }
资源文件
如果不想生成,这里给大家免费提供一份数据
链接: https://pan.baidu.com/s/1A8Waqr38kcuDguMMEoSFQA 提取码: ec1h
链接: https://pan.baidu.com/s/1BTYhB0IzwuqTsHFSePvjXg 提取码: 7u5w

本文介绍了一种使用Java程序自动生成用户注册信息和访问日志数据的方法,包括数据格式、生成逻辑及代码实现,适用于大数据分析项目的前期数据准备。
1365

被折叠的 条评论
为什么被折叠?



