| 课题 名称 | 基于Hive的新能源汽车数据仓库管理系统的设计与实现 | ||||||
| 课题 来源 | 学校课题及自选课题 | 课题 类型 | 工程实践 | 指导 教师 | 付接递 | 职称/学位 | 硕士 |
| 学生 姓名 | 徐东升 | 学号 | 2020105410243 | 专业 | 信息与计算机科学(智能信息处理方向) | 班级 | 2班 |
| 一、调研资料准备 对企业进行实地调研,做好需求分析;走访相关企业了解企业的需求以及应用场景,与相关负责人沟通存在的问题,针对于问题做好合理的规划与设计;通过查阅相关资料信息如万方数据等学术网站相关论文内容,分析与研究新能源汽车数据仓库管理系统的功能,并且查阅相关系统开发知识,与指导教师沟通系统设计思路与功能。 二、设计的目的、要求、思路与预期成果 (1)设计目的 本次设计一个基于Hive的新能源汽车数据仓管理系统。企业管理员登录系统后可以在汽车保养时,根据这些汽车内置传感器传回的数据分析其故障原因,以便维修人员更加及时准确处理相关的故障问题。或者对这些数据分析之后向车主进行预警提示车主注意保养汽车,以提高汽车行驶的安全系数。 (2)设计要求 利用Flume进行分布式的日志数据采集,Kafka实现高吞吐量的数据传输,DateX进行数据清洗、转换和整合,MySQL存储结构化数据,HDFS存储大规模原始日志数据,Hive进行数据仓库查询和分析,Spark进行分布式数据计算,Dolphinscheduler进行全流程调度管理,帆软BI工具实现可视化大屏展示。实现数据采集、传输、清洗、存储、查询、计算、调度和展示全流程功能。提供监控、数据质量管理、多维度数据分析等功能。 (3)设计思路 新能源汽车数据仓库管理系统,主要服务于新能源汽车车主,企业负责本系统的登录维护,在进行工作流调度时设置Admin用户进行环境管理或环境创建,设置普通用户权限进行项目工作流的创建和任务节点的配置,在FineBI中设置管理员账号进行环境的搭建以及对数据进行处理展示,使管理人员能够更加直观的获取想要的数据,企业根据系统中的数据为车主提供相关服务。其设计思路着重于多方位的数据处理,涵盖了全面数据采集、数据清洗和质量保障、大规模数据存储、高效数据管理、数据分析与挖掘、可扩展性与灵活性、用户友好的数据查询与报告、高性能与低延迟等多个方面。整体设计思路旨在打造一套高效、安全、可靠的大数据处理平台,为新能源汽车行业提供全面的数据支持,助力业务决策并确保系统在未来的发展中具备良好的可扩展性和适应性。 (4)预期成果 毕业设计1套(包含项目软件和数据库等)和毕业论文1份(符合学校查重要求的毕业论文)。 三、任务完成的阶段内容及时间安排 2023年11月06日前查看相关资料、技术,准备技术文档,做好需求分析,下发任务书; 2023年12月31日前制定软件开发计划,设计软件部分功能,完成开题报告; 2024年01月06日前进行开题答辩; 2024年03月15日前完成系统开发与测试,进行中期检查; 2024年04月22日前撰写论文初稿、完成后和指导老师沟通修改论文、定稿、进行查重检测; 2024年05月19日前完成所有毕设材料、参加答辩。
(1)硬件方面:Windows10操作系统电脑一台 (2)软件方面:Linux虚拟机、MySQL数据库、帆软BI。 指导教师签名: 日期:
| |||||||









核心算法代码分享如下:
package com.sql
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.sql.{SaveMode, SparkSession}
object clean {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("mysql")
.master("local[6]")
.getOrCreate()
val schema = StructType(
List(
StructField("name", StringType),
StructField("mileage", DoubleType),
StructField("time", StringType),
StructField("local", StringType),
StructField("price", DoubleType),
StructField("newprice", StringType),
StructField("cartype", StringType),
StructField("engine", StringType),
StructField("Driving", StringType),
StructField("transfer", StringType),
StructField("gearbox", StringType)
)
)
//读取HDFS中数据
//hdfs
val carDF = spark.read.option("header", "true").schema(schema).csv("D://hadoop_spark_hive_car2024//scrapytest//scrapytest//car2.csv")
//读取本地
// val salaryDF: DataFrame = spark.read
// .option("delimiter", ",")
// .schema(schema)
// .csv("dataset/Levels_Fyi_Salary_Data.csv")
// carDF.show()
import spark.implicits._
val newcar = carDF.select('name,'mileage,'time,'local,'price,'newprice,'cartype,
'engine,'Driving,'transfer,'gearbox,
when('cartype === "-", null)
.otherwise('cartype cast StringType)
as("newcartype"),
when('engine === "-", null)
.otherwise('engine cast StringType)
as("newengine"),
when('Driving === "-", null)
.otherwise('Driving cast StringType)
as("newDriving"))
val newcars = newcar.na.drop("any")
import org.apache.spark.sql.functions._
val toDouble = udf[Double, String]( _.toDouble)
val featureDf = newcars
.withColumn("newprice", toDouble(newcar("newprice")))
// featureDf.write.format("csv").save("dataset/car")
val featureDf1 = featureDf.select('name,'mileage,'time,'local,'price,'newprice,'newcartype, 'newengine,'newDriving,'transfer,'gearbox)
val newdata = featureDf1.withColumn("local",when('local === "合肥" or 'local === "芜湖" or 'local === "蚌埠" or 'local === "淮南" or 'local === "马鞍山" or 'local === "淮北" or 'local === "铜陵" or 'local === "安庆" or 'local === "黄山" or 'local === "滁州" or 'local === "阜阳" or 'local === "宿州" or 'local === "六安" or 'local === "亳州" or 'local === "池州" or 'local === "宣城","安徽")
.when('local === "福州" or 'local === "厦门" or 'local === "莆田" or 'local === "三明" or 'local === "泉州" or 'local === "漳州" or 'local === "南平" or 'local === "龙岩" or 'local === "宁德","福建")
.when('local === "广州" or 'local === "韶关" or 'local === "深圳" or 'local === "珠海" or 'local === "汕头" or 'local === "佛山" or 'local === "江门" or 'local === "湛江" or 'local === "茂名" or 'local === "肇庆" or 'local === "惠州" or 'local === "梅州" or 'local === "汕尾" or 'local === "河源" or 'local === "阳江" or 'local === "清远" or 'local === "东莞" or 'local === "中山" or 'local === "潮州" or 'local === "揭阳" or 'local === "云浮","广东")
.when('local === "南宁" or 'local === "柳州" or 'local === "桂林" or 'local === "梧州" or 'local === "北海" or 'local === "防城港" or 'local === "钦州" or 'local === "贵港" or 'local === "玉林" or 'local === "百色" or 'local === "贺州" or 'local === "河池" or 'local === "来宾" or 'local === "崇左","广西")
.when('local === "贵阳" or 'local === "六盘水" or 'local === "遵义" or 'local === "安顺" or 'local === "毕节" or 'local === "铜仁" or 'local === "黔西南" or 'local === "黔东南" or 'local === "黔南","贵州")
.when('local === "兰州" or 'local === "嘉峪关" or 'local === "金昌" or 'local === "白银" or 'local === "天水" or 'local === "武威" or 'local === "张掖" or 'local === "平凉" or 'local === "酒泉" or 'local === "庆阳" or 'local === "定西" or 'local === "陇南" or 'local === "临夏" or 'local === "甘南","甘肃")
.when('local === "海口" or 'local === "三亚" or 'local === "三沙" or 'local === "儋州" or 'local === "五指山" or 'local === "琼海" or 'local === "文昌" or 'local === "万宁" or 'local === "东方" or 'local === "定安" or 'local === "屯昌" or 'local === "澄迈" or 'local === "临高" or 'local === "白沙" or 'local === "昌江" or 'local === "乐东" or 'local === "陵水" or 'local === "保亭" or 'local === "琼中","海南")
.when('local === "郑州" or 'local === "开封" or 'local === "洛阳" or 'local === "平顶山" or 'local === "安阳" or 'local === "鹤壁" or 'local === "新乡" or 'local === "焦作" or 'local === "濮阳" or 'local === "许昌" or 'local === "漯河" or 'local === "三门峡" or 'local === "南阳" or 'local === "商丘" or 'local === "信阳" or 'local === "周口" or 'local === "驻马店" or 'local === "济源","河南")
.when('local === "武汉" or 'local === "黄石" or 'local === "十堰" or 'local === "宜昌" or 'local === "襄阳" or 'local === "鄂州" or 'local === "荆门" or 'local === "孝感" or 'local === "荆州" or 'local === "黄冈" or 'local === "咸宁" or 'local === "随州" or 'local === "恩施" or 'local === "仙桃" or 'local === "潜江" or 'local === "天门" or 'local === "神农架","湖北")
.when('local === "长沙" or 'local === "株洲" or 'local === "湘潭" or 'local === "衡阳" or 'local === "邵阳" or 'local === "岳阳" or 'local === "常德" or 'local === "张家界" or 'local === "益阳" or 'local === "郴州" or 'local === "永州" or 'local === "怀化" or 'local === "娄底" or 'local === "湘西","湖南")
.when('local === "石家庄" or 'local === "唐山" or 'local === "秦皇岛" or 'local === "邯郸" or 'local === "邢台" or 'local === "保定" or 'local === "张家口" or 'local === "承德" or 'local === "沧州" or 'local === "廊坊" or 'local === "衡水","河北")
.when('local === "哈尔滨" or 'local === "齐齐哈尔" or 'local === "鸡西" or 'local === "鹤岗" or 'local === "双鸭山" or 'local === "大庆" or 'local === "伊春" or 'local === "佳木斯" or 'local === "七台河" or 'local === "牡丹江" or 'local === "黑河" or 'local === "绥化" or 'local === "大兴安岭","黑龙江")
.when('local === "南京" or 'local === "无锡" or 'local === "徐州" or 'local === "常州" or 'local === "苏州" or 'local === "南通" or 'local === "连云港" or 'local === "淮安" or 'local === "盐城" or 'local === "扬州" or 'local === "镇江" or 'local === "泰州" or 'local === "宿迁","江苏")
.when('local === "南昌" or 'local === "景德镇" or 'local === "萍乡" or 'local === "九江" or 'local === "新余" or 'local === "鹰潭" or 'local === "赣州" or 'local === "吉安" or 'local === "宜春" or 'local === "抚州" or 'local === "上饶","江西")
.when('local === "长春" or 'local === "吉林" or 'local === "四平" or 'local === "辽源" or 'local === "通化" or 'local === "白山" or 'local === "松原" or 'local === "白城" or 'local === "延边","吉林")
.when('local === "沈阳" or 'local === "大连" or 'local === "鞍山" or 'local === "抚顺" or 'local === "本溪" or 'local === "丹东" or 'local === "锦州" or 'local === "营口" or 'local === "阜新" or 'local === "辽阳" or 'local === "盘锦" or 'local === "铁岭" or 'local === "朝阳" or 'local === "葫芦岛","辽宁")
.when('local === "呼和浩特" or 'local === "包头" or 'local === "乌海" or 'local === "赤峰" or 'local === "通辽" or 'local === "鄂尔多斯" or 'local === "呼伦贝尔" or 'local === "巴彦淖尔" or 'local === "乌兰察布" or 'local === "兴安盟" or 'local === "锡林郭勒盟" or 'local === "阿拉善盟","内蒙古")
.when('local === "银川" or 'local === "石嘴山" or 'local === "吴忠" or 'local === "固原" or 'local === "中卫","宁夏")
.when('local === "西宁" or 'local === "海东" or 'local === "海北" or 'local === "黄南" or 'local === "海南" or 'local === "果洛" or 'local === "玉树" or 'local === "海西","青海")
.when('local === "西安" or 'local === "铜川" or 'local === "宝鸡" or 'local === "咸阳" or 'local === "渭南" or 'local === "延安" or 'local === "汉中" or 'local === "榆林" or 'local === "安康" or 'local === "商洛" or 'local === "西咸新区","陕西")
.when('local === "成都" or 'local === "自贡" or 'local === "攀枝花" or 'local === "泸州" or 'local === "德阳" or 'local === "绵阳" or 'local === "广元" or 'local === "遂宁" or 'local === "内江" or 'local === "乐山" or 'local === "南充" or 'local === "眉山" or 'local === "宜宾" or 'local === "广安" or 'local === "达州" or 'local === "雅安" or 'local === "巴中" or 'local === "资阳" or 'local === "阿坝" or 'local === "凉山" or 'local === "甘孜","四川")
.when('local === "太原" or 'local === "大同" or 'local === "阳泉" or 'local === "长治" or 'local === "晋城" or 'local === "朔州" or 'local === "晋中" or 'local === "运城" or 'local === "忻州" or 'local === "临汾" or 'local === "吕梁","山西")
.when('local === "济南" or 'local === "青岛" or 'local === "淄博" or 'local === "枣庄" or 'local === "东营" or 'local === "烟台" or 'local === "潍坊" or 'local === "济宁" or 'local === "泰安" or 'local === "威海" or 'local === "日照" or 'local === "莱芜" or 'local === "临沂" or 'local === "德州" or 'local === "聊城" or 'local === "滨州" or 'local === "菏泽","山东")
.when('local === "乌鲁木齐" or 'local === "克拉玛依" or 'local === "吐鲁番" or 'local === "哈密" or 'local === "昌吉" or 'local === "博尔塔拉" or 'local === "巴音郭楞" or 'local === "阿克苏" or 'local === "克孜勒苏" or 'local === "喀什" or 'local === "和田" or 'local === "伊犁" or 'local === "塔城" or 'local === "阿勒泰" or 'local === "石河子" or 'local === "阿拉尔" or 'local === "图木舒克" or 'local === "五家渠" or 'local === "北屯" or 'local === "铁门关" or 'local === "双河" or 'local === "可克达拉" or 'local === "昆玉","新疆")
.when('local === "拉萨" or 'local === "日喀则" or 'local === "昌都" or 'local === "林芝" or 'local === "山南" or 'local === "那曲" or 'local === "阿里","西藏")
.when('local === "昆明" or 'local === "曲靖" or 'local === "玉溪" or 'local === "保山" or 'local === "昭通" or 'local === "丽江" or 'local === "普洱" or 'local === "临沧" or 'local === "楚雄" or 'local === "红河" or 'local === "文山" or 'local === "西双版纳" or 'local === "大理" or 'local === "德宏" or 'local === "怒江" or 'local === "迪庆","云南")
.when('local === "杭州" or 'local === "宁波" or 'local === "温州" or 'local === "嘉兴" or 'local === "湖州" or 'local === "绍兴" or 'local === "金华" or 'local === "衢州" or 'local === "舟山" or 'local === "台州" or 'local === "丽水" or 'local === "舟山群岛新区","浙江")
.when('local === "北京","北京")
.when('local === "上海","上海")
.when('local === "天津","天津")
.when('local === "重庆","重庆"))
newdata.write.format("jdbc").mode(SaveMode.Overwrite)
.option("url", "jdbc:mysql://192.168.227.166:3306/lynbb?useSSL=false")
.option("driver","com.mysql.jdbc.Driver")
.option("dbtable", "car")
.option("user", "root")
.option("password", "123456")
.save()
}
}

被折叠的 条评论
为什么被折叠?



