PS:如果各位有疑问的话,可以留下微信,我看到会第一时间加的,以后可以多多交流
CREATE DATABASE task3;
\c task3
–创建数据库,切换数据库
CREATE TABLE data(update_time date,id text,title text,price numeric,sale_count int,comment_count int,店名 text);
\COPY data FROM 'C:\Users\Administrator\Desktop\数据可视化课程\考核\考核项目3_基于PostgreSQL的消费数据解析\体验课资料_双十一淘宝美妆数据.csv'WITH CSV HEADER;
--创建表格导入目标数据
ALTER TABLE data RENAME COLUMN update_time TO 日期;
ALTER TABLE data RENAME COLUMN id TO 商品id;
ALTER TABLE data RENAME COLUMN title TO 商品名称;
ALTER TABLE data RENAME COLUMN price TO 价格;
ALTER TABLE data RENAME COLUMN sale_count TO 销售量;
ALTER TABLE data RENAME COLUMN comment_count TO 评价数量;
ALTER TABLE data RENAME COLUMN 店名 TO 品牌名称;
--修改目标数据列名
SELECT COUNT(商品id) AS 数据总量 FROM data;
WITH t AS (SELECT DISTINCT (商品id) FROM data) SELECT COUNT(商品id) AS 商品id总量 FROM t;
SELECT 品牌名称 AS 店名,COUNT(品牌名称) AS 不同品牌数据量 FROM data GROUP BY 店名 ORDER BY 不同品牌数据量 DESC;
--查看数据量
CREATE TABLE data_1 AS
SELECT 商品id FROM data WHERE 日期 = '2016-11-11';
ALTER TABLE data_1 ADD 是否参与活动 boolean;
UPDATE data_1 SET 是否参与活动 = True;
--创建有打折的id表
CREATE TABLE data_2 AS
SELECT 商品id,MAX(日期),MIN(日期) FROM data GROUP BY 商品id;
--创建商品的日期max和min
CREATE TABLE data_3 AS
SELECT data_2.商品id,data_2.max,data_2.min,data_1.是否参与活动
FROM data_2 FULL OUTER JOIN data_1 ON data_1.商品id = data_2.商品id;
UPDATE data_3 SET 是否参与活动 = False WHERE 是否参与活动 IS NULL;
--合并数据得到销售日期max,min和当天是否在售做判断
ALTER TABLE data_3 ADD 分类 text;
UPDATE data_3 SET 分类 = 'A'WHERE 是否参与活动 = True AND max > '2016-11-11' AND min < '2016-11-11';
UPDATE data_3 SET 分类 = 'B' WHERE 是否参与活动 = True AND max = '2016-11-11' AND min < '2016-11-11';
UPDATE data_3 SET 分类 = 'C' WHERE 是否参与活动 = True AND max > '2016-11-11' AND min = '2016-11-11';
UPDATE data_3 SET 分类 = 'D' WHERE 是否参与活动 = True AND max = '2016-11-11' AND min = '2016-11-11';
UPDATE data_3 SET 分类 = 'E' WHERE 是否参与活动 = False AND max < '2016-11-11';
UPDATE data_3 SET 分类 = 'F' WHERE 是否参与活动 = False AND min > '2016-11-11';
UPDATE data_3 SET 分类 = 'G' WHERE 是否参与活动 = False AND min < '2016-11-11' AND max > '2016-11-11';
--插入类别并做好类别标签
SELECT 分类,COUNT(分类),
CAST(COUNT(分类) AS numeric)/(SELECT COUNT(分类) FROM data_3) AS 商品类型占比
FROM data_3 GROUP BY 分类 ORDER BY COUNT(分类) DESC;
--查看数据类别占比
ALTER TABLE data ADD 是否参与活动 boolean;
ALTER TABLE data ADD 类别 varchar(10);
--创建data的新字段
UPDATE data SET 是否参与活动 = data_3.是否参与活动 FROM data_3 WHERE data.商品id = data_3.商品id;
UPDATE data SET 分类 = data_3.分类 FROM data_3 WHERE data.商品id = data_3.商品id;
--更新data的字段来自data_3
CREATE TABLE data_4 AS
SELECT DISTINCT 商品id,价格 FROM data;
--筛选单商品id和价格
CREATE TABLE data_5 AS
SELECT 商品id,MAX(价格),MIN(价格) FROM data_4 GROUP BY 商品id HAVING COUNT(商品id) > 1;
--筛选商品id和价格大于1的数据,得到有打折的数据
ALTER TABLE data_5 ADD 是否打折 boolean;
UPDATE data_5 SET 是否打折 = True;
--设置打折表现
ALTER TABLE data_5 ADD 折扣率 numeric;
UPDATE data_5 SET 折扣率 = min/max;
--算出折扣率
ALTER TABLE data ADD 是否打折 boolean;
UPDATE data SET 是否打折 = False;
ALTER TABLE data ADD 折扣率 numeric;
UPDATE data SET 折扣率 = 1.0;
--给data设置打折和折扣率标签
UPDATE data SET 是否打折 = data_5.是否打折 FROM data_5 WHERE data.商品id = data_5.商品id;
UPDATE data SET 折扣率 = data_5.折扣率 FROM data_5 WHERE data.商品id = data_5.商品id;
--更新有打折和有折扣的数据到data
WITH t AS (SELECT DISTINCT 商品id,是否打折 FROM data)
SELECT CAST((SELECT COUNT(是否打折) FROM t WHERE 是否打折 = True) AS numeric)/(SELECT COUNT(商品id) FROM t) AS 打折占比;
--查看商品打折占比
WITH t AS (SELECT DISTINCT 商品id,折扣率,品牌名称 FROM data)
SELECT 品牌名称,AVG(折扣率) AS 平均折扣率 FROM t GROUP BY 品牌名称 ORDER BY 平均折扣率;
--查看品牌打折占比
CREATE TABLE data_6 AS
SELECT DISTINCT 品牌名称,商品id,是否打折,折扣率 FROM data;
--创建唯一商品id表
CREATE TABLE data_7 AS
SELECT 品牌名称,AVG(折扣率),COUNT(商品id) FROM data_6 GROUP BY 品牌名称;
--创建品牌均值和总数表
CREATE TABLE data_8 AS
SELECT 品牌名称,COUNT(商品id) FROM data_6 WHERE 是否打折 = True GROUP BY 品牌名称;
--计算品牌打折数量
ALTER TABLE data_7 ADD 打折商品数 int;
UPDATE data_7 SET 打折商品数 = 0;NG
--插入打折商品数列并填充为0
UPDATE data_7 SET 打折商品数 = data_8.count FROM data_8 WHERE data_7.品牌名称 = data_8.品牌名称;
--更新有打折的商品总数
ALTER TABLE data_7 RENAME COLUMN avg TO 平均打折力度;
ALTER TABLE data_7 RENAME COLUMN count TO 商品总数;
--最终修改列名
\COPY data to 'C:\Users\Administrator\Desktop\结果数据.CSV' WITH CSV HEADER;
\COPY data_7 to 'C:\Users\Administrator\Desktop\可视化数据.CSV' WITH CSV HEADER;
--导出数据
通过excel出图,查看品牌打折占比和平均打折力度