1. 基本的经典查询
#基本的查询语句
SELECT *
FROM data.`dataanalyst`
WHERE city ='上海'
AND (education = '本科'
OR workYear = '1-3年'
)
AND secondType LIKE '%开发%' #字段包含开发
OR secondType LIKE '后端%' #优先级:()> AND > OR
结果:
2. 不同城市招聘公司的数量,需要用distinct去重
#不同城市招聘公司的数量,需要用distinct去重
SELECT city, COUNT(positionId), COUNT(DISTINCT(companyId))
FROM data.`dataanalyst`
GROUP BY city
3. 不同城市,学历的招聘岗位数目
#不同城市,学历的数目
SELECT city,education,COUNT(1) FROM data.`dataanalyst`
GROUP BY city,education

4. 不同城市电子商务岗位的数量
#不同城市电子商务岗位的数量
SELECT city,COUNT(1) FROM data.`dataanalyst`
WHERE industryField LIKE '%电子商务%'
GROUP BY city
HAVING COUNT(1)>50 #二次过滤,挑选拥有电子商务岗位数量为50以上的城市
5. 这是第4题的改写
上面也可以写成这样
把where合并入having中
#不同城市电子商务岗位的数量
SELECT city,COUNT(1) FROM data.`dataanalyst`
GROUP BY city
HAVING COUNT(IF(industryField LIKE '%电子商务%',1,NULL))>50
得出的结果完全相同
6. 不同城市下,电子商务岗位在所有岗位中的占比
注意:AS的别名在where中起名之后,只能在having,order by中使用
select
city,
COUNT(IF(industryField LIKE '%电子商务%',1,NULL)) as emarket,
count(1) as total,
COUNT(IF(industryField LIKE '%电子商务%',1,NULL))/count(1) as proportion
from data.`dataanalyst`
group by city
having emarket>10
order by proportion
7. 截取薪资上限和下限
SELECT
LEFT(salary,LOCATE('k',salary)-1) AS 'minSalary',
RIGHT(salary,LOCATE('-',salary)-1) AS '最大薪资数据',
#去掉最后的k
LEFT(RIGHT(salary,LOCATE('-',salary)-1),LOCATE('k',RIGHT(salary,LOCATE('-',salary)-1))-1) AS 'maxSalary',
salary
FROM data.`dataanalyst`
8. 是第7题薪资上下限的改写,使用substr函数
SELECT
LEFT(salary,LOCATE('k',salary)-1) AS 'minSalary',
LOCATE('-',salary),
LENGTH(salary),
#substr(字符串,从哪里开始,截取长度)
SUBSTR(salary,LOCATE('-',salary)+1,LENGTH(salary)-LOCATE('-',salary)-1) AS 'maxSalary',
salary
FROM data.`dataanalyst`
9. 子查询实现对平均薪资分区
使用子查询,对数据进一步过滤,且使用了别名,更加优雅hhh
SELECT
CASE
WHEN (minSalary+maxSalary)/2 <=10 THEN '0-10k'
WHEN (minSalary+maxSalary)/2 <=20 THEN '10k-20k'
WHEN (minSalary+maxSalary)/2 <=30 THEN '20k-30k'
ELSE '30k以上'
END AS '平均薪资范围',
(minSalary+maxSalary)/2 AS 'aveSalary',
salary
FROM(
SELECT
LEFT(salary,LOCATE('k',salary)-1) AS 'minSalary',
LOCATE('-',salary),
LENGTH(salary),
#substr(字符串,从哪里开始,截取长度)
SUBSTR(salary,LOCATE('-',salary)+1,LENGTH(salary)-LOCATE('-',salary)-1) AS 'maxSalary',
salary
FROM
data.`dataanalyst`
) AS t
注意对表子查询的话,最后要加上 as t
10. 查询职位数目在100以上的城市的详细数据
使用子查询,先查询出符合条件的城市名
【注意】外部 where 中的 city 必须与子查询中的 city 字段名相同
SELECT *
FROM data.`dataanalyst`
WHERE city IN (
SELECT
city
FROM data.`dataanalyst`
GROUP BY city
HAVING COUNT(positionId)>100
)
子查询结果:
最终结果:
11. 表关联join
查询:规模为150-500的公司的岗位总数
SELECT
COUNT(t2.companyId) AS '目标职位数量',
COUNT(1) AS '总职位数量'
FROM data.`dataanalyst` t1 LEFT JOIN(
SELECT *
FROM data.`company`
WHERE companySize = '150-500人') t2 ON t1.`companyId`=t2.companyId
12. 用power shell导入数据
C:\Windows\system32>cd /d D:\Environment\mysql-5.7.19\bin
D:\Environment\mysql-5.7.19\bin>mysql -u root -p
Enter password: ******
mysql> load data local infile 'C:/Users/Jianfei Sun/Desktop/七周成为数据分析师_课件/第五周/orderinfo.csv' into table data.orderinfo character set utf8 fields terminated by ',';
Query OK, 539414 rows affected, 65535 warnings (2.98 sec)
Records: 539414 Deleted: 0 Skipped: 0 Warnings: 539415
解决乱码:在table后面加入character set utf8
语句
13. 时间字段
SELECT
paidTIme,
DATE(paidTIme),
MONTH(paidTIme), #返回月份
DATE_FORMAT(paidTIme,'%Y-%m-%d'), #年-月-日
DATE(DATE_ADD(paidTIme,INTERVAL 1 DAY)) #加上一天,能嵌套函数
FROM data.`orderinfo`
14. 统计不同月份的下单人数
#统计不同月份的下单人数
SELECT MONTH(paidTIme),COUNT(DISTINCT(userId))
FROM data.`orderinfo`
WHERE isPaid ='已支付' #过滤脏数据
GROUP BY MONTH(paidTIme)
原始表数据