1、只获取文本中的字母,汉字和数字,剔除标点和特殊符号
> select regexp_replace('assseewfew@@@###$%%.....~~~京津冀123','[[^0-9a-zA-Z\\u4e00-\\u9fa5]]','')
> assseewfew京津冀123
2、获取字符串a首次在字符串b中出现的位置
> select locate('123','124123')
> 4
> select sort_array(
collect_set(
concat_ws(':',cast(sn as string),sp)))
from (select locate(sp,'妈妈做的土豆炖排骨') as sn, sp
from item_ci
where locate(sp,'妈妈做的土豆炖排骨')>0)a
> ["5:土豆","8:排骨"]
3、比较两个字符串相同的字符数量
> select levenshtein('秘制红烧肉盖饭十炒时蔬+赠饮','红烧肉盖饭时蔬')
> 7
4、解析json组
> select
get_json_object(ss.list, '$.sid') as sid,
get_json_object(ss.list, '$.price') as price,
get_json_object(ss.list, '$.sn') as sn,
get_json_object(ss.list, '$.number') as number,
get_json_object(ss.list, '$.outr') as outr,
get_json_object(ss.list, '$.deli') as deli,
get_json_object(ss.list, '$.outs') as outs,
get_json_object(ss.list, '$.act') as act from
(select split(regexp_replace(regexp_extract(
'[{"sid":"3458764527033385083","sn":1,"number":200,"price":33,"outr":0,"outs":0,"deli":0,"act":["3458764719724101804"]},
{"sid":"3458764702871191593","sn":1,"number":2000,"price":39,"outr":0,"outs":0,"deli":0,"act":["3458764730620903593"]},
{"sid":"3458764712379678782","sn":1,"number":900,"price":42,"outr":0,"outs":0,"deli":0,"act":["3458764738128707753"]}]',
'^\\[(.+)\\]$',1),'\\}\\,\\{','\\}\\|\\|\\{'),'\\|\\|') as str) pp lateral view explode(pp.str) ss as list
| sid | price | sn | number | outr | deli | outs | act |
|---|---|---|---|---|---|---|---|
| 3458764527033385083 | 33 | 1 | 200 | 0 | 0 | 0 | [“3458764719724101804”] |
| 3458764702871191593 | 39 | 1 | 2000 | 0 | 0 | 0 | [“3458764730620903593”] |
| 3458764712379678782 | 42 | 1 | 900 | 0 | 0 | 0 | [“3458764738128707753”] |
这篇博客主要介绍了在HIVE中处理字符串的技巧,包括筛选字母、汉字和数字,查找字符串首次出现位置,比较字符串相似度,以及解析JSON数据的方法。
4万+

被折叠的 条评论
为什么被折叠?



