Hive0.14在left outer join多级连接中，执行计划生成BUG记录_left outer join requires an on or using clause.-优快云博客

本文详细分析了一个在Hive 0.9和0.14上执行三级左外连接查询时出现的不同结果的现象，特别关注了在连接顺序改变后问题得到解决的情况。通过深入研究执行计划和源代码，揭示了编译器在处理多级连接时可能存在的逻辑漏洞，并提出了修正方案。最终验证了修正后的代码能够使查询结果一致。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

前言：

这几天遇到一个很诡异的问题，一个三级left outer join的句子，在hive0.9和0.14上的执行结果会不一样。

而且在0.14上通过转换右表连接的顺序可以达到正确输出的目的，但是其中是为什么却不得而知，情况非常

诡异，猜测是编译器编译执行计划有问题！（所谓转换右表连接顺序是指把A left outer join B left outer join C

改成A left outer join C left outer join B,出问题的在B子句中有个聚合出的结果在最终结果中表现不对。）

下面详细介绍下问题：

原语句：

 
          select  A.state_date,
         
          A.customer,
         
          A.channel_2,
         
          A.id,
         
          A.pid,
         
          A.type,
         
          A.pv,
         
          A.uv,
         
          A.visits,
         
          if
          (C.stay_visits is 
          null
          ,
          0
          ,C.stay_visits) as stay_visits,
         
          A.stay_time,
         
          if
          (B.bounce is 
          null
          ,
          0
          ,B.bounce) as bounce
         
          from
         
          (select a.state_date,
         
          a.customer,
         
          b.url as channel_2,
         
          b.id,
         
          b.pid,
         
          b.type,
         
          count(
          1
          ) as pv,
         
          count(distinct a.gid) uv,
         
          count(distinct a.session_id) as visits,
         
          sum(a.stay_time) as stay_time
         
          from      
         
          ( select state_date,
         
          customer,
         
          gid,
         
          session_id,
         
          ep,
         
          stay_time
         
          from bdi_fact.mid_pageview_dt0
         
          where l_date =
          '$v_date'
         
          )a
         
          join
         
          (select l_date as state_date ,
         
          url,
         
          id,
         
          pid,
         
          type,
         
          cid
         
          from bdi_fact.frequency_channel
         
          where l_date =
          '$v_date'
         
          and type =
          '2'
         
          and dr=
          '0'
         
          )b
         
          on  a.customer=b.cid 
         
          where a.ep  rlike b.url
         
          group by a.state_date, a.customer, b.url,b.id,b.pid,b.type
         
          )A
         
          left outer join
         
          (   select
         
          c.state_date ,
         
          c.customer ,
         
          d.url as channel_2,
         
          d.id,
         
          sum(pagedepth) as bounce
         
          from
         
          ( select
         
          t1.state_date ,
         
          t1.customer ,
         
          t1.session_id,
         
          t1.ep,
         
          t2.pagedepth
         
          from          
         
          ( select
         
          state_date ,
         
          customer ,
         
          session_id,
         
          exit_url as ep
         
          from ods.mid_session_enter_exit_dt0
         
          where l_date =
          '$v_date'
         
          )t1
         
          join
         
          ( select
         
          state_date ,
         
          customer ,
         
          session_id,
         
          pagedepth
         
          from ods.mid_session_action_dt0
         
          where l_date =
          '$v_date'
         
          and  pagedepth=
          '1'
         
          )t2
         
          on t1.customer=t2.customer
         
          and t1.session_id=t2.session_id
         
          )c
         
          join
         
          (select *
         
          from bdi_fact.frequency_channel
         
          where l_date =
          '$v_date'
         
          and type =
          '2'
         
          and dr=
          '0'
         
          )d
         
          on c.customer=d.cid
         
          where c.ep  rlike d.url
         
          group by  c.state_date,c.customer,d.url,d.id
         
          )B
         
          on
         
          A.customer=B.customer
         
          and A.channel_2=B.channel_2
         
          and A.id=B.id
         
          left outer join
         
          (
         
          select e.state_date,
         
          e.customer,
         
          f.url as channel_2,
         
          f.id,
         
          f.pid,
         
          f.type,
         
          count(distinct e.session_id) as stay_visits
         
          from      
         
          ( select state_date,
         
          customer,
         
          gid,
         
          session_id,
         
          ep,
         
          stay_time
         
          from bdi_fact.mid_pageview_dt0
         
          where l_date =
          '$v_date'
         
          )e
         
          join
         
          (select l_date as state_date,
         
          url,
         
          id,
         
          pid,
         
          type,
         
          cid
         
          from bdi_fact.frequency_channel
         
          where l_date =
          '$v_date'
         
          and type =
          '2'
         
          and dr=
          '0'
         
          )f
         
          on  e.customer=f.cid 
         
          where e.ep  rlike f.url
         
          and e.stay_time is not 
          null
         
          and e.stay_time <>
          '0'
         
          group by e.state_date, e.customer, f.url,f.id,f.pid,f.type
         
          )C
         
          on
         
          A.customer=C.customer
         
          and   A.channel_2=C.channel_2
         
          and   A.id=C.id
         
          and   A.pid=C.pid
         
          and   A.type=C.type
         
          where A.customer=
          'Cdianyingwang' 
          and A.channel_2=
          'http://www.1905.com/film/filmnews/jk/' 
          and A.id=
          '127'
          ;"