set hive.exec.reducers.bytes.per.reducer=100000000;
set hive.optimize.cp=true;
set hive.optimize.prunner=true;
set hive.exec.parallel=true ;
set hive.exec.parallel.thread.number=8;
set mapred.compress.map.output=true;
set mapred.map.output.compression.codec=org.apache.hadoop.io.compress.DefaultCodec;
set mapred.reduce.parallel.copies=10;
set mapred.reduce.copy.backoff=200;
set mapred.job.shuffle.input.buffer.percent=0.9;
set mapred.job.reduce.input.buffer.percent=0.5;
set mapred.job.shuffle.merge.percent=0.8;
set io.sort.factor=300;
set hive.auto.convert.join=false;
set mapred.max.split.size=200000000;
set mapreduce.map.memory.mb=5632;
set mapreduce.reduce.memory.mb=11264;
set mapred.child.java.opts=-Xmx7168m;
set mapreduce.map.java.opts=-Xmx7168m;
set mapreduce.reduce.java.opts=-Xmx7168m;
最近数据汇总由Oracle迁移到了hive,进行批处理汇总.针对hive sql进行了一些内存调优,在这里分享下
其实汇总这块很简单,就是join 和group by等操作