例行拉取日志
#!/bin/bash
# Step 1: Check Parameters
if [ $# -lt 2 ]; then
echo ""
echo "Usage: sh download_logs_template.sh LastDay DaysToDownload"
echo "Example: sh download_logs_template.sh 20140831 7, this will download logs for 7 days: 20140825-20140831."
echo ""
exit
fi
# Step 2: Fetch log data
last_day=$1
days_to_download=$2
for((i = 1; i <= ${days_to_download} ; ++i));
do
(( finished = i - 1))
(( togo = ${days_to_download} - finished ))
echo ""
echo "-------"
if [ ${last_day} = "yesterday" ]; then
day=`date -d -${i}day +%Y%m%d`
else
day=`date -d "${last_day} ${finished} days ago" +%Y%m%d`
fi
echo "[`date`] Downloading log for ${day}, ${finished} finished, ${togo} to go......"
start_date=`date`
# download logs
# download logs finished
end_date=`date`
echo "Finished ${day}. Started at ${start_date}, finished at ${end_date}"
done
echo ""
echo "Finished downloading $2 logs ( ${day} - ${last_day} ) !"
echo ""
用法:
this_day="20141212"
i="42"
j="24"
history_day=`date -d "${this_day} ${i} days ago" +%Y%m%d`
future_day=`date -d "${this_day} ${j} days" +%Y%m%d`
ref:
linux date 前一天等多种用法
http://jerrybear.blog.51cto.com/629421/393097
shell计算指定日期的后一天日期http://bbs.chinaunix.net/thread-3588499-1-1.html
生成wget命令
#!/bin/bash
if [ $# -lt 2 ]; then
echo ""
echo "Usage: sh generate_wget_cmd.sh <hostname> <dirpath>"
echo "Example: sh generate_wget_cmd.sh www.xyz.com /home/yaoyao/project"
echo ""
exit
fi
hostname=$1
dirpath=$2
path_arr=(${dirpath//\// })
path_length=${#path_arr[@]}
echo "wget --cut-dirs=${path_length} -nH -m ${hostname}:${dirpath} ."
正则表达式匹配
s="hdfs://hdfs.example.com:8800/app/yaoyao/20141203/part-00042"
if [[ $s =~ [0-9]{8,8} ]]; then
echo ${BASH_REMATCH[0]}
fi
输出:20141203
将文件分割成多个
split -l 1000000 ../data.txt -d -a 5 part-
结果:
part-00000
part-00001
part-00002
处理hadoop目录名称
selectdate="2014112[6-9]|20141130|2014120[1-9]|2014121[0-7]"
input=`${HADOOP_HOME}/bin/hadoop fs -ls /hdfs/yaoyao/data_dir_a | sed '1d' | awk '{print $NF}' | egrep "${selectdate}" | sed 's/$/\/suffix/g' | tr '\n' ','`
input=$input"/hdfs/yaoyao/data_b"
#input=`echo $input | cut -c1-$((${#input}-1))`
读入文件
n=0
CONF_FILE="action_needed.conf"
cat $CONF_FILE |
while read LINE
do
echo "$n) $LINE"
n=`expr $n + 1`
done
Get value from an attribute using xmllint
http://stackoverflow.com/questions/11611385/get-value-from-an-attribute-using-xmllint