我们开发电商网站的时候,需要把网站的一些访问量比较大,页面内容不需要更新的页面进行静态化处理来减少数据库的压力。
我们用的wget生成静态页面,但是这样方式是要在服务器上执行命令,并在服务器上建一个定时器每隔一段时间去更新静态页面,使用页面内容得以更新,这样方式是一种较为简便的方法(脚主要靠脚本)脚本如下:
#!/bin/bash
#######################################################
# 页面静态缓存控制 pagecache.sh #
#######################################################
#
# $1 指定页面
# pagecache.sh all #全部(默认)
# pagecache.sh index.php #仅首页
#
# -h 指定网站根路径(URL)
# pagecache.sh -h http://b2b.ccb.com
# [注意] 请保证本机可访问相应域名或IP,可使用本地hosts进行解析
#
# -p 指定WEB路径(webroot)
# pagecache.sh -p /home/ecp/ecp/php/web
#
# -c 清除缓存(删除html文件)
#
#------------------------------------------------------
# 需要静态化的页面集合
PAGES=("sparkant/sparkant.xhtml?h=165")
HTMLPAGES=("sparkant.html")
PAGE="all"
SiteURL="http://127.0.0.1:8080/sparkant"
SitePath="/data/app/tomcat-7.0.59/webapps/sparkant/static"
WGeter="/usr/bin/wget"
ClearPage=0
# 命令行参数
while [ $# -gt 0 ]
do
if [ "$1" == "-h" ]; then
shift
if [[ $# -gt 0 && "$1" != "" ]]; then
SiteURL=$1
fi
elif [ "$1" == "-p" ]; then
shift
if [[ $# -gt 0 && "$1" != "" ]]; then
SitePath=$1
fi
elif [ "$1" == "-c" ]; then
ClearPage=1
else
PAGE=$1
fi
shift
done
# 检查是否安装了wget
if [[ ! -x "$WGeter" ]]; then
echo "wget not exist or not execute permission"
exit;
fi
cd $SitePath
#for P in ${PAGES[@]}; do
for((i=0;i<${#HTMLPAGES[@]}; i++))
do
if [[ "$P" == "$PAGE" || "$PAGE" == "all" ]]; then
#HtmlPage=${P/\.jsp/\.htm}
HtmlPage=${HTMLPAGES[i]}
HtmlPage=${HtmlPage/_dyc/}
if [ "$ClearPage" != "0" ]; then
# 清除页面静态缓存
echo "...clear $HtmlPage...";
rm -rf $HtmlPage && echo "[ ok ]"
continue;
fi
# 备份归档只保留7天
cutdate=`date +%Y%m%d --date='1 days ago'`
tarfile="$HtmlPage.$cutdate.tar"
if [ -f "$tarfile" ]; then
rm -rf "$tarfile"
echo "...delete $tarfile..."
fi
# 备份当前静态缓存
echo "...backup $HtmlPage to $HtmlPage.$ctime...";
ctime=`date +"%Y%m%d_%H%M%S"`
cdate=`date +"%Y%m%d"`
rm -rf "$HtmlPage.$ctime"
cp -f $HtmlPage "$HtmlPage.$ctime"
if [ -f "$HtmlPage.$ctime" ]; then
# 归档备份
tar rf "$HtmlPage.$cdate.tar" "$HtmlPage.$ctime"
rm -rf "$HtmlPage.$ctime"
fi
# 生成静态缓存
echo "...wget $HtmlPage to $HtmlPage.tmp...";
HtmlTmp="$HtmlPage.tmp"
rm -rf $HtmlTmp
# wget 动态页面html内容到临时文件
#$WGeter $SiteURL/$P -O $HtmlTmp || continue
$WGeter $SiteURL/${PAGES[i]} -O $HtmlTmp || continue
if [[ -f $HtmlTmp && -s $HtmlTmp ]]; then
# 用临时文件替换静态页面
echo "...copy $HtmlPage.tmp to $HtmlPage...";
cp -f $HtmlTmp $HtmlPage || continue
chmod 0755 $HtmlPage
echo "[ ok ]"
fi
fi
done
但是如果我访问的页面不存在的时候,不能及时的生成静态页面。下面我自己简单实现了一个静态页面引擎工具类。
package cn.sparkant.utils;
import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.log4j.Logger;
import java.io.*;
import java.net.HttpURLConnection;
/** 静态页面引擎技术工具类
* Created by hjs on 16/5/18.
*/
public class HtmlGenerator {
private static final Logger logger = Logger.getLogger(HtmlGenerator.class);
public static boolean createHtmlPage(String url, String htmlFileName) {
boolean status = false;
try {
//创建一个HttpClient实例充当模拟浏览器
CloseableHttpClient httpClient = HttpClients.createDefault();
//创建GET方法的实例
HttpGet get = new HttpGet(url);
//设置Get方法提交参数时使用的字符集,以支持中文参数的正常传递
get.setHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
//5.执行get方法得到服务器的返回的所有的数据,都存到response中。
CloseableHttpResponse response = httpClient.execute(get);
//6.httpclient 访问服务器返回的表头,包含http状态码
StatusLine statusLine = response.getStatusLine();
//7.得到状态码
int code = statusLine.getStatusCode();
if(code== HttpURLConnection.HTTP_OK) {//如果连接成功
//8.获得数据实体
HttpEntity entity = response.getEntity();
//9.获得输入流
InputStream is = entity.getContent();
//此方法默认会乱码,经过长时期的摸索,下面的方法才可以
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
StringBuffer sb = new StringBuffer();
String line = null;
String page = null;
while ((line = br.readLine()) != null) {
sb.append(line + "\n");
}
br.close();
page = sb.toString().replaceAll("/css","http://shop.zj96596.com/css")
.replaceAll("/js","http://shop.zj96596.com/js");
//将解析结果写入指定的静态HTML文件中,实现静态HTML生成
writeHtml(url, htmlFileName, page);
status = true;
}else {
logger.info("静态页面引擎在解析" + url + "产生静态页面" + htmlFileName + "时出错!");
}
httpClient.close();
response.close();
} catch(Exception e){
logger.error("HtmlGenerator 生成静态页面失败", e);
} finally {
}
return status;
}
//将解析结果写入指定的静态HTML文件中
private synchronized static void writeHtml(String url,String htmlFileName,String content) throws Exception{
//BufferedWriter fw = new BufferedWriter(new FileWriter(htmlFileName));
OutputStreamWriter fw = new OutputStreamWriter(new FileOutputStream(htmlFileName),"UTF-8");
fw.write(content);
fw.close();
logger.info("静态页面引擎在解析" +url+ "产生静态页面" + htmlFileName + "成功");
}
//测试方法
public static void main(String[] args){
HtmlGenerator h = new HtmlGenerator();
h.createHtmlPage("http://baidu.com","/Users/hjs/Desktop/a.html");
System.out.println("静态页面已经生成到c:/a.html");
}
}
在配置拦截器,当访问的页面不存在返回404时,判断该路径是不是我们要生成的静态页面。
package cn.sparkant.webapp.fliter;
import cn.sparkant.utils.HtmlGenerator;
import javax.servlet.*;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.IOException;
/**404页面错误
* 当访问的指定静态页面不存在时,通过静态页面引擎技术工具类生成静态页面
* Created by hjs on 16/5/19.
*/
public class HtmlFliter implements Filter{
public void init(FilterConfig filterConfig) throws ServletException {
}
public void doFilter(ServletRequest servletRequest, ServletResponse servletResponse, FilterChain filterChain) throws IOException, ServletException {
HttpServletRequest request = (HttpServletRequest) servletRequest;
String url = request.getRequestURI().toString();
HttpServletResponse response = (HttpServletResponse) servletResponse;
filterChain.doFilter(request, response);
int statusCode = response.getStatus();
if (statusCode == 404) {
if (..) { //判断url是否是我们要生成的静态页面
HtmlGenerator.createHtmlPage("http://shop.zj96596.com", "/Users/hjs/Desktop/a.html");
response.setStatus(HttpServletResponse.SC_OK);
//response.sendRedirect("http://shop..com");
}
}
}
public void destroy() {
}
}
后面大家可以自己优化改进, 有什么不足的可以指正。