功能如下
对外提供接口 机票查询,航班查询,翻译【汉译英,英译汉,汉译日】
技术框架
php,simple_html_dom.php(一个第三方开源框架,解析html很方便),simplexml_load_string(php5自带工具类,解析xml很方便),正则表达式,网页分析必备技术。
测试接口通过java的http接口进行测试
遇到问题
1.原来php是以脚本命令方式执行没有任何问题,切到apache下报错,最后发现是页面本身协议头中的文件类型有误
原来代码:header("Content-Type:text/xml;charset=utf-8");
调整后: header("Content-Type:text/html;charset=utf-8");
2.编码问题,php本身执行没有乱码,通过java调用出现乱码,调整输入流字符集控制,url参数字符集控制解决
详见java测试类中编码方式。
废话不多说啦,直接贴代码如下:
<?php
/*
* @auther xiaoluozheng@sohu-inc.com
* @date 2011-8-26
* 本接口实现几个功能:机票查询,航班查询,翻译【汉译英,英译汉,汉译日】
* 页面参数 @flag 业务标识【1,2,3 翻译 4 航班查询 5 机票查询 6,7 找工作 8 小额创业】
* 页面参数 @content 请求内容
*/
header("Content-Type:text/html;charset=utf-8");
include_once ('simple_html_dom.php');
error_reporting(E_ALL); //错误不输出
/*
* 航班查询接口,去哪儿网抓取数据,进行分析
* @flightcode 航班号
* retun 航班描述信息
*/
function flightQueryByFlightCode($flightcode) {
//url 参数最好用urlencode进行编码(纯英文字母不编码也可以,遇到汉字或其它字符则必须url编码)
$url = "http://flight.qunar.com/status/fquery.jsp?flightCode=" . urlencode($flightcode);
$html = file_get_html($url); //使用simple_html_dom第三方开源插件解析网页数据
$count = 0;
$filter = array(2, 5, 6, 7);
$filterstr = array("航班时刻" => "", "(" => "", ")" => "", "<b>" => "", "</b>" => "", " " => "", "计划时间:" => "", "起降机场:" => ""); //要过滤的字符串
$result = array();
foreach ($html->find('.state_detail') as $element) {
foreach ($element->find('dt') as $span) {
$str = trim($span->innertext);
preg_match_all("|(.*)<span|U", $str, $out, PREG_PATTERN_ORDER);
$str = $out[1][0];
$str = strtr($str, $filterstr);
array_push($result, $str);
}
foreach ($element->find('span') as $span) {
$count++;
if (in_array($count, $filter)) {
$str = trim($span->innertext);
$str = strtr($str, $filterstr);
array_push($result, $str);
}
}
}
$html->clear();
$content = implode(",", $result);
return $content;
}
/*
* 翻译接口,调用bing翻译接口
* @flag 1:英译汉 2:汉译英 3:汉译日
* @str 翻译内容
* retun 返回译文内容
*/
function translate($flag, $str) {
$inters = array(
"1" => "http://api.microsofttranslator.com/V2/Ajax.svc/Translate?oncomplete=mycallback&appId=A4D660A48A6A97CCA791C34935E4C02BBB1BEC1C&from=en&to=zh-cn&text=",
"2" => "http://api.microsofttranslator.com/V2/Ajax.svc/Translate?oncomplete=mycallback&appId=A4D660A48A6A97CCA791C34935E4C02BBB1BEC1C&from=zh-cn&to=en&text=",
"3" => "http://api.microsofttranslator.com/V2/Ajax.svc/Translate?oncomplete=mycallback&appId=A4D660A48A6A97CCA791C34935E4C02BBB1BEC1C&from=zh-cn&to=ja&text="
);
//url 参数最好用urlencode进行编码(纯英文字母不编码也可以,遇到汉字或其它字符则必须url编码)
$url = $inters[$flag] . urlencode($str);
$content = file_get_contents($url); // mycallback("How do you do");
preg_match_all("|\(\"(.*)\"\)|U", $content, $out, PREG_PATTERN_ORDER);
$content = $out[1][0];
return $content;
}
/*
* 飞机票查询接口,根据出发地,目的地进行查询机票信息;调用携程网机票查询接口
* @str 查询字符串,如北京到上海,则参数应为 北京-上海
* @return 返回当天打折机票信息
*/
function flightQueryByCity($str) {
//url 参数最好用urlencode进行编码(纯英文字母不编码也可以,遇到汉字或其它字符则必须url编码)
$url = "http://ws.qunar.com/holidayService.jcp?lane=" . urlencode($str);
$content = file_get_contents($url);
$xml = simplexml_load_string($content);
$result = array();
foreach ($xml->airline->line[0]->attributes() as $key => $value) {
$result[$key] = $value;
}
foreach ($xml->airline->line[0]->children()->attributes() as $key => $value) {
$result[$key] = $value;
}
//下面逻辑为通过航班号取得起降机场信息
$tmp = explode(" ", $result['go_avc']); //取得航班号
$flightcode = $tmp[1];
$tmp = flightQueryByFlightCode($flightcode); //取得航班具体信息
$tmp = explode(",", $tmp);
$airport = $tmp[1];
//构造数据
$content = "当前最低折扣:" . $result['go_avc'] . "," . $airport . "," . $result['go_start'] . "-" . $result['go_expires'] . "," . $result['discount'] . $result['price'] . "元";
return $content;
}
function findJob($flag,$city){
// 技工类
$url_a = "http://www.51zgzg.com/search/searchEmp.do?method=search&words=%E6%8A%80%E5%B7%A5&FuntypeID=&FuntypeName=&jobAreaID=11000000&jobAreaName=";
// 销售类
$url_b = "http://www.51zgzg.com/search/searchEmp.do?method=search&words=%E9%94%80%E5%94%AE&FuntypeID=&FuntypeName=&jobAreaID=32050000&jobAreaName=";
if($flag == "6"){
$url = $url_a.urlencode($city);
}
if($flag == "7"){
$url = $url_b.urlencode($city);
}
$errmsg = "目前系统没有你要找的工作信息!";
$result = array();
$count = 0;
$html = file_get_html($url); //使用simple_html_dom第三方开源插件解析网页数据
foreach ($html->find('tr') as $element) {
foreach ($element->find('td') as $td) {
$str = trim($td->innertext);
array_push($result, $str);
}
if(++$count % 4 == 0)
break;
}
$html->clear();
$content = implode("###", $result);
return $content != "" ? $content : $errmsg;
}
function findProject(){
}
function printLog($content) {
/*
$fp = fopen("log.txt", "a+");
$content .= "\r\n";
fwrite($fp, $content);
fclose($fp);
*/
}
$flag = {1}
REQUEST['flag']; //业务标识 1,2,3 翻译 4 航班查询 5 机票查询$str =
{1}
REQUEST['content']; //请求具体内容printLog($flag . "-" . $str);//$flag = "2";//$str = "你好吗";$content = ""; //响应内容try { switch ($flag) { case "1": case "2": case "3": $content = translate($flag, $str); break; case "4": $content = flightQueryByFlightCode($str); break; case "5": $content = flightQueryByCity($str); break; }} catch (Exception $exc) { //echo $exc->getMessage();}printLog($content);echo $content;?>
测试代码如下:
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
public class TestClient {
/**
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
String url = "http://localhost/ceshi/server.php";
try {
String[][] params = {{"1","laugh"},{"2","今天天气真好"},{"3","早上好"},{"4","CZ3802"},{"5","杭州-广州"}};
String content = "";
for(int i=0; i<params.length; i++){
// http调用url方式 参数为汉字或者符号(除了数字英文字母)必须urlencode编码进行传输
content = "flag="+params[i][0]+"&content="+URLEncoder.encode(params[i][1],"utf-8");
URL realUrl = new URL(url);
URLConnection con = realUrl.openConnection();
con.setDoOutput(true);
con.setDoInput(true);
con.setRequestProperty("Pragma:", "no-cache");
con.setRequestProperty("Cache-Control", "no-cache");
PrintWriter out = new PrintWriter(con.getOutputStream());
out.print(content);
out.flush();
out.close();
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream(),"utf-8"));
String line;
while ((line = in.readLine()) != null) {
System.out.println(line);
}
in.close();
}
} catch (Exception e) {
throw e;
}
}
}
输出如下:
笑
Today the weather is really good
おはようございます
CZ3802中国南方航空公司,萧山机场B楼—白云机场,机型:JET,飞行距离:1099KM,22:05-23:55
当前最低折扣:中国南方航空公司 CZ3820,萧山机场B楼—白云机场,08:20-10:20,4.9折510元。