最近项目想用到"文档在线预览",参考了一下 使用OpenOffice.org将各类文档转为PDF
本想用OpenOffice的类, 但OpenOffice的类太复杂了..
后来想到了Aspose ,
Google docs(谷歌文档)也是用的这个商业解决方案..
当然还有PSVIEW 大家有兴趣研究下..是开源的
但是在偷窃的心理作用下..决定利用了下Google的优良服务.
演示地址: http://game.gtmm.cn/
以下为源代码(仅供参考..切莫用于商业用途..后台自负)
<?php
/*
* 这个类的主要作用是从Google文档上下载回文件..没有什么正式的API..所以..琰`Google改了.这也要改
* 使用的时候请注意改一下HOSTS文件....因为国内服务器都没办法访问Google Docs的.
* 加入如下两条记录,在服务器HOSTS文件(所以..国内虚拟主机没办法了)
* 203.208.45.200 docs.google.com
* 74.125.31.132 doc-08-c8-docsviewer.googleusercontent.com
*
* by wc1217 Time: 2012-03-09 13:11:31
*/
class google_docs{
private $viewerInfo = null;
//private $decorate = '_';
function __construct(){
require_once 'curl_multi_class.php';
}
/*
* 得到Google Viewer转换之后的信息
* $url
* $retArray 应返回的键名
*/
private function getUrlViewerInfo($url, $retArray = array()){
$multi = new curl_multi();
$multi->setUrlList(array('https://docs.google.com/viewer?url=' . urlencode($url) . '&embedded=true&mobile=true'));
//$multi->setOpt(array('CURLOPT_HEADER'=>1));
$content = $multi->exec();
$out = array();
preg_match('/\{svUrl:\\\'\?url\\\\75(https?:\/\/.*?)\\\',biUrl:\\\'\?url\\\\75(https?:\/\/.*?)\\\',chanId:\\\'(.*?)\\\',gpUrl:\\\'(https?:\/\/.*?)\\\',docId:\\\'(.*?)\\\',numPages:(\d+),gtUrl:\\\'\?url\\\\75(https?:\/\/.*?)\\\',thWidth:(\d+),dlUrl:\\\'(.*?)\\\',thHeight:(.*?)\}/', $content[0], $out);
if(empty($out) || count($out) != 11){
trigger_error('没有应有的得到响应值!', E_USER_ERROR);
}else{
array_shift($out);
$allArray = array_combine(array('svUrl', 'biUrl', 'chanId', 'gpUrl', 'docId', 'numPages', 'gtUrl', 'thWidth', 'dlUrl', 'thHeight'), $out); //合并键值
//返回指定键值
return empty($retArray) || !is_array($retArray) ? $allArray : array_intersect_key($allArray, array_flip($retArray));
}
}
/*
* 转化八进制URL
*/
private function transFormUrl($url){
return preg_replace('/\\\\(\d{2,3})/e', 'chr(ord("\\\$1"))', $url);
}
/*
* 转换成Png图片
* $url type biUrl
* $page number
* @retrun array pngByte
*/
private function getUrlToPng($url, $page, $width = '1000'){
$urlList = array();
for($i = 1; $i <= $page; $i++){
$urlList[] = $this->transFormUrl("https://docs.google.com/viewer?url={$url}&pagenumber={$i}&w={$width}");
}
$multi = new curl_multi();
$multi->setUrlList($urlList);
return $multi->exec();
}
/*
* 先得到文件信息
*/
function setUrlViewerInfo($url, $retArray = array('biUrl', 'numPages')){
if(empty($url))
trigger_error('$url can not be empty!', E_USER_ERROR);
else
$this->viewerInfo = $this->getUrlViewerInfo($url, $retArray);
}
/*
* 返回的Png的Byte保存至文件
* $filePrefix 文件前缀
* $numPages 要几页?
*/
function byteToPngFile($filePrefix = '', $numPages = 0){
if(empty($this->viewerInfo))
trigger_error('Please call setUrlViewerInfo() before runing!', E_USER_ERROR);
else
$biUrl = $this->viewerInfo;
$pngByte = $this->getUrlToPng($biUrl['biUrl'], empty($numPages) ? $biUrl['numPages'] : $numPages);
$succeed = array();
foreach($pngByte as $key => $value){
$succeed[] = file_put_contents($filePrefix . (sprintf("%02d", $key + 1)) . '.png', $value);
}
return $succeed;
}
/*
* 转换成PDF输出
*/
function viewerToPdfFile($filePrefix = ''){
if(empty($this->viewerInfo))
trigger_error('Please call setUrlViewerInfo() before runing!', E_USER_ERROR);
else
$gpUrl = $this->viewerInfo;
$url = $this->transFormUrl($gpUrl['gpUrl']);
$multi = new curl_multi();
$multi->setOpt(array(/* 'CURLOPT_FOLLOWLOCATION' => 0,'CURLOPT_MAXREDIRS'=>3, */'CURLOPT_HEADER' => 1));
$multi->setUrlList(array($url));
$urlHeader = $this->transFormHeader($multi->exec()); //第一次..
//得到cookie 还有location
$cookie = explode(';', $urlHeader['Set-Cookie']); //Set-Cookie:
$location = $urlHeader['Location']; //Location:
//exit($cookie[0]);
//$multi->setOpt(array('CURLOPT_COOKIE' => $cookie[0], 'CURLOPT_HEADER' => 1));
$multi->setUrlList(array($location));
$urlHeader = $this->transFormHeader($multi->exec()); //第二次
$location = $urlHeader['Location']; //Location:
$multi->setOpt(array('CURLOPT_COOKIE' => $cookie[0], 'CURLOPT_HEADER' => 0)); //第三次..加上cookie
$multi->setUrlList(array($location));
$bytePdf = $multi->exec();
if(!empty($bytePdf[0]))
return file_put_contents($filePrefix . 'pdf.pdf', $bytePdf);
}
/*
* 转化Header为数组格式
*/
private function transFormHeader($str){
$headerArray = array();
if(is_array($str))
$str = $str[0];
if(!empty($str) && strpos($str, "\n") !== false)
foreach(explode("\n", $str) as $v){
if(strpos($v, ': ') !== false){
$t = explode(': ', $v);
if(count($t) == 2)
$headerArray[$t[0]] = $t[1];
}
}
return $headerArray;
}
/*
* 得到文件信息,并写入文件
* (不完全功能)有待XML解析
*/
function viewerToTextFile($filePrefix = ''){
if(empty($this->viewerInfo))
trigger_error('Please call setUrlViewerInfo() before runing!', E_USER_ERROR);
else
$gtUrl = $this->viewerInfo;
$url = 'https://docs.google.com/viewer?url=' . $this->transFormUrl($gtUrl['gtUrl']);
$multi = new curl_multi();
$multi->setUrlList(array($url));
return file_put_contents($filePrefix . 'text.txt', $multi->exec());
}
}
curl_multi的类.请引用curl_multi_class.php文件
文件来自本人上一篇文章 http://blog.youkuaiyun.com/wc1217/article/details/7332852
以下是测试文件index.php
<?php
require_once 'google_docs_viewer.php';
$docs = new google_docs();
$docs->setUrlViewerInfo('http://infolab.stanford.edu/pub/papers/google.pdf', null);
echo $docs->viewerToPdfFile('10123_')."\n";
echo $docs->viewerToTextFile('10123_')."\n";
print_r($docs->byteToPngFile('10123_'));