10分钟上手Guzzle批量下载:断点续传+实时进度条完整指南
【免费下载链接】guzzle Guzzle, an extensible PHP HTTP client 项目地址: https://gitcode.com/gh_mirrors/gu/guzzle
你是否还在为批量文件下载中断后需重新开始而烦恼?是否需要给用户展示直观的下载进度?本文将通过Guzzle实现企业级批量下载功能,包含断点续传、实时进度条和并发控制,让你10分钟内掌握专业下载解决方案。
读完本文你将获得:
- 基于Guzzle Pool的并发下载实现
- 断点续传核心代码与文件校验机制
- 文本进度条与百分比实时显示
- 错误处理与任务重试策略
批量下载架构设计
Guzzle作为PHP生态最流行的HTTP客户端,提供了完善的异步请求和并发控制能力。批量下载功能主要基于以下核心组件构建:
- 并发请求管理:src/Pool.php实现请求池管理,默认支持25个并发连接,可通过
concurrency参数调整 - 流式传输:src/Handler/StreamHandler.php提供底层流处理,支持断点续传
- 进度追踪:StreamHandler的
progress回调函数可实时获取传输状态 - 文件系统交互:结合PHP流操作实现断点续传的文件写入
核心功能实现
1. 并发请求池配置
使用Guzzle Pool组件创建请求池,设置并发数和任务回调:
use GuzzleHttp\Client;
use GuzzleHttp\Pool;
use GuzzleHttp\Psr7\Request;
$client = new Client();
$urls = [/* 下载URL列表 */];
// 创建请求生成器
$requests = function ($urls) use ($client) {
foreach ($urls as $url) {
$savePath = '/path/to/save/' . basename($url);
yield function () use ($client, $url, $savePath) {
return $client->getAsync($url, [
'sink' => $savePath,
'progress' => function ($downloadTotal, $downloaded) use ($savePath) {
// 进度回调处理
}
]);
};
}
};
// 配置请求池
$pool = new Pool($client, $requests($urls), [
'concurrency' => 5, // 并发数
'fulfilled' => function ($response, $index) {
// 成功回调
},
'rejected' => function ($reason, $index) {
// 失败回调
},
]);
// 执行并等待完成
$promise = $pool->promise();
$promise->wait();
2. 断点续传实现
断点续传核心是通过HTTP Range头实现部分下载,结合文件系统操作实现断点记录:
function getResumeOptions($savePath) {
$options = [];
if (file_exists($savePath)) {
$fileSize = filesize($savePath);
if ($fileSize > 0) {
// 设置Range请求头,从已下载字节处继续
$options['headers'] = [
'Range' => "bytes=$fileSize-"
];
// 使用临时文件存储新下载内容
$options['sink'] = $savePath . '.part';
}
}
return $options;
}
// 在请求回调中使用
$options = getResumeOptions($savePath);
return $client->getAsync($url, $options);
下载完成后合并文件:
function mergePartialFile($savePath) {
$partFile = $savePath . '.part';
if (file_exists($partFile)) {
$handle = fopen($savePath, 'ab');
$partHandle = fopen($partFile, 'rb');
stream_copy_to_stream($partHandle, $handle);
fclose($handle);
fclose($partHandle);
unlink($partFile);
}
}
3. 进度条显示
利用StreamHandler的progress回调实现文本进度条:
$progressCallback = function ($downloadTotal, $downloaded, $uploadTotal, $uploaded) use ($savePath) {
static $lastProgress = 0;
if ($downloadTotal && $downloaded) {
$percent = round(($downloaded / $downloadTotal) * 100, 2);
// 每1%更新一次,减少IO操作
if ($percent - $lastProgress >= 1 || $percent == 100) {
$lastProgress = $percent;
// 文本进度条
$barLength = 50;
$filledLength = (int)($barLength * $percent / 100);
$bar = str_repeat('=', $filledLength) . str_repeat(' ', $barLength - $filledLength);
// 格式化文件大小
$downloadedSize = formatBytes($downloaded);
$totalSize = formatBytes($downloadTotal);
// 输出进度信息(覆盖当前行)
echo "\rDownloading $savePath: [$bar] $percent% ($downloadedSize/$totalSize)";
if ($percent == 100) echo "\n";
}
}
};
// 文件大小格式化辅助函数
function formatBytes($bytes, $precision = 2) {
$units = ['B', 'KB', 'MB', 'GB', 'TB'];
$bytes = max($bytes, 0);
$pow = floor(($bytes ? log($bytes) : 0) / log(1024));
$pow = min($pow, count($units) - 1);
return round($bytes / pow(1024, $pow), $precision) . ' ' . $units[$pow];
}
错误处理与优化
任务重试机制
使用Guzzle的重试中间件处理临时网络错误:
use GuzzleHttp\HandlerStack;
use GuzzleHttp\Middleware;
use GuzzleHttp\RetryMiddleware;
$stack = HandlerStack::create();
// 添加重试中间件
$stack->push(Middleware::retry(
function ($retries, $request, $response, $exception) {
// 最多重试3次,仅对5xx错误和网络异常重试
return $retries < 3
&& ($exception instanceof ConnectException
|| ($response && $response->getStatusCode() >= 500));
},
// 指数退避策略
function ($retries) {
return (int)pow(2, $retries) * 1000;
}
));
$client = new Client(['handler' => $stack]);
下载任务状态管理
创建任务状态跟踪类,记录每个下载的进度和状态:
class DownloadManager {
private $statusFile;
private $status = [];
public function __construct($statusFile) {
$this->statusFile = $statusFile;
$this->loadStatus();
}
// 加载已保存的状态
private function loadStatus() {
if (file_exists($this->statusFile)) {
$this->status = json_decode(file_get_contents($this->statusFile), true) ?? [];
}
}
// 更新任务状态
public function updateStatus($url, $status, $progress = 0) {
$this->status[$url] = [
'status' => $status, // 'pending', 'downloading', 'completed', 'failed'
'progress' => $progress,
'updated_at' => time()
];
$this->saveStatus();
}
// 保存状态到文件
private function saveStatus() {
file_put_contents($this->statusFile, json_encode($this->status, JSON_PRETTY_PRINT));
}
// 获取未完成的任务
public function getPendingTasks() {
return array_filter($this->status, function($task) {
return $task['status'] !== 'completed';
});
}
}
完整代码示例
以下是整合所有功能的完整示例:
<?php
require 'vendor/autoload.php';
use GuzzleHttp\Client;
use GuzzleHttp\Pool;
use GuzzleHttp\Exception\ConnectException;
use GuzzleHttp\HandlerStack;
use GuzzleHttp\Middleware;
use GuzzleHttp\Psr7\Request;
class BatchDownloader {
private $client;
private $downloadDir;
private $concurrency;
private $manager;
public function __construct($downloadDir = './downloads', $concurrency = 5) {
$this->downloadDir = rtrim($downloadDir, '/');
$this->concurrency = $concurrency;
$this->manager = new DownloadManager($this->downloadDir . '/status.json');
// 创建下载目录
if (!is_dir($this->downloadDir)) {
mkdir($this->downloadDir, 0755, true);
}
// 配置带重试机制的客户端
$stack = HandlerStack::create();
$stack->push(Middleware::retry(
function ($retries, $request, $response, $exception) {
return $retries < 3
&& ($exception instanceof ConnectException
|| ($response && $response->getStatusCode() >= 500));
},
function ($retries) {
return (int)pow(2, $retries) * 1000;
}
));
$this->client = new Client(['handler' => $stack]);
}
public function download($urls) {
$requests = function ($urls) {
foreach ($urls as $url) {
$filename = basename(parse_url($url, PHP_URL_PATH));
$savePath = $this->downloadDir . '/' . $filename;
// 跳过已完成的文件
if (isset($this->manager->getPendingTasks()[$url]['status'])
&& $this->manager->getPendingTasks()[$url]['status'] === 'completed') {
continue;
}
$this->manager->updateStatus($url, 'pending');
yield function () use ($url, $savePath) {
return $this->createRequest($url, $savePath);
};
}
};
$pool = new Pool($this->client, $requests($urls), [
'concurrency' => $this->concurrency,
'fulfilled' => function ($response, $index) use ($urls) {
$url = $urls[$index];
$this->manager->updateStatus($url, 'completed', 100);
echo "Completed: $url\n";
},
'rejected' => function ($reason, $index) use ($urls) {
$url = $urls[$index];
$this->manager->updateStatus($url, 'failed');
echo "Failed: $url - " . $reason->getMessage() . "\n";
},
]);
$promise = $pool->promise();
$promise->wait();
echo "All downloads completed!\n";
}
private function createRequest($url, $savePath) {
$options = $this->getResumeOptions($savePath);
$options['progress'] = function ($downloadTotal, $downloaded) use ($url) {
$progress = $downloadTotal ? round(($downloaded / $downloadTotal) * 100, 2) : 0;
$this->manager->updateStatus($url, 'downloading', $progress);
// 显示进度条
$this->showProgress($url, $downloadTotal, $downloaded);
};
return $this->client->getAsync($url, $options)
->then(function ($response) use ($savePath) {
// 合并临时文件(如果存在)
$this->mergePartialFile($savePath);
return $response;
});
}
private function getResumeOptions($savePath) {
$options = ['sink' => $savePath];
if (file_exists($savePath)) {
$fileSize = filesize($savePath);
if ($fileSize > 0) {
$options['headers']['Range'] = "bytes=$fileSize-";
$options['sink'] = $savePath . '.part';
}
}
return $options;
}
private function mergePartialFile($savePath) {
$partFile = $savePath . '.part';
if (file_exists($partFile)) {
$handle = fopen($savePath, 'ab');
$partHandle = fopen($partFile, 'rb');
stream_copy_to_stream($partHandle, $handle);
fclose($handle);
fclose($partHandle);
unlink($partFile);
}
}
private function showProgress($url, $total, $downloaded) {
$filename = basename($url);
$totalSize = $this->formatBytes($total);
$downloadedSize = $this->formatBytes($downloaded);
$percent = $total ? round(($downloaded / $total) * 100, 2) : 0;
$barLength = 50;
$filledLength = (int)($barLength * $percent / 100);
$bar = str_repeat('=', $filledLength) . str_repeat(' ', $barLength - $filledLength);
echo "\r[$bar] $percent% ($downloadedSize/$totalSize) - $filename";
if ($percent == 100) echo "\n";
}
private function formatBytes($bytes, $precision = 2) {
if ($bytes === 0) return '0 B';
$units = ['B', 'KB', 'MB', 'GB', 'TB'];
$pow = floor(log($bytes, 1024));
return round($bytes / pow(1024, $pow), $precision) . ' ' . $units[$pow];
}
}
// 使用示例
$downloader = new BatchDownloader('/path/to/downloads', 5);
$downloadUrls = [
'https://example.com/large-file1.zip',
'https://example.com/large-file2.iso',
// 添加更多URL...
];
$downloader->download($downloadUrls);
扩展功能建议
- 任务优先级:修改请求生成器,根据文件大小或重要性排序请求
- 速度限制:添加流量控制中间件,限制总下载带宽
- 校验机制:下载完成后通过MD5或SHA校验文件完整性
- Web界面:结合前端框架创建可视化管理界面,使用WebSocket实时更新进度
- 邮件通知:任务完成后发送邮件通知,包含下载报告
通过本文介绍的方法,你可以构建一个功能完善、健壮可靠的批量下载系统。Guzzle的灵活架构和强大功能让复杂的HTTP操作变得简单,无论是构建企业级下载工具还是简单的批量获取脚本,都能满足需求。
完整实现可参考src/Pool.php和src/Handler/StreamHandler.php的源代码,了解底层实现细节以进行更高级的定制。
【免费下载链接】guzzle Guzzle, an extensible PHP HTTP client 项目地址: https://gitcode.com/gh_mirrors/gu/guzzle
创作声明:本文部分内容由AI辅助生成(AIGC),仅供参考



