app.py
-- coding: utf-8 --
from flask import Flask, jsonify, request, render_template, redirect, url_for from flask_login import LoginManager,current_user from flask_wtf.csrf import CSRFProtect import logging import os import sys from datetime import datetime import warnings import urllib3 from urllib3.exceptions import InsecureRequestWarning from sqlalchemy import text import time import threading
禁用SSL警告
warnings.filterwarnings("ignore", category=InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
添加当前目录到Python路径
current_dir = os.path.dirname(os.path.abspath(file)) sys.path.insert(0,current_dir)
from config import config, DevelopmentConfig from database.models import db,User, UserProxy, Proxy, SystemConfig from core.scheduler import SchedulerManager from web.auth import auth_bp from web.views import web_bp
配置日志 - 简化日志输出
logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)] ) logger= logging.getLogger(name)
login_manager = LoginManager() login_manager.login_view= 'auth.login' login_manager.login_message= '请先登录以访问此页面' csrf= CSRFProtect()
全局调度器实例
scheduler_manager = None _app_initialized= False _app_initializing= False _scheduler_started= False
def get_database_uri(): """获取数据库URI字符串""" try: config_instance = DevelopmentConfig() return config_instance.SQLALCHEMY_DATABASE_URI except Exception as e: logger.error(f"获取数据库URI失败: {e}") return f"sqlite:///{os.path.join(current_dir, 'proxy_pool.db')}"
def test_database_connection(): """测试数据库连接""" try: db.session.execute(text('SELECT 1')) return True except Exception as e: logger.warning(f"数据库连接异常: {e}") return False
def check_first_run(app): """检查是否是首次运行""" try: db_uri = app.config.get('SQLALCHEMY_DATABASE_URI', '')
def initialize_scheduler(app): """初始化并启动调度器""" global scheduler_manager, _scheduler_started
def initialize_application(app): """应用初始化函数""" global _app_initialized, _app_initializing
def create_app(config_name='default'): app = Flask(name)
命令行运行 - 修改启动方式
if name == 'main': app = create_app()
utils/helpers.py import re from datetime import datetime,timedelta from urllib.parse import urlparse import ipaddress import os import logging from sqlalchemy import inspect,text
logger = logging.getLogger(name)
import pymysql import sqlite3 import logging from sqlalchemy import create_engine from sqlalchemy.exc import OperationalError
添加纯真IP查询库
import qqwry
logger = logging.getLogger(name)
def test_database_connection(db_config): """ 测试数据库连接是否可用 db_config: 数据库配置字典 返回: (成功与否, 错误信息) """ try: db_type = db_config.get('DB_TYPE', 'sqlite')
def is_valid_ip(ip): """验证IP地址是否有效""" try: ipaddress.ip_address(ip) return True except ValueError: return False
def is_valid_port(port): """验证端口是否有效""" try: port = int(port) return 1 <= port <= 65535 except ValueError: return False
def is_valid_protocol(protocol): """验证协议是否有效""" return protocol.lower() in ['http', 'https', 'socks4', 'socks5']
def parse_proxy_string(proxy_str, default_protocol='http'): """解析代理字符串""" if not proxy_str or not isinstance(proxy_str, str): return None
def format_proxy_string(proxy_dict): """格式化代理为字符串""" if not proxy_dict: return ""
初始化纯真IP查询对象(单例模式)
_qqwry_reader = None
def get_qqwry_reader(): """获取纯真IP查询读取器""" global _qqwry_reader if _qqwry_reader is None: try: # 自动下载或使用本地纯真IP数据库 _qqwry_reader = qqwry.QQwry() _qqwry_reader.load_file('qqwry.dat') # 默认使用当前目录的qqwry.dat文件 except Exception as e: logger.error(f"初始化纯真IP数据库失败: {e}") return None return _qqwry_reader
def get_location_from_ip(ip): """根据IP获取地理位置(使用纯真IP查询)""" try: # 验证IP地址有效性 if not is_valid_ip(ip): return "无效IP"
def calculate_next_run(interval_seconds): """计算下一次运行时间""" return datetime.now() + timedelta(seconds=interval_seconds)
def format_timedelta(delta): """格式化时间间隔""" if not delta: return "从未"
def humanize_time(dt): """人性化时间显示""" if not dt: return "从未"
web/views.py from functools import wraps from flask import Blueprint,render_template, jsonify, request, flash, redirect, url_for, current_app from flask_login import login_required,current_user, login_user from datetime import datetime,timedelta import json import threading from time import sleep import logging import os import subprocess from sqlalchemy import inspect
from database.models import db, Proxy, User, UserProxy, CrawlerRule, SystemConfig from web.forms import CrawlerRuleForm,UserForm, SystemConfigForm, ImportForm, FirstRunForm, DatabaseMigrationForm from core.crawler import ProxyCrawler from core.validator import ProxyValidator from utils.importer import ProxyImporter
web_bp = Blueprint('web', name) logger= logging.getLogger(name)
添加模板过滤器
@web_bp.app_template_filter('safe_date') def safe_date_filter(value,format='%Y-%m-%d %H:%M'): """安全的日期格式化过滤器""" if value is None: return "从未" try: if hasattr(value, 'strftime'): return value.strftime(format) elif isinstance(value, str): # 尝试解析字符串日期 try: if 'T' in value: dt = datetime.fromisoformat(value.replace('Z', '+00:00')) else: dt = datetime.strptime(value, '%Y-%m-%d %H:%M:%S') return dt.strftime(format) except (ValueError, TypeError): return value else: return str(value) except (AttributeError, ValueError): return "无效日期"
检查是否需要首次运行
web/views.py 中的 check_first_run 函数需要修复
def check_first_run(): """更健壮的首次运行检查""" try: # 检查数据库连接是否正常 from sqlalchemy import text db.session.execute(text('SELECT 1'))
管理员权限装饰器
def admin_required(f): @wraps(f) def decorated_function(args, **kwargs): if not current_user.is_authenticated: flash('请先登录', 'danger') return redirect(url_for('auth.login')) if not current_user.is_admin: flash('需要管理员权限', 'danger') return redirect(url_for('web.dashboard')) return f(args, **kwargs) return decorated_function
全局变量用于跟踪验证进度
validation_progress = { 'running': False, 'total': 0, 'completed': 0, 'message': '' }
添加首次运行检查中间件
@web_bp.before_request def before_request(): # 排除静态文件和首次运行路由 if request.endpoint and ( request.endpoint.startswith('static') or request.endpoint in ['web.first_run', 'web.first_run_setup'] ): return
web/views.py 中的首次运行路由
@web_bp.route('/first-run', methods=['GET']) def first_run(): # 使用新的检查方法 from utils.helpers import check_first_run
@web_bp.route('/first-run/setup', methods=['POST']) def first_run_setup(): from utils.helpers import check_first_run
数据库迁移页
@web_bp.route('/database-migration', methods=['GET', 'POST']) @login_required @admin_required def database_migration(): migration_form = DatabaseMigrationForm()
修改设置页面路由,添加数据库迁移选项
@web_bp.route('/settings', methods=['GET', 'POST']) @login_required @admin_required def settings(): form = SystemConfigForm()
@web_bp.route('/') @login_required def dashboard(): from database.models import Proxy, User, CrawlerRule, SystemConfig
@web_bp.route('/proxies') @login_required def proxies(): page = request.args.get('page', 1, type=int) per_page = 20
@web_bp.route('/test-proxy/') @login_required def test_proxy(proxy_id): """测试单个代理""" proxy = Proxy.query.get_or_404(proxy_id)
@web_bp.route('/edit-proxy/', methods=['POST']) @login_required @admin_required def edit_proxy(proxy_id): """编辑代理信息""" proxy = Proxy.query.get_or_404(proxy_id)
@web_bp.route('/validate-all-proxies') @login_required @admin_required def validate_all_proxies(): """后台验证所有代理""" global validation_progress
@web_bp.route('/get-validation-progress') @login_required def get_validation_progress(): """获取验证进度""" global validation_progress return jsonify(validation_progress)
@web_bp.route('/my-proxies') @login_required def my_proxies(): user_proxies = current_user.proxies.join(Proxy).filter( UserProxy.is_active == True ).order_by(UserProxy.assigned_at.desc()).all()
@web_bp.route('/get-proxy') @login_required def get_proxy(): """获取代理 - 添加到期时间检查""" # 检查用户是否到期 if not current_user.can_access_system(): return jsonify({'error': '您的账户已到期或已被禁用'}), 403
@web_bp.route('/proxy-info/') @login_required @admin_required def proxy_info(proxy_id): """获取代理信息""" proxy = Proxy.query.get_or_404(proxy_id) return jsonify(proxy.to_dict())
@web_bp.route('/crawlers', methods=['GET', 'POST']) @login_required @admin_required def crawlers(): form = CrawlerRuleForm() if form.validate_on_submit(): rule = CrawlerRule( name=form.name.data, url=form.url.data, pattern_type=form.pattern_type.data, pattern=form.pattern.data, protocol=form.protocol.data, interval=form.interval.data, is_active=form.is_active.data ) db.session.add(rule) db.session.commit() flash('Crawler rule added successfully') return redirect(url_for('web.crawlers'))
@web_bp.route('/run-crawler/') @login_required @admin_required def run_crawler(rule_id): rule = CrawlerRule.query.get_or_404(rule_id) crawler = ProxyCrawler() proxies = crawler.crawl_rule(rule) new_count = crawler.save_proxies(proxies)
@web_bp.route('/users', methods=['GET', 'POST']) @login_required @admin_required def users(): form = UserForm()
@web_bp.route('/edit-user/') @login_required @admin_required def edit_user(user_id): """获取用户信息用于编辑""" user = User.query.get_or_404(user_id)
@web_bp.route('/import', methods=['GET', 'POST']) @login_required @admin_required def import_proxies(): form = ImportForm() importer = ProxyImporter()
@web_bp.route('/api/docs') @login_required def api_docs(): return render_template('api_docs.html')
@web_bp.route('/replace-proxy/') @login_required def replace_proxy(proxy_id): """更换用户代理""" user_proxy = UserProxy.query.get_or_404(proxy_id)
@web_bp.route('/release-proxy/') @login_required def release_proxy(proxy_id): """释放用户代理""" user_proxy = UserProxy.query.get_or_404(proxy_id)
@web_bp.route('/delete-user/', methods=['DELETE']) @login_required @admin_required def delete_user(user_id): """删除用户""" if user_id == current_user.id: return jsonify({'error': '不能删除自己'}), 400
@web_bp.route('/reset-api-key/', methods=['POST']) @login_required @admin_required def reset_api_key(user_id): """重置用户API Key""" user = User.query.get_or_404(user_id) new_key = user.generate_api_key() db.session.commit()
@web_bp.route('/users/add', methods=['POST']) @login_required @admin_required def add_user(): """添加新用户""" form = UserForm() if form.validate_on_submit(): try: # 检查用户名是否已存在 existing_user = User.query.filter_by(username=form.username.data).first() if existing_user: flash('用户名已存在', 'danger') return redirect(url_for('web.users'))
@web_bp.route('/users/edit', methods=['POST']) @login_required @admin_required def edit_user_post(): """编辑用户信息""" form = UserForm() user_id = request.form.get('user_id')
@web_bp.route('/users//edit') @login_required @admin_required def get_user_info(user_id): """获取用户信息用于编辑(API接口)""" user = User.query.get_or_404(user_id)
@web_bp.route('/clear-invalid-proxies', methods=['POST']) @login_required @admin_required def clear_invalid_proxies(): """清理无效代理""" try: # 删除无效代理 invalid_proxies = Proxy.query.filter_by(is_valid=False).all() deleted_count = len(invalid_proxies)
@web_bp.route('/test-crawler-rule', methods=['POST']) @login_required @admin_required def test_crawler_rule(): """测试抓取规则""" try: # 检查请求内容类型 if not request.is_json: return jsonify({ 'success': False, 'message': '请求必须是JSON格式', 'proxies': [], 'html_content': '', 'element_count': 0, 'valid_count': 0, 'raw_elements': [] }), 400
@web_bp.route('/crawler-detail/.json') @login_required @admin_required def crawler_detail_json(rule_id): """获取规则详情(JSON格式)""" rule = CrawlerRule.query.get_or_404(rule_id) return jsonify(rule.to_dict())
@web_bp.route('/crawler/') @login_required @admin_required def crawler_detail(rule_id): """查看抓取规则详情页面""" rule = CrawlerRule.query.get_or_404(rule_id) return render_template('crawler_detail.html', rule=rule)
@web_bp.route('/toggle-crawler/') @login_required @admin_required def toggle_crawler(rule_id): """切换抓取规则状态""" rule = CrawlerRule.query.get_or_404(rule_id) rule.is_active = not rule.is_active db.session.commit() return jsonify({'success': True})
@web_bp.route('/edit-crawler/', methods=['GET', 'POST']) @login_required @admin_required def edit_crawler(rule_id): """编辑抓取规则""" rule = CrawlerRule.query.get_or_404(rule_id) form = CrawlerRuleForm(obj=rule)
@web_bp.route('/delete-crawler/', methods=['DELETE']) @login_required @admin_required def delete_crawler(rule_id): """删除抓取规则""" try: rule = CrawlerRule.query.get_or_404(rule_id) db.session.delete(rule) db.session.commit()
web/forms.py from flask_wtf import FlaskForm from wtforms import StringField,PasswordField, BooleanField, IntegerField, SelectField, TextAreaField, SubmitField from wtforms.validators import DataRequired,Email, Length, EqualTo, ValidationError, URL ,Optional from database.models import User
添加首次运行表单
class FirstRunForm(FlaskForm): db_type = SelectField('数据库类型', choices=[
('sqlite', 'SQLite'),
('mysql', 'MySQL')
], default='sqlite', validators=[DataRequired()]) db_host = StringField('数据库主机', default='localhost', validators=[Optional()]) db_port = StringField('数据库端口', default='3306', validators=[Optional()]) db_name = StringField('数据库名称', default='proxy_pool', validators=[Optional()]) db_user = StringField('用户名', default='root', validators=[Optional()]) db_password = PasswordField('密码', validators=[Optional()]) admin_username = StringField('管理员用户名', validators=[DataRequired(), Length(1, 64)]) admin_password = PasswordField('管理员密码', validators=[DataRequired(), Length(6, 128)]) admin_password2 = PasswordField('确认密码', validators=[DataRequired(), EqualTo('admin_password')]) submit = SubmitField('保存配置')
添加数据库迁移表单
class DatabaseMigrationForm(FlaskForm): db_type = SelectField('目标数据库类型', choices=[
('sqlite', 'SQLite'),
('mysql', 'MySQL')
], validators=[DataRequired()]) db_host = StringField('数据库主机', default='localhost', validators=[Optional()]) db_port = StringField('数据库端口', default='3306', validators=[Optional()]) db_name = StringField('数据库名称', default='proxy_pool', validators=[Optional()]) db_user = StringField('用户名', default='root', validators=[Optional()]) db_password = PasswordField('密码', validators=[Optional()]) submit = SubmitField('开始迁移')
class LoginForm(FlaskForm): username = StringField('用户名', validators=[DataRequired(), Length(1, 64)]) password = PasswordField('密码', validators=[DataRequired()]) remember_me = BooleanField('记住我') submit = SubmitField('登录')
class RegistrationForm(FlaskForm): username = StringField('用户名', validators=[DataRequired(), Length(1, 64)]) email = StringField('邮箱', validators=[DataRequired(), Email()]) password = PasswordField('密码', validators=[DataRequired(), Length(6, 128)]) password2 = PasswordField('确认密码', validators=[DataRequired(), EqualTo('password')]) submit = SubmitField('注册')
class CrawlerRuleForm(FlaskForm): name = StringField('规则名称', validators=[DataRequired(), Length(1, 100)]) url = StringField('目标URL', validators=[DataRequired(), URL(), Length(1, 500)]) pattern_type = SelectField('匹配类型', choices=[
('css', 'CSS选择器'),
('xpath', 'XPath'),
('regex', '正则表达式')
], validators=[DataRequired()]) pattern = TextAreaField('主匹配模式', validators=[DataRequired()])
class UserForm(FlaskForm): username = StringField('用户名', validators=[DataRequired(), Length(1, 64)]) email = StringField('邮箱', validators=[DataRequired(), Email()]) password = PasswordField('密码', validators=[Length(6, 128)]) max_proxies = IntegerField('最大代理数', default=100, validators=[DataRequired()]) rate_limit = IntegerField('速率限制(次/分钟)', default=60, validators=[DataRequired()]) expires_at = StringField('到期时间', render_kw={'placeholder': 'YYYY-MM-DD HH:MM:SS 或留空为永久'}) is_active = BooleanField('账户激活', default=True) is_admin = BooleanField('管理员权限') submit = SubmitField('保存用户')
class SystemConfigForm(FlaskForm): crawl_interval = IntegerField('抓取间隔(秒)', default=3600, validators=[DataRequired()]) validate_interval = IntegerField('验证间隔(秒)', default=300, validators=[DataRequired()]) validate_timeout = IntegerField('验证超时(秒)', default=10, validators=[DataRequired()]) validate_url = StringField('验证URL', default='http://httpbin.org/ip', validators=[DataRequired(), URL()])
class ImportForm(FlaskForm): import_type = SelectField('导入类型', choices=[
('text', '文本导入'),
('api', 'API导入')
], validators=[DataRequired()]) protocol = SelectField('默认协议', choices=[
('http', 'HTTP'),
('https', 'HTTPS'),
('socks4', 'SOCKS4'),
('socks5', 'SOCKS5')
], default='http') text = TextAreaField('代理文本', render_kw={'placeholder': '每行一个代理,格式: 协议://用户:密码@IP:端口 或 IP:端口'}) api_url = StringField('API地址', render_kw={'placeholder': '请输入API URL'}) pattern = StringField('匹配模式', render_kw={'placeholder': '正则表达式模式'}) submit = SubmitField('导入代理')
class ClearProxiesForm(FlaskForm): submit = SubmitField('清理无效代理')
app.py 首次运行不对了,首次运行网页跳转安装配置数据库和管理用户,修复后完整的代码