Akamai Bot Manager企业级防护：传感器数据生成与智能检测对抗技术

原创于 2025-09-24 12:29:36 发布 · 853 阅读

8 ·

CC 4.0 BY-SA版权

文章标签：

#Akamai #Bot Manager #传感器数据 #企业防护 #反爬虫 #sensor_data #智能检测

Akamai Bot Manager企业级防护：传感器数据生成与智能检测对抗技术

技术概述与发展背景

Akamai Bot Manager作为全球领先的企业级机器人检测和缓解平台，凭借其先进的传感器数据分析技术和机器学习算法，为众多知名企业提供了强大的反自动化防护能力。该系统通过在客户端植入JavaScript传感器，收集用户行为特征、设备指纹、网络环境等多维度数据，构建精确的机器人识别模型。

在企业级应用场景中，Akamai Bot Manager不仅能够检测传统的脚本机器人，更能识别高度模拟人类行为的复杂自动化工具。其核心技术包括sensor_data传感器数据生成、telemetry遥测验证、以及基于AI的实时行为分析系统。

Akamai技术架构深度解析

多层检测机制： - 客户端传感器：收集鼠标轨迹、键盘时序、触摸事件等行为数据 - 设备指纹识别：分析硬件特征、浏览器配置、插件信息 - 网络层分析：检测IP信誉、地理位置、连接模式 - 机器学习引擎：实时分析行为模式，动态调整检测策略

核心Cookie机制： - _abck Cookie：包含传感器数据和验证状态的加密信息 - bm_sz Cookie：存储会话大小和时间戳数据 - ak_bmsc Cookie：Bot Manager状态控制和配置信息

核心技术实现详解

2.1 传感器数据分析与生成

Sensor Data技术原理

import json
import time
import random
import hashlib
from typing import Dict, List, Optional, Tuple
from urllib.parse import urlparse

class AkamaiSensorDataGenerator:
    """
    Akamai传感器数据生成器
    """

    def __init__(self):
        self.sensor_patterns = {
            'mouse_events': {
                'move_frequency': (50, 200),  # 鼠标移动频率范围(ms)
                'click_delay': (100, 500),    # 点击延迟范围(ms)
                'trajectory_points': (10, 50)  # 轨迹点数量
            },
            'keyboard_events': {
                'typing_speed': (80, 150),     # 打字速度(WPM)
                'key_intervals': (50, 300),   # 按键间隔(ms)
                'pause_probability': 0.1      # 停顿概率
            },
            'device_metrics': {
                'screen_resolution': [(1920, 1080), (1366, 768), (1536, 864)],
                'viewport_size': [(1200, 800), (1024, 768), (1440, 900)],
                'color_depth': [24, 32],
                'pixel_ratio': [1, 1.25, 1.5, 2]
            }
        }

        self.timing_baseline = time.time() * 1000  # 基准时间戳

    def generate_mouse_trajectory(self, start_pos: Tuple[int, int], 
                                 end_pos: Tuple[int, int],
                                 duration: int = 1000) -> List[Dict]:
        """
        生成真实的鼠标移动轨迹
        """
        trajectory = []
        points_count = random.randint(*self.sensor_patterns['mouse_events']['trajectory_points'])

        start_x, start_y = start_pos
        end_x, end_y = end_pos

        for i in range(points_count):
            # 使用贝塞尔曲线生成自然轨迹
            progress = i / (points_count - 1)

            # 添加轨迹噪声，模拟真实人类行为
            noise_x = random.uniform(-5, 5)
            noise_y = random.uniform(-5, 5)

            # 计算当前点位置
            current_x = start_x + (end_x - start_x) * progress + noise_x
            current_y = start_y + (end_y - start_y) * progress + noise_y

            # 计算时间戳
            timestamp = self.timing_baseline + (duration * progress)

            trajectory.append({
                'x': int(current_x),
                'y': int(current_y),
                'timestamp': int(timestamp),
                'event_type': 'mousemove'
            })

        return trajectory

    def generate_keyboard_timing(self, text: str) -> List[Dict]:
        """
        生成键盘输入时序数据
        """
        timing_data = []
        current_time = self.timing_baseline

        typing_speed = random.randint(*self.sensor_patterns['keyboard_events']['typing_speed'])
        base_interval = 60000 / (typing_speed * 5)  # 平均字符间隔

        for i, char in enumerate(text):
            # 模拟打字节奏变化
            interval_variance = random.uniform(0.5, 1.5)
            char_interval = base_interval * interval_variance

            # 添加随机停顿
            if random.random() < self.sensor_patterns['keyboard_events']['pause_probability']:
                char_interval += random.randint(200, 800)

            current_time += char_interval

            timing_data.append({
                'char': char,
                'keyCode': ord(char.upper()) if char.isalpha() else ord(char),
                'timestamp': int(current_time),
                'event_type': 'keypress'
            })

        return timing_data

    def generate_device_fingerprint(self) -> Dict:
        """
        生成设备指纹数据
        """
        screen_res = random.choice(self.sensor_patterns['device_metrics']['screen_resolution'])
        viewport = random.choice(self.sensor_patterns['device_metrics']['viewport_size'])

        fingerprint = {
            'screen': {
                'width': screen_res[0],
                'height': screen_res[1],
                'colorDepth': random.choice(self.sensor_patterns['device_metrics']['color_depth']),
                'pixelRatio': random.choice(self.sensor_patterns['device_metrics']['pixel_ratio'])
            },
            'viewport': {
                'width': viewport[0],
                'height': viewport[1]
            },
            'timezone': {
                'offset': -480,  # GMT+8
                'name': 'Asia/Shanghai'
            },
            'language': 'zh-CN',
            'platform': 'Win32',
            'userAgent': self._generate_user_agent(),
            'plugins': self._generate_plugin_list(),
            'canvas': self._generate_canvas_fingerprint()
        }

        return fingerprint

    def _generate_user_agent(self) -> str:
        """
        生成真实的User-Agent
        """
        chrome_versions = ['120.0.0.0', '119.0.0.0', '118.0.0.0']
        webkit_version = '537.36'

        version = random.choice(chrome_versions)

        return f'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/{webkit_version} (KHTML, like Gecko) Chrome/{version} Safari/{webkit_version}'

    def _generate_plugin_list(self) -> List[str]:
        """
        生成浏览器插件列表
        """
        common_plugins = [
            'PDF Viewer',
            'Chrome PDF Viewer',
            'Chromium PDF Viewer',
            'Microsoft Edge PDF Viewer',
            'WebKit built-in PDF'
        ]

        return random.sample(common_plugins, random.randint(2, 4))

    def _generate_canvas_fingerprint(self) -> str:
        """
        生成Canvas指纹
        """
        # 模拟Canvas渲染结果的哈希值
        canvas_data = f"canvas_{random.randint(100000, 999999)}_{time.time()}"
        return hashlib.md5(canvas_data.encode()).hexdigest()[:16]

2.2 企业级Bot Manager对抗技术

智能传感器数据提交

import requests
import json
from typing import Dict, Optional, Union

class AkamaiBotManagerSolver:
    """
    Akamai Bot Manager企业级解决方案
    """

    def __init__(self, user_token: str, developer_id: str = "hqLmMS"):
        self.user_token = user_token
        self.developer_id = developer_id
        self.api_url = "http://api.nocaptcha.io/api/wanda/akamai/v2"
        self.headers = {
            'User-Token': user_token,
            'Content-Type': 'application/json',
            'Developer-Id': developer_id
        }

        self.sensor_generator = AkamaiSensorDataGenerator()

    def solve_standard_challenge(self, href: str, 
                               api: Optional[str] = None,
                               proxy: Optional[str] = None) -> Dict:
        """
        解决标准Akamai挑战
        """
        payload = {
            "href": href
        }

        # 配置API接口地址
        if api:
            payload["api"] = api

        # 配置代理
        if proxy:
            payload["proxy"] = proxy

        return self._make_request(payload)

    def solve_with_cookies(self, href: str,
                          cookies: Union[str, Dict],
                          api: Optional[str] = None,
                          proxy: Optional[str] = None) -> Dict:
        """
        使用现有cookies解决挑战
        """
        payload = {
            "href": href,
            "cookies": self._format_cookies(cookies)
        }

        if api:
            payload["api"] = api

        if proxy:
            payload["proxy"] = proxy

        return self._make_request(payload)

    def solve_telemetry_challenge(self, href: str,
                                 cookies: Optional[Union[str, Dict]] = None,
                                 proxy: Optional[str] = None) -> Dict:
        """
        解决telemetry验证模式
        """
        payload = {
            "href": href,
            "telemetry": True
        }

        if cookies:
            payload["cookies"] = self._format_cookies(cookies)

        if proxy:
            payload["proxy"] = proxy

        return self._make_request(payload)

    def solve_maersk_api(self, quotes_url: str = "https://www.maersk.com.cn/instantPrice/quotes",
                        use_telemetry: bool = True,
                        proxy: Optional[str] = None) -> Dict:
        """
        专用于Maersk API的解决方案
        """
        payload = {
            "href": quotes_url,
            "telemetry": use_telemetry
        }

        if proxy:
            payload["proxy"] = proxy

        return self._make_request(payload)

    def _format_cookies(self, cookies: Union[str, Dict]) -> Dict:
        """
        格式化cookies数据
        """
        if isinstance(cookies, str):
            # 字符串格式的cookies
            return {
                "value": cookies,
                "uri": ""
            }
        elif isinstance(cookies, dict):
            # 已经是正确格式
            if 'value' in cookies:
                return cookies
            else:
                # 键值对格式，需要转换
                cookie_string = "; ".join([f"{k}={v}" for k, v in cookies.items()])
                return {
                    "value": cookie_string,
                    "uri": ""
                }
        else:
            raise ValueError("不支持的cookies格式")

    def _make_request(self, payload: Dict) -> Dict:
        """
        发送API请求
        """
        import time
        start_time = time.time()

        try:
            response = requests.post(
                self.api_url,
                headers=self.headers,
                json=payload,
                timeout=120  # Akamai处理可能需要较长时间
            )

            response.raise_for_status()
            result = response.json()

            # 添加实际耗时
            actual_cost = (time.time() - start_time) * 1000
            result['actual_cost'] = f"{actual_cost:.2f}ms"

            # 解析返回的_abck cookie
            if result.get('status') == 1 and 'data' in result:
                data = result['data']
                if '_abck' in data:
                    result['cookie_analysis'] = self._analyze_abck_cookie(data['_abck'])

            return result

        except requests.exceptions.RequestException as e:
            return {
                'status': 0,
                'msg': f'请求失败: {str(e)}',
                'cost': f"{(time.time() - start_time) * 1000:.2f}ms"
            }
        except json.JSONDecodeError as e:
            return {
                'status': 0,
                'msg': f'响应解析失败: {str(e)}',
                'cost': f"{(time.time() - start_time) * 1000:.2f}ms"
            }

    def _analyze_abck_cookie(self, abck_value: str) -> Dict:
        """
        分析_abck cookie的结构
        """
        analysis = {
            'cookie_length': len(abck_value),
            'segments': [],
            'version_info': '',
            'timestamp_info': '',
            'validation_status': ''
        }

        # _abck cookie通常以~分隔多个段
        segments = abck_value.split('~')
        analysis['segments'] = [{
            'index': i,
            'content': seg[:50] + '...' if len(seg) > 50 else seg,
            'length': len(seg)
        } for i, seg in enumerate(segments)]

        # 提取版本信息（通常在第二个段）
        if len(segments) > 1:
            analysis['version_info'] = segments[1]

        # 检查验证状态（通常包含在最后几个段中）
        if len(segments) > 3:
            analysis['validation_status'] = segments[-1] if segments[-1] else segments[-2]

        return analysis

    def validate_abck_cookie(self, abck_value: str, target_url: str,
                           proxy: Optional[str] = None) -> bool:
        """
        验证_abck cookie的有效性
        """
        try:
            # 构造测试请求
            test_headers = {
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
                'Accept-Encoding': 'gzip, deflate, br',
                'Connection': 'keep-alive'
            }

            cookies = {
                '_abck': abck_value
            }

            proxies = None
            if proxy:
                if '://' in proxy:
                    proxies = {'http': proxy, 'https': proxy}
                elif '@' in proxy:
                    proxies = {'http': f'http://{proxy}', 'https': f'http://{proxy}'}
                else:
                    proxies = {'http': f'http://{proxy}', 'https': f'http://{proxy}'}

            response = requests.get(
                target_url,
                headers=test_headers,
                cookies=cookies,
                proxies=proxies,
                timeout=30
            )

            # 检查是否遇到Bot Manager挑战
            content = response.text.lower()
            challenge_indicators = [
                'akamai',
                'bot manager',
                'sensor_data',
                'bmak',
                'bm_start_ts'
            ]

            # 如果没有检测到挑战指标，说明cookie有效
            return not any(indicator in content for indicator in challenge_indicators)

        except Exception as e:
            print(f"Cookie验证失败: {e}")
            return False

2.3 高级场景专用解决方案

企业API接口保护绕过

class AkamaiEnterpriseAPIHandler:
    """
    企业API接口专用处理器
    """

    def __init__(self, base_solver: AkamaiBotManagerSolver):
        self.solver = base_solver
        self.api_patterns = {
            'jetstar': {
                'domain': 'jetstar.com',
                'api_pattern': r'/3Fl6sx/QIvaPL/b/7Hf/[^/]+/[^/]+/[^/]+/[^/]+/[^/]+',
                'requires_proxy': False
            },
            'dickssportinggoods': {
                'domain': 'dickssportinggoods.com',
                'api_pattern': r'/prod/api/paygate/v2/[^/]+/[^/]+',
                'requires_proxy': True
            },
            'maersk': {
                'domain': 'maersk.com',
                'api_pattern': r'/instantPrice/quotes',
                'telemetry_required': True
            }
        }

    def handle_jetstar_api(self, gift_card_data: Dict, 
                          proxy: Optional[str] = None) -> Dict:
        """
        处理Jetstar礼品卡API
        """
        base_url = "https://www.jetstar.com/"

        # 1. 获取有效的_abck cookie
        cookie_result = self.solver.solve_standard_challenge(
            href=base_url,
            proxy=proxy
        )

        if cookie_result.get('status') != 1:
            return cookie_result

        # 2. 使用获取的cookie访问API
        abck_cookie = cookie_result['data']['_abck']

        return {
            'status': 1,
            'msg': '已获取有效验证cookie',
            'data': {
                '_abck': abck_cookie,
                'usage_instructions': {
                    'cookie_name': '_abck',
                    'cookie_value': abck_cookie,
                    'required_headers': {
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
                        'Referer': base_url
                    },
                    'api_endpoint': 'https://payments.jetstar.com/api/giftcard/*',
                    'method': 'POST'
                }
            }
        }

    def handle_maersk_telemetry(self, shipping_data: Dict,
                               proxy: Optional[str] = None) -> Dict:
        """
        处理Maersk API的telemetry验证
        """
        quotes_url = "https://www.maersk.com.cn/instantPrice/quotes"

        # 使用telemetry模式解决
        result = self.solver.solve_telemetry_challenge(
            href=quotes_url,
            proxy=proxy
        )

        if result.get('status') == 1:
            result['api_usage'] = {
                'endpoint': quotes_url,
                'headers': {
                    'akamai-bm-telemetry': result['data'].get('telemetry_data', ''),
                    'Content-Type': 'application/json',
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
                },
                'method': 'POST',
                'payload_example': {
                    'origin': 'CNYTN',
                    'destination': 'USNYC',
                    'cargoType': 'DRY'
                }
            }

        return result

    def handle_sephora_protection(self, product_data: Dict,
                                 device_type: str = 'pc',
                                 proxy: Optional[str] = None) -> Dict:
        """
        处理Sephora网站保护
        """
        sephora_url = "https://www.sephora.com/"

        # Sephora通常需要设备类型
        payload = {
            "href": sephora_url,
            "device": device_type
        }

        if proxy:
            payload["proxy"] = proxy

        result = self.solver._make_request(payload)

        if result.get('status') == 1:
            result['shopping_api_usage'] = {
                'search_endpoint': 'https://www.sephora.com/api/catalog/products',
                'cart_endpoint': 'https://www.sephora.com/api/users/profiles/cart',
                'required_cookies': {
                    '_abck': result['data']['_abck'],
                    'device_type': device_type
                }
            }

        return result

2.4 综合监控与性能优化

企业级监控系统

import logging
from datetime import datetime, timedelta
from typing import Dict, List
import sqlite3

class AkamaiPerformanceMonitor:
    """
    Akamai性能监控系统
    """

    def __init__(self, db_path: str = "akamai_monitor.db"):
        self.db_path = db_path
        self.init_database()

        # 配置日志
        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
            handlers=[
                logging.FileHandler('akamai_monitor.log'),
                logging.StreamHandler()
            ]
        )
        self.logger = logging.getLogger('AkamaiMonitor')

        self.metrics = {
            'total_requests': 0,
            'success_count': 0,
            'telemetry_success': 0,
            'cookie_validation_success': 0,
            'average_response_time': 0,
            'response_times': []
        }

    def init_database(self):
        """
        初始化监控数据库
        """
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()

        cursor.execute('''
            CREATE TABLE IF NOT EXISTS akamai_requests (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                target_url TEXT,
                request_type TEXT,
                status INTEGER,
                response_time REAL,
                error_message TEXT,
                abck_length INTEGER,
                proxy_used TEXT
            )
        ''')

        conn.commit()
        conn.close()

    def record_request(self, target_url: str, request_type: str,
                      result: Dict, response_time: float,
                      proxy: Optional[str] = None):
        """
        记录请求结果
        """
        self.metrics['total_requests'] += 1
        self.metrics['response_times'].append(response_time)

        # 更新平均响应时间
        self.metrics['average_response_time'] = sum(self.metrics['response_times']) / len(self.metrics['response_times'])

        status = result.get('status', 0)
        error_msg = result.get('msg', '') if status == 0 else None
        abck_length = 0

        if status == 1:
            self.metrics['success_count'] += 1

            if request_type == 'telemetry':
                self.metrics['telemetry_success'] += 1

            # 获取_abck长度
            if 'data' in result and '_abck' in result['data']:
                abck_length = len(result['data']['_abck'])

            self.logger.info(f"请求成功 - URL: {target_url}, 类型: {request_type}, 耗时: {response_time:.2f}ms")
        else:
            self.logger.warning(f"请求失败 - URL: {target_url}, 错误: {error_msg}, 耗时: {response_time:.2f}ms")

        # 保存到数据库
        self._save_to_database(target_url, request_type, status, response_time, error_msg, abck_length, proxy)

    def _save_to_database(self, target_url: str, request_type: str, 
                         status: int, response_time: float,
                         error_message: Optional[str], abck_length: int,
                         proxy: Optional[str]):
        """
        保存请求数据到数据库
        """
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()

            cursor.execute('''
                INSERT INTO akamai_requests 
                (target_url, request_type, status, response_time, error_message, abck_length, proxy_used)
                VALUES (?, ?, ?, ?, ?, ?, ?)
            ''', (target_url, request_type, status, response_time, error_message, abck_length, proxy))

            conn.commit()
            conn.close()
        except Exception as e:
            self.logger.error(f"数据库保存失败: {e}")

    def get_performance_report(self, hours: int = 24) -> Dict:
        """
        生成性能报告
        """
        end_time = datetime.now()
        start_time = end_time - timedelta(hours=hours)

        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()

            # 获取时间段内的统计数据
            cursor.execute('''
                SELECT 
                    COUNT(*) as total,
                    SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as success,
                    AVG(response_time) as avg_time,
                    MIN(response_time) as min_time,
                    MAX(response_time) as max_time,
                    AVG(abck_length) as avg_abck_length
                FROM akamai_requests 
                WHERE timestamp BETWEEN ? AND ?
            ''', (start_time.isoformat(), end_time.isoformat()))

            stats = cursor.fetchone()

            # 获取请求类型分布
            cursor.execute('''
                SELECT request_type, COUNT(*), 
                       SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as success_count
                FROM akamai_requests 
                WHERE timestamp BETWEEN ? AND ?
                GROUP BY request_type
            ''', (start_time.isoformat(), end_time.isoformat()))

            type_stats = cursor.fetchall()

            conn.close()

            total, success, avg_time, min_time, max_time, avg_abck_length = stats or (0, 0, 0, 0, 0, 0)

            report = {
                'report_period': f'{hours}小时',
                'timestamp': end_time.isoformat(),
                'overall_stats': {
                    '总请求数': total or 0,
                    '成功率': f"{(success/total*100) if total > 0 else 0:.2f}%",
                    '平均响应时间': f"{avg_time or 0:.2f}ms",
                    '最快响应': f"{min_time or 0:.2f}ms",
                    '最慢响应': f"{max_time or 0:.2f}ms",
                    '平均_abck长度': f"{avg_abck_length or 0:.0f}字符"
                },
                'request_types': {}
            }

            for req_type, count, success_count in type_stats:
                report['request_types'][req_type] = {
                    '请求数': count,
                    '成功数': success_count,
                    '成功率': f"{(success_count/count*100) if count > 0 else 0:.2f}%"
                }

            return report

        except Exception as e:
            self.logger.error(f"生成报告失败: {e}")
            return {'error': str(e)}

    def get_failure_analysis(self, hours: int = 24) -> Dict:
        """
        分析失败原因
        """
        end_time = datetime.now()
        start_time = end_time - timedelta(hours=hours)

        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()

            cursor.execute('''
                SELECT error_message, COUNT(*) as count
                FROM akamai_requests 
                WHERE status = 0 AND timestamp BETWEEN ? AND ?
                GROUP BY error_message
                ORDER BY count DESC
                LIMIT 10
            ''', (start_time.isoformat(), end_time.isoformat()))

            failures = cursor.fetchall()
            conn.close()

            analysis = {
                'failure_summary': {},
                'recommendations': []
            }

            for error_msg, count in failures:
                analysis['failure_summary'][error_msg or '未知错误'] = count

                # 基于错误类型提供建议
                if '代理' in (error_msg or ''):
                    analysis['recommendations'].append('检查代理配置和可用性')
                elif '超时' in (error_msg or ''):
                    analysis['recommendations'].append('增加请求超时时间或优化网络环境')
                elif 'sensor_data' in (error_msg or ''):
                    analysis['recommendations'].append('更新传感器数据生成算法')

            return analysis

        except Exception as e:
            return {'error': str(e)}

# 使用示例
config = {
    'user_token': 'your_token_here',
    'developer_id': 'hqLmMS'
}

# 初始化解决方案
solver = AkamaiBotManagerSolver(
    user_token=config['user_token'],
    developer_id="hqLmMS"
)

api_handler = AkamaiEnterpriseAPIHandler(solver)
monitor = AkamaiPerformanceMonitor()

# 处理Jetstar API示例
result = api_handler.handle_jetstar_api(
    gift_card_data={'card_number': '1234567890'},
    proxy="proxy_ip:port"
)

print(f"Jetstar API处理结果: {result}")

# 生成性能报告
report = monitor.get_performance_report(hours=24)
print(f"24小时性能报告: {report}")