亚马逊 API 接口开发：解锁商品详情页实时数据（接入流程解析）

最新推荐文章于 2025-06-12 22:38:01 发布

API_technology

最新推荐文章于 2025-06-12 22:38:01 发布

阅读量317

点赞数 3

分类专栏：开发后端文章标签： python 机器学习开发语言大数据数据挖掘数据库

本文链接：https://blog.youkuaiyun.com/API_technology/article/details/148585789

版权

开发同时被 2 个专栏收录

64 篇文章

订阅专栏

后端

64 篇文章

订阅专栏

在电商数据分析和竞品监控领域，获取亚马逊商品详情页的实时数据是一项核心需求。本文将详细介绍如何通过亚马逊 API 接口开发来获取这些有价值的数据，包括接入流程、认证机制和代码实现。

亚马逊 API 简介

亚马逊提供了多种 API 接口，其中最常用的是亚马逊产品广告 API（Amazon Product Advertising API）和亚马逊市场卖家 API（Amazon Marketplace Web Service, MWS）。本文主要围绕产品广告 API 展开，该 API 允许开发者访问亚马逊商品信息、价格、评论等数据。

接入准备工作

在开始开发前，需要完成以下准备工作：

注册账户（Associates Program）
获取 API 密钥（Access Key 和 Secret Key）
注册 AWS 账户（如果需要）
了解 API 调用限制和费用（部分功能需要付费）

认证机制

亚马逊 API 使用 HMAC-SHA256 算法进行请求签名认证，主要包含以下步骤：

构建规范化请求字符串
创建待签名字符串
计算签名
将签名添加到请求参数中

下面是一个完整的 Python 实现，展示如何构建和发送请求到亚马逊 API：

import hashlib
import hmac
import time
import urllib.parse
import requests
from datetime import datetime

class AmazonAPIClient:
    def __init__(self, access_key, secret_key, associate_tag, region='US'):
        """初始化亚马逊API客户端"""
        self.access_key = access_key
        self.secret_key = secret_key
        self.associate_tag = associate_tag
        self.region = region
        
        # 根据不同区域设置端点
        self.endpoints = {
            'US': 'webservices.amazon.com',
            'CA': 'webservices.amazon.ca',
            'UK': 'webservices.amazon.co.uk',
            'DE': 'webservices.amazon.de',
            'FR': 'webservices.amazon.fr',
            'IT': 'webservices.amazon.it',
            'ES': 'webservices.amazon.es',
            'JP': 'webservices.amazon.co.jp',
            'CN': 'webservices.amazon.cn',
            'IN': 'webservices.amazon.in'
        }
        
        self.endpoint = self.endpoints.get(region, self.endpoints['US'])
    
    def get_timestamp(self):
        """获取当前时间戳，格式为ISO 8601"""
        return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
    
    def sign_request(self, params):
        """使用HMAC-SHA256算法对请求进行签名"""
        # 添加必需的参数
        params['AWSAccessKeyId'] = self.access_key
        params['AssociateTag'] = self.associate_tag
        params['Timestamp'] = self.get_timestamp()
        params['Version'] = '2013-08-01'
        
        # 按照字典序排序参数
        sorted_params = sorted(params.items(), key=lambda x: x[0])
        
        # 构建规范化请求字符串
        canonical_query_string = '&'.join([f"{k}={urllib.parse.quote_plus(str(v))}" for k, v in sorted_params])
        
        # 创建待签名字符串
        string_to_sign = f"GET\n{self.endpoint}\n/onca/xml\n{canonical_query_string}"
        
        # 计算签名
        signature = hmac.new(
            self.secret_key.encode('utf-8'),
            string_to_sign.encode('utf-8'),
            hashlib.sha256
        ).digest()
        
        # 将签名转换为Base64编码
        signature = urllib.parse.quote_plus(signature.hex())
        
        # 添加签名到参数中
        params['Signature'] = signature
        
        return params
    
    def get_item_info(self, asin, response_group='ItemAttributes,Offers,Images,Reviews'):
        """获取单个商品的详细信息"""
        params = {
            'Operation': 'ItemLookup',
            'ItemId': asin,
            'ResponseGroup': response_group
        }
        
        # 签名请求
        signed_params = self.sign_request(params)
        
        # 构建请求URL
        query_string = '&'.join([f"{k}={urllib.parse.quote_plus(str(v))}" for k, v in signed_params.items()])
        url = f"https://{self.endpoint}/onca/xml?{query_string}"
        
        # 发送请求
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.text
        except requests.exceptions.RequestException as e:
            print(f"请求出错: {e}")
            return None
    
    def search_items(self, keywords, search_index='All', response_group='ItemAttributes,Offers,Images'):
        """搜索商品"""
        params = {
            'Operation': 'ItemSearch',
            'Keywords': keywords,
            'SearchIndex': search_index,
            'ResponseGroup': response_group
        }
        
        # 签名请求
        signed_params = self.sign_request(params)
        
        # 构建请求URL
        query_string = '&'.join([f"{k}={urllib.parse.quote_plus(str(v))}" for k, v in signed_params.items()])
        url = f"https://{self.endpoint}/onca/xml?{query_string}"
        
        # 发送请求
        try:
            response = requests.get(url)
            response.raise_for_status()
            return response.text
        except requests.exceptions.RequestException as e:
            print(f"请求出错: {e}")
            return None

# 使用示例
if __name__ == "__main__":
    # 替换为你的API凭证
    ACCESS_KEY = "YOUR_ACCESS_KEY"
    SECRET_KEY = "YOUR_SECRET_KEY"
    ASSOCIATE_TAG = "YOUR_ASSOCIATE_TAG"
    
    # 创建API客户端
    client = AmazonAPIClient(ACCESS_KEY, SECRET_KEY, ASSOCIATE_TAG)
    
    # 获取单个商品信息
    asin = "B07HGGYFZ6"  # 示例ASIN
    item_info = client.get_item_info(asin)
    print(f"商品 {asin} 的信息:\n{item_info}")
    
    # 搜索商品
    search_keywords = "wireless headphones"
    search_results = client.search_items(search_keywords)
    print(f"搜索关键词 '{search_keywords}' 的结果:\n{search_results}")

解析 API 响应数据

亚马逊 API 返回的是 XML 格式的数据，我们可以使用 Python 的 ElementTree 库来解析：

import xml.etree.ElementTree as ET

class AmazonResponseParser:
    def __init__(self):
        # 定义命名空间
        self.ns = {
            'a': 'http://webservices.amazon.com/AWSECommerceService/2013-08-01'
        }
    
    def parse_item_info(self, xml_response):
        """解析商品信息响应"""
        root = ET.fromstring(xml_response)
        
        # 检查是否有错误
        error = root.find('.//a:Error', self.ns)
        if error is not None:
            error_code = error.find('a:Code', self.ns).text
            error_message = error.find('a:Message', self.ns).text
            return {'error': f"{error_code}: {error_message}"}
        
        # 提取商品信息
        item = root.find('.//a:Item', self.ns)
        if item is None:
            return {'error': 'No item found'}
        
        result = {
            'asin': item.find('a:ASIN', self.ns).text,
            'title': item.find('.//a:Title', self.ns).text,
            'url': item.find('.//a:DetailPageURL', self.ns).text,
            'price': None,
            'currency': None,
            'image_url': None
        }
        
        # 提取价格信息
        offer = item.find('.//a:Offer', self.ns)
        if offer is not None:
            price_element = offer.find('.//a:FormattedPrice', self.ns)
            if price_element is not None:
                result['price'] = price_element.text
                
                # 提取货币符号
                currency_element = offer.find('.//a:CurrencyCode', self.ns)
                if currency_element is not None:
                    result['currency'] = currency_element.text
        
        # 提取主图URL
        image = item.find('.//a:LargeImage', self.ns)
        if image is not None:
            image_url = image.find('a:URL', self.ns)
            if image_url is not None:
                result['image_url'] = image_url.text
        
        # 提取评分信息
        reviews = item.find('.//a:CustomerReviews', self.ns)
        if reviews is not None:
            rating = reviews.find('.//a:AverageRating', self.ns)
            if rating is not None:
                result['average_rating'] = rating.text
            
            review_count = reviews.find('.//a:TotalReviews', self.ns)
            if review_count is not None:
                result['review_count'] = review_count.text
        
        return result
    
    def parse_search_results(self, xml_response):
        """解析搜索结果响应"""
        root = ET.fromstring(xml_response)
        
        # 检查是否有错误
        error = root.find('.//a:Error', self.ns)
        if error is not None:
            error_code = error.find('a:Code', self.ns).text
            error_message = error.find('a:Message', self.ns).text
            return {'error': f"{error_code}: {error_message}"}
        
        # 提取搜索结果
        items = root.findall('.//a:Item', self.ns)
        if not items:
            return {'error': 'No items found'}
        
        results = []
        for item in items:
            result = {
                'asin': item.find('a:ASIN', self.ns).text,
                'title': item.find('.//a:Title', self.ns).text,
                'url': item.find('.//a:DetailPageURL', self.ns).text,
                'price': None,
                'currency': None,
                'image_url': None
            }
            
            # 提取价格信息
            offer = item.find('.//a:Offer', self.ns)
            if offer is not None:
                price_element = offer.find('.//a:FormattedPrice', self.ns)
                if price_element is not None:
                    result['price'] = price_element.text
                    
                    # 提取货币符号
                    currency_element = offer.find('.//a:CurrencyCode', self.ns)
                    if currency_element is not None:
                        result['currency'] = currency_element.text
            
            # 提取主图URL
            image = item.find('.//a:MediumImage', self.ns)
            if image is not None:
                image_url = image.find('a:URL', self.ns)
                if image_url is not None:
                    result['image_url'] = image_url.text
            
            results.append(result)
        
        # 提取总结果数
        total_results = root.find('.//a:TotalResults', self.ns)
        if total_results is not None:
            return {
                'total_results': int(total_results.text),
                'items': results
            }
        
        return {'items': results}

# 使用示例
if __name__ == "__main__":
    # 假设我们已经有了API响应
    sample_response = """
    <ItemLookupResponse xmlns="http://webservices.amazon.com/AWSECommerceService/2013-08-01">
        <OperationRequest>
            <HTTPHeaders>
                <Header Name="UserAgent" Value="python-requests/2.25.1" />
            </HTTPHeaders>
            <RequestId>12345678-1234-1234-1234-123456789012</RequestId>
            <Arguments>
                <Argument Name="Operation" Value="ItemLookup" />
                <Argument Name="ResponseGroup" Value="ItemAttributes,Offers,Images,Reviews" />
                <Argument Name="ItemId" Value="B07HGGYFZ6" />
                <Argument Name="AWSAccessKeyId" Value="AKIAIOSFODNN7EXAMPLE" />
                <Argument Name="AssociateTag" Value="yourtag-20" />
                <Argument Name="Timestamp" Value="2023-01-01T12:00:00Z" />
                <Argument Name="Version" Value="2013-08-01" />
                <Argument Name="Signature" Value="EXAMPLE" />
            </Arguments>
            <RequestProcessingTime>0.0422150000000000</RequestProcessingTime>
        </OperationRequest>
        <Items>
            <Request>
                <IsValid>True</IsValid>
                <ItemLookupRequest>
                    <IdType>ASIN</IdType>
                    <ItemId>B07HGGYFZ6</ItemId>
                    <ResponseGroup>ItemAttributes,Offers,Images,Reviews</ResponseGroup>
                    <VariationPage>All</VariationPage>
                </ItemLookupRequest>
            </Request>
            <Item>
                <ASIN>B07HGGYFZ6</ASIN>
                <DetailPageURL>https://www.amazon.com/dp/B07HGGYFZ6</DetailPageURL>
                <ItemAttributes>
                    <Binding>Electronics</Binding>
                    <Brand>ExampleBrand</Brand>
                    <Color>Black</Color>
                    <Department>Electronics</Department>
                    <IsAdultProduct>false</IsAdultProduct>
                    <Label>ExampleLabel</Label>
                    <ListPrice>
                        <Amount>12999</Amount>
                        <CurrencyCode>USD</CurrencyCode>
                        <FormattedPrice>$129.99</FormattedPrice>
                    </ListPrice>
                    <Manufacturer>ExampleManufacturer</Manufacturer>
                    <Model>EXAMPLE-MODEL</Model>
                    <NumberOfItems>1</NumberOfItems>
                    <PackageDimensions>
                        <Height Units="hundredths-inches">800</Height>
                        <Length Units="hundredths-inches">600</Length>
                        <Weight Units="hundredths-pounds">200</Weight>
                        <Width Units="hundredths-inches">200</Width>
                    </PackageDimensions>
                    <PackageQuantity>1</PackageQuantity>
                    <PartNumber>EXAMPLE-PN</PartNumber>
                    <ProductGroup>Electronics</ProductGroup>
                    <ProductTypeName>ELECTRONICS</ProductTypeName>
                    <Publisher>ExamplePublisher</Publisher>
                    <ReleaseDate>2019-01-01</ReleaseDate>
                    <Size>Standard</Size>
                    <Studio>ExampleStudio</Studio>
                    <Title>Example Product Title</Title>
                    <Warranty>1 Year Limited Warranty</Warranty>
                </ItemAttributes>
                <Offers>
                    <TotalOffers>2</TotalOffers>
                    <TotalOfferPages>1</TotalOfferPages>
                    <MoreOffersUrl>https://www.amazon.com/gp/offer-listing/B07HGGYFZ6</MoreOffersUrl>
                    <Offer>
                        <OfferAttributes>
                            <Condition>New</Condition>
                        </OfferAttributes>
                        <OfferListing>
                            <Price>
                                <Amount>12999</Amount>
                                <CurrencyCode>USD</CurrencyCode>
                                <FormattedPrice>$129.99</FormattedPrice>
                            </Price>
                            <Availability>Usually ships within 24 hours</Availability>
                            <AvailabilityAttributes>
                                <AvailabilityType>now</AvailabilityType>
                                <MinimumHours>0</MinimumHours>
                                <MaximumHours>0</MaximumHours>
                            </AvailabilityAttributes>
                            <IsEligibleForSuperSaverShipping>true</IsEligibleForSuperSaverShipping>
                            <IsEligibleForPrime>true</IsEligibleForPrime>
                            <OfferListingId>EXAMPLE-OFFER-LISTING-ID</OfferListingId>
                        </OfferListing>
                    </Offer>
                </Offers>
                <Images>
                    <LargeImage>
                        <URL>https://m.media-amazon.com/images/I/81abcdefg-h.jpg</URL>
                        <Height Units="pixels">500</Height>
                        <Width Units="pixels">500</Width>
                    </LargeImage>
                    <MediumImage>
                        <URL>https://m.media-amazon.com/images/I/81abcdefg-h._AC_SX300.jpg</URL>
                        <Height Units="pixels">300</Height>
                        <Width Units="pixels">300</Width>
                    </MediumImage>
                    <SmallImage>
                        <URL>https://m.media-amazon.com/images/I/81abcdefg-h._AC_SX150.jpg</URL>
                        <Height Units="pixels">150</Height>
                        <Width Units="pixels">150</Width>
                    </SmallImage>
                </Images>
                <CustomerReviews>
                    <IFrameURL>https://www.amazon.com/reviews/iframe?akid=AKIAIOSFODNN7EXAMPLE&alinkCode=xm2&asin=B07HGGYFZ6&atag=yourtag-20&encoding=UTF8&collapsed=0&format=embedded&language=en_US&showViewpoints=1&sortBy=recent</IFrameURL>
                    <AverageRating>4.8 out of 5 stars</AverageRating>
                    <TotalReviews>245</TotalReviews>
                </CustomerReviews>
            </Item>
        </Items>
    </ItemLookupResponse>
    """
    
    parser = AmazonResponseParser()
    result = parser.parse_item_info(sample_response)
    print("解析结果:")
    for key, value in result.items():
        print(f"{key}: {value}")

实际应用案例

下面是一个简单的应用示例，展示如何使用上述代码获取商品信息并进行分析：

import time
import csv
import os
from datetime import datetime
from amazon_api_client import AmazonAPIClient
from amazon_response_parser import AmazonResponseParser

class AmazonPriceTracker:
    def __init__(self, access_key, secret_key, associate_tag, region='US'):
        """初始化价格追踪器"""
        self.client = AmazonAPIClient(access_key, secret_key, associate_tag, region)
        self.parser = AmazonResponseParser()
        self.tracked_items = {}
        self.data_dir = "amazon_data"
        
        # 创建数据目录
        if not os.path.exists(self.data_dir):
            os.makedirs(self.data_dir)
    
    def add_item(self, asin, name=None):
        """添加要追踪的商品"""
        # 获取商品信息
        xml_response = self.client.get_item_info(asin)
        if not xml_response:
            print(f"无法获取商品 {asin} 的信息")
            return False
        
        # 解析响应
        item_info = self.parser.parse_item_info(xml_response)
        
        # 检查是否有错误
        if 'error' in item_info:
            print(f"获取商品信息时出错: {item_info['error']}")
            return False
        
        # 使用商品名称或提供的名称
        item_name = name or item_info['title']
        
        # 添加到追踪列表
        self.tracked_items[asin] = {
            'name': item_name,
            'price_history': []
        }
        
        print(f"已添加商品 '{item_name}' (ASIN: {asin}) 到追踪列表")
        return True
    
    def track_price(self, asin):
        """追踪单个商品的价格"""
        if asin not in self.tracked_items:
            print(f"商品 {asin} 不在追踪列表中")
            return False
        
        # 获取商品信息
        xml_response = self.client.get_item_info(asin)
        if not xml_response:
            print(f"无法获取商品 {asin} 的信息")
            return False
        
        # 解析响应
        item_info = self.parser.parse_item_info(xml_response)
        
        # 检查是否有错误
        if 'error' in item_info:
            print(f"获取商品信息时出错: {item_info['error']}")
            return False
        
        # 记录价格
        current_price = item_info.get('price')
        current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        
        if current_price:
            self.tracked_items[asin]['price_history'].append({
                'time': current_time,
                'price': current_price
            })
            
            print(f"已记录商品 '{self.tracked_items[asin]['name']}' 的价格: {current_price}")
            return True
        else:
            print(f"无法获取商品 '{self.tracked_items[asin]['name']}' 的价格")
            return False
    
    def track_all(self):
        """追踪所有商品的价格"""
        success_count = 0
        for asin in self.tracked_items:
            if self.track_price(asin):
                success_count += 1
        
        return success_count
    
    def export_data(self, asin=None):
        """导出价格历史数据到CSV文件"""
        if asin:
            if asin not in self.tracked_items:
                print(f"商品 {asin} 不在追踪列表中")
                return False
            
            # 导出单个商品的数据
            item = self.tracked_items[asin]
            filename = f"{self.data_dir}/{asin}_{item['name'][:50].replace(' ', '_')}.csv"
            
            with open(filename, 'w', newline='') as csvfile:
                fieldnames = ['时间', '价格']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                
                writer.writeheader()
                for record in item['price_history']:
                    writer.writerow({
                        '时间': record['time'],
                        '价格': record['price']
                    })
            
            print(f"已导出商品 '{item['name']}' 的价格历史到 {filename}")
            return True
        else:
            # 导出所有商品的数据
            for asin, item in self.tracked_items.items():
                if item['price_history']:
                    filename = f"{self.data_dir}/{asin}_{item['name'][:50].replace(' ', '_')}.csv"
                    
                    with open(filename, 'w', newline='') as csvfile:
                        fieldnames = ['时间', '价格']
                        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                        
                        writer.writeheader()
                        for record in item['price_history']:
                            writer.writerow({
                                '时间': record['time'],
                                '价格': record['price']
                            })
                    
                    print(f"已导出商品 '{item['name']}' 的价格历史到 {filename}")
            
            return True
    
    def run_scheduled_tracking(self, interval_seconds=3600, max_iterations=None):
        """运行定时追踪任务"""
        iteration = 0
        while max_iterations is None or iteration < max_iterations:
            print(f"\n=== 开始第 {iteration + 1} 轮追踪 ===")
            success_count = self.track_all()
            print(f"=== 完成第 {iteration + 1} 轮追踪，成功记录 {success_count} 个商品的价格 ===")
            
            # 导出数据
            self.export_data()
            
            iteration += 1
            
            # 如果不是最后一轮，则等待指定时间
            if max_iterations is None or iteration < max_iterations:
                print(f"\n等待 {interval_seconds} 秒后进行下一轮追踪...")
                time.sleep(interval_seconds)

# 使用示例
if __name__ == "__main__":
    # 替换为你的API凭证
    ACCESS_KEY = "YOUR_ACCESS_KEY"
    SECRET_KEY = "YOUR_SECRET_KEY"
    ASSOCIATE_TAG = "YOUR_ASSOCIATE_TAG"
    
    # 创建价格追踪器
    tracker = AmazonPriceTracker(ACCESS_KEY, SECRET_KEY, ASSOCIATE_TAG)
    
    # 添加要追踪的商品
    tracker.add_item("B07HGGYFZ6", "Example Product 1")
    tracker.add_item("B07HGGYFZ7", "Example Product 2")
    
    # 运行一次追踪
    tracker.track_all()
    
    # 导出数据
    tracker.export_data()
    
    # 或者，运行定时追踪任务（每小时一次，运行3次）
    # tracker.run_scheduled_tracking(interval_seconds=3600, max_iterations=3)