不要采用自动化方式呢,想从接口直接拿数据,这是目前的代码,
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
import json
import time
import random
import re
from datetime import datetime
from urllib.parse import quote, urlparse, parse_qs
import logging
import browser_cookie3 # 用于从浏览器导入cookies
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(f"instagram_fixed_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger('InstagramFixed')
class InstagramSessionFixer:
def __init__(self, target_url):
self.target_url = target_url
self.post_shortcode = self.extract_shortcode(target_url)
self.session = requests.Session()
self.user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0'
]
# 更新会话cookies (使用您提供的cookies)
self.cookies = {
'ds_user_id': '61971702457',
'csrftoken': '4YzNalzWguviI-qNJs0ftx',
'ig_did': '2661FC9A-4FB7-4383-B1EB-714C7A6A54E8',
'dpr': '1.25',
'mid': 'aQRkcwALAAHwafiIiIA8Jae8jgv1',
'ps_l': '1',
'ps_n': '1',
'sessionid': '61971702457%3ATzxhAREIa44MuZ%3A8%3AAYipFOcaqTklj8Q3fd955T1XMcGKg2aCYsuZf2Y7RBo',
'wd': '1536x279',
'rur': '"FRC\\05461971702457\\0541793697743:01fe602ce79b768addfffc37180f68c38dfe057f78f683c62d836153a42a17d08355ed21"',
}
# 添加Instagram所需的其他关键cookie
self.cookies['ig_nrcb'] = '1'
self.cookies['datr'] = 'random_datr_value' # 需要有效的datr值
self.session.cookies.update(self.cookies)
self.update_headers()
# 关键参数
self.post_query_hash = "b3055c01b4b222b8a47dc12b090e4e64"
self.attempt_count = 0
def extract_shortcode(self, url):
"""从URL中提取帖子短码"""
pattern = r"https?://(?:www\.)?instagram\.com/p/([^/?#]+)"
match = re.search(pattern, url)
return match.group(1) if match else None
def update_headers(self):
"""更新请求头 - 添加Instagram所需的关键头部"""
self.headers = {
'User-Agent': random.choice(self.user_agents),
'X-IG-App-ID': '936619743392459',
'X-Requested-With': 'XMLHttpRequest',
'Referer': f'https://www.instagram.com/p/{self.post_shortcode}/',
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'X-CSRFToken': self.cookies.get('csrftoken', ''),
'X-IG-WWW-Claim': '0',
'X-Instagram-AJAX': '1007616494', # 这个值需要动态生成或从浏览器复制
'X-Requested-With': 'XMLHttpRequest',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'Sec-GPC': '1',
'TE': 'trailers'
}
def refresh_x_instagram_ajax(self):
"""刷新X-Instagram-AJAX头部 - 这个值每24小时会变化"""
self.headers['X-Instagram-AJAX'] = str(random.randint(1000000000, 9999999999))
def refresh_csrf_token(self):
"""刷新CSRF令牌 - 从cookies同步到头部"""
self.headers['X-CSRFToken'] = self.session.cookies.get('csrftoken', '')
def validate_session(self):
"""验证会话是否有效 - 通过访问个人主页测试"""
test_url = f"https://www.instagram.com/api/v1/users/web_profile_info/?username=instagram"
response = self.session.get(test_url, headers=self.headers)
if response.status_code == 200:
try:
json_data = response.json()
if json_data.get('status', '') == 'ok':
logger.info("会话验证成功")
return True
except:
pass
logger.warning("会话验证失败")
return False
def get_browser_cookies(self, browser_type='chrome'):
"""从浏览器获取有效的cookies - 需要在浏览器登录Instagram"""
try:
if browser_type == 'chrome':
cookies = browser_cookie3.chrome(domain_name='.instagram.com')
elif browser_type == 'firefox':
cookies = browser_cookie3.firefox(domain_name='.instagram.com')
elif browser_type == 'edge':
cookies = browser_cookie3.edge(domain_name='.instagram.com')
else:
cookies = browser_cookie3.load(domain_name='.instagram.com')
# 将浏览器cookies添加到会话
for cookie in cookies:
self.session.cookies.set(cookie.name, cookie.value)
logger.info(f"成功从{browser_type}导入{len(cookies)}个cookies")
return True
except Exception as e:
logger.error(f"导入浏览器cookies失败: {str(e)}")
return False
def safe_request(self, url):
"""安全的请求方法,带会话刷新和错误处理"""
self.attempt_count += 1
# 每3次尝试刷新关键头部
if self.attempt_count % 3 == 0:
self.refresh_x_instagram_ajax()
self.refresh_csrf_token()
logger.info("已刷新请求头部")
# 随机延迟
delay = random.uniform(1.5, 3.5)
logger.info(f"请求前等待 {delay:.1f}秒...")
time.sleep(delay)
try:
response = self.session.get(url, headers=self.headers, timeout=30)
if response.status_code == 200:
logger.info("请求成功 (200 OK)")
return response.json()
elif response.status_code == 401:
logger.error("会话已过期 (401 Unauthorized)")
# 尝试自动修复会话
if self.get_browser_cookies():
return self.safe_request(url) # 重试
elif response.status_code == 403:
logger.error("访问被拒绝 (403 Forbidden)")
# 尝试刷新cookies
self.refresh_x_instagram_ajax()
self.refresh_csrf_token()
return self.safe_request(url) # 重试
elif response.status_code == 404:
logger.error("资源不存在 (404 Not Found)")
else:
logger.error(f"请求失败: {response.status_code} {response.reason}")
# 如果需要,可以添加更多状态码处理
return None
except requests.exceptions.RequestException as e:
logger.error(f"请求异常: {str(e)}")
return None
def get_post_data(self):
"""获取帖子数据"""
if not self.post_shortcode:
logger.error("无效的Instagram URL")
return None
# 验证会话
if not self.validate_session():
logger.warning("会话无效,尝试从浏览器导入cookies...")
self.get_browser_cookies()
# 构造GraphQL请求
variables = {
"shortcode": self.post_shortcode,
"child_comment_count": 3,
"fetch_comment_count": 40,
"parent_comment_count": 24,
"has_threaded_comments": True
}
# 编码变量
json_str = json.dumps(variables)
encoded_str = quote(json_str)
graphql_url = f"https://www.instagram.com/graphql/query/?query_hash={self.post_query_hash}&variables={encoded_str}"
logger.info(f"请求URL: {graphql_url[:100]}...")
# 发送请求
json_data = self.safe_request(graphql_url)
if json_data and json_data.get('data'):
logger.info("成功获取帖子数据")
return self.parse_post_data(json_data)
else:
logger.error("无法获取帖子数据")
return None
def parse_post_data(self, json_data):
"""解析帖子数据"""
try:
media = json_data['data']['shortcode_media']
# 基本数据
post_data = {
'id': media['id'],
'shortcode': media['shortcode'],
'likes_count': media['edge_media_preview_like']['count'],
'comments_count': media['edge_media_to_comment']['count'],
'timestamp': datetime.fromtimestamp(media['taken_at_timestamp']).strftime('%Y-%m-%d %H:%M:%S'),
'owner': media['owner']['username'],
'is_video': media['is_video'],
'caption': media['edge_media_to_caption']['edges'][0]['node']['text']
if media['edge_media_to_caption']['edges'] else '',
}
# 尝试获取分享数(转发数)
try:
post_data['shares_count'] = media['edge_web_media_to_related_media']['count']
except KeyError:
# 使用备用方法获取分享数
post_data['shares_count'] = media.get('edge_web_media_to_related_media', {}).get('count', 0)
if post_data['shares_count'] == 0:
# 从描述中提取
description = media.get('accessibility_caption', '')
shares_match = re.search(r'(\d+)\s*shares', description)
post_data['shares_count'] = int(shares_match.group(1)) if shares_match else 0
# 视频特定数据
if post_data['is_video']:
post_data['video_view_count'] = media.get('video_view_count', 0)
post_data['video_duration'] = media.get('video_duration', 0)
# 位置信息
if media.get('location'):
post_data['location_id'] = media['location'].get('id', '')
post_data['location_name'] = media['location'].get('name', '')
# 互动数据
post_data['saved_count'] = media.get('edge_media_to_saved', {}).get('count', 0)
logger.info(f"解析成功: {post_data['shortcode']}")
return post_data
except KeyError as e:
logger.error(f"解析失败: 缺少键 {e}")
logger.debug(f"响应数据: {json.dumps(json_data, indent=2)}")
return None
def print_results(self, post_data):
"""打印结果"""
if not post_data:
logger.warning("没有数据可展示")
return
print("\n" + "=" * 50)
print(f"Instagram 帖子分析结果: {self.target_url}")
print("=" * 50)
print(f"帖子ID: {post_data.get('id', '')}")
print(f"发布时间: {post_data.get('timestamp', '')}")
print(f"发布者: @{post_data.get('owner', '')}")
print("-" * 50)
print(f"点赞数: {post_data.get('likes_count', 0):,}")
print(f"评论数: {post_data.get('comments_count', 0):,}")
print(f"转发/分享数: {post_data.get('shares_count', 0):,}")
print(f"收藏数: {post_data.get('saved_count', 0):,}")
if post_data.get('is_video', False):
print(f"视频观看次数: {post_data.get('video_view_count', 0):,}")
print("=" * 50)
print(f"内容描述: {post_data.get('caption', '')[:200]}...")
print("=" * 50)
def main():
# 目标帖子URL
post_url = "https://www.instagram.com/p/DP1z9ZUDiAL/?igsh=ajBqN282aWtxaXZv"
# 创建修复后的会话实例
fixer = InstagramSessionFixer(post_url)
# 获取帖子数据
post_data = fixer.get_post_data()
# 打印结果
if post_data:
fixer.print_results(post_data)
else:
logger.error("无法获取帖子数据,请检查cookies和网络连接")
if __name__ == '__main__':
main()
报错信息是:#!/usr/bin/env python
# -*- coding: utf-8 -*-
import requests
import json
import time
import random
import re
from datetime import datetime
from urllib.parse import quote, urlparse, parse_qs
import logging
import browser_cookie3 # 用于从浏览器导入cookies
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(f"instagram_fixed_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger('InstagramFixed')
class InstagramSessionFixer:
def __init__(self, target_url):
self.target_url = target_url
self.post_shortcode = self.extract_shortcode(target_url)
self.session = requests.Session()
self.user_agents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0'
]
# 更新会话cookies (使用您提供的cookies)
self.cookies = {
'ds_user_id': '61971702457',
'csrftoken': '4YzNalzWguviI-qNJs0ftx',
'ig_did': '2661FC9A-4FB7-4383-B1EB-714C7A6A54E8',
'dpr': '1.25',
'mid': 'aQRkcwALAAHwafiIiIA8Jae8jgv1',
'ps_l': '1',
'ps_n': '1',
'sessionid': '61971702457%3ATzxhAREIa44MuZ%3A8%3AAYipFOcaqTklj8Q3fd955T1XMcGKg2aCYsuZf2Y7RBo',
'wd': '1536x279',
'rur': '"FRC\\05461971702457\\0541793697743:01fe602ce79b768addfffc37180f68c38dfe057f78f683c62d836153a42a17d08355ed21"',
}
# 添加Instagram所需的其他关键cookie
self.cookies['ig_nrcb'] = '1'
self.cookies['datr'] = 'random_datr_value' # 需要有效的datr值
self.session.cookies.update(self.cookies)
self.update_headers()
# 关键参数
self.post_query_hash = "b3055c01b4b222b8a47dc12b090e4e64"
self.attempt_count = 0
def extract_shortcode(self, url):
"""从URL中提取帖子短码"""
pattern = r"https?://(?:www\.)?instagram\.com/p/([^/?#]+)"
match = re.search(pattern, url)
return match.group(1) if match else None
def update_headers(self):
"""更新请求头 - 添加Instagram所需的关键头部"""
self.headers = {
'User-Agent': random.choice(self.user_agents),
'X-IG-App-ID': '936619743392459',
'X-Requested-With': 'XMLHttpRequest',
'Referer': f'https://www.instagram.com/p/{self.post_shortcode}/',
'Accept': 'application/json, text/plain, */*',
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'X-CSRFToken': self.cookies.get('csrftoken', ''),
'X-IG-WWW-Claim': '0',
'X-Instagram-AJAX': '1007616494', # 这个值需要动态生成或从浏览器复制
'X-Requested-With': 'XMLHttpRequest',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'Sec-GPC': '1',
'TE': 'trailers'
}
def refresh_x_instagram_ajax(self):
"""刷新X-Instagram-AJAX头部 - 这个值每24小时会变化"""
self.headers['X-Instagram-AJAX'] = str(random.randint(1000000000, 9999999999))
def refresh_csrf_token(self):
"""刷新CSRF令牌 - 从cookies同步到头部"""
self.headers['X-CSRFToken'] = self.session.cookies.get('csrftoken', '')
def validate_session(self):
"""验证会话是否有效 - 通过访问个人主页测试"""
test_url = f"https://www.instagram.com/api/v1/users/web_profile_info/?username=instagram"
response = self.session.get(test_url, headers=self.headers)
if response.status_code == 200:
try:
json_data = response.json()
if json_data.get('status', '') == 'ok':
logger.info("会话验证成功")
return True
except:
pass
logger.warning("会话验证失败")
return False
def get_browser_cookies(self, browser_type='chrome'):
"""从浏览器获取有效的cookies - 需要在浏览器登录Instagram"""
try:
if browser_type == 'chrome':
cookies = browser_cookie3.chrome(domain_name='.instagram.com')
elif browser_type == 'firefox':
cookies = browser_cookie3.firefox(domain_name='.instagram.com')
elif browser_type == 'edge':
cookies = browser_cookie3.edge(domain_name='.instagram.com')
else:
cookies = browser_cookie3.load(domain_name='.instagram.com')
# 将浏览器cookies添加到会话
for cookie in cookies:
self.session.cookies.set(cookie.name, cookie.value)
logger.info(f"成功从{browser_type}导入{len(cookies)}个cookies")
return True
except Exception as e:
logger.error(f"导入浏览器cookies失败: {str(e)}")
return False
def safe_request(self, url):
"""安全的请求方法,带会话刷新和错误处理"""
self.attempt_count += 1
# 每3次尝试刷新关键头部
if self.attempt_count % 3 == 0:
self.refresh_x_instagram_ajax()
self.refresh_csrf_token()
logger.info("已刷新请求头部")
# 随机延迟
delay = random.uniform(1.5, 3.5)
logger.info(f"请求前等待 {delay:.1f}秒...")
time.sleep(delay)
try:
response = self.session.get(url, headers=self.headers, timeout=30)
if response.status_code == 200:
logger.info("请求成功 (200 OK)")
return response.json()
elif response.status_code == 401:
logger.error("会话已过期 (401 Unauthorized)")
# 尝试自动修复会话
if self.get_browser_cookies():
return self.safe_request(url) # 重试
elif response.status_code == 403:
logger.error("访问被拒绝 (403 Forbidden)")
# 尝试刷新cookies
self.refresh_x_instagram_ajax()
self.refresh_csrf_token()
return self.safe_request(url) # 重试
elif response.status_code == 404:
logger.error("资源不存在 (404 Not Found)")
else:
logger.error(f"请求失败: {response.status_code} {response.reason}")
# 如果需要,可以添加更多状态码处理
return None
except requests.exceptions.RequestException as e:
logger.error(f"请求异常: {str(e)}")
return None
def get_post_data(self):
"""获取帖子数据"""
if not self.post_shortcode:
logger.error("无效的Instagram URL")
return None
# 验证会话
if not self.validate_session():
logger.warning("会话无效,尝试从浏览器导入cookies...")
self.get_browser_cookies()
# 构造GraphQL请求
variables = {
"shortcode": self.post_shortcode,
"child_comment_count": 3,
"fetch_comment_count": 40,
"parent_comment_count": 24,
"has_threaded_comments": True
}
# 编码变量
json_str = json.dumps(variables)
encoded_str = quote(json_str)
graphql_url = f"https://www.instagram.com/graphql/query/?query_hash={self.post_query_hash}&variables={encoded_str}"
logger.info(f"请求URL: {graphql_url[:100]}...")
# 发送请求
json_data = self.safe_request(graphql_url)
if json_data and json_data.get('data'):
logger.info("成功获取帖子数据")
return self.parse_post_data(json_data)
else:
logger.error("无法获取帖子数据")
return None
def parse_post_data(self, json_data):
"""解析帖子数据"""
try:
media = json_data['data']['shortcode_media']
# 基本数据
post_data = {
'id': media['id'],
'shortcode': media['shortcode'],
'likes_count': media['edge_media_preview_like']['count'],
'comments_count': media['edge_media_to_comment']['count'],
'timestamp': datetime.fromtimestamp(media['taken_at_timestamp']).strftime('%Y-%m-%d %H:%M:%S'),
'owner': media['owner']['username'],
'is_video': media['is_video'],
'caption': media['edge_media_to_caption']['edges'][0]['node']['text']
if media['edge_media_to_caption']['edges'] else '',
}
# 尝试获取分享数(转发数)
try:
post_data['shares_count'] = media['edge_web_media_to_related_media']['count']
except KeyError:
# 使用备用方法获取分享数
post_data['shares_count'] = media.get('edge_web_media_to_related_media', {}).get('count', 0)
if post_data['shares_count'] == 0:
# 从描述中提取
description = media.get('accessibility_caption', '')
shares_match = re.search(r'(\d+)\s*shares', description)
post_data['shares_count'] = int(shares_match.group(1)) if shares_match else 0
# 视频特定数据
if post_data['is_video']:
post_data['video_view_count'] = media.get('video_view_count', 0)
post_data['video_duration'] = media.get('video_duration', 0)
# 位置信息
if media.get('location'):
post_data['location_id'] = media['location'].get('id', '')
post_data['location_name'] = media['location'].get('name', '')
# 互动数据
post_data['saved_count'] = media.get('edge_media_to_saved', {}).get('count', 0)
logger.info(f"解析成功: {post_data['shortcode']}")
return post_data
except KeyError as e:
logger.error(f"解析失败: 缺少键 {e}")
logger.debug(f"响应数据: {json.dumps(json_data, indent=2)}")
return None
def print_results(self, post_data):
"""打印结果"""
if not post_data:
logger.warning("没有数据可展示")
return
print("\n" + "=" * 50)
print(f"Instagram 帖子分析结果: {self.target_url}")
print("=" * 50)
print(f"帖子ID: {post_data.get('id', '')}")
print(f"发布时间: {post_data.get('timestamp', '')}")
print(f"发布者: @{post_data.get('owner', '')}")
print("-" * 50)
print(f"点赞数: {post_data.get('likes_count', 0):,}")
print(f"评论数: {post_data.get('comments_count', 0):,}")
print(f"转发/分享数: {post_data.get('shares_count', 0):,}")
print(f"收藏数: {post_data.get('saved_count', 0):,}")
if post_data.get('is_video', False):
print(f"视频观看次数: {post_data.get('video_view_count', 0):,}")
print("=" * 50)
print(f"内容描述: {post_data.get('caption', '')[:200]}...")
print("=" * 50)
def main():
# 目标帖子URL
post_url = "https://www.instagram.com/p/DP1z9ZUDiAL/?igsh=ajBqN282aWtxaXZv"
# 创建修复后的会话实例
fixer = InstagramSessionFixer(post_url)
# 获取帖子数据
post_data = fixer.get_post_data()
# 打印结果
if post_data:
fixer.print_results(post_data)
else:
logger.error("无法获取帖子数据,请检查cookies和网络连接")
if __name__ == '__main__':
main()
最新发布