去掉db_link中的.com

本文介绍了如何通过更新sysdba用户的global_names设置或props$表中的GLOBAL_DB_NAME字段来移除Oracle数据库链接中的.com部分。
 
去掉db_link中的.com

 

sysdba用户执行:update global_name set global_name='ORCL';commit;

或者:update props$ set value$ = 'ORCL' where name = 'GLOBAL_DB_NAME'; commit;


参考:http://blog.itpub.net/post/1263/48549

import time import requests from bs4 import BeautifulSoup import random # 设置更完善的请求头防止403错误 headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Cookie": 'll="118284"; bid=XbWsx02MQmI; _pk_id.100001.4cf6=14b9da0fc85b3ef5.1756642216.; _vwo_uuid_v2=D15A8803D83CBE9359D16DB17583B4076|743aa6314ac9f42b936560b13aff4faa; dbcl2="292019379:aUaXRcm5lko"; __utmz=30149280.1761667573.4.2.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utmz=223695111.1761667573.4.2.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; push_noty_num=0; push_doumail_num=0; ck=BykD; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1761747350%2C%22https%3A%2F%2Faccounts.douban.com%2F%22%5D; _pk_ses.100001.4cf6=1; __utma=30149280.1051025430.1756642217.1761730183.1761747350.6; __utmb=30149280.0.10.1761747350; __utmc=30149280; __utma=223695111.703530425.1756642217.1761730183.1761747350.6; __utmc=223695111; frodotk_db="68db8015f5b6d6c03a928041e1248440"; __utmt=1; __utmb=223695111.5.10.1761747350' } # 创建会话保持连接 session = requests.Session() session.headers.update(headers) all_movies = [] print("开始爬取豆瓣电影Top250...") # 分页爬取(0-225,每页25条) for start in range(0, 250, 25): page_num = start // 25 + 1 print(f"\n爬取第{page_num}页") # 构造分页URL params = {"start": start, "filter": ""} # 获取列表页 try: response = session.get("https://movie.douban.com/top250", params=params, timeout=15) if response.status_code != 200: print(f"请求失败:状态码 {response.status_code},等待重试...") time.sleep(10) # 长时间等待后重试 response = session.get("https://movie.douban.com/top250", params=params, timeout=15) soup = BeautifulSoup(response.text, 'html.parser') movie_items = soup.find_all("div", class_="item") for item in movie_items: # 提取基本信息 title_tag = item.find("span", class_="title") title = title_tag.get_text(strip=True) if title_tag else "未知片名" rank_tag = item.find("em") rank = rank_tag.get_text(strip=True) if rank_tag else "未知排名" rating_tag = item.find("span", class_="rating_num") rating = rating_tag.get_text(strip=True) if rating_tag else "未知评分" detail_link_tag = item.find("a") detail_link = detail_link_tag['href'] if detail_link_tag else None # 提取详情页简介 intro = "无详情链接" if detail_link: try: detail_response = session.get(detail_link, timeout=15) detail_soup = BeautifulSoup(detail_response.text, 'html.parser') # 尝试多种选择器定位简介 intro_tag = detail_soup.select_one('#link-report-intra span[property="v:summary"]') if not intro_tag: intro_tag = detail_soup.select_one('.related-info .indent span') if not intro_tag: intro_tag = detail_soup.select_one('.related-info') intro = intro_tag.get_text('\n', strip=True) if intro_tag else "无简介信息" time.sleep(random.uniform(1.5, 3.0)) # 详情页请求间隔 except Exception as e: intro = f"获取简介失败:{str(e)}" # 保存电影数据 movie_data = { "rank": rank, "title": title, "rating": rating, "intro": intro } all_movies.append(movie_data) print(f"已获取:{title}(排名:{rank})") # 页面间延迟(2-5秒) delay = random.uniform(2.0, 5.0) print(f"等待{delay:.1f}秒后继续...") time.sleep(delay) except Exception as e: print(f"处理第{page_num}页时出错:{str(e)}") time.sleep(10) # 出错后长时间等待 # 输出结果 print("\n爬取完成!共获取电影:", len(all_movies)) 修改以上代码实现功能 1.豆瓣电影top 250,获取每部电影的中文片名、排名、评分、详细页面链接;以及每部电影的剧情简介(通过片名点进去的子网页);以“片名-排名-评分-剧情简介”格式打印在屏幕上。 2.将片名 剧情简介 一一对应的保存,保存成txt。
10-30
import requests from bs4 import BeautifulSoup import pymysql import time db_config = { 'host': 'localhost', 'user': 'root', 'password': '258852f', 'database': 'spider', 'charset': 'utf8mb4' } def create_connection(): return pymysql.connect(**db_config) from datetime import datetime created_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S") def insert_recipe_data(name, ingredients, steps, link,created_at): connection = create_connection() try: with connection.cursor() as cursor: sql = """ INSERT INTO recipes (name, ingredients, steps, link,created_at) VALUES (%s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE name = VALUES(name), ingredients = VALUES(ingredients), steps = VALUES(steps), link = VALUES(link) """ cursor.execute(sql, (name, ingredients, steps, link, created_at)) connection.commit() finally: connection.close() def crawl_recipes(): base_url = 'http://www.xiachufang.com/explore/' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36', } connection = create_connection() response = requests.get(base_url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') recipes = soup.find_all('div', class_='info pure-u') for recipe in recipes: name = recipe.find('p', class_='name').a.get_text() link = 'http://www.xiachufang.com' + recipe.find('p', class_='name').a['href'] ingredients = recipe.find('p', class_='ing ellipsis').get_text() print(f'Fetching recipe details from {link}') detail_response = requests.get(link, headers=headers) if detail_response.status_code == 200: detail_soup = BeautifulSoup(detail_response.text, 'html.parser') steps = detail_soup.find_all('li', class_='container') steps_text = '\n'.join([step.get_text() for step in steps]) created_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S") insert_recipe_data(name, ingredients, steps_text, link, created_at) else: print(f'Failed to retrieve recipe details from {link}') time.sleep(1) else: print(f'Failed to retrieve data from {base_url}') # 主程序入口 if __name__ == '__main__': crawl_recipes()
10-31
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值