Http download like Video Stream?

本文探讨了通过HTTP和RTSP链接处理视频时的不同流程和技术特性。HTTP下载涉及选择存储路径并允许同时播放,而RTSP直接进行流媒体播放。文章还讨论了TCP与UDP协议在数据传输方面的区别及其对用户体验的影响。
Description
–In the test case, when press a rtsp link,the phone play video stream immediately, when press a http link,the phone will download video file from http server,it shows a page that let user select  path to store download file. If user want play video the same time,he can press play button.
–now customer want http download like video stream.
–Actually,When user press http link,it will enter downloading process,the phone will download file from http server,if user want play video the same time,user can press play button.
–When press rtsp link,it will enter video streaming process,the phone will play video immediately,without downloading file.
–And http download use tcp protocol to receive data, stream,video stream use rtp(most of rtp use udp protocol)to receive data.As tcp will use Acknowledgement and Retransmission,so the rate of http download is slow than video stream,and video stream may lost data.
–So the two links have different process.

Conclusion
–Now our phone can download video with play video at the same time,and http download is different from video stream,Also other feature phone have the same behavior.so I suggest we do not modify this feature.

int HTTPSessionExecute(TPHTTPSERVERSESSION *pSession) { int iRet, iGot; unsigned long long ullCurTime = tpgetboottime(); pSession->reply_error_code = 0; RETURN_IVALID_IF(!pSession) /* 非P2P方式拉流且在规定时间内未完成HTTP的完整消息交互,则断开 */ if ((pSession->p2p_connection == NULL) && (pSession->iSessionState != TPHTTP_SESSION_STATE_PLAYING)) { if (pSession->ulTime4LastHTTPMsg == 0) { pSession->ulTime4LastHTTPMsg = ullCurTime; } if ((ullCurTime - pSession->ulTime4LastHTTPMsg) > pSession->ullRecvTimeout) { HTTP_ERROR("HTTP: session timeout"); return(TPHTTPSERVER_EC_RTP_FAILURE); } } if((eConnType_Relay == pSession->conn_type) && (TPHTTP_SESSION_STATE_INIT == pSession->iSessionState)) {/* relay的init状态才判断是否需要发送心跳包 */ HTTPRelayPreHeatBeat(pSession, (S32)(ullCurTime/1000)); } /* p2p方式拉流,先判断UDT的状态,UDT状态异常则返回 TPRTSPSERVER_EC_SOCKET_CLOSE */ if(pSession->p2p_connection != NULL) { int udt_state = P2PUdt_GetState(pSession->p2p_connection); if(udt_state != CONNECTING && udt_state != CONNECTED) { HTTP_ERROR_LOG("[p2p dbg] UDT state(%d)", udt_state); return TPRTSPSERVER_EC_SOCKET_CLOSE; } else if (udt_state == CONNECTING) { /*如果为connecting状态,则判断是否超时*/ if ((ullCurTime - pSession->startconnectTime) > P2P_UDT_SETUP_TIMEOUT) { HTTP_ERROR_LOG("[p2p dbg] UDT connecting time out"); return TPRTSPSERVER_EC_SOCKET_CLOSE; } } } //没有正在发送的http报文,则尝试发送响应信令/通知数据 if (!HttpStreamCtx_IsSendPending(&pSession->stream_ctx)) { HttpSession_TrySendReplyData(pSession); } /* 直接推流功能:根据字段及连接类型 */ if (pSession->is_directly_push_stream && pSession->conn_type == eConnType_Relay) { /* 直接推流:将填充拉流参数,跳到取流处*/ pSession->is_directly_push_stream = 0; pSession->iSessionStream = HTTP_STREAM_PREVIEW; /* 监听客户端预览拉流请求 */ PREVIEW_STREAM_TYPE_MSG msg = {0}; msg.type = HTTP_STREAM_PREVIEW; msg_send(PREVIEW_HTTP_STREAM_MID, (uint8_t *)&msg, sizeof(PREVIEW_STREAM_TYPE_MSG)); /* 流数限制判断: 若码流数已达上限,不直接推流 */ int is_up_to_limit = SessionStream_IsUpToLimit(pSession->iCurStreamID, pSession->iSessionStream); if (is_up_to_limit) { HTTPSession_SetState(pSession, TPHTTP_SESSION_STATE_INIT); } else { HTTPSession_SetState(pSession, TPHTTP_SESSION_STATE_READY); HttpStreamCtx_Init(&pSession->stream_ctx, pSession->iSessionStream, pSession->uSessionID, 0, 0, pSession); goto Push_Stream_Handle; } } iGot = HTTPSessionRecv(pSession); if ((TP_TCP_EC_FAILURE == iGot) || (TP_TCP_EC_SOCKET_CLOSE == iGot) || (iGot == TP_TCP_EC_BUFF_FULL)) { HTTP_ERROR_LOG("HTTP: iGot=%d", iGot); if(TPHTTP_SESSION_STATE_PLAYING == pSession->iSessionState && HTTP_STREAM_TALK == pSession->iSessionStream) { TALK_MASK_RELOAD_MSG msg = {0}; msg.enabled = 0; msg_send(TALK_MASK_RELOAD, (uint8_t *)&msg, sizeof(TALK_MASK_RELOAD_MSG)); } return(TPHTTPSERVER_EC_FAILURE); } /* relay首个报文为boundary */ if (pSession->conn_type == eConnType_Relay && pSession->is_relay_boundary_msg) { return HTTPHandleRelayBoundaryMsg(pSession); } if ((iGot > 0) || (MBUFFERByteArrayCurPos(&pSession->RXBuffer) > 0 && pSession->iSessionParseState != HTTP_EC_NEED_MORE_DATA)) { iRet = HTTPSessionParse(pSession); if (iRet == TPHTTPSERVER_EC_FAILURE) { HTTP_ERROR_LOG("Session %d recv invalid HTTP Msg", pSession->iSessionID); if (pSession->conn_type != eConnType_Relay) { HTTPSessionReplyBadRequest(pSession); } /* 消息错误,清空接收缓存数据 */ MBUFFERByteArrayReset(&pSession->RXBuffer); return (TPHTTPSERVER_EC_OK); } else if (iRet == HTTP_EC_NEED_MORE_DATA) { return (TPHTTPSERVER_EC_OK); } /* 未知的消息类型,或者是RTCP报文,更新心跳,忽略后续解析 */ if (pSession->Method.iMethod == HTTP_UN_SUPPORT) { pSession->ulTime4LastHTTPMsg = ullCurTime; return (TPHTTPSERVER_EC_OK); } pSession->ulTime4LastHTTPMsg = ullCurTime; switch (pSession->Method.iMethod) { case HTTP_POST: HTTP_DEBUG("Session %d receive POST, current state %d", pSession->iSessionID, pSession->iSessionState); if (pSession->iSessionState == TPHTTP_SESSION_STATE_READY) { int is_up_to_limit = SessionStream_IsUpToLimit(pSession->iCurStreamID, pSession->iSessionStream); if (is_up_to_limit) { #ifdef INAPP_DIAGNOSE /* 若主子码流都满了诊断流将无法进入playing,则在此处将流数情况通知diagnose模块,同时还原相关变量 */ if (eStreamDiagnose_ON == pSession->diag_mode && SessionStream_IsUpToLimit(TPRTP_STREAM_ID_MAIN, HTTP_STREAM_PREVIEW) && SessionStream_IsUpToLimit(TPRTP_STREAM_ID_SUB, HTTP_STREAM_PREVIEW)) { diagnose_reply_bitrate(pSession->conn_type, pSession->bitrate, IPC_DIAGNOSE_STREAM_FULL); pSession->diag_mode = eStreamDiagnose_INIT; pSession->diag_start_time = 0; pSession->trans_byte_total = 0; pSession->bitrate = 0; } #endif int reply_code = TPPLAYER_SESSION_UP_TO_LIMIT; #ifdef DOORBELL_FUNCTION_SUPPORT if (HTTP_STREAM_TALK != pSession->iSessionStream || ERROR == srtp_session_free_talk()) #endif { if (HTTP_STREAM_TALK == pSession->iSessionStream) { reply_code = TPPLAYER_EC_AUDIO_DEVICE_BUSY; } pSession->reply_error_code = reply_code; iRet = HTTPSessionReplyPOST(pSession); if (iRet != TPHTTPSERVER_EC_OK) { return(TPHTTPSERVER_EC_FAILURE); } HTTPSession_SetState(pSession, TPHTTP_SESSION_STATE_INIT); return TPHTTPSERVER_EC_OK; } } if (pSession->iSessionStream == HTTP_STREAM_PLAYBACK || pSession->iSessionStream == HTTP_STREAM_DOWNLOAD || pSession->iSessionStream == HTTP_STREAM_RECORD_DOWNLOAD) { /* 回放/下载录像/下载缩略图信令,若sd卡状态异常(不可用),则在回复中带上sd卡状态信息 */ if (!is_sd_card_available()) { pSession->reply_error_code = TPPLAYER_EC_STORAGE_UNAVAILABLE; iRet = HTTPSessionReplyPOST(pSession); if (iRet != TPHTTPSERVER_EC_OK) { return(TPHTTPSERVER_EC_FAILURE); } HTTPSession_SetState(pSession, TPHTTP_SESSION_STATE_INIT); return TPHTTPSERVER_EC_OK; } } #ifdef TAPO_USR_DEF_AUDIO_UPLOAD if (HTTP_STREAM_USR_DEF_AUDIO == pSession->iSessionStream) { if (pSession->file_ops.fd < 0) { int reply_code = 0; switch (pSession->file_ops.fd) { case USR_DEF_AUDIO_FILE_ID_FULL_FD: reply_code = TPPLAYER_EC_USR_DEF_AUDIO_FULL; break; case USR_DEF_AUDIO_DEVICE_BUSY_FD: reply_code = TPPLAYER_EC_AUDIO_DEVICE_BUSY; break; case USR_DEF_AUDIO_NAME_DUPLICATE_FD: reply_code = TPPLAYER_EC_NAME_DUPLICATE; break; default: break; } pSession->reply_error_code = reply_code; iRet = HTTPSessionReplyPOST(pSession); if (iRet != TPHTTPSERVER_EC_OK) { return (TPHTTPSERVER_EC_FAILURE); } HTTPSession_SetState(pSession, TPHTTP_SESSION_STATE_INIT); return TPHTTPSERVER_EC_OK; } } #endif } iRet = HTTPSessionReplyPOST(pSession); if (iRet != TPHTTPSERVER_EC_OK) { return(TPHTTPSERVER_EC_FAILURE); } //HTTP_DEBUG("Session %d receive SETUP, current state %d", pSession->iSessionID, pSession->iSessionState); if (pSession->iSessionState == TPHTTP_SESSION_STATE_READY) { if (pSession->iSessionStream == HTTP_STREAM_DOWNLOAD || pSession->iSessionStream == HTTP_STREAM_REALTIME_PHOTO_DOWNLOAD) { TPSendDownloadData(pSession); return(TPHTTPSERVER_EC_OK); } #ifdef SPLENDID_MOMENT else if (pSession->iSessionStream == HTTP_STREAM_SPLMOM_DOWNLOAD) { TPSendSplendidMomentData(pSession); return(TPHTTPSERVER_EC_OK); } #endif #ifdef DETECTION_FSS_SUPPORT if (pSession->iSessionStream == HTTP_STREAM_GET_FACE) { if (pSession->face_info.all_trans_finish == FALSE && pSession->face_info.image_status_check_finish == IMG_CHECK_DONE) { if( pSession->face_info.trans_num < pSession->face_info.count && pSession->face_info.face_items[pSession->face_info.trans_num].trans_status == IMG_TRANS_SEND_PROCESSING) { TPSendFaceimgData(pSession,pSession->face_info.face_items[pSession->face_info.trans_num].face_id); } else { //当前列表请求的图片全部发完后,face_id=0,发送finished TPSendFaceimgData(pSession, 0); } } return(TPHTTPSERVER_EC_OK); } #endif Push_Stream_Handle: if (pSession->iSessionStream == HTTP_STREAM_PREVIEW) { if (pSession->iStreamType == TPRTP_STREAM_TYPE_VIDEO_MAIN #ifdef DUAL_CAM || pSession->iStreamType == TPRTP_STREAM_TYPE_VIDEO_MAIN2 #endif ) { pSession->video_desc.priv = (void *)pSession; pSession->video_desc.stream_id = get_http_main_stream_id(); #ifdef DUAL_CAM if (pSession->iStreamType == TPRTP_STREAM_TYPE_VIDEO_MAIN2) { pSession->video_desc.stream_id = get_http_main2_stream_id(); } else { pSession->video2_desc.priv = (void *)pSession; pSession->video2_desc.stream_id = get_http_main2_stream_id(); pSession->video2_desc.stream_ctx = &pSession->stream_ctx; pSession->video2_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->video2_desc, NULL); } #endif pSession->video_desc.stream_ctx = &pSession->stream_ctx; pSession->video_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->video_desc, NULL); } else { S32 stream_video_id; #ifdef DUAL_CAM if (pSession->iStreamType == TPRTP_STREAM_TYPE_VIDEO_MINOR2) stream_video_id = trans_register_stream_http(AVDM_TYPE_MINOR2, NULL, NULL); else #endif stream_video_id = trans_register_stream_http(AVDM_TYPE_SUB, NULL, NULL); if(stream_video_id == -1) { HTTP_ERROR_LOG("attach stream fail"); return (TPHTTPSERVER_EC_FAILURE); } pSession->video_desc.priv = (void *)pSession; pSession->video_desc.stream_id = stream_video_id; pSession->video_desc.stream_ctx = &pSession->stream_ctx; pSession->video_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->video_desc, NULL); #ifdef DUAL_CAM if (pSession->iStreamType == TPRTP_STREAM_TYPE_VIDEO_MINOR) // DC_TODO 根据streams判断 { stream_video_id = trans_register_stream_http(AVDM_TYPE_MINOR2, NULL, NULL); pSession->video2_desc.priv = (void *)pSession; pSession->video2_desc.stream_id = stream_video_id; pSession->video2_desc.stream_ctx = &pSession->stream_ctx; pSession->video2_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->video2_desc, NULL); } #endif } { pSession->audio_desc.priv = (void *)pSession; pSession->audio_desc.stream_id = get_http_audio_stream_id(); pSession->audio_desc.stream_ctx = &pSession->stream_ctx; pSession->audio_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->audio_desc, NULL); } } #ifdef DOORBELL_FUNCTION_SUPPORT else if (pSession->iSessionStream == HTTP_STREAM_AUDIO_ONLY) { pSession->audio_desc.priv = (void *)pSession; pSession->audio_desc.stream_id = get_http_audio_stream_id(); pSession->audio_desc.stream_ctx = &pSession->stream_ctx; pSession->audio_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->audio_desc, NULL); } #endif else if (pSession->iSessionStream == HTTP_STREAM_PLAYBACK && NULL == pSession->video_desc.priv) { { S32 stream_id; trans_attach_param_t attach_param; attach_param.version = pSession->playback_ver; if (pSession->playback_ver == PLAYBACK_VERSION_WITH_PLAYER_ID) { attach_param.data = (void *)pSession->player_id; } else { attach_param.data = (void *)&pSession->client_id; } stream_id = trans_register_stream_http(TRANS_STREAM_TYPE_PLAYBACK, NULL, &attach_param); if(stream_id == -1) { HTTP_ERROR_LOG("attach stream fail"); return (TPHTTPSERVER_EC_FAILURE); } pSession->video_desc.priv = (void *)pSession; pSession->video_desc.stream_id = stream_id; pSession->video_desc.stream_ctx = &pSession->stream_ctx; pSession->video_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->video_desc, NULL); } } else if (pSession->iSessionStream == HTTP_STREAM_RECORD_DOWNLOAD && NULL == pSession->video_desc.priv) { { S32 stream_id; trans_attach_param_t attach_param; attach_param.version = pSession->playback_ver; if (pSession->playback_ver == PLAYBACK_VERSION_WITH_PLAYER_ID) { attach_param.data = (void *)pSession->player_id; } else { attach_param.data = (void *)&pSession->client_id; } stream_id = trans_register_stream_http(TRANS_STREAM_TYPE_RECORD_DOWNLOAD, NULL, &attach_param); if(stream_id == -1) { HTTP_ERROR("attach stream fail"); return (TPHTTPSERVER_EC_FAILURE); } pSession->video_desc.priv = (void *)pSession; pSession->video_desc.stream_id = stream_id; pSession->video_desc.stream_ctx = &pSession->stream_ctx; pSession->video_desc.stream_ctx->session_ctx = (void *)pSession; trans_attach_to_stream(&pSession->video_desc, NULL); } } //HTTP_DEBUG("Session: %d receive PLAY, current state %d", pSession->iSessionID, pSession->iSessionState); pSession->video_desc.param.start_time = (pSession->npt == -1 || pSession->npt == 0) ? pSession->start_time : pSession->npt; pSession->video_desc.param.timestamp = pSession->timestamp; pSession->video_desc.param.change_day = pSession->change_day; if(TPHTTP_SESSION_STATE_PAUSE == pSession->iSessionState && pSession->npt == -1) pSession->video_desc.param.start_time = -1; pSession->video_desc.param.client_id = pSession->video_desc.stream_id; if(pSession->scale - 1 > 0.0) { pSession->video_desc.param.scale.denominator = 1; //默认倍速为1 pSession->video_desc.param.scale.numerator = pSession->scale; } else { pSession->video_desc.param.scale.denominator = 1 / pSession->scale; //默认倍速为1 pSession->video_desc.param.scale.numerator = 1; } pSession->video_desc.param.is_end = 0; pSession->video_desc.param.type_bitmap = 1; pSession->video_desc.param.task_type = 1; pSession->video_desc.param.type_bitmap = 6; #ifdef DUAL_CAM //HTTP_DEBUG("Session: %d receive PLAY, current state %d", pSession->iSessionID, pSession->iSessionState); pSession->video2_desc.param.start_time = (pSession->npt == -1 || pSession->npt == 0) ? pSession->start_time : pSession->npt; pSession->video2_desc.param.timestamp = pSession->timestamp; pSession->video2_desc.param.change_day = pSession->change_day; if(TPHTTP_SESSION_STATE_PAUSE == pSession->iSessionState && pSession->npt == -1) pSession->video2_desc.param.start_time = -1; pSession->video2_desc.param.client_id = pSession->video2_desc.stream_id; if(pSession->scale - 1 > 0.0) { pSession->video2_desc.param.scale.denominator = 1; //默认倍速为1 pSession->video2_desc.param.scale.numerator = pSession->scale; } else { pSession->video2_desc.param.scale.denominator = 1 / pSession->scale; //默认倍速为1 pSession->video2_desc.param.scale.numerator = 1; } pSession->video2_desc.param.is_end = 0; pSession->video2_desc.param.type_bitmap = 1; pSession->video2_desc.param.task_type = 1; pSession->video2_desc.param.type_bitmap = 6; #endif if (pSession->iSessionStream == HTTP_STREAM_PLAYBACK) { pSession->video_desc.param.vod_type = pSession->vod_type; pSession->video_desc.param.auto_seek = pSession->auto_seek; pSession->video_desc.param.auto_switch_date = pSession->auto_switch_date; pSession->vod_type = 0; pSession->auto_seek = 0; } if (pSession->iSessionStream == HTTP_STREAM_RECORD_DOWNLOAD) { pSession->video_desc.param.end_time = pSession->end_time; //录像下载需要传入结束时间 pSession->video_desc.param.task_type = 2; //录像下载的task_type为2 trans_start_download(&pSession->video_desc); } else { trans_start_play(&pSession->video_desc); #ifdef DUAL_CAM trans_start_play(&pSession->video2_desc); #endif } if (pSession->web_connection != NULL) { pSession->webparam->if_attach = 1; } #ifndef HOMEKIT_ONBOARDING_SUPPORT SessionAllowConnection(pSession->iCurStreamID, pSession->iSessionStream); #else /* 如果添加一路直播流 */ if (SessionAllowConnection(pSession->iCurStreamID, pSession->iSessionStream) && pSession->iSessionStream == HTTP_STREAM_PREVIEW) { g_http_stream_count++; LED_TRIGGER_MSG led_trigger; memset(&led_trigger, 0, sizeof(led_trigger)); led_trigger.trigger_mask = LED_TRIGGER_START_GENMASK(LED_TRIGGER_TYPE_TAPO_STREAMING); msg_send(LED_HOMEKIT_TRIGGER_MID, (U8 *)&led_trigger, sizeof(led_trigger)); } #endif if(HTTP_STREAM_TALK == pSession->iSessionStream) { TALK_MASK_RELOAD_MSG msg = {0}; msg.enabled = 1; msg_send(TALK_MASK_RELOAD, (uint8_t *)&msg, sizeof(TALK_MASK_RELOAD_MSG)); } HTTPSession_SetState(pSession, TPHTTP_SESSION_STATE_PLAYING); #if defined(TELEMETRY_SUPPORT) pSession->conn_start_time = nvmp_get_us(); #endif } if (pSession->bIsSessionON == 0) { HTTP_ERROR("Setup exit During CloseSession %d", pSession->iSessionID); return TPHTTPSERVER_EC_STATE_NONFATAL; } break; default: HTTP_INFO_LOG("TPHTTPSERVER_EC_FAILURE"); return (TPHTTPSERVER_EC_FAILURE); } } else if(TP_TCP_EC_WAIT == iGot) { return TPHTTPSERVER_EC_GOTNOTING; } return(TPHTTPSERVER_EC_OK); } 解释代码,详细一点
最新发布
10-15
import os import re import requests from bs4 import BeautifulSoup # 创建文件夹 def create_folder_if_not_exists(folder_name): if not os.path.exists(folder_name): os.makedirs(folder_name) # 提取视频链接 def extract_video_links(url, video_extension='.mp4'): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.3' } try: response = requests.get(url, headers=headers, timeout=10, verify=True) response.raise_for_status() except requests.RequestException as e: print(f"请求失败: {e}") return [] soup = BeautifulSoup(response.text, 'html.parser') video_links = [] # 匹配 HTML 中的视频链接 video_pattern = re.compile(rf'https?://[^"\']+\{video_extension}') video_links += video_pattern.findall(response.text) # 提取 <video> 标签中的 src for tag in soup.find_all('video'): src = tag.get('src') if src: full_url = urljoin(url, src) if full_url.endswith(video_extension) and full_url not in video_links: video_links.append(full_url) return video_links # 下载视频 def download_video(url, folder): try: response = requests.get(url, stream=True, timeout=20, verify=True) response.raise_for_status() except requests.RequestException as e: print(f"下载失败: {e}") return file_name = os.path.join(folder, url.split('/')[-1]) with open(file_name, 'wb') as f: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) print(f"已下载: {file_name}") # 主程序入口 def main(): target_url = "https://v.jstv.com/a/20250709/1752062315733.shtml"#视频链接 video_folder = "爬取的mp4视频" create_folder_if_not_exists(video_folder) video_links = extract_video_links(target_url) print(f"找到 {len(video_links)} 个视频文件。") for i, link in enumerate(video_links, 1): print(f"{i}. {link}") download_video(link, video_folder) if __name__ == "__main__": main(),这个代码能够提取什么类型的视频
08-13
import os import re import requests from bs4 import BeautifulSoup # 创建文件夹用于存储下载的视频 def create_folder_if_not_exists(folder_name): if not os.path.exists(folder_name): os.makedirs(folder_name) # 提取网页中的视频链接 def extract_video_links(url, video_extension='.mp4'): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.3' } response = requests.get(url, headers=headers) response.raise_for_status() # 使用 BeautifulSoup 解析 HTML soup = BeautifulSoup(response.text, 'html.parser') # 使用正则表达式匹配视频链接 video_pattern = re.compile(rf'https?://[^"\']+\{video_extension}') video_links = video.findall(response.text) # 或者直接从 <video> 标签提取 src 属性 video_tags = soup.find_all('video') for tag in video_tags: src = tag.get('src') if src and src.endswith(video_extension): video_links.append(src) return video_links # 下载视频文件 def download_video(url, folder): response = requests.get(url, stream=True) file_name = os.path.join(folder, url.split('/')[-1]) with open(file_name, 'wb') as f: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) print(f"已下载: {file_name}") # 主函数 def main(): target_url = "https://v.youth.cn/yw/202508/t20250812_16171522.htm" # 替换为实际目标网页 video_folder = "downloaded_videos" create_folder_if_not_exists(video_folder) video_links = extract_video_links(target_url) print(f"找到 {len(video_links)} 个视频文件。") for i, link in enumerate(video_links, 1): print(f"{i}. {link}") download_video(link, video_folder) if __name__ == "__main__": main() 帮我改一下这个代码,它报错了,是python3.12版本的
08-13
import os import re import requests from bs4 import BeautifulSoup # 创建文件夹 def create_folder_if_not_exists(folder_name): if not os.path.exists(folder_name): os.makedirs(folder_name) # 提取视频链接 def extract_video_links(url, video_extension='.mp4'): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.3' } try: response = requests.get(url, headers=headers, timeout=10, verify=True) response.raise_for_status() except requests.RequestException as e: print(f"请求失败: {e}") return [] soup = BeautifulSoup(response.text, 'html.parser') video_links = [] # 匹配 HTML 中的视频链接 video_pattern = re.compile(rf'https?://[^"\']+\{video_extension}') video_links += video_pattern.findall(response.text) # 提取 <video> 标签中的 src for tag in soup.find_all('video'): src = tag.get('src') if src: full_url = urljoin(url, src) if full_url.endswith(video_extension) and full_url not in video_links: video_links.append(full_url) return video_links # 下载视频 def download_video(url, folder): try: response = requests.get(url, stream=True, timeout=20, verify=True) response.raise_for_status() except requests.RequestException as e: print(f"下载失败: {e}") return file_name = os.path.join(folder, url.split('/')[-1]) with open(file_name, 'wb') as f: for chunk in response.iter_content(chunk_size=1024): if chunk: f.write(chunk) print(f"已下载: {file_name}") # 主程序入口 def main(): target_url = "https://v.jstv.com/a/20250709/1752062315733.shtml"#视频链接 video_folder = "爬取的mp4视频" create_folder_if_not_exists(video_folder) video_links = extract_video_links(target_url) print(f"找到 {len(video_links)} 个视频文件。") for i, link in enumerate(video_links, 1): print(f"{i}. {link}") download_video(link, video_folder) if __name__ == "__main__": main(),我这个代码是只能提取mp4的视频吗
08-13
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值