[Android]initial delay and current time stamp in camerasource

本文深入探讨Android CameraSource中mStartTimeUs和timeUs的获取过程,涉及系统时间、帧时间戳以及如何计算初始延迟。通过logcat日志分析,展示正常与异常情况下的时间戳差异,并讨论其对录制视频时长的影响。

Please note that:本文章为原创,若需转载,请指明该出处.

=================================================================================

sourcecode:http://androidxref.com/

1369 void CameraSource::processBufferQueueFrame(BufferItem& buffer) {
...

1410     ALOGV("initial delay: %" PRId64 ", current time stamp: %" PRId64,
1411         mStartTimeUs, timeUs); 

本文章主要研究此处的mStartTimeUs与timeUs是怎么得到的:

1.mStartTimeUs

line1917:camerasource中获取系统时间:int64_t startTimeUs = systemTime() / 1000;
line2132:camerasource中加上偏移时间:startTimeUs += startTimeOffsetUs;

line1362 & line1363:从camera中获取每一帧的系统时间:

    while (mConsumer->acquireBuffer(&buffer, 0) == OK) { 
         mCameraSource->processBufferQueueFrame(buffer);  

line1179:  camera的第一帧系统时间 减去 camerasource中开始时间(line1362&line1363 - ine2132):          mStartTimeUs = timestampUs - mStartTimeUs

2.timeUs

line1362 & line1363:从camera中获取每一帧的系统时间:

    while (mConsumer->acquireBuffer(&buffer, 0) == OK) { 
         mCameraSource->processBufferQueueFrame(buffer);  

 

 

PartI:start stamp from CameraSource

media/libmediaplayerservice/StagefrightRecorder.cpp
971 status_t StagefrightRecorder::start() {
972     ALOGV("start");
...
992
993     switch (mOutputFormat) {
994         case OUTPUT_FORMAT_DEFAULT:
995         case OUTPUT_FORMAT_THREE_GPP:
996         case OUTPUT_FORMAT_MPEG_4:
997         case OUTPUT_FORMAT_WEBM:
998         {
 999             bool isMPEG4 = true;
1000             if (mOutputFormat == OUTPUT_FORMAT_WEBM) {
1001                 isMPEG4 = false;
1002             }
1003             sp<MetaData> meta = new MetaData;
1004             setupMPEG4orWEBMMetaData(&meta);          >>>>a1
1005             status = mWriter->start(meta.get());      >>>>a4
1006             break;
1007         }

1916 void StagefrightRecorder::setupMPEG4orWEBMMetaData(sp<MetaData> *meta) {
1917     int64_t startTimeUs = systemTime() / 1000;                 >>>>a2
1918     (*meta)->setInt64(kKeyTime, startTimeUs);                  >>>>a3
          +ALOGI("case03738707,Start time offset: %" PRId64 " us", startTimeUs);


./media/libstagefright/MPEG4Writer.cpp
730 status_t MPEG4Writer::start(MetaData *param) {
...
783     if (mStarted) {
784         if (mPaused) {
785             mPaused = false;
786             return startTracks(param);             >>>>a5
787         }
788         return OK;
789     }
 
610 status_t MPEG4Writer::startTracks(MetaData *params) {
611     if (mTracks.empty()) {
612         ALOGE("No source added");
613         return INVALID_OPERATION;
614     }
615
616     for (List<Track *>::iterator it = mTracks.begin();
617          it != mTracks.end(); ++it) {
618         status_t err = (*it)->start(params); >>>a6
619


2097 status_t MPEG4Writer::Track::start(MetaData *params) {
2098     if (!mDone && mPaused) {
2099         mPaused = false;
2100         mResumed = true;
2101         return OK;
2102     }
2103
2104     int64_t startTimeUs;
2105     if (params == NULL || !params->findInt64(kKeyTime, &startTimeUs)) {
2106         startTimeUs = 0;
2107     }
...
2117     sp<MetaData> meta = new MetaData;
2118     if (mOwner->isRealTimeRecording() && mOwner->numTracks() > 1) {
...
2128         int64_t startTimeOffsetUs = mOwner->getStartTimeOffsetMs() * 1000LL;
2129         if (startTimeOffsetUs < 0) {  // Start time offset was not set
2130             startTimeOffsetUs = kInitialDelayTimeUs;
2131         }
2132         startTimeUs += startTimeOffsetUs;                           >>>>
2133         ALOGI("Start time offset: %" PRId64 " us", startTimeOffsetUs);
2134     }
2135
2136     meta->setInt64(kKeyTime, startTimeUs);
2137
2138     status_t err = mSource->start(meta.get());  >>>>a7   >>>>MediaCodecSource->start

media/libstagefright/MediaCodecSource.cpp
 379 status_t MediaCodecSource::start(MetaData* params) {
 380     sp<AMessage> msg = new AMessage(kWhatStart, mReflector);
 381     msg->setObject("meta", params);
 382     return postSynchronouslyAndReturnError(msg);
 383 }
 
857 void MediaCodecSource::onMessageReceived(const sp<AMessage> &msg) {
...
1004     case kWhatStart:
1005     {
...
1009         sp<RefBase> obj;
1010         CHECK(msg->findObject("meta", &obj));
1011         MetaData *params = static_cast<MetaData *>(obj.get());
1012
1013         sp<AMessage> response = new AMessage;
1014         response->setInt32("err", onStart(params)); >>>>a8

780 status_t MediaCodecSource::onStart(MetaData *params) {
...
822         CHECK(mPuller != NULL);
823         sp<MetaData> meta = params;
824         if (mSetEncoderFormat) {
825             if (meta == NULL) {
826                 meta = new MetaData;
827             }
828             meta->setInt32(kKeyPixelFormat, mEncoderFormat);
829             meta->setInt32(kKeyColorSpace, mEncoderDataSpace);
830         }
831
832         sp<AMessage> notify = new AMessage(kWhatPullerNotify, mReflector);
833         err = mPuller->start(meta.get(), notify);                   >>>>a9
 
177 status_t MediaCodecSource::Puller::start(const sp<MetaData> &meta, const sp<AMessage> &notify) {
178     ALOGV("puller (%s) start", mIsAudio ? "audio" : "video");
179     mLooper->start(
180             false /* runOnCallingThread */,
181             false /* canCallJava */,
182             PRIORITY_AUDIO);
183     mLooper->registerHandler(this);
184     mNotify = notify;
185
186     sp<AMessage> msg = new AMessage(kWhatStart, this); >>>>a10
187     msg->setObject("meta", meta);
188     return postSynchronouslyAndReturnError(msg);
189 }

236 void MediaCodecSource::Puller::onMessageReceived(const sp<AMessage> &msg) {
237     switch (msg->what()) {
238         case kWhatStart:
239         {
240             sp<RefBase> obj;
241             CHECK(msg->findObject("meta", &obj));
242
243             {
244                 Mutexed<Queue>::Locked queue(mQueue);
245                 queue->mPulling = true;
246             }
247
248             status_t err = mSource->start(static_cast<MetaData *>(obj.get()));                >>>>a11 >>>>CameraSource->start
 
./media/libstagefright/CameraSource.cpp
801 status_t CameraSource::start(MetaData *meta) {
802     ALOGV("start");
...
818     if (meta) {
819         int64_t startTimeUs;
820         if (meta->findInt64(kKeyTime, &startTimeUs)) {                >>>>a12
821             mStartTimeUs = startTimeUs;                               >>>>a13 >>>>mStartTimeUs means start timestamp for CameraSource
822         }

PartII:initial delay and current time stamp in camerasource

/media/libstagefright/CameraSource.cpp
1335 void CameraSource::BufferQueueListener::onFrameAvailable(const BufferItem& /*item*/) {
1336     ALOGV("%s: onFrameAvailable", __FUNCTION__);   >>>>
1337
1338     Mutex::Autolock l(mLock);
1339
1340     if (!mFrameAvailable) {
1341         mFrameAvailable = true;
1342         mFrameAvailableSignal.signal();                     
1343     }
1344 }

1346 bool CameraSource::BufferQueueListener::threadLoop() {
...
1351     {
1352         Mutex::Autolock l(mLock);
1353         while (!mFrameAvailable) {
1354             if (mFrameAvailableSignal.waitRelative(mLock, kFrameAvailableTimeout) == TIMED_OUT) {
1355                 return true;
1356             }
1357         }
1358         mFrameAvailable = false;
1359     }
1360
1361     BufferItem buffer;
1362     while (mConsumer->acquireBuffer(&buffer, 0) == OK) {  >>>>b1  >>>>buffer.mTimestamp means timestamp for each frame out of camera Consumer
1363         mCameraSource->processBufferQueueFrame(buffer);   >>>>b2
1364     }
1365
1366     return true;
1367 }

1369 void CameraSource::processBufferQueueFrame(BufferItem& buffer) {
...
1372     int64_t timestampUs = buffer.mTimestamp / 1000;      >>>>b3
1373     if (shouldSkipFrameLocked(timestampUs)) {            >>>>b4
1374         mVideoBufferConsumer->releaseBuffer(buffer);
1375         return;
1376     }
...
1408     int64_t timeUs = mStartTimeUs + (timestampUs - mFirstFrameTimeUs);  >>>>b7       >>>>timeUs means timestamp for each frame out of CameraSource
1409     mFrameTimes.push_back(timeUs);
1410     ALOGV("initial delay: %" PRId64 ", current time stamp: %" PRId64,
1411         mStartTimeUs, timeUs);                                           

1140 bool CameraSource::shouldSkipFrameLocked(int64_t timestampUs) {
...
1169     mLastFrameTimestampUs = timestampUs;
1170     if (mNumFramesReceived == 0) {
1171         mFirstFrameTimeUs = timestampUs;  >>>>b5
1172         // Initial delay
1173         if (mStartTimeUs > 0) {
1174             if (timestampUs < mStartTimeUs) {
1175                 // Frame was captured before recording was started
1176                 // Drop it without updating the statistical data.
1177                 return true;
1178             }
1179             mStartTimeUs = timestampUs - mStartTimeUs; >>>>b6                         >>>>mStartTimeUs means initial delay timestamp from camera Consumer to CameraSource
1180         }
1181     }
1182
1183     return false;
1184 }

PartIII:who is consumer

frameworks/av/include/media/stagefright/CameraSource.h
263     // Consumer and producer of the buffer queue between this class and camera. >>>>
264     sp<BufferItemConsumer> mVideoBufferConsumer;
265     sp<IGraphicBufferProducer> mVideoBufferProducer;

569 status_t CameraSource::initBufferQueue(uint32_t width, uint32_t height,
570         uint32_t format, android_dataspace dataSpace, uint32_t bufferCount) {
571     ALOGV("initBufferQueue");
...
578     // Create a buffer queue.
579     sp<IGraphicBufferProducer> producer;
580     sp<IGraphicBufferConsumer> consumer;
581     BufferQueue::createBufferQueue(&producer, &consumer); >>>>
...
590     mVideoBufferConsumer = new BufferItemConsumer(consumer, usage, bufferCount);
591     mVideoBufferConsumer->setName(String8::format("StageFright-CameraSource"));
592     mVideoBufferProducer = producer;
...
615     res = mCamera->setVideoTarget(mVideoBufferProducer); >>>>
...
624     mBufferQueueListener = new BufferQueueListener(mVideoBufferConsumer, this); >>>>
625     res = mBufferQueueListener->run("CameraSource-BufferQueueListener");

frameworks/native/libs/gui/BufferQueue.cpp
80void BufferQueue::createBufferQueue(sp<IGraphicBufferProducer>* outProducer,
81        sp<IGraphicBufferConsumer>* outConsumer,
82        bool consumerIsSurfaceFlinger) {
83    LOG_ALWAYS_FATAL_IF(outProducer == NULL,
84            "BufferQueue: outProducer must not be NULL");
85    LOG_ALWAYS_FATAL_IF(outConsumer == NULL,
86            "BufferQueue: outConsumer must not be NULL");
87
88    sp<BufferQueueCore> core(new BufferQueueCore());
89    LOG_ALWAYS_FATAL_IF(core == NULL,
90            "BufferQueue: failed to create BufferQueueCore");
91
92    sp<IGraphicBufferProducer> producer(new BufferQueueProducer(core, consumerIsSurfaceFlinger));
93    LOG_ALWAYS_FATAL_IF(producer == NULL,
94            "BufferQueue: failed to create BufferQueueProducer");
95
96    sp<IGraphicBufferConsumer> consumer(new BufferQueueConsumer(core));
97    LOG_ALWAYS_FATAL_IF(consumer == NULL,
98            "BufferQueue: failed to create BufferQueueConsumer");
99
100    *outProducer = producer;
101    *outConsumer = consumer;
102}

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Issue sample:

#pass-logcat.log
>>>>the start time is very normal(17ms).the recoder duration is about (10s+17ms)=10s17ms
11-13 15:16:02.763  1003  2803 I StagefrightRecorder: case03738707,Start time offset: 215703345 us
11-13 15:16:02.764  1003  2803 I MPEG4Writer: case03738707,MPEG4Writer::Track::start 1: 215703345 us
11-13 15:16:02.764  1003  2803 I MPEG4Writer: Start time offset: 1000000 us
11-13 15:16:02.764  1003  4142 I MediaCodecSource: MediaCodecSource (video) starting
11-13 15:16:02.765  1003  4177 I CameraSource: case03738707,CameraSource::start: 216703345 us                          >>>>mStartTimeUs
11-13 15:16:03.280  1003  4142 I MediaCodecSource: MediaCodecSource (video) started <<<<
11-13 15:16:03.503  1003  4178 I CameraSource: case03738707,CameraSource::processBufferQueueFrame:11-13: 216352949 us
11-13 15:16:03.503  1003  4178 V CameraSource: Drop frame at 216352949/216703345 us
...
11-13 15:16:03.881  1003  4178 I CameraSource: case03738707,CameraSource::processBufferQueueFrame:11-13: 216720431 us >>>>timestampUs
11-13 15:16:03.881  1003  4178 I CameraSource: case03738707,CameraSource::shouldSkipFrameLocked 2: 216703345 us       
11-13 15:16:03.881  1003  4178 I CameraSource: case03738707,CameraSource::shouldSkipFrameLocked 3: 17086 us           >>>>mStartTimeUs = timestampUs - mStartTimeUs
11-13 15:16:03.882  1003  4178 V CameraSource: initial delay: 17086, current time stamp: 17086                        >>>>216720431-216703345=17086us=17ms
...
11-13 15:16:13.295  1003  4142 I MediaCodecSource: encoder (video) stopping       
11-13 15:16:13.294  1003  4142 V CameraSource: Set stoptime: 226232474 us                                             >>>>>>>>226232474-216703345=9529129us=9.5s
11-13 15:16:13.333  1003  4142 I MediaCodecSource: encoder (video) stopped                                            <<<<15:16:13-15:16:03=10s


#fail-logcat.log
>>>>the start time is very big(13s298ms). the recoder duration is about (11s+13s298ms)=24s298ms
11-13 15:16:10.515   996   996 I StagefrightRecorder: case03738707,Start time offset: 210533899 us
11-13 15:16:10.516   996   996 I MPEG4Writer: case03738707,MPEG4Writer::Track::start 1: 210533899 us
11-13 15:16:10.516   996   996 I MPEG4Writer: Start time offset: 1000000 us
11-13 15:16:10.516   996  3969 I MediaCodecSource: MediaCodecSource (video) starting
11-13 15:16:10.517   996  4002 I CameraSource: case03738707,CameraSource::start: 211533899 us                         >>>>mStartTimeUs
11-13 15:16:11.090   996  3969 I MediaCodecSource: MediaCodecSource (video) started                                   <<<<

11-13 15:16:11.311   996  4003 I CameraSource: case03738707,CameraSource::processBufferQueueFrame:11-13: 224832511 us >>>>timestampUs
11-13 15:16:11.311   996  4003 I CameraSource: case03738707,CameraSource::shouldSkipFrameLocked 2: 211533899 us        
11-13 15:16:11.311   996  4003 I CameraSource: case03738707,CameraSource::shouldSkipFrameLocked 3: 13298612 us        >>>>mStartTimeUs = timestampUs - mStartTimeUs
11-13 15:16:11.311   996  4003 V CameraSource: initial delay: 13298612, current time stamp: 13298612                  >>>>224832511-211533899=13298612us=13s298ms
                                                           
11-13 15:16:21.121   996  3969 I MediaCodecSource: encoder (video) stopping
11-13 15:16:21.120   996  3969 V CameraSource: Set stoptime: 221138609 us                                             >>>>>>>>221138609-224832511<0,221138609-211533899=9604710us=9.6s
11-13 15:16:22.377   996  3969 I MediaCodecSource: encoder (video) stopped                                            <<<<15:16:22-15:16:11=11s


#pass-Record.mp4
Complete name                            : pass-Record.mp4
...
Duration                                 : 9s 421ms >>>>
Bit rate                                 : 199 Kbps

#fail-Record.mp4
General
Complete name                            : fail-Record.mp4
...
Duration                                 : 23s 110ms >>>
Bit rate                                 : 85.8 Kbps

class AsyncSocketManager: """异步Socket管理器 - 协调Reader和Writer线程""" # 分批连接参数 BATCH_SIZE = 250 # 每批处理的连接数 BATCH_INTERVAL = 20.0 # 批次间隔时间(秒) MAX_RETRIES = 5 # 增加最大重试次数 RECONNECT_INITIAL_DELAY = 20.0 # 初始重连延迟 RECONNECT_BACKOFF_FACTOR = 1.5 # 指数退避因子 MAX_RECONNECT_DELAY = 120.0 # 最大重连延迟(2分钟) HEALTH_CHECK_INTERVAL = 5.0 # 健康检查间隔 def __init__(self): self.running = True self._shutdown_event = asyncio.Event() self._cleanup_lock = asyncio.Lock() self.shutdown_requested = asyncio.Event() self.original_sigint_handler = None self.shutdown_requested = asyncio.Event() self.server_config = {'ip': 'charge.b-conn.com', 'port': 5455} self.total = 0 self.writer = None self.reader = None self._active_tasks = set() self.connections: Dict[str, Dict] = {} self.batch_tasks: List[asyncio.Task] = [] # 批次任务列表 self._read_lock = asyncio.Lock() # 读取操作专用锁 self._write_lock = asyncio.Lock() # 写入操作专用锁 self._state_lock = asyncio.Lock() # 状态变更锁 self.logger = logging.getLogger(__name__) self.setup_logging() self.connection_pool = {} # 连接池 # 连接状态统计 self.stats = { "success": 0, "failed": 0, "pending": 0, "active": 0 } # 每个连接一个专用的状态锁 self.connection_locks = defaultdict(asyncio.Lock) def install_signal_handlers(self): """安装信号处理器""" try: loop = asyncio.get_running_loop() # 保存原始信号处理器 self.original_sigint_handler = loop.add_signal_handler( signal.SIGINT, self._initiate_shutdown ) self.logger.info("✅ 信号处理器安装完成") except (NotImplementedError, RuntimeError): # 在某些平台上可能不支持 self.logger.warning("⚠️ 当前平台不支持异步信号处理") def _initiate_shutdown(self): """初始化关闭过程""" self.logger.info("🔄 接收到关闭信号,开始优雅关闭...") self.shutdown_requested.set() def restore_signal_handlers(self): """恢复原始信号处理器""" try: loop = asyncio.get_running_loop() if self.original_sigint_handler is not None: loop.remove_signal_handler(signal.SIGINT) # 重新安装默认处理器 loop.add_signal_handler(signal.SIGINT, signal.default_int_handler) except Exception as e: self.logger.debug(f"恢复信号处理器时发生异常: {e}") async def close_all_connections(self): """异步关闭所有Socket连接""" self.logger.info("正在关闭所有Socket连接...") self.running = False self._shutdown_event.set() # 取消所有活跃任务 for task in self._active_tasks: if not task.done(): task.cancel() # 关闭所有连接 close_tasks = [] for pileCode in list(self.connections.keys()): conn = self.connections[pileCode] if 'writer' in conn and conn['writer'] is not None: writer = conn['writer'] close_tasks.append(self.close_writer(writer, pileCode)) if close_tasks: await asyncio.gather(*close_tasks, return_exceptions=True) self.logger.info("所有连接已关闭") async def close_writer(self, writer, pileCode: str): """安全关闭写入器""" try: if not writer.is_closing(): writer.close() await writer.wait_closed() self.logger.debug(f"关闭连接: {pileCode}") except Exception as e: self.logger.error(f"关闭连接失败: {pileCode} 错误: {e}") async def _close_single_connection(self, conn_key: str): """关闭单个Socket连接""" try: if conn_key in self.connections: conn_info = self.connections[conn_key] # 实际的Socket关闭逻辑 if 'writer' in conn_info: writer = conn_info['writer'] writer.close() await writer.wait_closed() self.logger.debug(f"🔌 关闭连接 {conn_key}") del self.connections[conn_key] except Exception as e: self.logger.error(f"❌ 关闭连接 {conn_key} 时发生异常: {e}") async def batch_create_connections(self, pile_codes: List[str]): """添加健康监控器""" # 启动健康监控器 health_task = asyncio.create_task( self.connection_health_monitor(), name="connection_health_monitor" ) self.batch_tasks.append(health_task) """批量创建Socket连接(分批处理)""" total = len(pile_codes) batches = math.ceil(total / self.BATCH_SIZE) self.total_connections = total self.logger.info(f"开始分批创建连接,共{total}个桩号,分{batches}批处理,每批{self.BATCH_SIZE}个") for batch_num in range(batches): if self.shutdown_requested.is_set(): self.logger.warning("关闭信号已触发,停止创建新批次") break start_idx = batch_num * self.BATCH_SIZE end_idx = min(start_idx + self.BATCH_SIZE, total) batch = pile_codes[start_idx:end_idx] batch_id = batch_num + 1 self.logger.info(f"处理批次 {batch_id}/{batches} ({len(batch)}个桩号)") # 创建批量连接任务 task = asyncio.create_task( self.process_batch(batch, batch_id), name=f"batch_{batch_id}" ) self.batch_tasks.append(task) # 添加批次间隔 if batch_num < batches - 1: await asyncio.sleep(self.BATCH_INTERVAL) # 等待所有批次完成 if self.batch_tasks: await asyncio.gather(*self.batch_tasks) self.logger.info(f"所有批次处理完成! 成功:{self.stats['success']} 失败:{self.stats['failed']}") async def process_batch(self, pile_codes: List[str], batch_id: int): """处理单个批次的连接""" tasks = [] for i, pileCode in enumerate(pile_codes): message_id = f"batch{batch_id}-conn{i + 1}" task = asyncio.create_task( self.socket_worker(message_id, pileCode), name=f"batch{batch_id}-{pileCode}" ) tasks.append(task) self.connection_pool[pileCode] = {"status": "pending"} self.stats["pending"] += 1 # 等待批次内所有连接完成初始化 results = await asyncio.gather(*tasks, return_exceptions=True) # 统计批次结果 for result in results: if isinstance(result, Exception): self.stats["failed"] += 1 else: self.stats["success"] += 1 self.stats["pending"] -= len(pile_codes) self.stats["active"] = len(self.connections) async def socket_worker(self, message_id: str, pileCode: str): global logger """异步Socket工作协程""" retry_count = 0 last_reconnect_time = 0 # health_check_last = time.time() while self.running and not self._shutdown_event.is_set(): try: # 检查是否需要等待重连延迟 current_time = time.time() if last_reconnect_time > 0: delay = self.calculate_reconnect_delay(retry_count) if current_time - last_reconnect_time < delay: await asyncio.sleep(delay - (current_time - last_reconnect_time)) # 尝试创建连接 success = await self.create_socket_connection_with_retry(message_id, pileCode, retry_count) if not success: retry_count = min(retry_count + 1, self.MAX_RETRIES) last_reconnect_time = time.time() continue # 重置重试计数(连接成功) retry_count = 0 last_reconnect_time = 0 # 启动读写任务 await self.start_reader_writer(message_id, pileCode) # 健康检查和状态报告 # current_time = time.time() # if current_time - health_check_last > self.HEALTH_CHECK_INTERVAL: # pending_count = self.stats["pending"] # self.logger.info( # f"等待中={pending_count} 重试={retry_count}" # ) # health_check_last = current_time except asyncio.CancelledError: self.logger.info(f"⚠️ Socket工作协程被取消: {message_id}") if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn and conn['writer'] is not None: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass if pileCode in self.connections: del self.connections[pileCode] break except Exception as e: self.logger.error(f"❌ Socket工作异常: {message_id} 桩号: {pileCode} 错误: {e}") retry_count = min(retry_count + 1, self.MAX_RETRIES) last_reconnect_time = time.time() # 清理当前连接 if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn and conn['writer'] is not None: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass if pileCode in self.connections: del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 finally: # 更新连接池状态 if pileCode in self.connection_pool: self.connection_pool[pileCode] = { "status": "retrying" if retry_count > 0 else "active", "retry_count": retry_count, "last_retry": last_reconnect_time } def calculate_reconnect_delay(self, retry_count: int) -> float: """计算指数退避的重连延迟""" delay = self.RECONNECT_INITIAL_DELAY * (self.RECONNECT_BACKOFF_FACTOR ** retry_count) return min(delay, self.MAX_RECONNECT_DELAY) def handle_task_completion(self, task: asyncio.Task): """任务完成回调处理""" try: # 首先检查任务是否被取消 if task.cancelled(): self.logger.debug(f"任务 {task.get_name()} 被取消") return if task.done() and task.exception(): self.logger.error(f"任务失败: {task.get_name()} {task.exception()}") # 标记连接需要重启 pile_code = task.get_name().split("_")[1] if pile_code in self.connections: writer = self.connections[pile_code]["writer"] if writer is not None: writer.close() if pile_code in self.connections: del self.connections[pile_code] except Exception as e: self.logger.error(f"任务回调处理错误: {e}") async def connection_health_monitor(self): """连接健康监控器""" while self.running and not self._shutdown_event.is_set(): try: real_active_count = 0 current_time = time.time() connections_to_restart = [] for pile_code, conn in list(self.connections.items()): # 检查是否超过最大重试次数 if conn.get("retry_count", 0) >= self.MAX_RETRIES: self.logger.warning(f"❌ 连接超过最大重试次数: {pile_code}") await self._close_single_connection(pile_code) continue if conn.get("isLogin") is True: real_active_count += 1 # 检查最后活动时间 last_activity = conn.get("last_time_stamp", 0) if current_time - last_activity > 60: # 1分钟无活动 self.logger.warning(f"桩号: {pile_code} 上次报文时间: {last_activity} 当前时间: {current_time} 时间间隔: {current_time - last_activity}") self.logger.warning(f"🕒 连接无活动超时: {pile_code}") connections_to_restart.append(pile_code) # 检查是否需要重启 if conn.get("needs_restart", False): connections_to_restart.append(pile_code) # 重启需要重置的连接 for pile_code in connections_to_restart: if pile_code in self.connections: conn = self.connections[pile_code] if conn['isLogin'] and conn['isLogin'] is not None: connections_to_restart.remove(pile_code) continue else: await self._close_single_connection(pile_code) # 重新启动工作协程 self.logger.info(f"🔄 重启连接: {pile_code}") task = asyncio.create_task( self.socket_worker(f"restart-{pile_code}", pile_code), name=f"restart_{pile_code}" ) self.batch_tasks.append(task) await asyncio.sleep(1) # 定期报告 restart_count = len(connections_to_restart) self.logger.info( f"🔍 健康检查: 活跃连接={real_active_count} 待重启={restart_count}" ) await asyncio.sleep(5) except Exception as e: self.logger.error(f"健康监控器异常: {e}") await asyncio.sleep(10) async def create_socket_connection_with_retry(self, message_id: str, pileCode: str, attempt: int) -> bool: """带重试机制的连接创建""" try: reader, writer = await asyncio.wait_for( asyncio.open_connection(self.server_config['ip'], self.server_config['port']), timeout=30.0 + min(attempt * 5, 30) # 随重试次数增加超时 ) async with self.connection_locks[pileCode]: if pileCode not in self.connections: self.connections[pileCode] = { 'reader': reader, 'writer': writer, 'pileCode': pileCode, 'message_id': message_id, 'retry_count': 0, 'last_heartbeat1': 0, 'last_heartbeat2': 0, 'last_time_stamp': 0, 'isLogin': False, 'timeout_count_login': 0, 'timeout_count': 0, 'heart_serialNum': "00 00", 'status': 'Offline', 'charging': False, 'priceModelList': [] } self.logger.info(f"连接成功: {message_id} 桩号: {pileCode}") return True except asyncio.TimeoutError: self.logger.warning(f"⛔ 连接超时: {message_id} 桩号: {pileCode} (尝试 #{attempt + 1})") if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn and conn['writer'] is not None: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass if pileCode in self.connections: del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 except ConnectionRefusedError: self.logger.warning(f"🚫 连接拒绝: {message_id} 桩号: {pileCode} (尝试 #{attempt + 1})") if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn and conn['writer'] is not None: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass if pileCode in self.connections: del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 except Exception as e: self.logger.error( f"❌连接错误: {message_id} 桩号: {pileCode} 错误: {e} (尝试 #{attempt + 1})") if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn and conn['writer'] is not None: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass if pileCode in self.connections: del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 return False async def start_reader_writer(self, message_id: str, pileCode: str): """启动读写任务""" if pileCode not in self.connections: return conn = self.connections[pileCode] reader = conn['reader'] writer = conn['writer'] self.writer, self.reader = AsyncSocketWriter(writer, pileCode, message_id, self.total, self.connections, self.connection_locks, self.logger), AsyncSocketReader( reader, pileCode, message_id, self.connections, self.connection_locks, self.logger) # 创建读写任务 read_task = asyncio.create_task( self.reader.start_reading(), name=f"reader_{pileCode}_{message_id}" ) write_task = asyncio.create_task( self.writer.start_write(), name=f"writer_{pileCode}_{message_id}" ) # 添加完成回调 read_task.add_done_callback(self.handle_task_completion) write_task.add_done_callback(self.handle_task_completion) # 添加到活跃任务集合 self._active_tasks.add(read_task) self._active_tasks.add(write_task) # 并行运行读写任务 try: await asyncio.gather( read_task, write_task, return_exceptions=True ) except Exception as e: self.logger.error(f"读写任务异常: {message_id} {pileCode}: {e}") 有无问题?
最新发布
11-08
class AsyncSocketManager: """异步Socket管理器 - 协调Reader和Writer线程""" # 分批连接参数 BATCH_SIZE = 250 # 每批处理的连接数 BATCH_INTERVAL = 2.0 # 批次间隔时间(秒) MAX_RETRIES = 3 # 增加最大重试次数 RECONNECT_INITIAL_DELAY = 3.0 # 初始重连延迟 RECONNECT_BACKOFF_FACTOR = 1.5 # 指数退避因子 MAX_RECONNECT_DELAY = 300.0 # 最大重连延迟(5分钟) HEALTH_CHECK_INTERVAL = 30.0 # 健康检查间隔 def __init__(self): self.running = True self._shutdown_event = asyncio.Event() self._cleanup_lock = asyncio.Lock() self.shutdown_requested = asyncio.Event() self.original_sigint_handler = None self.shutdown_requested = asyncio.Event() self.server_config = {'ip': 'conn.com', 'port': 5455} self.total = 0 self.writer = None self.reader = None self._active_tasks = set() self.connections: Dict[str, Dict] = {} self.batch_tasks: List[asyncio.Task] = [] # 批次任务列表 self._read_lock = asyncio.Lock() # 读取操作专用锁 self._write_lock = asyncio.Lock() # 写入操作专用锁 self._state_lock = asyncio.Lock() # 状态变更锁 self.logger = logging.getLogger(__name__) self.setup_logging() self.connection_pool = {} # 连接池 # 连接状态统计 self.stats = { "success": 0, "failed": 0, "pending": 0, "active": 0 } self.semaphore = asyncio.Semaphore(10000) def setup_logging(self): """配置日志系统""" logging.config.dictConfig({ 'version': 1, 'disable_existing_loggers': False, 'formatters': { 'standard': { 'format': '%(asctime)s [%(levelname)s] %(name)s - %(message)s', 'datefmt': '%Y-%m-%d %H:%M:%S' }, 'detailed': { 'format': '%(asctime)s [%(levelname)s] %(name)s::%(funcName)s - %(message)s' } }, 'handlers': { 'console': { 'level': 'INFO', 'class': 'logging.StreamHandler', 'formatter': 'standard' }, 'file_handler': { 'level': 'DEBUG', 'class': 'logging.FileHandler', 'filename': '/mnt/test/socket_client.log', 'encoding': 'utf-8', 'formatter': 'detailed' }, 'error_file': { 'level': 'WARNING', 'class': 'logging.FileHandler', 'filename': '/mnt/test/socket_errors.log', 'encoding': 'utf-8', 'formatter': 'detailed' } }, 'loggers': { '': { # root logger 'handlers': ['console', 'file_handler', 'error_file'], 'level': 'DEBUG', 'propagate': True } } }) def install_signal_handlers(self): """安装信号处理器""" try: loop = asyncio.get_running_loop() # 保存原始信号处理器 self.original_sigint_handler = loop.add_signal_handler( signal.SIGINT, self._initiate_shutdown ) self.logger.info("✅ 信号处理器安装完成") except (NotImplementedError, RuntimeError): # 在某些平台上可能不支持 self.logger.warning("⚠️ 当前平台不支持异步信号处理") def _initiate_shutdown(self): """初始化关闭过程""" self.logger.info("🔄 接收到关闭信号,开始优雅关闭...") self.shutdown_requested.set() def restore_signal_handlers(self): """恢复原始信号处理器""" try: loop = asyncio.get_running_loop() if self.original_sigint_handler is not None: loop.remove_signal_handler(signal.SIGINT) # 重新安装默认处理器 loop.add_signal_handler(signal.SIGINT, signal.default_int_handler) except Exception as e: self.logger.debug(f"恢复信号处理器时发生异常: {e}") async def close_all_connections(self): """异步关闭所有Socket连接""" self.logger.info("正在关闭所有Socket连接...") self.running = False self._shutdown_event.set() # 取消所有活跃任务 for task in self._active_tasks: if not task.done(): task.cancel() # 关闭所有连接 close_tasks = [] for pileCode in list(self.connections.keys()): conn = self.connections[pileCode] if 'writer' in conn: writer = conn['writer'] close_tasks.append(self.close_writer(writer, pileCode)) if close_tasks: await asyncio.gather(*close_tasks, return_exceptions=True) self.logger.info("所有连接已关闭") async def close_writer(self, writer, pileCode: str): """安全关闭写入器""" try: if not writer.is_closing(): writer.close() await writer.wait_closed() self.logger.debug(f"关闭连接: {pileCode}") except Exception as e: self.logger.error(f"关闭连接失败: {pileCode} 错误: {e}") async def _close_single_connection(self, conn_key: str): """关闭单个Socket连接""" try: if conn_key in self.connections: conn_info = self.connections[conn_key] # 实际的Socket关闭逻辑 if 'writer' in conn_info: writer = conn_info['writer'] writer.close() await writer.wait_closed() self.logger.debug(f"🔌 关闭连接 {conn_key}") del self.connections[conn_key] except Exception as e: self.logger.error(f"❌ 关闭连接 {conn_key} 时发生异常: {e}") async def batch_create_connections(self, pile_codes: List[str]): """添加健康监控器""" # 启动健康监控器 health_task = asyncio.create_task( self.connection_health_monitor(), name="connection_health_monitor" ) self.batch_tasks.append(health_task) """批量创建Socket连接(分批处理)""" total = len(pile_codes) batches = math.ceil(total / self.BATCH_SIZE) self.total_connections = total self.logger.info(f"开始分批创建连接,共{total}个桩号,分{batches}批处理,每批{self.BATCH_SIZE}个") for batch_num in range(batches): if self.shutdown_requested.is_set(): self.logger.warning("关闭信号已触发,停止创建新批次") break start_idx = batch_num * self.BATCH_SIZE end_idx = min(start_idx + self.BATCH_SIZE, total) batch = pile_codes[start_idx:end_idx] batch_id = batch_num + 1 self.logger.info(f"处理批次 {batch_id}/{batches} ({len(batch)}个桩号)") # 创建批量连接任务 task = asyncio.create_task( self.process_batch(batch, batch_id), name=f"batch_{batch_id}" ) self.batch_tasks.append(task) # 添加批次间隔 if batch_num < batches - 1: await asyncio.sleep(self.BATCH_INTERVAL) # 等待所有批次完成 if self.batch_tasks: await asyncio.gather(*self.batch_tasks) self.logger.info(f"所有批次处理完成! 成功:{self.stats['success']} 失败:{self.stats['failed']}") async def process_batch(self, pile_codes: List[str], batch_id: int): """处理单个批次的连接""" tasks = [] for i, pileCode in enumerate(pile_codes): message_id = f"batch{batch_id}-conn{i + 1}" task = asyncio.create_task( self.socket_worker(message_id, pileCode), name=f"batch{batch_id}-{pileCode}" ) tasks.append(task) self.connection_pool[pileCode] = {"status": "pending"} self.stats["pending"] += 1 # 等待批次内所有连接完成初始化 results = await asyncio.gather(*tasks, return_exceptions=True) # 统计批次结果 for result in results: if isinstance(result, Exception): self.stats["failed"] += 1 else: self.stats["success"] += 1 self.stats["pending"] -= len(pile_codes) self.stats["active"] = len(self.connections) async def socket_worker(self, message_id: str, pileCode: str): global logger """异步Socket工作协程""" retry_count = 0 last_reconnect_time = 0 health_check_last = time.time() while self.running and not self._shutdown_event.is_set(): try: # 检查是否需要等待重连延迟 current_time = time.time() if last_reconnect_time > 0: delay = self.calculate_reconnect_delay(retry_count) if current_time - last_reconnect_time < delay: await asyncio.sleep(delay - (current_time - last_reconnect_time)) # 尝试创建连接 success = await self.create_socket_connection_with_retry(message_id, pileCode, retry_count) if not success: retry_count = min(retry_count + 1, self.MAX_RETRIES) last_reconnect_time = time.time() continue # 重置重试计数(连接成功) retry_count = 0 last_reconnect_time = 0 # 启动读写任务 await self.start_reader_writer(message_id, pileCode) # 健康检查和状态报告 current_time = time.time() list_count = 0 if current_time - health_check_last > self.HEALTH_CHECK_INTERVAL: pending_count = self.stats["pending"] self.logger.info( f"等待中={pending_count} 重试={retry_count}" ) health_check_last = current_time except asyncio.CancelledError: self.logger.info(f"⚠️ Socket工作协程被取消: {message_id}") break except Exception as e: self.logger.error(f"❌ Socket工作异常: {message_id} 桩号: {pileCode} 错误: {e}") retry_count = min(retry_count + 1, self.MAX_RETRIES) last_reconnect_time = time.time() # 清理当前连接 if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 finally: # 更新连接池状态 if pileCode in self.connection_pool: self.connection_pool[pileCode] = { "status": "retrying" if retry_count > 0 else "active", "retry_count": retry_count, "last_retry": last_reconnect_time } def calculate_reconnect_delay(self, retry_count: int) -> float: """计算指数退避的重连延迟""" delay = self.RECONNECT_INITIAL_DELAY * (self.RECONNECT_BACKOFF_FACTOR ** retry_count) return min(delay, self.MAX_RECONNECT_DELAY) def handle_task_completion(self, task: asyncio.Task): """任务完成回调处理""" try: # 首先检查任务是否被取消 if task.cancelled(): self.logger.debug(f"任务 {task.get_name()} 被取消") return if task.done() and task.exception(): self.logger.error(f"任务失败: {task.get_name()} {task.exception()}") # 标记连接需要重启 pile_code = task.get_name().split("_")[1] if pile_code in self.connections: self.connections[pile_code]["needs_restart"] = True except Exception as e: self.logger.error(f"任务回调处理错误: {e}") async def connection_health_monitor(self): """连接健康监控器""" while self.running and not self._shutdown_event.is_set(): try: current_time = time.time() connections_to_restart = [] for pile_code, conn in list(self.connections.items()): # 检查是否超过最大重试次数 if conn.get("retry_count", 0) >= self.MAX_RETRIES: self.logger.warning(f"❌ 连接超过最大重试次数: {pile_code}") await self._close_single_connection(pile_code) continue # 检查最后活动时间 last_activity = conn.get("last_time_stamp", 0) if current_time - last_activity > 60: # 1分钟无活动 self.logger.warning(f"🕒 连接无活动超时: {pile_code}") connections_to_restart.append(pile_code) # 检查是否需要重启 if conn.get("needs_restart", False): connections_to_restart.append(pile_code) # 重启需要重置的连接 for pile_code in connections_to_restart: if pile_code in self.connections: await self._close_single_connection(pile_code) # 重新启动工作协程 self.logger.info(f"🔄 重启连接: {pile_code}") task = asyncio.create_task( self.socket_worker(f"restart-{pile_code}", pile_code), name=f"restart_{pile_code}" ) self.batch_tasks.append(task) # 定期报告 active_count = len(self.connections) restart_count = len(connections_to_restart) self.logger.info( f"🔍 健康检查: 活跃连接={active_count} 待重启={restart_count}" ) await asyncio.sleep(self.HEALTH_CHECK_INTERVAL) except Exception as e: self.logger.error(f"健康监控器异常: {e}") await asyncio.sleep(10) async def create_socket_connection_with_retry(self, message_id: str, pileCode: str, attempt: int) -> bool: """带重试机制的连接创建""" try: reader, writer = await asyncio.wait_for( asyncio.open_connection(self.server_config['ip'], self.server_config['port']), timeout=30.0 + min(attempt * 5, 30) # 随重试次数增加超时 ) self.connections[pileCode] = { 'reader': reader, 'writer': writer, 'pileCode': pileCode, 'message_id': message_id, 'retry_count': 0, 'last_heartbeat1': 0, 'last_heartbeat2': 0, 'last_time_stamp': 0, 'isLogin': False, 'timeout_count_login': 0, 'timeout_count': 0, 'heart_serialNum': "00 00", 'status': 'Offline', 'charging': False, 'priceModelList': [] } self.logger.info(f"连接成功: {message_id} 桩号: {pileCode}") return True except asyncio.TimeoutError: self.logger.warning(f"⛔ 连接超时: {message_id} 桩号: {pileCode} (尝试 #{attempt + 1})") if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 except ConnectionRefusedError: self.logger.warning(f"🚫 连接拒绝: {message_id} 桩号: {pileCode} (尝试 #{attempt + 1})") if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 except Exception as e: self.logger.error( f"❌连接错误: {message_id} 桩号: {pileCode} 错误: {e} (尝试 #{attempt + 1})") if pileCode in self.connections: conn = self.connections[pileCode] if 'writer' in conn: writer = conn['writer'] try: if not writer.is_closing(): writer.close() await writer.wait_closed() except Exception: pass del self.connections[pileCode] await asyncio.sleep(1) # 短暂暂停 return False async def start_reader_writer(self, message_id: str, pileCode: str): """启动读写任务""" if pileCode not in self.connections: return conn = self.connections[pileCode] reader = conn['reader'] writer = conn['writer'] self.writer, self.reader = AsyncSocketWriter(writer, pileCode, message_id, self.total, self.connections, self.logger), AsyncSocketReader( reader, pileCode, message_id, self.connections, self.logger) # 创建读写任务 read_task = asyncio.create_task( self.reader.start_reading(), name=f"reader_{pileCode}_{message_id}" ) write_task = asyncio.create_task( self.writer.start_write(), name=f"writer_{pileCode}_{message_id}" ) # 添加完成回调 read_task.add_done_callback(self.handle_task_completion) write_task.add_done_callback(self.handle_task_completion) # 添加到活跃任务集合 self._active_tasks.add(read_task) self._active_tasks.add(write_task) # 并行运行读写任务 try: await asyncio.gather( read_task, write_task, return_exceptions=True ) except Exception as e: self.logger.error(f"读写任务异常: {message_id} {pileCode}: {e}") def read_to_array(file_path, remove_empty=True, encoding='utf-8'): """ 读取txt文件到数组 参数: file_path: 文件路径 remove_empty: 是否移除空行 encoding: 文件编码 """ try: with open(file_path, 'r', encoding=encoding) as file: if remove_empty: lines = [line.strip() for line in file if line.strip()] else: lines = [line.strip() for line in file] # 对每行数据进行处理 processed_lines = [] for line in lines: if len(line) == 14 and line.isdigit(): # 将14位数字分割为每2位一组 parts = [line[i:i + 2] for i in range(0, 14, 2)] processed_line = ' '.join(parts) processed_lines.append(processed_line) else: processed_lines.append(line) return processed_lines except FileNotFoundError: print(f"❌错误: 文件 '{file_path}' 不存在") return [] except UnicodeDecodeError: print(f"❌错误: 使用 {encoding} 编码读取文件失败") return [] except Exception as e: print(f"❌未知错误: {e}") return [] async def async_main(): # 读取桩号列表 piles_list_config = read_to_array('pileList.txt') if not piles_list_config: print("桩号配置文件为空") return # 创建管理器实例 manager = AsyncSocketManager() manager.install_signal_handlers() try: print("开始分批建立Socket连接...") await manager.batch_create_connections(piles_list_config) # 等待关闭信号 await manager.shutdown_requested.wait() print("接收到关闭信号...") except asyncio.CancelledError: print("异步任务被取消") except Exception as e: print(f"异步Socket客户端异常: {e}") finally: # 确保资源清理 print("开始最终资源清理...") await manager.close_all_connections() manager.restore_signal_handlers() print("异步Socket客户端已完全关闭") def main(): """主入口函数""" try: # 提升资源限制(Unix系统) try: import resource soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (hard, hard)) print(f"文件描述符限制提升至: {hard}") except (ImportError, ValueError): print("无法提升文件描述符限制") asyncio.run(async_main()) except KeyboardInterrupt: print("程序被用户中断") except Exception as e: print(f"程序异常退出: {e}") return 1 return 0 if __name__ == "__main__": exit(main()) 2025-11-06 23:34:56,728 [ERROR] __main__::create_socket_connection_with_retry - ❌连接错误: restart-31 01 17 00 05 65 92 桩号: 31 01 17 00 05 65 92 错误: [Errno 99] Cannot assign requested address (尝试 #1)
11-07
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值