android bionic mutex分析

本文详细解析了Mutex的构造、初始化、锁定、尝试锁定及销毁的过程。通过分析Mutex的不同类型及其内部实现原理,帮助读者深入理解Mutex的工作机制。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1. 先从mutex的构造开始

/*
 * Simple mutex class.  The implementation is system-dependent.
 *
 * The mutex must be unlocked by the thread that locked it.  They are not
 * recursive, i.e. the same thread can't lock it multiple times.
 */
class Mutex {
public:
    enum {
        PRIVATE = 0,
        SHARED = 1
    };
    
                Mutex();
                Mutex(const char* name);
                Mutex(int type, const char* name = NULL);
#if defined(HAVE_PTHREADS)
                virtual
#endif
                ~Mutex();

    // lock or unlock the mutex
    status_t    lock();
    // gaia add +
    status_t    lockTimeout(unsigned msecs);
    // gaia add -
    void        unlock();

    // lock if possible; returns 0 on success, error otherwise
    status_t    tryLock();

    // Manages the mutex automatically. It'll be locked when Autolock is
    // constructed and released when Autolock goes out of scope.
    class Autolock {
    public:
    	// gaia add +
#if 1
        inline Autolock(Mutex& mutex) : mLock(mutex)  { mStatus = mLock.lock(); }
        inline Autolock(Mutex* mutex) : mLock(*mutex) { mStatus = mLock.lock(); }
        inline Autolock(Mutex& mutex, unsigned msec) : mLock(mutex), mMsec(msec) {
            msec ? (mStatus = mLock.lockTimeout(msec)) : (mStatus = mLock.lock()); }
        inline ~Autolock() { mLock.unlock(); }
        status_t getStatus() { return mStatus; }
#else
        inline Autolock(Mutex& mutex) : mLock(mutex)  { mLock.lock(); }
        inline Autolock(Mutex* mutex) : mLock(*mutex) { mLock.lock(); }
        inline ~Autolock() { mLock.unlock(); }
#endif
   		// gaia add -
    private:
        Mutex& mLock;
	    // gaia add +
        unsigned mMsec;
        status_t mStatus;
	    // gaia add -
    };

private:
    friend class Condition;
    // GAIA ADD for RecursiveMutex
   friend class RecursiveMutex;
    
    // A mutex cannot be copied
                Mutex(const Mutex&);
    Mutex&      operator = (const Mutex&);
    
#if defined(HAVE_PTHREADS)
    pthread_mutex_t mMutex;
    //GAIA ADD
//protected:
    Mutex(bool recursive, int type = 0, const char* name = NULL);
#else
    void    _init();
    void*   mState;
#endif
};

这是Mutex的头文件定义, 根据系统不同而不同。

#if defined(HAVE_PTHREADS)

inline Mutex::Mutex() {
    pthread_mutex_init(&mMutex, NULL);
}
inline Mutex::Mutex(const char* name) {
    pthread_mutex_init(&mMutex, NULL);
}
inline Mutex::Mutex(int type, const char* name) {
    if (type == SHARED) {
        pthread_mutexattr_t attr;
        pthread_mutexattr_init(&attr);
        pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
        pthread_mutex_init(&mMutex, &attr);
        pthread_mutexattr_destroy(&attr);
    } else {
        pthread_mutex_init(&mMutex, NULL);
    }
}
//GAIA ADD
#if defined(HAVE_PTHREADS)
inline Mutex::Mutex(bool recursive, int type, const char* name) {
   (void)name;
    pthread_mutexattr_t attr;
    pthread_mutexattr_init(&attr);
    if (type == SHARED) {
        pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
    }
    if (recursive) {
       pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
    }
    pthread_mutex_init(&mMutex, &attr);
    pthread_mutexattr_destroy(&attr);}
#endif
inline Mutex::~Mutex() {
    pthread_mutex_destroy(&mMutex);
}
inline status_t Mutex::lock() {
    return -pthread_mutex_lock(&mMutex);
}
// gaia add +
inline status_t Mutex::lockTimeout(unsigned msec) {
#if HOST_BUILD
    (void)msec;
    return -pthread_mutex_lock(&mMutex);
#else
    return -pthread_mutex_lock_timeout_np(&mMutex, msec);
#endif
}
// gaia add -
inline void Mutex::unlock() {
    pthread_mutex_unlock(&mMutex);
}
inline status_t Mutex::tryLock() {
    return -pthread_mutex_trylock(&mMutex);
}

#endif // HAVE_PTHREADS

这段代码不复杂,粗看基本能看懂。

那么接下来详细介绍。

从构造函数入手:

                Mutex();
                Mutex(const char* name);
                Mutex(int type, const char* name = NULL);
注意第三个构造函数:
inline Mutex::Mutex(int type, const char* name) {
    if (type == SHARED) {
        pthread_mutexattr_t attr;
        pthread_mutexattr_init(&attr);
        pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
        pthread_mutex_init(&mMutex, &attr);
        pthread_mutexattr_destroy(&attr);
    } else {
        pthread_mutex_init(&mMutex, NULL);
    }
}
这里可以看到Mutex分为两中类型:

    enum {
        PRIVATE = 0,
        SHARED = 1
    };

Mutex调用的是Bionic中的pthread:bionic/libc/bionic/pthread.c.

那么接下来深入到pthread中。


pthread针对不同Mutex设置了不同的type

/* Mutex types.  */
enum
{
  PTHREAD_MUTEX_TIMED_NP,
  PTHREAD_MUTEX_RECURSIVE_NP,
  PTHREAD_MUTEX_ERRORCHECK_NP,
  PTHREAD_MUTEX_ADAPTIVE_NP
#if defined __USE_UNIX98 || defined __USE_XOPEN2K8
  ,
  PTHREAD_MUTEX_NORMAL = PTHREAD_MUTEX_TIMED_NP,
  PTHREAD_MUTEX_RECURSIVE = PTHREAD_MUTEX_RECURSIVE_NP,
  PTHREAD_MUTEX_ERRORCHECK = PTHREAD_MUTEX_ERRORCHECK_NP,
  PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL
#endif
#ifdef __USE_GNU
  /* For compatibility.  */
  , PTHREAD_MUTEX_FAST_NP = PTHREAD_MUTEX_TIMED_NP
#endif
};
mutex的type分为3类: Normal, Recursive, Errorcheck,Recursive就是递归锁;

相关的API罗列如下:

    //pthread mutexattr 操作  
    int pthread_mutexattr_init(pthread_mutexattr_t *attr);  
    int pthread_mutexattr_destroy(pthread_mutexattr_t *attr);  
      
    int pthread_mutexattr_gettype(const pthread_mutexattr_t *attr, int *type);  
    int pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type);  
      
    int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int  pshared);  
    int pthread_mutexattr_getpshared(pthread_mutexattr_t *attr, int *pshared);  
      
      
      
      
    //pthread mutex 操作  
    int pthread_mutex_init(pthread_mutex_t *mutex,  
                           const pthread_mutexattr_t *attr);  
    int pthread_mutex_destroy(pthread_mutex_t *mutex);  
      
    int pthread_mutex_lock(pthread_mutex_t *mutex);  
    int pthread_mutex_unlock(pthread_mutex_t *mutex);  
    int pthread_mutex_trylock(pthread_mutex_t *mutex);  
    int pthread_mutex_timedlock(pthread_mutex_t *mutex, struct timespec*  ts);  



基础结构

pthread_mutexattr_t,

pthread_mutex_t

typedef long pthread_mutexattr_t;
 a mutex attribute holds the following fields
  bits:     name       description
  0-3       type       type of mutex
  4         shared     process-shared flag
 

typedef struct
{
    int volatile value;
} pthread_mutex_t;

 a mutex is implemented as a 32-bit integer holding the following fields
 
  bits:     name     description
  31-16     tid      owner thread's kernel id (recursive and errorcheck only)
  15-14     type     mutex type
  13        shared   process-shared flag
  12-2      counter  counter of recursive mutexes
  1-0       state    lock state (0, 1 or 2)



先把这个解释下,在后面会用到的:/

//tid,  表示拥有这个mutex的线程kernel id

//type 表示mutex的类型 /normal : recursive: error

//process-shared flag, 是否共享锁,/private : shared

//counter主要用于递归mutex;

//state 表示锁的状态;

先解析如下这段代码:
        pthread_mutexattr_t attr;
        pthread_mutexattr_init(&attr);
        pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
        pthread_mutex_init(&mMutex, &attr);
        pthread_mutexattr_destroy(&attr);


锁的初始化分两步:

a. 初始化mutexattr;

pthread_mutexattr_t用来描述mutex的属性:

a mutex attribute holds the following fields
 *
 * bits:     name       description
 * 0-3       type       type of mutex
 * 4         shared     process-shared flag
 

对应的掩码:

#define  MUTEXATTR_TYPE_MASK   0x000f
#define  MUTEXATTR_SHARED_MASK 0x0010

shared: Flage为

#define PTHREAD_PROCESS_PRIVATE  0
#define PTHREAD_PROCESS_SHARED   1

pthread_mutexattr_t对应于pthread_mutex_t

 * 15-14     type     mutex type

 * 13           shared   process-shared flag

a. 初始化attr源码

int pthread_mutexattr_init(pthread_mutexattr_t *attr)
{
    if (attr) {
        *attr = PTHREAD_MUTEX_DEFAULT;
        return 0;
    } else {
        return EINVAL;
    }
}

PTHREAD_MUTEX_DEFAULT其实就是前面的

PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL

接下来设置flag:

int pthread_mutexattr_setpshared(pthread_mutexattr_t *attr, int  pshared)
{
    if (!attr)
        return EINVAL;

    switch (pshared) {
    case PTHREAD_PROCESS_PRIVATE:
        *attr &= ~MUTEXATTR_SHARED_MASK;
        return 0;

    case PTHREAD_PROCESS_SHARED:
        /* our current implementation of pthread actually supports shared
         * mutexes but won't cleanup if a process dies with the mutex held.
         * Nevertheless, it's better than nothing. Shared mutexes are used
         * by surfaceflinger and audioflinger.
         */
        *attr |= MUTEXATTR_SHARED_MASK;
        return 0;
    }
    return EINVAL;
}

b. 初始化mutex;


#define  MUTEX_TYPE_SHIFT      14
#define  MUTEX_TYPE_LEN        2
#define  MUTEX_TYPE_MASK       FIELD_MASK(MUTEX_TYPE_SHIFT,MUTEX_TYPE_LEN)

#define  MUTEX_TYPE_NORMAL          0  /* Must be 0 to match __PTHREAD_MUTEX_INIT_VALUE */
#define  MUTEX_TYPE_RECURSIVE       1
#define  MUTEX_TYPE_ERRORCHECK      2

#define  MUTEX_TYPE_TO_BITS(t)       FIELD_TO_BITS(t, MUTEX_TYPE_SHIFT, MUTEX_TYPE_LEN)

#define  MUTEX_TYPE_BITS_NORMAL      MUTEX_TYPE_TO_BITS(MUTEX_TYPE_NORMAL)
#define  MUTEX_TYPE_BITS_RECURSIVE   MUTEX_TYPE_TO_BITS(MUTEX_TYPE_RECURSIVE)
#define  MUTEX_TYPE_BITS_ERRORCHECK  MUTEX_TYPE_TO_BITS(MUTEX_TYPE_ERRORCHECK)


int pthread_mutex_init(pthread_mutex_t *mutex,
                       const pthread_mutexattr_t *attr)
{
    int value = 0;

    if (mutex == NULL)
        return EINVAL;

    if (__likely(attr == NULL)) {
        mutex->value = MUTEX_TYPE_BITS_NORMAL;
        return 0;
    }

    if ((*attr & MUTEXATTR_SHARED_MASK) != 0)
        value |= MUTEX_SHARED_MASK;

    switch (*attr & MUTEXATTR_TYPE_MASK) {
    case PTHREAD_MUTEX_NORMAL:
        value |= MUTEX_TYPE_BITS_NORMAL;//设置相应位置
        break;
    case PTHREAD_MUTEX_RECURSIVE:
        value |= MUTEX_TYPE_BITS_RECURSIVE;
        break;
    case PTHREAD_MUTEX_ERRORCHECK:
        value |= MUTEX_TYPE_BITS_ERRORCHECK;
        break;
    default:
        return EINVAL;
    }

    mutex->value = value;
    return 0;
}



那么Mutex就创建好了。
2. 接下来看看如何进行lock

inline status_t Mutex::lock() {
    return -pthread_mutex_lock(&mMutex);
}

进入pthread

int pthread_mutex_lock(pthread_mutex_t *mutex) {
    if (!__g_enable_deadlock_detection) {
        int err = pthread_mutex_lock_impl(mutex);
        return err;
    }
    int64_t startTime = uptime_millis();
    int err = pthread_mutex_trylock_impl(mutex);
    int wait_tid = gettid();
    if (err) {
        // two cases:
        // 1. mutex hold by other thread
        // 2. normal mutex acquired again
        int mvalue = mutex->value;
        int mtype = mvalue & MUTEX_TYPE_MASK;//设置type
        int tid = MUTEX_OWNER_FROM_BITS(mvalue);
#if NONRECURSIVE_REENTRANT_DETECT
        if (mtype == MUTEX_TYPE_BITS_NORMAL) {
            if (tid != 0 && wait_tid == tid) { // 2
                __dump_me("nonrecursive-mutex-lock-twice", tid, tid);
            }
        }
#endif
#if MUTEX_WARNING_ON
        int i = 0;
        for (i = 0; i < DEADLOCK_TIMEOUT/DEADLOCK_WARNING_PERIOD; ++i) {
            err = pthread_mutex_lock_timeout_np_impl(mutex, DEADLOCK_WARNING_PERIOD);
            if (!err)
                break;
            mvalue = mutex->value;
            tid = MUTEX_OWNER_FROM_BITS(mvalue);
            __show_mutex_info(mutex, uptime_millis() - startTime, tid);
        }
#else
        err = pthread_mutex_lock_timeout_np_impl(mutex, DEADLOCK_TIMEOUT);
#endif
        if (err) { // 1
            __dump_me("mutex-acquire-lock-timeout", tid, wait_tid);
        } else {
            while (1) {
                int newval = mvalue;
                newval |= (mvalue & FIELD_MASK(0, MUTEX_OWNER_SHIFT)) | MUTEX_OWNER_TO_BITS(wait_tid);
                if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                    mvalue = mutex->value;
                    continue;
                }
                ANDROID_MEMBAR_FULL();
                return 0;
            }
        }
    } else {
        int mvalue = mutex->value;
        int mtype = mutex->value & MUTEX_TYPE_MASK;
        if (mtype == MUTEX_TYPE_BITS_NORMAL) {
            while (1) {
                int newval = mvalue;
                newval |= (mvalue & FIELD_MASK(0, MUTEX_OWNER_SHIFT)) | MUTEX_OWNER_TO_BITS(wait_tid);
                //表示本线程已经持有了Mutex,那么更新其tid
                if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                    mvalue = mutex->value;
                    continue;
                }
                ANDROID_MEMBAR_FULL();
                return 0;
            }
        }
    }
    return err;
}



__LIBC_HIDDEN__
int pthread_mutex_lock_impl(pthread_mutex_t *mutex)
{
    int mvalue, mtype, tid, new_lock_type, shared;

    if (__unlikely(mutex == NULL))
        return EINVAL;

    mvalue = mutex->value;
    mtype = (mvalue & MUTEX_TYPE_MASK);
    shared = (mvalue & MUTEX_SHARED_MASK);

    /* Handle normal case first */
//情况1
    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) ) {//normal type调用
        _normal_lock(mutex, shared);
        return 0;
    }
//情况2
    /* Do we already own this recursive or error-check mutex ? */
    tid = __get_thread()->kernel_id;
    if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) )
        return _recursive_increment(mutex, mvalue, mtype);

    /* Add in shared state to avoid extra 'or' operations below */
    mtype |= shared;
//情况3
    /* First, if the mutex is unlocked, try to quickly acquire it.
     * In the optimistic case where this works, set the state to 1 to
     * indicate locked with no contention */
    if (mvalue == mtype) {
        int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
        if (__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0) {
            ANDROID_MEMBAR_FULL();
            return 0;
        }
        /* argh, the value changed, reload before entering the loop */
        mvalue = mutex->value;
    }
//情况4
    for (;;) {
        int newval;

        /* if the mutex is unlocked, its value should be 'mtype' and
         * we try to acquire it by setting its owner and state atomically.
         * NOTE: We put the state to 2 since we _know_ there is contention
         * when we are in this loop. This ensures all waiters will be
         * unlocked.
         */
        if (mvalue == mtype) {
            newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
            /* TODO: Change this to __bionic_cmpxchg_acquire when we
             *        implement it to get rid of the explicit memory
             *        barrier below.
             */
            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                mvalue = mutex->value;
                continue;
            }
            ANDROID_MEMBAR_FULL();
            return 0;
        }
//情况5
        /* the mutex is already locked by another thread, if its state is 1
         * we will change it to 2 to indicate contention. */
        if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
            newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue); /* locked state 1 => state 2 */
            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                mvalue = mutex->value;
                continue;
            }
            mvalue = newval;
        }

        /* wait until the mutex is unlocked */
        __futex_wait_ex(&mutex->value, shared, mvalue, NULL);

        mvalue = mutex->value;
    }
    /* NOTREACHED */
}

情况1:Normal。调用_normal_lock 

static __inline__ void
_normal_lock(pthread_mutex_t*  mutex, int shared)
{
    /* convenience shortcuts */
    const int unlocked           = shared | MUTEX_STATE_BITS_UNLOCKED;
    const int locked_uncontended = shared | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
    取得locked状态,
//锁状态宏如下:

下面是锁的三种状态:
无锁
非竞争锁
竞争锁
后面两中情况应该是这样理解,如果这个锁无持有这,无锁状态,那么当第一个持有这,持有它的时候,将它设为uncontended状态
如果非本线程持有,不管是否获取到了该锁,都将它设置为contended状态,表示这个锁是几个线程竞争的(待验证)

//#define  MUTEX_STATE_BITS_UNLOCKED            MUTEX_STATE_TO_BITS(MUTEX_STATE_UNLOCKED)
//#define  MUTEX_STATE_BITS_LOCKED_UNCONTENDED  MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_UNCONTENDED)
//#define  MUTEX_STATE_BITS_LOCKED_CONTENDED    MUTEX_STATE_TO_BITS(MUTEX_STATE_LOCKED_CONTENDED)


/* * The common case is an unlocked mutex, so we begin by trying to * change the lock's state from 0 (UNLOCKED) to 1 (LOCKED). * __bionic_cmpxchg() returns 0 if it made the swap successfully. * If the result is nonzero, this lock is already held by another thread.
翻译为:
尝试去将lock状态从unlocked转换为locked,如果转换成功则返回0
其实际就是将Mutex->value与unlocked比较,如果两个值相同,那么将value设置为locked_uncontended 
*/ if (__bionic_cmpxchg(unlocked, locked_uncontended, &mutex->value) != 0) { const int locked_contended = shared | MUTEX_STATE_BITS_LOCKED_CONTENDED;
 /*返回非0,说明mutex->value不是unlocked状态,那么应该被其他线程占用着
 * We want to go to sleep until the mutex is available, which * requires promoting it to state 2 (CONTENDED). We need to * swap in the new state value and then wait until somebody wakes us up. * * __bionic_swap() returns the previous value. We swap 2 in and * see if we got zero back; if so, we have acquired the lock. If * not, another thread still holds the lock and we wait again. * * The second argument to the __futex_wait() call is compared * against the current value. If it doesn't match, __futex_wait() * returns immediately (otherwise, it sleeps for a time specified * by the third argument; 0 means sleep forever). This ensures * that the mutex is in state 2 when we go to sleep on it, which * guarantees a wake-up call. */
//循环等待其他线程释放这个mutex
 while (__bionic_swap(locked_contended, &mutex->value) != unlocked) __futex_wait_ex(&mutex->value, shared, locked_contended, 0); } ANDROID_MEMBAR_FULL();}

__ATOMIC_INLINE__ int
__bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr)
{
    /* We must return 0 on success */
    return __sync_bool_compare_and_swap(ptr, old_value, new_value) == 0;
//也就是如果*ptr==old_value,那么将*ptr设置为new_value,返回true;否则返回false;

}

__ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
{
    int32_t prev;
    do {
        prev = *ptr;
        status = __sync_val_compare_and_swap(ptr, prev, new_value);
    } while (status == 0);//如果prev == ptr那么将new_value设置到ptr, 返回prev;
    return prev;
}
情况2:当tid == MUTEX_OWNER_FROM_BITS(mvalue) 说明。本线程已经拥有它了,那么只是将计数+1;

static __inline__ __attribute__((always_inline)) 
int _recursive_increment(pthread_mutex_t* mutex, int mvalue, int mtype) 
{ 
	if (mtype == MUTEX_TYPE_BITS_ERRORCHECK) { 
	/* trying to re-lock a mutex we already acquired */ 
		return EDEADLK; 
		} 
		/* Detect recursive lock overflow and return EAGAIN. 
		* This is safe because only the owner thread can modify the 
		* counter bits in the mutex value. */ 
	if (MUTEX_COUNTER_BITS_WILL_OVERFLOW(mvalue)) { 
	return EAGAIN; 
	} 
	/* We own the mutex, but other threads are able to change 
	* the lower bits (e.g. promoting it to "contended"), so we 
	* need to use an atomic cmpxchg loop to update the counter. 
	*/
	for (;;) { 
	/* increment counter, overflow was already checked */ 
	int newval = mvalue + MUTEX_COUNTER_BITS_ONE; 
	if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) { 
	/* mutex is still locked, not need for a memory barrier */ 
	return 0; 
	} 
	/* the value was changed, this happens when another thread changes 
	* the lower state bits from 1 to 2 to indicate contention. This 
	* cannot change the counter, so simply reload and try again. 
	*/ 
	mvalue = mutex->value; 
	} 
 }

情况3: 没怎么弄懂,应该是一种优化的情况。

mtype |= shared;

    /* First, if the mutex is unlocked, try to quickly acquire it.
     * In the optimistic case where this works, set the state to 1 to
     * indicate locked with no contention 
    *///下面的值:tid拥有的mutex value|mtype|locke_uncontended竞争锁,其实也就是表示如果

      mvalue除了type和shared字段,其他字段都是空的,即value == mtype|shared,那么表示这个mutex没有任何线程获得,
      那么立即将它设置为1,表示其为非竞争锁

 if (mvalue == mtype) { int newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED;
 if (__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0) { ANDROID_MEMBAR_FULL(); return 0; } /* argh, the value changed, reload before entering the loop */ 
    mvalue = mutex->value; }
情况4: 如果情况1-2-3失败,即它不是normal锁,也不是本线程持有,也不是非竞争锁。 从结构分析来看
* bits: name description

* 31-16 tid owner thread's kernel id (recursive and errorcheck only)

* 15-14 type mutex type

* 13 shared process-shared flag

* 12-2 counter counter of recursive mutexes

* 1-0 state lock state (0, 1 or 2)

则可能如下情况: a. tid!=0 b. counter!=0; c. state!=0; 还是回到源码:

for (;;) {
        int newval;

        /* if the mutex is unlocked, its value should be 'mtype' and
         * we try to acquire it by setting its owner and state atomically.
         * NOTE: We put the state to 2 since we _know_ there is contention
         * when we are in this loop. This ensures all waiters will be
         * unlocked.
         */
        if (mvalue == mtype) {//之前的情况,其他都是0,mvalue == mtype|shared ,但是之前之所以失败,肯定其不是uncontended状态,因此现在将它设置为contended 状态
            newval = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_CONTENDED;
            /* TODO: Change this to __bionic_cmpxchg_acquire when we
             *        implement it to get rid of the explicit memory
             *        barrier below.
             */
            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                mvalue = mutex->value;
                continue;
            }
            ANDROID_MEMBAR_FULL();
            return 0;
        }

        /* the mutex is already locked by another thread, if its state is 1
         * we will change it to 2 to indicate contention. */表示被其他线程持有,那么将其设置为contended,表示这个锁存在竞争了。
        if (MUTEX_STATE_BITS_IS_LOCKED_UNCONTENDED(mvalue)) {
            newval = MUTEX_STATE_BITS_FLIP_CONTENTION(mvalue); /* locked state 1 => state 2 */
            if (__unlikely(__bionic_cmpxchg(mvalue, newval, &mutex->value) != 0)) {
                mvalue = mutex->value;
                continue;
            }
            mvalue = newval;
        }

        /* wait until the mutex is unlocked */
        __futex_wait_ex(&mutex->value, shared, mvalue, NULL);

        mvalue = mutex->value;
    }
    /* NOTREACHED */

__futex_wait_ex相关的东西 需要以后好好研究。应该持续等待其他线程唤醒,对应的是

 __futex_wake_ex
3. 解锁

__LIBC_HIDDEN__
int pthread_mutex_unlock_impl(pthread_mutex_t *mutex)
{
    int mvalue, mtype, tid, oldv, shared;

    if (__unlikely(mutex == NULL))
        return EINVAL;

    mvalue = mutex->value;
    mtype  = (mvalue & MUTEX_TYPE_MASK);
    shared = (mvalue & MUTEX_SHARED_MASK);
//情况1.
    /* Handle common case first */
    if (__likely(mtype == MUTEX_TYPE_BITS_NORMAL)) {
        _normal_unlock(mutex, shared);
        return 0;
    }

//情况2.

 /* Do we already own this recursive or error-check mutex ? */ tid = __get_thread()->kernel_id; if ( tid != MUTEX_OWNER_FROM_BITS(mvalue) ) return EPERM;

//情况3.

 /* If the counter is > 0, we can simply decrement it atomically. * Since other threads can mutate the lower state bits (and only the * lower state bits), use a cmpxchg to do it. */ if (!MUTEX_COUNTER_BITS_IS_ZERO(mvalue)) { for (;;) { int newval = mvalue - MUTEX_COUNTER_BITS_ONE; if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) { /* success: we still own the mutex, so no memory barrier */ return 0; } /* the value changed, so reload and loop */ mvalue = mutex->value; } } /* the counter is 0, so we're going to unlock the mutex by resetting * its value to 'unlocked'. We need to perform a swap in order * to read the current state, which will be 2 if there are waiters * to awake. * * TODO: Change this to __bionic_swap_release when we implement it * to get rid of the explicit memory barrier below. */

//情况4.


 ANDROID_MEMBAR_FULL(); /* RELEASE BARRIER */ mvalue = __bionic_swap(mtype | shared | MUTEX_STATE_BITS_UNLOCKED, &mutex->value); /* Wake one waiting thread, if any */ if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) { __futex_wake_ex(&mutex->value, shared, 1); } return 0;}


情况1:表示normal type进入到_normal_unlock

_normal_unlock(pthread_mutex_t*  mutex, int shared)
{
    ANDROID_MEMBAR_FULL();

    /*
     * The mutex state will be 1 or (rarely) 2.  We use an atomic decrement
     * to release the lock.  __bionic_atomic_dec() returns the previous value;
     * if it wasn't 1 we have to do some additional work.
     *///理解为下:

bionic_atomic_dec调用的__sync_fetch_and_add,是先获取state,然后-1,那么如果当前状态不是uncontended,则是contended,即需要唤醒其他等待的线程;
如果状态时uncontende,-1之后变为了0,表示为unlock状态。

  if (__bionic_atomic_dec(&mutex->value) != (shared|MUTEX_STATE_BITS_LOCKED_UNCONTENDED)) { /* * Start by releasing the lock. The decrement changed it from * "contended lock" to "uncontended lock", which means we still * hold it, and anybody who tries to sneak in will push it back * to state 2. * * Once we set it to zero the lock is up for grabs. We follow * this with a __futex_wake() to ensure that one of the waiting * threads has a chance to grab it. * * This doesn't cause a race with the swap/wait pair in * _normal_lock(), because the __futex_wait() call there will * return immediately if the mutex value isn't 2. */ mutex->value = shared; /* * Wake up one waiting thread. We don't know which thread will be * woken or when it'll start executing -- futexes make no guarantees * here. There may not even be a thread waiting. * * The newly-woken thread will replace the 0 we just set above * with 2, which means that when it eventually releases the mutex * it will also call FUTEX_WAKE. This results in one extra wake * call whenever a lock is contended, but lets us avoid forgetting * anyone without requiring us to track the number of sleepers. * * It's possible for another thread to sneak in and grab the lock * between the zero assignment above and the wake call below. If * the new thread is "slow" and holds the lock for a while, we'll * wake up a sleeper, which will swap in a 2 and then go back to * sleep since the lock is still held. If the new thread is "fast", * running to completion before we call wake, the thread we * eventually wake will find an unlocked mutex and will execute. * Either way we have correct behavior and nobody is orphaned on * the wait queue. */ __futex_wake_ex(&mutex->value, shared, 1); }}

情况2:既然不满足1情况,同时mutex非本线程持有,因此将其作为error处理;情况3:递归mutex

 if (!MUTEX_COUNTER_BITS_IS_ZERO(mvalue)) {//递归锁,表示计数非0,那么只是减去1
        for (;;) {
            int newval = mvalue - MUTEX_COUNTER_BITS_ONE;//将其对应位减去1
            if (__likely(__bionic_cmpxchg(mvalue, newval, &mutex->value) == 0)) {
                /* success: we still own the mutex, so no memory barrier */
                return 0;
            }
            /* the value changed, so reload and loop */
            mvalue = mutex->value;
        }
    }
情况4:
    /* the counter is 0, so we're going to unlock the mutex by resetting
     * its value to 'unlocked'. We need to perform a swap in order
     * to read the current state, which will be 2 if there are waiters
     * to awake.
     *
     * TODO: Change this to __bionic_swap_release when we implement it
     *        to get rid of the explicit memory barrier below.
     */
    ANDROID_MEMBAR_FULL();  /* RELEASE BARRIER */
    mvalue = __bionic_swap(mtype | shared | MUTEX_STATE_BITS_UNLOCKED, &mutex->value);
    /* Wake one waiting thread, if any */
    if (MUTEX_STATE_BITS_IS_LOCKED_CONTENDED(mvalue)) {
        __futex_wake_ex(&mutex->value, shared, 1);
    }
    //将state设置为unlocked,然后判定mvalue是什么状态,如果是contended状态,那么唤醒其他的线程。
    return 0;

__ATOMIC_INLINE__ int32_t
__bionic_swap(int32_t new_value, volatile int32_t* ptr)
{
    int32_t prev;
    do {
        prev = *ptr;
        status = __sync_val_compare_and_swap(ptr, prev, new_value);
    } while (status == 0);
    return prev; 就是交换之后,返回先前的值
}
4. trylock
int pthread_mutex_trylock_impl(pthread_mutex_t *mutex)
{
    int mvalue, mtype, tid, oldv, shared;

    if (__unlikely(mutex == NULL))
        return EINVAL;

    mvalue = mutex->value;
    mtype  = (mvalue & MUTEX_TYPE_MASK);
    shared = (mvalue & MUTEX_SHARED_MASK);
//情况1:
    /* Handle common case first */
    if ( __likely(mtype == MUTEX_TYPE_BITS_NORMAL) )
    {
        if (__bionic_cmpxchg(shared|MUTEX_STATE_BITS_UNLOCKED,
                             shared|MUTEX_STATE_BITS_LOCKED_UNCONTENDED,
                             &mutex->value) == 0) {
            ANDROID_MEMBAR_FULL();
            return 0;
        }

        return EBUSY;
    }

//情况2:

 /* Do we already own this recursive or error-check mutex ? */ tid = __get_thread()->kernel_id; if ( tid == MUTEX_OWNER_FROM_BITS(mvalue) ) return _recursive_increment(mutex, mvalue, mtype);

//情况3:

 /* Same as pthread_mutex_lock, except that we don't want to wait, and * the only operation that can succeed is a single cmpxchg to acquire the * lock if it is released / not owned by anyone. No need for a complex loop. */
     mtype |= shared | MUTEX_STATE_BITS_UNLOCKED;
    //假设其处于unlocked状态,那么设置为tid|mtype|UNcontended。 
    mvalue = MUTEX_OWNER_TO_BITS(tid) | mtype | MUTEX_STATE_BITS_LOCKED_UNCONTENDED; 
    if (__likely(__bionic_cmpxchg(mtype, mvalue, &mutex->value) == 0)) { 
        ANDROID_MEMBAR_FULL(); 
        return 0; 
    } 
    return EBUSY;
}
trylock其实就是说,我尝试去lock,如果没成功,我不会去持续等待。

情况1:NormalType 要么获取到了,要么返回ebusy

情况2:表示本线程已经持有,那么+1就ok

情况3:如果处于unlocked状态,那么果断的将其设置为uncontende,如果失败返回ebusy.

5. destory
int pthread_mutex_destroy(pthread_mutex_t *mutex)
{
    int ret;

    /* use trylock to ensure that the mutex value is
     * valid and is not already locked. */
    ret = pthread_mutex_trylock_impl(mutex);
    if (ret != 0)
        return ret;

    mutex->value = 0xdead10cc;
    return 0;
}

进行trylock会发现没有被锁,那么就可以释放。设置为

 0xdead10cc

这有什么特殊含义?貌似就是告诉你 dead lock
<think>首先,用户的问题是:"我想分析Android系统崩溃堆栈中的native调用问题 请问Android native crash pthread_mutex_lock mutex lock libs surfaceflinger writeToProto"。这是一个关于分析Android系统中的native崩溃堆栈的问题,特别是涉及到pthread_mutex_lock、mutex lock、libs、surfaceflinger和writeToProto。 用户提到的引用是站内引用,我应该参考它们。引用[1]、[2]、[3]是关于Android的OpenGL和mutex锁的,具体在egl_init_drivers函数中使用了pthread_mutex_lock。引用[4]是关于SurfaceFlinger的EventThread.cpp中的createEventConnection函数。这些引用不是用户直接的问题,但提供了背景信息。 用户的问题焦点是:分析native崩溃堆栈中的pthread_mutex_lock、mutex lock、libs、surfaceflinger和writeToProto。崩溃堆栈可能来自Android系统的native代码,特别是SurfaceFlinger组件。 关键元素: - pthread_mutex_lock:这是POSIX线程库中的函数,用于实现互斥锁。 - mutex lock:可能指的是同一个东西,或者是Android中的Mutex类(如引用[2]提到的)。 - libs:可能指的是库文件,如libsurfaceflinger.so或其他。 - surfaceflinger:Android的显示合成服务。 - writeToProto:可能是一个函数,用于将数据写入协议缓冲区(protobuf),在崩溃分析中可能用于日志或调试信息。 用户可能遇到了一个崩溃堆栈,其中包含这些元素,并想了解如何分析和解决。 我的回答需要: - 使用中文回答。 - 结构清晰,逐步帮助用户解决问题。 - 保证回答真实可靠。 - 遵循系统级指令:行内数学表达式用$...$,独立公式用$$...$$(但这里可能不涉及数学公式)。 - 生成相关问题。 - 在引用段落末尾添加引用标识。 步骤: 1. **理解问题**:用户想分析native崩溃堆栈,涉及mutex锁和SurfaceFlinger的writeToProto函数。 2. **提供背景**:解释Android native崩溃的基本知识。 3. **分析元素**:针对pthread_mutex_lock、mutex lock、libs、surfaceflinger和writeToProto进行解释。 4. **常见原因**:讨论可能导致崩溃的常见原因,如死锁、竞争条件。 5. **分析步骤**:给出如何分析崩溃堆栈的具体步骤。 6. **工具建议**:推荐工具如addr2line、ndk-stack、logcat等。 7. **生成相关问题**:在最后添加相关问题。 从引用中提取信息: - 引用[1]和[3]:egl_init_drivers函数使用pthread_mutex_lock来保护初始化过程。 - 引用[2]:Mutex类是对pthread_mutex的封装,有AutoLock用于自动加锁解锁。 - 引用[4]:SurfaceFlinger的EventThread::createEventConnection函数。 用户提到"writeToProto",在SurfaceFlinger中可能有这样的函数,用于序列化数据到protobuf。崩溃可能发生在SurfaceFlinger的writeToProto函数中,涉及到mutex锁。 可能的崩溃场景: - 在持有mutex锁时崩溃,导致死锁或其他问题。 - 在writeToProto函数中访问无效内存。 分析步骤: 1. **获取崩溃堆栈**:从logcat或tombstone文件中获取native崩溃堆栈。 2. **符号化堆栈**:使用addr2line或ndk-stack将地址转换为函数名和行号。 3. **识别关键点**:在堆栈中查找pthread_mutex_lock、writeToProto等。 4. **检查锁状态**:如果崩溃在pthread_mutex_lock中,可能表示锁已被其他线程持有(死锁)。 5. **查看日志**:分析logcat日志,查找相关错误消息。 6. **代码审查**:参考Android源码,如SurfaceFlinger的代码。 工具: - addr2line:将地址映射到源码位置。 - ndk-stack:Android NDK工具,用于符号化堆栈。 - logcat:查看系统日志。 - gdb/lldb:调试器用于深入分析。 回答结构: 1. **引言**:确认问题,并概述回答。 2. **背景解释**:简要解释Android native崩溃和mutex锁。 3. **关键元素分析**:针对pthread_mutex_lock、mutex lock、libs、surfaceflinger、writeToProto。 4. **分析步骤**:一步步指导如何分析崩溃堆栈。 5. **常见原因和解决方案**:讨论潜在问题。 6. **工具使用**:推荐工具和命令。 7. **总结**。 8. **相关问题**:生成相关问题。 生成相关问题: - 如何防止Android中的死锁? - SurfaceFlinger的工作原理是什么? - 如何调试Android native代码? - 什么是协议缓冲区在Android中的应用? - 如何优化mutex使用以避免崩溃? 确保语言自然,使用中文。</think>### Android Native Crash 分析:pthread_mutex_lock、mutex lock 与 SurfaceFlinger 的 writeToProto 您好!您的问题是关于分析 Android 系统崩溃堆栈中的 native 调用问题,特别是涉及 `pthread_mutex_lock`、`mutex lock`、`libs`、`surfaceflinger` 和 `writeToProto`。我将从背景知识入手,逐步指导您如何分析这类崩溃。Android native 崩溃通常由内存访问错误、死锁或资源竞争引起,而您提到的元素常见于 SurfaceFlinger(显示合成服务)的代码中。我会结合真实可靠的信息,基于 Android 源码和常见调试实践,帮助您定位问题。 #### 1. **背景知识:关键元素解析** - **`pthread_mutex_lock`**:这是 POSIX 线程库中的函数,用于实现互斥锁(mutex),确保多线程环境下资源的独占访问。在 Android 中,它常用于保护共享资源。如果崩溃发生在 `pthread_mutex_lock` 调用点,通常表示死锁或锁状态异常(例如,锁已被其他线程持有而未释放)[^2]。 - 示例代码(类似引用[1][^1]和[3][^3]): ```c pthread_mutex_lock(&mutex); // 加锁 // 临界区代码 pthread_mutex_unlock(&mutex); // 解锁 ``` - **`mutex lock`**:在 Android 源码中,常指 `Mutex` 类(定义于 `system/core/libutils/include/utils/Mutex.h`),它封装了 `pthread_mutex_t`,并提供自动加解锁的 `AutoLock` 辅助类。使用不当(如忘记解锁)会导致死锁[^2]。 - 示例(基于引用[2][^2]): ```cpp Mutex mLock; { AutoLock lock(mLock); // 构造时加锁,析构时自动解锁 // 安全访问共享资源 } ``` - **`libs`**:指共享库文件(如 `libsurfaceflinger.so`),在崩溃堆栈中,库名(如 `surfaceflinger`)能帮助定位问题模块。SurfaceFlinger 是 Android 显示系统的核心服务,负责合成和渲染 UI。 - **`surfaceflinger`**:Android 的显示合成服务,源码位于 `frameworks/native/services/surfaceflinger/`。崩溃常发生在多线程操作中,如事件处理或状态序列化。 - **`writeToProto`**:这是一个常见函数名,用于将数据序列化为协议缓冲区(protobuf)格式,便于日志或调试。在 SurfaceFlinger 中,它可能用于输出状态信息(例如,在 `frameworks/native/services/surfaceflinger/Scheduler/EventThread.cpp` 中)。如果崩溃涉及此函数,可能因无效内存访问(如空指针)或锁竞争引起[^4]。 #### 2. **崩溃分析的常见原因** - **死锁(Deadlock)**:线程 A 持有锁 L1 并等待 L2,线程 B 持有 L2 并等待 L1,导致双方永久阻塞。常见于 `pthread_mutex_lock` 或 `Mutex` 使用不当。 - **竞争条件(Race Condition)**:多个线程同时访问共享资源(如 SurfaceFlinger 的状态变量),未正确加锁,导致数据损坏。 - **无效内存访问**:在 `writeToProto` 中,可能因空指针(如未初始化的对象)或越界访问触发崩溃。 - **锁初始化失败**:如果 mutex 未正确初始化(如未调用 `pthread_mutex_init`),加锁操作会崩溃。 根据您的描述,崩溃堆栈涉及 `pthread_mutex_lock` 和 `writeToProto`,可能场景是:SurfaceFlinger 线程在 `writeToProto` 函数中尝试加锁时卡死(死锁),或在序列化过程中访问无效内存。 #### 3. **逐步分析崩溃堆栈的步骤** 以下是系统化分析流程,使用标准 Android 工具。假设您已有崩溃堆栈(从 logcat 或 tombstone 文件获取)。 **步骤 1: 获取并符号化崩溃堆栈** - 从 logcat 提取崩溃日志: ```bash adb logcat -b crash > crash.log ``` - 查找关键行,如: ``` backtrace: #00 pc 0000000000012345 /system/lib64/libsurfaceflinger.so (writeToProto+0x100) #01 pc 0000000000056789 /system/lib64/libsurfaceflinger.so (pthread_mutex_lock+0x50) ``` - 符号化堆栈:使用 `addr2line` 或 `ndk-stack` 将地址映射到源码行号。 - 示例(需 Android NDK): ```bash ndk-stack -sym /path/to/symbols/libsurfaceflinger.so -dump crash.log ``` 输出类似: ``` #00 0x12345 in writeToProto frameworks/native/services/surfaceflinger/State.cpp:100 #01 0x56789 in pthread_mutex_lock frameworks/native/opengl/libs/EGL/egl.cpp:50 ``` **步骤 2: 定位关键函数和锁状态** - **分析 `pthread_mutex_lock` 调用**: - 检查堆栈中锁的地址(如 `&sInitDriverMutex`),确认是否被多个线程争用。 - 使用 `pstack` 或 `gdb` 附加到进程,查看所有线程的堆栈,识别谁持有该锁。 ```bash adb shell gdbserver :5039 --attach $(pidof surfaceflinger) adb forward tcp:5039 tcp:5039 gdb -ex "thread apply all bt" -ex "quit" ``` - 如果堆栈显示线程阻塞在 `pthread_mutex_lock`,死锁可能性高。 - **分析 `writeToProto` 调用**: - 查看序列化的对象是否有效(如 `this` 指针为 null)。 - 在源码中检查 `writeToProto` 是否涉及共享资源加锁。例如,在 SurfaceFlinger 中,状态序列化常需要锁保护: ```cpp // 伪代码示例(基于引用[4][^4]) void SurfaceFlinger::writeToProto(ProtoOutputStream& proto) { AutoLock lock(mStateMutex); // 依赖 Mutex 加锁 proto.write(mCurrentState); // 如果 mCurrentState 无效,则崩溃 } ``` **步骤 3: 检查源码和日志** - 查看相关源码: - `pthread_mutex_lock` 用法:参考 `egl_init_drivers`(如引用[1][^1]),它使用锁保护初始化过程。 - `writeToProto` 实现:在 SurfaceFlinger 中搜索(如 `grep -r "writeToProto" frameworks/native/services/surfaceflinger/`)。 - 分析 logcat 日志: - 查找 `DEBUG` 或 `ERROR` 标签,如 `SurfaceFlinger`,确认崩溃前是否有警告(如 `"mutex not initialized"`)。 - 死锁时,日志可能出现 `"thread blocked indefinitely"`。 **步骤 4: 复现和调试** - **复现崩溃**:使用高负载测试(如连续旋转屏幕),触发竞争条件。 - **工具辅助**: - **Lock Tracing**:启用 `DEBUG_LOCKS` 标志,记录锁操作: ```bash adb shell setprop debug.sf.lock_trace 1 ``` - **Address Sanitizer (ASan)**:编译 SurfaceFlinger 时启用 ASan,检测内存错误: ```bash mmm frameworks/native/services/surfaceflinger SANITIZE_TARGET=address ``` - **GDB/LLDB**:实时调试,检查变量值。 #### 4. **常见解决方案** - **死锁问题**: - 确保加解锁成对出现,使用 `AutoLock` 自动管理。 - 避免嵌套锁;或使用 `pthread_mutex_trylock` 设置超时。 - **内存错误**: - 在 `writeToProto` 中添加空指针检查: ```cpp if (mCurrentState == nullptr) return; // 防御性编程 ``` - 使用 ASan 或 Valgrind 检测内存泄漏。 - **锁初始化**:确认 mutex 在全局或静态变量中正确初始化(如 `pthread_mutex_init`)。 - **更新和补丁**:检查 Android 版本;某些崩溃可能由系统 bug 引起,需更新或应用补丁。 #### 5. **总结** 分析 Android native 崩溃的关键是结合堆栈符号化、源码审查和日志检查。针对您的问题: - 如果崩溃在 `pthread_mutex_lock`,优先排查死锁(检查所有线程堆栈)。 - 如果涉及 `writeToProto`,检查内存访问和锁使用(如 `State.cpp` 中的序列化逻辑)。 - 使用工具如 `ndk-stack` 和 ASan 简化调试。 通过以上步骤,您应能定位到根本原因。如果提供具体崩溃堆栈,我可以给出更针对性的建议。 ### 相关问题 1. 如何预防 Android 中的死锁问题? 2. SurfaceFlinger 的工作原理及其常见崩溃场景有哪些? 3. 如何使用 Android NDK 工具调试 native 代码崩溃? 4. 协议缓冲区(protobuf)在 Android 系统中有哪些应用? 5. 如何优化 mutex 的使用以提高 SurfaceFlinger 的性能?
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值