一 应用接口函数
spinlock_t my_lock;
unsigned long irqflags;
spin_lock_init(&my_lock);
spin_lock_irqsave(&my_lock, irqflags);
spin_unlock_irqrestore(&my_lock, irqflags);
二 详细说明
1 数据结构
typedef struct spinlock {
union {
struct raw_spinlock rlock;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# define LOCK_PADSIZE (offsetof(struct raw_spinlock, dep_map))
struct {
u8 __padding[LOCK_PADSIZE];
struct lockdep_map dep_map;
};
#endif
};
} spinlock_t;
typedef struct raw_spinlock {
arch_spinlock_t raw_lock;
#ifdef CONFIG_GENERIC_LOCKBREAK
unsigned int break_lock;
#endif
#ifdef CONFIG_DEBUG_SPINLOCK
unsigned int magic, owner_cpu;
void *owner;
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
} raw_spinlock_t;
typedef struct {
union {
u32 slock;
struct __raw_tickets {
#ifdef __ARMEB__
u16 next;
u16 owner;
#else
u16 owner;
u16 next;
#endif
} tickets;
};
} arch_spinlock_t;
arch_spinlock_t是架构相关的type,其中包括slock和__raw_tickets两个成员。__ARMEB__代表Big-Endian,这样slock被划分成两部分:
u32 slock
-----------------------------------------------------------------------------------
| u16 next | u16 owner |
-----------------------------------------------------------------------------------
bit31 bit16 bit15 bit0
2 spin_lock_init()
#define spin_lock_init(_lock) \
do { \
spinlock_check(_lock); \
raw_spin_lock_init(&(_lock)->rlock); \
} while (0)
static inline raw_spinlock_t *spinlock_check(spinlock_t *lock)
{
return &lock->rlock;
}
spinlock_check通过rlock成员变量,来check参数是否是spinlock_t类型,如果不是编译和执行的时候会有warnning或者error。
# define raw_spin_lock_init(lock) \
do { *(lock) = __RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
#define __RAW_SPIN_LOCK_UNLOCKED(lockname) \
(raw_spinlock_t) __RAW_SPIN_LOCK_INITIALIZER(lockname)
#define __RAW_SPIN_LOCK_INITIALIZER(lockname) \
{ \
.raw_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
SPIN_DEBUG_INIT(lockname) \
SPIN_DEP_MAP_INIT(lockname) }
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
因为raw_lock = 0,所以slock = tickets.next = tickets.owner = 0。
typedef struct {
union {
u32 slock;
struct __raw_tickets {
#ifdef __ARMEB__
u16 next;
u16 owner;
#else
u16 owner;
u16 next;
#endif
} tickets;
};
} arch_spinlock_t;
3 spin_lock_irqsave()
#define spin_lock_irqsave(lock, flags) \
do { \
raw_spin_lock_irqsave(spinlock_check(lock), flags); \
} while (0)
#define raw_spin_lock_irqsave(lock, flags) \
do { \
typecheck(unsigned long, flags); \
flags = _raw_spin_lock_irqsave(lock); \
} while (0)
unsigned long __lockfunc _raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
return __raw_spin_lock_irqsave(lock);
}
static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
{
unsigned long flags;
local_irq_save(flags);//禁止中断
preempt_disable();
/*关闭调度器的抢占,原因是避免在当前进程持有lock的情况下,CPU被中断后,会调到执行另一个进程,这样,当另一进程也试图spin_lock,因为spin_lock本身不会sleep,就会产生死锁现象。所以要暂时关闭调度器抢占性。*/
spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
/*
* On lockdep we dont want the hand-coded irq-enable of
* do_raw_spin_lock_flags() code, because lockdep assumes
* that interrupts are not re-enabled during lock-acquire:
*/
#ifdef CONFIG_LOCKDEP
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
#else
do_raw_spin_lock_flags(lock, &flags);
#endif
return flags;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
# ifdef CONFIG_PROVE_LOCKING
# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
# define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
# else
# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
# define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, NULL, i)
# endif
# define spin_release(l, n, i) lock_release(l, n, i)
#else
# define spin_acquire(l, s, t, i) do { } while (0)
# define spin_release(l, n, i) do { } while (0)
#endif
spin_acquire()是供debug时使用,正常时定义为空。
#ifdef CONFIG_LOCK_STAT
extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
#define LOCK_CONTENDED(_lock, try, lock) \
do { \
if (!try(_lock)) { \
lock_contended(&(_lock)->dep_map, _RET_IP_); \
lock(_lock); \
} \
lock_acquired(&(_lock)->dep_map, _RET_IP_); \
} while (0)
/*先调用do_raw_spin_trylock()尝试获取lock。如果没有拿到lock,才会调用do_raw_spin_lock()自旋lock,这里会sleep。
*/
#else /* CONFIG_LOCK_STAT */
#define lock_contended(lockdep_map, ip) do {} while (0)
#define lock_acquired(lockdep_map, ip) do {} while (0)
#define LOCK_CONTENDED(_lock, try, lock) \
lock(_lock)
#endif /* CONFIG_LOCK_STAT */
3.1 spin_trylock
static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
{
return arch_spin_trylock(&(lock)->raw_lock);
}
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned long tmp;
u32 slock;
__asm__ __volatile__(
" ldrex %0, [%2]\n" //%0表示第0个操作数,以此类推
" subs %1, %0, %0, ror #16\n"
" addeq %0, %0, %3\n"
" strexeq %1, %0, [%2]"
: "=&r" (slock), "=&r" (tmp) //以下是操作数说明,“=”表示输出
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
if (tmp == 0) {
smp_mb();
return 1;
} else {
return 0;
}
}
%0->slock %1->tmp %2->&lock->slock %3->1 << TICKET_SHIFT(1 << 16)
slock = lock->slock;
tmp = slock - cpu_to_le32;(slock)//ror是循环右移,锁可用tmp=0,否则tmp=1
if (tmp == 0) slock = slock + (1 << 16);//相当于slock.tickets.next+1
strex和ldrex是一对指令,只有ARM core版本>=6才有。可以请bus监控从ldrex到strex之间有无其他的CPU或DMA来存取这个位址,若有的话,strex会在第一个寄存器里设定值为1(non-exclusive by this CPU)并且令store动作失败,若没有,strex会在第一个寄存器里设定值为0(exclusive access by this CPU),并且令store动作成功。
if (tmp == 0) lock->slock = slock; tmp = 0; strex保证了原子的操作,如果check到锁可用,并上锁tmp=0;如果上锁失败,tmp = 1。
arch_spin_trylock()函数是一个原子操作,它测试lock是否被上锁,若未上锁,上锁;返回1;否则返回0。
3.2 spin_lock
void do_raw_spin_lock(raw_spinlock_t *lock)
{
debug_spin_lock_before(lock);
if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
__spin_lock_debug(lock);
debug_spin_lock_after(lock);
}
static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
{
__acquire(lock);
arch_spin_lock(&lock->raw_lock);
}
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned long tmp;
u32 newval;
arch_spinlock_t lockval;
__asm__ __volatile__(
"1: ldrex %0, [%3]\n"
" add %1, %0, %4\n"
" strex %2, %1, [%3]\n"
" teq %2, #0\n" //影响cpsr->Z
" bne 1b"
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
: "cc");
while (lockval.tickets.next != lockval.tickets.owner) {
wfe();
lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
}
smp_mb();
}
%0->lockval %1->newval %2->tmp %3->&lock->slock %4->(1 << TICKET_SHIFT)
lockval = &lock->slock;
newval = lockval + (1 << TICKET_SHIFT);//相当于lockval.tickets.next+1
lock->slock = newval;//tmp的值取决于该原则操作是否成功,成功为0
if (tmp != 0) 跳转到第一个tag(标号1)处,goto 1;//store 失败就不停地跳回去
标号f: 表示往前跳,顺序执行的话是没有运行过的程序,front的意思。
标号b: 表示跳转到以前执行过的语句,第一个1标号处,back的意思。
if (lockval.tickets.next != lockval.tickets.owner),代表锁已被锁定,则执行wfe指令。则通过wfe指令进入suspend mode(clock停止),直到该锁被释放时发出的sev指令,CPU才会跳出suspend mode,然后检查,if (lockval.tickets.next != lockval.tickets.owner)就继续等。
4 spin_unlock_irqrestore()
static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
raw_spin_unlock_irqrestore(&lock->rlock, flags);
}
#define raw_spin_unlock_irqrestore(lock, flags) \
do { \
typecheck(unsigned long, flags); \
_raw_spin_unlock_irqrestore(lock, flags); \
} while (0)
void __lockfunc _raw_spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
{
__raw_spin_unlock_irqrestore(lock, flags);
}
static inline void __raw_spin_unlock_irqrestore(raw_spinlock_t *lock,
unsigned long flags)
{
spin_release(&lock->dep_map, 1, _RET_IP_);
do_raw_spin_unlock(lock);
local_irq_restore(flags);//允许中断
preempt_enable();//允许抢占
}
static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
{
arch_spin_unlock(&lock->raw_lock);
__release(lock);
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
lock->tickets.owner++;
dsb_sev();
}
static inline void dsb_sev(void)
{
#if __LINUX_ARM_ARCH__ >= 7
__asm__ __volatile__ (
"dsb\n"
SEV
);
#else
__asm__ __volatile__ (
"mcr p15, 0, %0, c7, c10, 4\n"
SEV
: : "r" (0)
);
#endif
}
可以看出,如果有其他的task再次调用spin_lock,就会因为lock时的lockval.tickets.next++,
从而,lockval.tickets.next != lockval.tickets.owner一直在loop中。直到持锁的task调用spin_unlock,lockval.tickets.owner++后,才会跳出loop。
这个机制是在Linux-2.6.25中加入内核,Linux-3.6中加入ARM架构的,也就是接下来介绍的,大名鼎鼎的 Ticket spinlocks排队自旋锁。
三 Ticket spinlocks
排队自旋锁仍然使用原有的raw_spinlock_t数据结构,但是赋予slock域新的含义。为了保存顺序信息,slock域被分成两部分,分别保存锁持有者和未来锁申请者的排队序号(Ticket Number),如下所示:
u32 slock
-----------------------------------------------------------------------------------
| u16 next | u16 owner |
-----------------------------------------------------------------------------------
bit31 bit16 bit15 bit0
owner和next域均为16位,其中owner域为slock的低 16 位。可见排队自旋锁最多支持 2^16=65536个处理器。只有next域与owner域相等时,才表明锁处于未使用状态(此时也无人申请该锁)。排队自旋锁初始化时slock被置为0,即owner和next置为0。内核执行线程申请自旋锁时,原子地将next域加1,并将原next值作为自己的排队序号。如果该排队序号等于申请时的owner值,说明自旋锁处于未使用状态,则直接获得锁;否则,该线程忙等待检查owner域是否等于自己持有的排队序号,一旦相等,则表明锁轮到自己获取。线程释放锁时,原子地将owner域加1即可,下一个线程将会发现这一变化,从忙等待状态中退出。线程将严格地按照申请顺序依次获取排队自旋锁,从而完全解决了“不公平”问题。