您现在的位置是：首页 > Linux OSLinux OS

深入理解Linux中断机制(下)---程磊篇

转载2022-08-07【Linux OS】人已围观

简介作者简介：程磊，一线码农，在某手机公司担任系统开发工程师，日常喜欢研究内核基本原理。

5.3 软中断(softirq)

软中断是把中断处理程序分成了两段：前一段叫做硬中断，执行驱动的ISR，处理与硬件密切相关的事，在此期间是禁止中断的；后一段叫做软中断，软中断中处理和硬件不太密切的事物，在此期间是开中断的，可以继续接受硬件中断。软中断的设计提高了系统对中断的响应性。下面我们先说软中断的执行时机，然后再说软中断的使用接口。

软中断也是中断处理程序的一部分，是在ISR执行完成之后运行的，在ISR中可以向软中断中添加任务，然后软中断有事要做就会运行了。有些时候当软中断过多，处理不过来的时候，也会唤醒ksoftirqd/x线程来执行软中断。

linux-src/kernel/irq/irqdesc.c

int handle_domain_irq(struct irq_domain *domain,
          unsigned int hwirq, struct pt_regs *regs)
{
  struct pt_regs *old_regs = set_irq_regs(regs);
  struct irq_desc *desc;
  int ret = 0;

  irq_enter();

  /* The irqdomain code provides boundary checks */
  desc = irq_resolve_mapping(domain, hwirq);
  if (likely(desc))
    handle_irq_desc(desc);
  else
    ret = -EINVAL;

  irq_exit();
  set_irq_regs(old_regs);
  return ret;
}

linux-src/kernel/softirq.c

void irq_exit(void)
{
  __irq_exit_rcu();
  rcu_irq_exit();
   /* must be last! */
  lockdep_hardirq_exit();
}

static inline void __irq_exit_rcu(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
  local_irq_disable();
#else
  lockdep_assert_irqs_disabled();
#endif
  account_hardirq_exit(current);
  preempt_count_sub(HARDIRQ_OFFSET);
  if (!in_interrupt() && local_softirq_pending())
    invoke_softirq();

  tick_irq_exit();
}

static inline void invoke_softirq(void)
{
  if (ksoftirqd_running(local_softirq_pending()))
    return;

  if (!force_irqthreads() || !__this_cpu_read(ksoftirqd)) {
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
    /*
     * We can safely execute softirq on the current stack if
     * it is the irq stack, because it should be near empty
     * at this stage.
     */
    __do_softirq();
#else
    /*
     * Otherwise, irq_exit() is called on the task stack that can
     * be potentially deep already. So call softirq in its own stack
     * to prevent from any overrun.
     */
    do_softirq_own_stack();
#endif
  } else {
    wakeup_softirqd();
  }
}

asmlinkage __visible void __softirq_entry __do_softirq(void)
{
  unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
  unsigned long old_flags = current->flags;
  int max_restart = MAX_SOFTIRQ_RESTART;
  struct softirq_action *h;
  bool in_hardirq;
  __u32 pending;
  int softirq_bit;

  static int i = 0;
  if(++i == 50)
    dump_stack();

  /*
   * Mask out PF_MEMALLOC as the current task context is borrowed for the
   * softirq. A softirq handled, such as network RX, might set PF_MEMALLOC
   * again if the socket is related to swapping.
   */
  current->flags &= ~PF_MEMALLOC;

  pending = local_softirq_pending();

  softirq_handle_begin();
  in_hardirq = lockdep_softirq_start();
  account_softirq_enter(current);

restart:
  /* Reset the pending bitmask before enabling irqs */
  set_softirq_pending(0);

  local_irq_enable();

  h = softirq_vec;

  while ((softirq_bit = ffs(pending))) {
    unsigned int vec_nr;
    int prev_count;

    h += softirq_bit - 1;

    vec_nr = h - softirq_vec;
    prev_count = preempt_count();

    kstat_incr_softirqs_this_cpu(vec_nr);

    trace_softirq_entry(vec_nr);
    h->action(h);
    trace_softirq_exit(vec_nr);
    if (unlikely(prev_count != preempt_count())) {
      pr_err("huh, entered softirq %u %s %p with preempt_count %08x, exited with %08x?\n",
             vec_nr, softirq_to_name[vec_nr], h->action,
             prev_count, preempt_count());
      preempt_count_set(prev_count);
    }
    h++;
    pending >>= softirq_bit;
  }

  if (!IS_ENABLED(CONFIG_PREEMPT_RT) &&
      __this_cpu_read(ksoftirqd) == current)
    rcu_softirq_qs();

  local_irq_disable();

  pending = local_softirq_pending();
  if (pending) {
    if (time_before(jiffies, end) && !need_resched() &&
        --max_restart)
      goto restart;

    wakeup_softirqd();
  }

  account_softirq_exit(current);
  lockdep_softirq_end(in_hardirq);
  softirq_handle_end();
  current_restore_flags(old_flags, PF_MEMALLOC);
}

可以看到__do_softirq在执行软中断前会打开中断local_irq_enable()，在执行完软中断之后又会关闭中断local_irq_disable()。所以软中断执行期间CPU是可以接收硬件中断的。

下面我们再来看一下软中断的使用接口。软中断定义了一个softirq_action类型的数组，数组大小是NR_SOFTIRQS，代表软中断的类型，目前只有10种软中断类型。softirq_action结构体里面仅仅只有一个函数指针。当我们要设置某一类软中断的处理函数时使用接口open_softirq。当我们想要触发某一类软中断的执行时使用接口raise_softirq。

下面我们来看一下代码：

linux-src/include/linux/interrupt.h

enum
{
  HI_SOFTIRQ=0,
  TIMER_SOFTIRQ,
  NET_TX_SOFTIRQ,
  NET_RX_SOFTIRQ,
  BLOCK_SOFTIRQ,
  IRQ_POLL_SOFTIRQ,
  TASKLET_SOFTIRQ,
  SCHED_SOFTIRQ,
  HRTIMER_SOFTIRQ,
  RCU_SOFTIRQ,    /* Preferable RCU should always be the last softirq */

  NR_SOFTIRQS
};

struct softirq_action
{
  void  (*action)(struct softirq_action *);
};

linux-src/kernel/softirq.c

static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;

void open_softirq(int nr, void (*action)(struct softirq_action *))
{
  softirq_vec[nr].action = action;
}

void raise_softirq(unsigned int nr)
{
  unsigned long flags;

  local_irq_save(flags);
  raise_softirq_irqoff(nr);
  local_irq_restore(flags);
}

inline void raise_softirq_irqoff(unsigned int nr)
{
  __raise_softirq_irqoff(nr);

  /*
   * If we're in an interrupt or softirq, we're done
   * (this also catches softirq-disabled code). We will
   * actually run the softirq once we return from
   * the irq or softirq.
   *
   * Otherwise we wake up ksoftirqd to make sure we
   * schedule the softirq soon.
   */
  if (!in_interrupt() && should_wake_ksoftirqd())
    wakeup_softirqd();
}

void __raise_softirq_irqoff(unsigned int nr)
{
  lockdep_assert_irqs_disabled();
  trace_softirq_raise(nr);
  or_softirq_pending(1UL << nr);
}

所有软中断的处理函数都是在系统启动的初始化函数里面用open_softirq接口设置的。raise_softirq一般是在硬中断或者软中断中用来往软中断上push work使得软中断可以被触发执行或者继续执行。

5.4 微任务(tasklet)

新代码要想使用softirq就必须修改内核的核心代码，添加新的softirq类型，这对于很多驱动程序来说是做不到的，于是内核在softirq的基础上开发了tasklet。使用tasklet不需要修改内核的核心代码，驱动程序直接使用tasklet的接口就可以了。

Tasklet其实是一种特殊的softirq，它是在softirq的基础上进行了扩展。它利用的就是softirq中的HI_SOFTIRQ和TASKLET_SOFTIRQ。softirq在初始化的时候会设置这两个softirq类型。然后其处理函数会去处理tasklet的链表。我们在使用tasklet的时候只需要定义一个tasklet_struct，并用我们想要执行的函数初始化它，然后再用tasklet_schedule把它放入到队列中，它就会被执行了。下面我们来看一下代码：

linux-src/kernel/softirq.c

void __init softirq_init(void)
{
  int cpu;

  for_each_possible_cpu(cpu) {
    per_cpu(tasklet_vec, cpu).tail =
      &per_cpu(tasklet_vec, cpu).head;
    per_cpu(tasklet_hi_vec, cpu).tail =
      &per_cpu(tasklet_hi_vec, cpu).head;
  }

  open_softirq(TASKLET_SOFTIRQ, tasklet_action);
  open_softirq(HI_SOFTIRQ, tasklet_hi_action);
}

static __latent_entropy void tasklet_action(struct softirq_action *a)
{
  tasklet_action_common(a, this_cpu_ptr(&tasklet_vec), TASKLET_SOFTIRQ);
}

static __latent_entropy void tasklet_hi_action(struct softirq_action *a)
{
  tasklet_action_common(a, this_cpu_ptr(&tasklet_hi_vec), HI_SOFTIRQ);
}

static void tasklet_action_common(struct softirq_action *a,
          struct tasklet_head *tl_head,
          unsigned int softirq_nr)
{
  struct tasklet_struct *list;

  local_irq_disable();
  list = tl_head->head;
  tl_head->head = NULL;
  tl_head->tail = &tl_head->head;
  local_irq_enable();

  while (list) {
    struct tasklet_struct *t = list;

    list = list->next;

    if (tasklet_trylock(t)) {
      if (!atomic_read(&t->count)) {
        if (tasklet_clear_sched(t)) {
          if (t->use_callback)
            t->callback(t);
          else
            t->func(t->data);
        }
        tasklet_unlock(t);
        continue;
      }
      tasklet_unlock(t);
    }

    local_irq_disable();
    t->next = NULL;
    *tl_head->tail = t;
    tl_head->tail = &t->next;
    __raise_softirq_irqoff(softirq_nr);
    local_irq_enable();
  }
}

static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);

static void __tasklet_schedule_common(struct tasklet_struct *t,
              struct tasklet_head __percpu *headp,
              unsigned int softirq_nr)
{
  struct tasklet_head *head;
  unsigned long flags;

  local_irq_save(flags);
  head = this_cpu_ptr(headp);
  t->next = NULL;
  *head->tail = t;
  head->tail = &(t->next);
  raise_softirq_irqoff(softirq_nr);
  local_irq_restore(flags);
}

void __tasklet_schedule(struct tasklet_struct *t)
{
  __tasklet_schedule_common(t, &tasklet_vec,
          TASKLET_SOFTIRQ);
}
EXPORT_SYMBOL(__tasklet_schedule);

void __tasklet_hi_schedule(struct tasklet_struct *t)
{
  __tasklet_schedule_common(t, &tasklet_hi_vec,
          HI_SOFTIRQ);
}
EXPORT_SYMBOL(__tasklet_hi_schedule);

linux-src/include/linux/interrupt.h

static inline void tasklet_schedule(struct tasklet_struct *t)
{
  if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
    __tasklet_schedule(t);
}

static inline void tasklet_hi_schedule(struct tasklet_struct *t)
{
  if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
    __tasklet_hi_schedule(t);
}

Tasklet和softirq有一个很大的区别就是，同一个softirq可以在不同的CPU上并发执行，而同一个tasklet不会在多个CPU上并发执行。所以我们在编程的时候，如果使用的是tasklet就不用考虑多CPU之间的同步问题。

还有很重要的一点，tasklet不是独立的，它是softirq的一部分，禁用软中断的同时也禁用了tasklet。

5.5 中断线程(threaded_irq)

前面讲的硬中断，它是外设中断处理中必不可少的一部分。Softirq和tasklet虽然不会禁用中断，提高了系统对中断的响应性，但是softirq的执行优先级还是比进程的优先级高，有些确实不那么重要的任务其实可以放到进程里执行，和普通进程共同竞争CPU。而且软中断里不能调用会阻塞、休眠的函数，这对软中断函数的编程是很不利的，所以综合各种因素，我们需要把中断处理任务中的与硬件无关有不太紧急的部分放到进程里面来做。为此内核开发了两种方法，中断线程和工作队列。

我们这节先讲中断线程，其接口如下：

linux-src/include/linux/interrupt.h

extern int __must_check
request_threaded_irq(unsigned int irq, irq_handler_t handler,
         irq_handler_t thread_fn,
         unsigned long flags, const char *name, void *dev);

如果我们要为某个外设注册中断处理程序，可以使用这个接口。其中handler是硬中断，是处理与硬件密切相关的事物。其处理完成后，可以把接收到的数据、要继续处理的事情放到某个位置，然后返回是否需要唤醒对应的中断线程。如果需要的话，系统会唤醒其对应的中断线程来继续处理任务，这个线程的主函数就是第三个参数thread_fn。下面我们来看一下这个接口的实现。

linux-src/kernel/irq/manage.c

int request_threaded_irq(unsigned int irq, irq_handler_t handler,
       irq_handler_t thread_fn, unsigned long irqflags,
       const char *devname, void *dev_id)
{
  struct irqaction *action;
  struct irq_desc *desc;
  int retval;

  if (irq == IRQ_NOTCONNECTED)
    return -ENOTCONN;

  /*
   * Sanity-check: shared interrupts must pass in a real dev-ID,
   * otherwise we'll have trouble later trying to figure out
   * which interrupt is which (messes up the interrupt freeing
   * logic etc).
   *
   * Also shared interrupts do not go well with disabling auto enable.
   * The sharing interrupt might request it while it's still disabled
   * and then wait for interrupts forever.
   *
   * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and
   * it cannot be set along with IRQF_NO_SUSPEND.
   */
  if (((irqflags & IRQF_SHARED) && !dev_id) ||
      ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) ||
      (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) ||
      ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND)))
    return -EINVAL;

  desc = irq_to_desc(irq);
  if (!desc)
    return -EINVAL;

  if (!irq_settings_can_request(desc) ||
      WARN_ON(irq_settings_is_per_cpu_devid(desc)))
    return -EINVAL;

  if (!handler) {
    if (!thread_fn)
      return -EINVAL;
    handler = irq_default_primary_handler;
  }

  action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
  if (!action)
    return -ENOMEM;

  action->handler = handler;
  action->thread_fn = thread_fn;
  action->flags = irqflags;
  action->name = devname;
  action->dev_id = dev_id;

  retval = irq_chip_pm_get(&desc->irq_data);
  if (retval < 0) {
    kfree(action);
    return retval;
  }

  retval = __setup_irq(irq, desc, action);

  if (retval) {
    irq_chip_pm_put(&desc->irq_data);
    kfree(action->secondary);
    kfree(action);
  }

#ifdef CONFIG_DEBUG_SHIRQ_FIXME
  if (!retval && (irqflags & IRQF_SHARED)) {
    /*
     * It's a shared IRQ -- the driver ought to be prepared for it
     * to happen immediately, so let's make sure....
     * We disable the irq to make sure that a 'real' IRQ doesn't
     * run in parallel with our fake.
     */
    unsigned long flags;

    disable_irq(irq);
    local_irq_save(flags);

    handler(irq, dev_id);

    local_irq_restore(flags);
    enable_irq(irq);
  }
#endif
  return retval;
}

static int
__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
{
  struct irqaction *old, **old_ptr;
  unsigned long flags, thread_mask = 0;
  int ret, nested, shared = 0;

  if (!desc)
    return -EINVAL;

  if (desc->irq_data.chip == &no_irq_chip)
    return -ENOSYS;
  if (!try_module_get(desc->owner))
    return -ENODEV;

  new->irq = irq;

  /*
   * If the trigger type is not specified by the caller,
   * then use the default for this interrupt.
   */
  if (!(new->flags & IRQF_TRIGGER_MASK))
    new->flags |= irqd_get_trigger_type(&desc->irq_data);

  /*
   * Check whether the interrupt nests into another interrupt
   * thread.
   */
  nested = irq_settings_is_nested_thread(desc);
  if (nested) {
    if (!new->thread_fn) {
      ret = -EINVAL;
      goto out_mput;
    }
    /*
     * Replace the primary handler which was provided from
     * the driver for non nested interrupt handling by the
     * dummy function which warns when called.
     */
    new->handler = irq_nested_primary_handler;
  } else {
    if (irq_settings_can_thread(desc)) {
      ret = irq_setup_forced_threading(new);
      if (ret)
        goto out_mput;
    }
  }

  /*
   * Create a handler thread when a thread function is supplied
   * and the interrupt does not nest into another interrupt
   * thread.
   */
  if (new->thread_fn && !nested) {
    ret = setup_irq_thread(new, irq, false);
    if (ret)
      goto out_mput;
    if (new->secondary) {
      ret = setup_irq_thread(new->secondary, irq, true);
      if (ret)
        goto out_thread;
    }
  }

  /*
   * Drivers are often written to work w/o knowledge about the
   * underlying irq chip implementation, so a request for a
   * threaded irq without a primary hard irq context handler
   * requires the ONESHOT flag to be set. Some irq chips like
   * MSI based interrupts are per se one shot safe. Check the
   * chip flags, so we can avoid the unmask dance at the end of
   * the threaded handler for those.
   */
  if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
    new->flags &= ~IRQF_ONESHOT;

  /*
   * Protects against a concurrent __free_irq() call which might wait
   * for synchronize_hardirq() to complete without holding the optional
   * chip bus lock and desc->lock. Also protects against handing out
   * a recycled oneshot thread_mask bit while it's still in use by
   * its previous owner.
   */
  mutex_lock(&desc->request_mutex);

  /*
   * Acquire bus lock as the irq_request_resources() callback below
   * might rely on the serialization or the magic power management
   * functions which are abusing the irq_bus_lock() callback,
   */
  chip_bus_lock(desc);

  /* First installed action requests resources. */
  if (!desc->action) {
    ret = irq_request_resources(desc);
    if (ret) {
      pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
             new->name, irq, desc->irq_data.chip->name);
      goto out_bus_unlock;
    }
  }

  /*
   * The following block of code has to be executed atomically
   * protected against a concurrent interrupt and any of the other
   * management calls which are not serialized via
   * desc->request_mutex or the optional bus lock.
   */
  raw_spin_lock_irqsave(&desc->lock, flags);
  old_ptr = &desc->action;
  old = *old_ptr;
  if (old) {
    /*
     * Can't share interrupts unless both agree to and are
     * the same type (level, edge, polarity). So both flag
     * fields must have IRQF_SHARED set and the bits which
     * set the trigger type must match. Also all must
     * agree on ONESHOT.
     * Interrupt lines used for NMIs cannot be shared.
     */
    unsigned int oldtype;

    if (desc->istate & IRQS_NMI) {
      pr_err("Invalid attempt to share NMI for %s (irq %d) on irqchip %s.\n",
        new->name, irq, desc->irq_data.chip->name);
      ret = -EINVAL;
      goto out_unlock;
    }

    /*
     * If nobody did set the configuration before, inherit
     * the one provided by the requester.
     */
    if (irqd_trigger_type_was_set(&desc->irq_data)) {
      oldtype = irqd_get_trigger_type(&desc->irq_data);
    } else {
      oldtype = new->flags & IRQF_TRIGGER_MASK;
      irqd_set_trigger_type(&desc->irq_data, oldtype);
    }

    if (!((old->flags & new->flags) & IRQF_SHARED) ||
        (oldtype != (new->flags & IRQF_TRIGGER_MASK)) ||
        ((old->flags ^ new->flags) & IRQF_ONESHOT))
      goto mismatch;

    /* All handlers must agree on per-cpuness */
    if ((old->flags & IRQF_PERCPU) !=
        (new->flags & IRQF_PERCPU))
      goto mismatch;

    /* add new interrupt at end of irq queue */
    do {
      /*
       * Or all existing action->thread_mask bits,
       * so we can find the next zero bit for this
       * new action.
       */
      thread_mask |= old->thread_mask;
      old_ptr = &old->next;
      old = *old_ptr;
    } while (old);
    shared = 1;
  }

  /*
   * Setup the thread mask for this irqaction for ONESHOT. For
   * !ONESHOT irqs the thread mask is 0 so we can avoid a
   * conditional in irq_wake_thread().
   */
  if (new->flags & IRQF_ONESHOT) {
    /*
     * Unlikely to have 32 resp 64 irqs sharing one line,
     * but who knows.
     */
    if (thread_mask == ~0UL) {
      ret = -EBUSY;
      goto out_unlock;
    }
    /*
     * The thread_mask for the action is or'ed to
     * desc->thread_active to indicate that the
     * IRQF_ONESHOT thread handler has been woken, but not
     * yet finished. The bit is cleared when a thread
     * completes. When all threads of a shared interrupt
     * line have completed desc->threads_active becomes
     * zero and the interrupt line is unmasked. See
     * handle.c:irq_wake_thread() for further information.
     *
     * If no thread is woken by primary (hard irq context)
     * interrupt handlers, then desc->threads_active is
     * also checked for zero to unmask the irq line in the
     * affected hard irq flow handlers
     * (handle_[fasteoi|level]_irq).
     *
     * The new action gets the first zero bit of
     * thread_mask assigned. See the loop above which or's
     * all existing action->thread_mask bits.
     */
    new->thread_mask = 1UL << ffz(thread_mask);

  } else if (new->handler == irq_default_primary_handler &&
       !(desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)) {
    /*
     * The interrupt was requested with handler = NULL, so
     * we use the default primary handler for it. But it
     * does not have the oneshot flag set. In combination
     * with level interrupts this is deadly, because the
     * default primary handler just wakes the thread, then
     * the irq lines is reenabled, but the device still
     * has the level irq asserted. Rinse and repeat....
     *
     * While this works for edge type interrupts, we play
     * it safe and reject unconditionally because we can't
     * say for sure which type this interrupt really
     * has. The type flags are unreliable as the
     * underlying chip implementation can override them.
     */
    pr_err("Threaded irq requested with handler=NULL and !ONESHOT for %s (irq %d)\n",
           new->name, irq);
    ret = -EINVAL;
    goto out_unlock;
  }

  if (!shared) {
    init_waitqueue_head(&desc->wait_for_threads);

    /* Setup the type (level, edge polarity) if configured: */
    if (new->flags & IRQF_TRIGGER_MASK) {
      ret = __irq_set_trigger(desc,
            new->flags & IRQF_TRIGGER_MASK);

      if (ret)
        goto out_unlock;
    }

    /*
     * Activate the interrupt. That activation must happen
     * independently of IRQ_NOAUTOEN. request_irq() can fail
     * and the callers are supposed to handle
     * that. enable_irq() of an interrupt requested with
     * IRQ_NOAUTOEN is not supposed to fail. The activation
     * keeps it in shutdown mode, it merily associates
     * resources if necessary and if that's not possible it
     * fails. Interrupts which are in managed shutdown mode
     * will simply ignore that activation request.
     */
    ret = irq_activate(desc);
    if (ret)
      goto out_unlock;

    desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
          IRQS_ONESHOT | IRQS_WAITING);
    irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);

    if (new->flags & IRQF_PERCPU) {
      irqd_set(&desc->irq_data, IRQD_PER_CPU);
      irq_settings_set_per_cpu(desc);
      if (new->flags & IRQF_NO_DEBUG)
        irq_settings_set_no_debug(desc);
    }

    if (noirqdebug)
      irq_settings_set_no_debug(desc);

    if (new->flags & IRQF_ONESHOT)
      desc->istate |= IRQS_ONESHOT;

    /* Exclude IRQ from balancing if requested */
    if (new->flags & IRQF_NOBALANCING) {
      irq_settings_set_no_balancing(desc);
      irqd_set(&desc->irq_data, IRQD_NO_BALANCING);
    }

    if (!(new->flags & IRQF_NO_AUTOEN) &&
        irq_settings_can_autoenable(desc)) {
      irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
    } else {
      /*
       * Shared interrupts do not go well with disabling
       * auto enable. The sharing interrupt might request
       * it while it's still disabled and then wait for
       * interrupts forever.
       */
      WARN_ON_ONCE(new->flags & IRQF_SHARED);
      /* Undo nested disables: */
      desc->depth = 1;
    }

  } else if (new->flags & IRQF_TRIGGER_MASK) {
    unsigned int nmsk = new->flags & IRQF_TRIGGER_MASK;
    unsigned int omsk = irqd_get_trigger_type(&desc->irq_data);

    if (nmsk != omsk)
      /* hope the handler works with current  trigger mode */
      pr_warn("irq %d uses trigger mode %u; requested %u\n",
        irq, omsk, nmsk);
  }

  *old_ptr = new;

  irq_pm_install_action(desc, new);

  /* Reset broken irq detection when installing new handler */
  desc->irq_count = 0;
  desc->irqs_unhandled = 0;

  /*
   * Check whether we disabled the irq via the spurious handler
   * before. Reenable it and give it another chance.
   */
  if (shared && (desc->istate & IRQS_SPURIOUS_DISABLED)) {
    desc->istate &= ~IRQS_SPURIOUS_DISABLED;
    __enable_irq(desc);
  }

  raw_spin_unlock_irqrestore(&desc->lock, flags);
  chip_bus_sync_unlock(desc);
  mutex_unlock(&desc->request_mutex);

  irq_setup_timings(desc, new);

  /*
   * Strictly no need to wake it up, but hung_task complains
   * when no hard interrupt wakes the thread up.
   */
  if (new->thread)
    wake_up_process(new->thread);
  if (new->secondary)
    wake_up_process(new->secondary->thread);

  register_irq_proc(irq, desc);
  new->dir = NULL;
  register_handler_proc(irq, new);
  return 0;

mismatch:
  if (!(new->flags & IRQF_PROBE_SHARED)) {
    pr_err("Flags mismatch irq %d. %08x (%s) vs. %08x (%s)\n",
           irq, new->flags, new->name, old->flags, old->name);
#ifdef CONFIG_DEBUG_SHIRQ
    dump_stack();
#endif
  }
  ret = -EBUSY;

out_unlock:
  raw_spin_unlock_irqrestore(&desc->lock, flags);

  if (!desc->action)
    irq_release_resources(desc);
out_bus_unlock:
  chip_bus_sync_unlock(desc);
  mutex_unlock(&desc->request_mutex);

out_thread:
  if (new->thread) {
    struct task_struct *t = new->thread;

    new->thread = NULL;
    kthread_stop(t);
    put_task_struct(t);
  }
  if (new->secondary && new->secondary->thread) {
    struct task_struct *t = new->secondary->thread;

    new->secondary->thread = NULL;
    kthread_stop(t);
    put_task_struct(t);
  }
out_mput:
  module_put(desc->owner);
  return ret;
}

static int
setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
{
  struct task_struct *t;

  if (!secondary) {
    t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
           new->name);
  } else {
    t = kthread_create(irq_thread, new, "irq/%d-s-%s", irq,
           new->name);
  }

  if (IS_ERR(t))
    return PTR_ERR(t);

  sched_set_fifo(t);

  /*
   * We keep the reference to the task struct even if
   * the thread dies to avoid that the interrupt code
   * references an already freed task_struct.
   */
  new->thread = get_task_struct(t);
  /*
   * Tell the thread to set its affinity. This is
   * important for shared interrupt handlers as we do
   * not invoke setup_affinity() for the secondary
   * handlers as everything is already set up. Even for
   * interrupts marked with IRQF_NO_BALANCE this is
   * correct as we want the thread to move to the cpu(s)
   * on which the requesting code placed the interrupt.
   */
  set_bit(IRQTF_AFFINITY, &new->thread_flags);
  return 0;
}

中断线程虽然实现很复杂，但是其使用接口还是很简单的。

5.6 工作队列(workqueue)

工作队列是内核中使用最广泛的线程化中断处理机制。系统中有一些默认的工作队列，你也可以创建自己的工作队列，工作队列背后对应的是内核线程。你可以创建一个work，然后push到某个工作队列，然后这个工作队列背后的内核线程就会去执行这些work。下面我们来看一下工作队列的接口。

linux-src/include/linux/workqueue.h

struct work_struct {
  atomic_long_t data;
  struct list_head entry;
  work_func_t func;
#ifdef CONFIG_LOCKDEP
  struct lockdep_map lockdep_map;
#endif
};

#define DECLARE_WORK(n, f)            \
  struct work_struct n = __WORK_INITIALIZER(n, f)

#define __WORK_INITIALIZER(n, f) {          \
  .data = WORK_DATA_STATIC_INIT(),        \
  .entry  = { &(n).entry, &(n).entry },        \
  .func = (f),              \
  __WORK_INIT_LOCKDEP_MAP(#n, &(n))        \
  }

static inline bool schedule_work(struct work_struct *work)
{
  return queue_work(system_wq, work);
}

static inline bool schedule_work_on(int cpu, struct work_struct *work)
{
  return queue_work_on(cpu, system_wq, work);
}

这是创建work，把work push到系统默认的工作队列上的接口，下面我们再来看一下创建自己的工作队列的接口：

linux-src/include/linux/workqueue.h

struct workqueue_struct *
alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);

#define create_workqueue(name)            \
  alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))

工作队列还有很多很丰富的接口，这里就不一一介绍了。

关于工作队列的实现原理，推荐阅读：

http://www.wowotech.net/irq_subsystem/workqueue.html

http://www.wowotech.net/irq_subsystem/cmwq-intro.html

http://www.wowotech.net/irq_subsystem/alloc_workqueue.html

http://www.wowotech.net/irq_subsystem/queue_and_handle_work.html

六、中断与同步

在只有线程的情况下，线程之间的同步逻辑还是很好理解的，但是有了中断之后，硬中断、软中断、线程相互之间的同步就变得复杂起来。下面我们就来看一下它们在运行的时候相互之间的抢占关系。

6.1 CPU运行模型

首先我们来看一下CPU最原始的运行模型，图灵机模型，非常简单，就是一条直线一直运行下去。

在图灵机上加入中断之后，CPU的运行模型也是比较简单的。但是当我们考虑软件中断、硬件中断的区别时，CPU运行模型就开始变得复杂起来了。

不同的中断类型使得中断执行流有了不同的类型，这里一共分为三种类型，系统调用、CPU异常、硬件中断。现在这个还不算复杂，下面我们看一下它们之间的抢占情形。

在系统调用时会发生CPU异常，也可能会发生硬件中断，在CPU异常的时候也可能发生硬件中断。其实这三者也可以嵌套起来，请看下图：

系统调用时发生了CPU异常，CPU异常时发生了硬件中断。下面我们把硬件中断的处理过程分为硬中断和软中断两部分，看看它们之间的关系。

硬件中断的前半部分是硬中断，后半部分是软中断，硬中断中不能再嵌套硬中断了，但是软中断中可以嵌套硬中断。不过嵌套的硬中断在返回时发现正在执行软中断，就不会再重新还行软中断了，而是会回到原来的软中断执行流中。软中断的执行还有一种情况，如下图所示：

这是因为线程在其临界区中禁用了软中断，如果临界区中发生了硬中断还是会执行的，但是硬中断返回时不会去执行软中断，因为软中断被禁用了。当线程的临界区结束是会再打开软中断，此时发现有pending的软中断没有处理，就会去执行软中断。

还有一种比较特殊的情况，就是线程里套软中断，软中断里套硬中断，硬中断里套NMI中断，如下图所示：

首先软中断是不能独立触发的，必须是硬中断触发软中断。在图中，第一个硬中断是执行完成了的，然后在软中断的执行过程中又发生了硬中断，第二个硬中断还没执行完的时候在执行过程中的时候又发生了NMI中断。这样就发生了四个不同等级的执行流一一嵌套的情况，这也是队列自旋锁的锁节点为啥要乘以4的原因。

6.2 中断相关同步方法

软中断可以抢占线程，硬中断可以抢占软中断也可以抢占线程，而返回来则不能抢占，所以如果我们的低等级执行流代码和高等级执行流代码有同步问题的话，就要考虑禁用高等级执行流。下面我们来看一下它们的接口，首先看禁用硬中断：

linux-src/include/linux/irqflags.h

#define local_irq_enable()  do { raw_local_irq_enable(); } while (0)
#define local_irq_disable()  do { raw_local_irq_disable(); } while (0)
#define local_irq_save(flags)  do { raw_local_irq_save(flags); } while (0)
#define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0)

linux-src/include/linux/interrupt.h

extern void disable_irq_nosync(unsigned int irq);
extern bool disable_hardirq(unsigned int irq);
extern void disable_irq(unsigned int irq);
extern void disable_percpu_irq(unsigned int irq);
extern void enable_irq(unsigned int irq);
extern void enable_percpu_irq(unsigned int irq, unsigned int type);

你可以在一个CPU上禁用所有中断，也可以在所有CPU上禁用某个硬件中断，但是你不能在所有CPU上同时禁用所有硬件中断。

再来看一下禁用软中断的接口：

linux-src/include/linux/bottom_half.h

static inline void local_bh_disable(void)
{
  __local_bh_disable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}

static inline void local_bh_enable(void)
{
  __local_bh_enable_ip(_THIS_IP_, SOFTIRQ_DISABLE_OFFSET);
}

我们只能禁用本地CPU的软中断，而且是整体禁用，不能只禁用某一类型的软中断。虽然在Linux中，下半部bh包括所有的下半部，但是此处的bh仅仅指软中断(包括tasklet)，不包括中断线程和工作队列。

七、总结回顾

本文我们从中断的概念开始讲起，一路上分析了中断的作用、中断的产生、中断的处理。其中内容最多的是硬件中断的处理，方法很多很繁杂。从6.1节CPU运行模型中，我们可以看到中断对于推动整个系统运行的重要性。所以说中断机制是计算机系统的神经和脉搏，一点都不为过。想要学会Linux内核，弄明白中断机制是其中必不可少的一环。最后我们再来看一下中断机制的图：