ID:fuchen1994
姓名:江军
作业要求:
-
理解Linux系统中进程调度的时机,可以在内核代码中搜索schedule()函数,看都是哪里调用了schedule(),判断我们课程内容中的总结是否准确;
-
使用gdb跟踪分析一个schedule()函数 ,验证您对Linux系统进程调度与进程切换过程的理解;推荐在环境下完成实验。
-
特别关注并仔细分析switch_to中的汇编代码,理解进程上下文的切换机制,以及与中断上下文切换的关系;
实验过程:
1.
进程调度的时机
-
中断处理过程(包括时钟中断、I/O中断、系统调用和异常)中,直接调用schedule(),或者返回用户态时根据need_resched标记调用schedule();
-
内核线程可以直接调用schedule()进行进程切换,也可以在中断处理过程中进行调度,也就是说内核线程作为一类的特殊的进程可以主动调度,也可以被动调度;
-
用户态进程无法实现主动调度,仅能通过陷入内核态后的某个时机点进行调度,即在中断处理过程中进行调度。
代码分析:
1.这部分是关闭掉内核抢占
#ifdef CONFIG_PREEMPT2906/*2907 * this is the entry point to schedule() from in-kernel preemption2908 * off of preempt_enable. Kernel preemptions off return from interrupt2909 * occur there and call schedule directly.2910 */2911asmlinkage __visible void __sched notrace preempt_schedule(void)2912{2913 /*2914 * If there is a non-zero preempt_count or interrupts are disabled,2915 * we do not want to preempt the current task. Just return..2916 */2917 if (likely(!preemptible()))2918 return;29192920 do {2921 __preempt_count_add(PREEMPT_ACTIVE);2922 __schedule(); //这个函数进入2923 __preempt_count_sub(PREEMPT_ACTIVE);29242925 /*2926 * Check again in case we missed a preemption opportunity2927 * between schedule and now.2928 */2929 barrier();2930 } while (need_resched());2931}
static void __sched __schedule(void) 2771{2772 struct task_struct *prev, *next;2773 unsigned long *switch_count;2774 struct rq *rq;2775 int cpu;27762777need_resched:2778 preempt_disable();2779 cpu = smp_processor_id(); //保存当前CPU的状态在rq中2780 rq = cpu_rq(cpu);2781 rcu_note_context_switch(cpu);2782 prev = rq->curr; //保存当前进程current27832784 schedule_debug(prev); //进入这个判断函数,判断
static inline void schedule_debug(struct task_struct *prev)2676{2677#ifdef CONFIG_SCHED_STACK_END_CHECK2678 BUG_ON(unlikely(task_stack_end_corrupted(prev)));2679#endif2680 /*2681 * Test if we are atomic. Since do_exit() needs to call into2682 * schedule() atomically, we ignore that path. Otherwise whine2683 * if we are scheduling when we should not.2684 */2685 if (unlikely(in_atomic_preempt_off() && prev->state != TASK_DEAD))2686 __schedule_bug(prev);2687 rcu_sleep_check();26882689 profile_hit(SCHED_PROFILING, __builtin_return_address(0));26902691 schedstat_inc(this_rq(), sched_count);2692}
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {检测prev,如果处于不可运行状态并且在内核态没有被抢占,就从运行队列上删除2799 if (unlikely(signal_pending_state(prev->state, prev))) { 检测prev,如果有非阻塞等待信号,就并且它的状态是TASK_INTERRUPTBLE,设置其状态为TASK_RUNNING,并且把它留在runqueue中2800 prev->state = TASK_RUNNING;2801 } else {2802 deactivate_task(rq, prev, DEQUEUE_SLEEP); 否则就调用deactiveate_task()函数,进行移除2803 prev->on_rq = 0;28042805 /*2806 * If a worker went to sleep, notify and ask workqueue2807 * whether it wants to wake up a task to maintain2808 * concurrency.2809 */2810 if (prev->flags & PF_WQ_WORKER) {2811 struct task_struct *to_wakeup;28122813 to_wakeup = wq_worker_sleeping(prev, cpu);2814 if (to_wakeup)2815 try_to_wake_up_local(to_wakeup);2816 }2817 }2818 switch_count = &prev->nvcsw;2819 }
860void deactivate_task(struct rq *rq, struct task_struct *p, int flags)861{862 if (task_contributes_to_load(p))863 rq->nr_uninterruptible++; 调用后将会发生上下文交换次数的增加864865 dequeue_task(rq, p, flags); 并且进入dequeue_task()函数866}
845static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)846{847 update_rq_clock(rq);848 sched_info_dequeued(rq, p);849 p->sched_class->dequeue_task(rq, p, flags); 调用自身,将p进程从当前运行队列上移除850}
2.执行下一个进程
2698pick_next_task(struct rq *rq, struct task_struct *prev)2699{2700 const struct sched_class *class = &fair_sched_class;2701 struct task_struct *p;27022703 /*2704 * Optimization: we know that if all tasks are in2705 * the fair class we can call that function directly:2706 */2707 if (likely(prev->sched_class == class &&2708 rq->nr_running == rq->cfs.h_nr_running)) {2709 p = fair_sched_class.pick_next_task(rq, prev);2710 if (unlikely(p == RETRY_TASK))2711 goto again;27122713 /* assumes fair_sched_class->next == idle_sched_class */2714 if (unlikely(!p))2715 p = idle_sched_class.pick_next_task(rq, prev);27162717 return p;2718 }
5const struct sched_class idle_sched_class = {86 /* .next is NULL */87 /* no enqueue/yield_task for idle tasks */8889 /* dequeue is not valid, we print a debug message there: */90 .dequeue_task = dequeue_task_idle,9192 .check_preempt_curr = check_preempt_curr_idle,9394 .pick_next_task = pick_next_task_idle,95 .put_prev_task = put_prev_task_idle,96
2.GDB跟踪分析
这个有点坑了,git不下来文件,就没法进行试验,等我后期在本机上搭建补上。其实跟我前面的代码分析差不多,区别不大
3.switch_to中的汇编代码分析,关注进程上下文切换机制,以及中断上下文切换的关系
schedule()函数选择一个新的进程来运行,并调用context_switch进行上下文的切换,这个宏调用switch_to来进行关键上下文切换next = pick_next_task(rq, prev);//进程调度算法都封装这个函数内部context_switch(rq, prev, next);//进程上下文切换switch_to利用了prev和next两个参数:prev指向当前进程,next指向被调度的进程31#define switch_to(prev, next, last) \32do { \33 /* \34 * Context-switching clobbers all registers, so we clobber \35 * them explicitly, via unused output variables. \36 * (EAX and EBP is not listed because EBP is saved/restored \37 * explicitly for wchan access and EAX is the return value of \38 * __switch_to()) \39 */ \40 unsigned long ebx, ecx, edx, esi, edi; \41 \42 asm volatile("pushfl\n\t" /* save flags */ \ 43 "pushl %%ebp\n\t" /* save EBP */ \ 当前进程堆栈基址压栈44 "movl %%esp,%[prev_sp]\n\t" /* save ESP */ \ 将当前进程栈顶保存prev->thread.sp45 "movl %[next_sp],%%esp\n\t" /* restore ESP */ \ 讲下一个进程栈顶保存到esp中46 "movl $1f,%[prev_ip]\n\t" /* save EIP */ \ 保存当前进程的eip47 "pushl %[next_ip]\n\t" /* restore EIP */ \ 将下一个进程的eip压栈,next进程的栈顶就是他的的起点48 __switch_canary \49 "jmp __switch_to\n" /* regparm call */ \ 50 "1:\t" \51 "popl %%ebp\n\t" /* restore EBP */ \ 52 "popfl\n" /* restore flags */ \ 开始执行下一个进程的第一条命令53 \54 /* output parameters */ \55 : [prev_sp] "=m" (prev->thread.sp), \56 [prev_ip] "=m" (prev->thread.ip), \57 "=a" (last), \58 \59 /* clobbered output registers: */ \60 "=b" (ebx), "=c" (ecx), "=d" (edx), \61 "=S" (esi), "=D" (edi) \62 \63 __switch_canary_oparam \64 \65 /* input parameters: */ \66 : [next_sp] "m" (next->thread.sp), \67 [next_ip] "m" (next->thread.ip), \68 \69 /* regparm parameters for __switch_to(): */ \70 [prev] "a" (prev), \71 [next] "d" (next) \72 \73 __switch_canary_iparam \74 \75 : /* reloaded segment registers */ \76 "memory"); \77} while (0)
通过系统调用,用户空间的应用程序就会进入内核空间,由内核代表该进程运行于内核空间,这就涉及到上下文的切换,用户空间和内核空间具有不同的地址映射,通用或专用的寄存器组,而用户空间的进程要传递很多变量、参数给内核,内核也要保存用户进程的一些寄存器、变量等,以便系统调用结束后回到用户空间继续执行,所谓的进程上下文,就是一个进程在执行的时候,CPU的所有寄存器中的值、进程的状态以及堆栈中的内容,当内核需要切换到另一个进程时,它需要保存当前进程的所有状态,即保存当前进程的进程上下文,以便再次执行该进程时,能够恢复切换时的状态,继续执行。
同理,硬件通过触发信号,导致内核调用中断处理程序,进入内核空间。这个过程中,硬件的一些变量和参数也要传递给内核,内核通过这些参数进行中断处理,中断上下文就可以理解为硬件传递过来的这些参数和内核需要保存的一些环境,主要是被中断的进程的环境。
Linux内核工作在进程上下文或者中断上下文。提供系统调用服务的内核代码代表发起系统调用的应用程序运行在进程上下文;另一方面,中断处理程序,异步运行在中断上下文。中断上下文和特定进程无关。
运行在进程上下文的内核代码是可以被抢占的(Linux2.6支持抢占)。但是一个中断上下文,通常都会始终占有CPU(当然中断可以嵌套,但我们一般不这样做),不可以被打断。正因为如此,运行在中断上下文的代码就要受一些限制