1 diff -Narup linux-2.4.31-orig/arch/ppc/config.in linux-2.4.31/arch/ppc/config.in
2 --- linux-2.4.31-orig/arch/ppc/config.in 2004-08-07 16:26:04.000000000 -0700
3 +++ linux-2.4.31/arch/ppc/config.in 2005-08-24 22:14:47.000000000 -0700
4 @@ -167,6 +167,8 @@ if [ "$CONFIG_SMP" = "y" ]; then
5 int 'Maximum number of CPUs (2-32)' CONFIG_NR_CPUS 32
8 +bool 'kexec system call' CONFIG_KEXEC
10 if [ "$CONFIG_6xx" = "y" -a "$CONFIG_8260" = "n" ];then
11 bool 'AltiVec Support' CONFIG_ALTIVEC
12 bool 'Thermal Management Support' CONFIG_TAU
13 diff -Narup linux-2.4.31-orig/arch/ppc/kernel/Makefile linux-2.4.31/arch/ppc/kernel/Makefile
14 --- linux-2.4.31-orig/arch/ppc/kernel/Makefile 2004-04-14 06:05:27.000000000 -0700
15 +++ linux-2.4.31/arch/ppc/kernel/Makefile 2005-08-24 22:17:32.000000000 -0700
16 @@ -49,6 +49,7 @@ obj-$(CONFIG_PCI) += pci-dma.o
17 obj-$(CONFIG_KGDB) += ppc-stub.o
18 obj-$(CONFIG_PPCBUG_NVRAM) += prep_nvram.o
19 obj-$(CONFIG_SMP) += smp.o
20 +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
21 obj-$(CONFIG_TAU) += temp.o
22 ifeq ($(CONFIG_SERIAL)$(CONFIG_GEN550),yy)
23 obj-$(CONFIG_KGDB) += gen550_kgdb.o gen550_dbg.o
24 diff -Narup linux-2.4.31-orig/arch/ppc/kernel/machine_kexec.c linux-2.4.31/arch/ppc/kernel/machine_kexec.c
25 --- linux-2.4.31-orig/arch/ppc/kernel/machine_kexec.c 1969-12-31 16:00:00.000000000 -0800
26 +++ linux-2.4.31/arch/ppc/kernel/machine_kexec.c 2005-08-24 23:43:17.000000000 -0700
29 + * machine_kexec.c - handle transition of Linux booting another kernel
30 + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
32 + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
34 + * This source code is licensed under the GNU General Public License,
35 + * Version 2. See the file COPYING for more details.
38 +#include <linux/mm.h>
39 +#include <linux/kexec.h>
40 +#include <linux/delay.h>
41 +#include <linux/reboot.h>
42 +#include <asm/pgtable.h>
43 +#include <asm/pgalloc.h>
44 +#include <asm/mmu_context.h>
46 +#include <asm/hw_irq.h>
47 +//#include <asm/cacheflush.h>
48 +#include <asm/machdep.h>
50 +typedef NORET_TYPE void (*relocate_new_kernel_t)(
51 + unsigned long indirection_page, unsigned long reboot_code_buffer,
52 + unsigned long start_address) ATTRIB_NORET;
54 +const extern unsigned char relocate_new_kernel[];
55 +const extern unsigned int relocate_new_kernel_size;
57 +void machine_shutdown(void)
59 + if (ppc_md.machine_shutdown) {
60 + ppc_md.machine_shutdown();
64 +void machine_crash_shutdown(void)
66 + if (ppc_md.machine_crash_shutdown) {
67 + ppc_md.machine_crash_shutdown();
72 + * Do what every setup is needed on image and the
73 + * reboot code buffer to allow us to avoid allocations
76 +int machine_kexec_prepare(struct kimage *image)
78 + if (ppc_md.machine_kexec_prepare) {
79 + return ppc_md.machine_kexec_prepare(image);
82 + * Fail if platform doesn't provide its own machine_kexec_prepare
88 +void machine_kexec_cleanup(struct kimage *image)
90 + if (ppc_md.machine_kexec_cleanup) {
91 + ppc_md.machine_kexec_cleanup(image);
96 + * Do not allocate memory (or fail in any way) in machine_kexec().
97 + * We are past the point of no return, committed to rebooting now.
99 +NORET_TYPE void machine_kexec(struct kimage *image)
101 + if (ppc_md.machine_kexec) {
102 + ppc_md.machine_kexec(image);
105 + * Fall back to normal restart if platform doesn't provide
106 + * its own kexec function, and user insist to kexec...
108 + machine_restart(NULL);
115 + * This is a generic machine_kexec function suitable at least for
116 + * non-OpenFirmware embedded platforms.
117 + * It merely copies the image relocation code to the control page and
119 + * A platform specific function may just call this one.
121 +void machine_kexec_simple(struct kimage *image)
123 + unsigned long page_list;
124 + unsigned long reboot_code_buffer, reboot_code_buffer_phys;
125 + relocate_new_kernel_t rnk;
127 + /* Interrupts aren't acceptable while we reboot */
128 + local_irq_disable();
130 + page_list = image->head;
132 + /* we need both effective and real address here */
133 + reboot_code_buffer =
134 + (unsigned long)page_address(image->control_code_page);
135 + reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
137 + /* copy our kernel relocation code to the control code page */
138 + memcpy((void *)reboot_code_buffer,
139 + relocate_new_kernel, relocate_new_kernel_size);
141 + flush_icache_range(reboot_code_buffer,
142 + reboot_code_buffer + KEXEC_CONTROL_CODE_SIZE);
143 + printk(KERN_INFO "Bye!\n");
146 + rnk = (relocate_new_kernel_t) reboot_code_buffer;
147 + (*rnk)(page_list, reboot_code_buffer_phys, image->start);
149 diff -Narup linux-2.4.31-orig/arch/ppc/kernel/misc.S linux-2.4.31/arch/ppc/kernel/misc.S
150 --- linux-2.4.31-orig/arch/ppc/kernel/misc.S 2004-04-14 06:05:27.000000000 -0700
151 +++ linux-2.4.31/arch/ppc/kernel/misc.S 2005-08-24 22:22:21.000000000 -0700
152 @@ -1310,6 +1310,25 @@ _GLOBAL(sys_call_table)
153 .long sys_ni_syscall /* reserved for sys_clock_getres */
154 .long sys_ni_syscall /* reserved for sys_clock_nanosleep */
155 .long sys_swapcontext
156 + .long sys_ni_syscall /* 250 */
157 + .long sys_ni_syscall
158 + .long sys_ni_syscall
159 + .long sys_ni_syscall
160 + .long sys_ni_syscall
161 + .long sys_ni_syscall /* 255 */
162 + .long sys_ni_syscall
163 + .long sys_ni_syscall
164 + .long sys_ni_syscall
165 + .long sys_ni_syscall
166 + .long sys_ni_syscall /* 260 */
167 + .long sys_ni_syscall
168 + .long sys_ni_syscall
169 + .long sys_ni_syscall
170 + .long sys_ni_syscall
171 + .long sys_ni_syscall /* 265 */
172 + .long sys_ni_syscall
173 + .long sys_ni_syscall
174 + .long sys_kexec_load /* and finally, 268 sys_kexec_load... */
176 .rept NR_syscalls-(.-sys_call_table)/4
178 diff -Narup linux-2.4.31-orig/arch/ppc/kernel/relocate_kernel.S linux-2.4.31/arch/ppc/kernel/relocate_kernel.S
179 --- linux-2.4.31-orig/arch/ppc/kernel/relocate_kernel.S 1969-12-31 16:00:00.000000000 -0800
180 +++ linux-2.4.31/arch/ppc/kernel/relocate_kernel.S 2005-08-24 11:37:15.000000000 -0700
183 + * relocate_kernel.S - put the kernel image in place to boot
184 + * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
186 + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
188 + * This source code is licensed under the GNU General Public License,
189 + * Version 2. See the file COPYING for more details.
192 +#include <asm/ppc_asm.h>
193 +#include <asm/processor.h>
195 +#include <asm/kexec.h>
197 +#define PAGE_SIZE 4096 /* must be same value as in <asm/page.h> */
200 + * Must be relocatable PIC code callable as a C function.
202 + .globl relocate_new_kernel
203 +relocate_new_kernel:
204 + /* r3 = page_list */
205 + /* r4 = reboot_code_buffer */
206 + /* r5 = start_address */
211 + * Set Machine Status Register to a known status,
212 + * switch the MMU off and jump to 1: in a single step.
216 + ori r8, r8, MSR_RI|MSR_ME
217 + mtspr SPRN_SRR1, r8
218 + addi r8, r4, 1f - relocate_new_kernel
219 + mtspr SPRN_SRR0, r8
224 + /* from this point address translation is turned off */
225 + /* and interrupts are disabled */
227 + /* set a new stack at the bottom of our page... */
228 + /* (not really needed now) */
229 + addi r1, r4, KEXEC_CONTROL_CODE_SIZE - 8 /* for LR Save+Back Chain */
232 + /* Do the copies */
233 + li r6, 0 /* checksum */
237 +0: /* top, read another word for the indirection page */
241 + /* is it a destination page? (r8) */
242 + rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
245 + rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */
248 +2: /* is it an indirection page? (r3) */
249 + rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
252 + rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */
256 +2: /* are we done? */
257 + rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
261 +2: /* is it a source page? (r9) */
262 + rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
265 + rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */
267 + li r7, PAGE_SIZE / 4
272 + lwzu r0, 4(r9) /* do the copy */
286 + /* To be certain of avoiding problems with self-modifying code
287 + * execute a serializing instruction here.
292 + /* jump to the entry point, usually the setup routine */
298 +relocate_new_kernel_end:
300 + .globl relocate_new_kernel_size
301 +relocate_new_kernel_size:
302 + .long relocate_new_kernel_end - relocate_new_kernel
304 diff -Narup linux-2.4.31-orig/arch/ppc/platforms/redwood6.c linux-2.4.31/arch/ppc/platforms/redwood6.c
305 --- linux-2.4.31-orig/arch/ppc/platforms/redwood6.c 2004-04-14 06:05:27.000000000 -0700
306 +++ linux-2.4.31/arch/ppc/platforms/redwood6.c 2005-08-24 22:57:12.000000000 -0700
307 @@ -168,7 +168,26 @@ board_setup_irq(void)
312 +int redwood6_kexec_prepare(struct kimage *image)
314 + /* here, we can place additional preparations */
315 + return 0; /* yes, we support kexec */
319 +void redwood6_kexec(struct kimage *image)
321 + /* just call the simple kexec version... */
322 + machine_kexec_simple(image);
324 +#endif /* CONFIG_KEXEC */
330 + ppc_md.machine_kexec_prepare = redwood6_kexec_prepare;
331 + ppc_md.machine_kexec = redwood6_kexec;
334 diff -Narup linux-2.4.31-orig/include/asm-ppc/io.h linux-2.4.31/include/asm-ppc/io.h
335 --- linux-2.4.31-orig/include/asm-ppc/io.h 2003-11-28 10:26:21.000000000 -0800
336 +++ linux-2.4.31/include/asm-ppc/io.h 2005-08-24 23:28:30.000000000 -0700
337 @@ -275,6 +275,10 @@ extern inline void * phys_to_virt(unsign
338 #define page_to_phys(page) (((page - mem_map) << PAGE_SHIFT) + PPC_MEMSTART)
339 #define page_to_bus(page) (page_to_phys(page) + PCI_DRAM_OFFSET)
341 +/* added for kexec support */
342 +#define pfn_to_page(pfn) (mem_map + ((pfn) - PPC_PGSTART))
343 +#define page_to_pfn(page) ((unsigned long)((page) - mem_map) + PPC_PGSTART)
346 * Enforce In-order Execution of I/O:
347 * Acts as a barrier to ensure all previous I/O accesses have
348 diff -Narup linux-2.4.31-orig/include/asm-ppc/kexec.h linux-2.4.31/include/asm-ppc/kexec.h
349 --- linux-2.4.31-orig/include/asm-ppc/kexec.h 1969-12-31 16:00:00.000000000 -0800
350 +++ linux-2.4.31/include/asm-ppc/kexec.h 2005-08-24 22:58:28.000000000 -0700
352 +#ifndef _PPC_KEXEC_H
353 +#define _PPC_KEXEC_H
358 + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
359 + * I.e. Maximum page that is mapped directly into kernel memory,
360 + * and kmap is not required.
362 + * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct
363 + * calculation for the amount of memory directly mappable into the
364 + * kernel memory space.
367 +/* Maximum physical address we can use pages from */
368 +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
369 +/* Maximum address we can reach in physical address mode */
370 +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
371 +/* Maximum address we can use for the control code buffer */
372 +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
374 +#define KEXEC_CONTROL_CODE_SIZE 4096
376 +/* The native architecture */
377 +#define KEXEC_ARCH KEXEC_ARCH_PPC
379 +#ifndef __ASSEMBLY__
383 +extern void machine_kexec_simple(struct kimage *image);
385 +#endif /* __ASSEMBLY__ */
387 +#endif /* CONFIG_KEXEC */
389 +#endif /* _PPC_KEXEC_H */
390 diff -Narup linux-2.4.31-orig/include/asm-ppc/machdep.h linux-2.4.31/include/asm-ppc/machdep.h
391 --- linux-2.4.31-orig/include/asm-ppc/machdep.h 2005-08-24 22:05:10.000000000 -0700
392 +++ linux-2.4.31/include/asm-ppc/machdep.h 2005-08-24 23:41:01.000000000 -0700
394 #define _PPC_MACHDEP_H
396 #include <linux/config.h>
397 +#include <linux/kexec.h>
400 #include <asm-m68k/machdep.h>
401 @@ -113,6 +114,36 @@ struct machdep_calls {
402 /* functions for dealing with other cpus */
403 struct smp_ops_t *smp_ops;
404 #endif /* CONFIG_SMP */
407 + /* Called to shutdown machine specific hardware not already controlled
408 + * by other drivers.
409 + * XXX Should we move this one out of kexec scope?
411 + void (*machine_shutdown)(void);
413 + /* Called to do the minimal shutdown needed to run a kexec'd kernel
414 + * to run successfully.
415 + * XXX Should we move this one out of kexec scope?
417 + void (*machine_crash_shutdown)(void);
419 + /* Called to do what every setup is needed on image and the
420 + * reboot code buffer. Returns 0 on success.
421 + * Provide your own (maybe dummy) implementation if your platform
422 + * claims to support kexec.
424 + int (*machine_kexec_prepare)(struct kimage *image);
426 + /* Called to handle any machine specific cleanup on image */
427 + void (*machine_kexec_cleanup)(struct kimage *image);
429 + /* Called to perform the _real_ kexec.
430 + * Do NOT allocate memory or fail here. We are past the point of
433 + void (*machine_kexec)(struct kimage *image);
434 +#endif /* CONFIG_KEXEC */
437 extern struct machdep_calls ppc_md;
438 diff -Narup linux-2.4.31-orig/include/asm-ppc/page.h linux-2.4.31/include/asm-ppc/page.h
439 --- linux-2.4.31-orig/include/asm-ppc/page.h 2003-11-28 10:26:21.000000000 -0800
440 +++ linux-2.4.31/include/asm-ppc/page.h 2005-08-24 23:27:19.000000000 -0700
441 @@ -104,6 +104,7 @@ extern unsigned long ppc_memstart;
442 extern unsigned long ppc_memoffset;
444 #define PPC_MEMSTART 0
445 +#define PPC_PGSTART 0
446 #define PPC_MEMOFFSET PAGE_OFFSET
448 #define PPC_MEMSTART ppc_memstart
449 diff -Narup linux-2.4.31-orig/include/asm-ppc/unistd.h linux-2.4.31/include/asm-ppc/unistd.h
450 --- linux-2.4.31-orig/include/asm-ppc/unistd.h 2004-11-17 03:54:22.000000000 -0800
451 +++ linux-2.4.31/include/asm-ppc/unistd.h 2005-08-24 22:49:47.000000000 -0700
454 #define __NR_swapcontext 249
456 +#define __NR_kexec_load 268
460 /* On powerpc a system call basically clobbers the same registers like a
461 diff -Narup linux-2.4.31-orig/include/linux/kexec.h linux-2.4.31/include/linux/kexec.h
462 --- linux-2.4.31-orig/include/linux/kexec.h 1969-12-31 16:00:00.000000000 -0800
463 +++ linux-2.4.31/include/linux/kexec.h 2005-08-24 23:40:30.000000000 -0700
465 +#ifndef LINUX_KEXEC_H
466 +#define LINUX_KEXEC_H
469 +#include <linux/compiler.h>
470 +#include <linux/kernel.h>
471 +#include <linux/types.h>
472 +#include <linux/list.h>
473 +#include <linux/linkage.h>
474 +#include <asm/kexec.h>
476 +/* Verify architecture specific macros are defined */
478 +#ifndef KEXEC_SOURCE_MEMORY_LIMIT
479 +#error KEXEC_SOURCE_MEMORY_LIMIT not defined
482 +#ifndef KEXEC_DESTINATION_MEMORY_LIMIT
483 +#error KEXEC_DESTINATION_MEMORY_LIMIT not defined
486 +#ifndef KEXEC_CONTROL_MEMORY_LIMIT
487 +#error KEXEC_CONTROL_MEMORY_LIMIT not defined
490 +#ifndef KEXEC_CONTROL_CODE_SIZE
491 +#error KEXEC_CONTROL_CODE_SIZE not defined
495 +#error KEXEC_ARCH not defined
499 + * This structure is used to hold the arguments that are used when loading
503 +typedef unsigned long kimage_entry_t;
504 +#define IND_DESTINATION 0x1
505 +#define IND_INDIRECTION 0x2
506 +#define IND_DONE 0x4
507 +#define IND_SOURCE 0x8
509 +#define KEXEC_SEGMENT_MAX 8
510 +struct kexec_segment {
513 + unsigned long mem; /* User space sees this as a (void *) ... */
518 + kimage_entry_t head;
519 + kimage_entry_t *entry;
520 + kimage_entry_t *last_entry;
522 + unsigned long destination;
524 + unsigned long start;
525 + struct page *control_code_page;
527 + unsigned long nr_segments;
528 + struct kexec_segment segment[KEXEC_SEGMENT_MAX];
530 + struct list_head control_pages;
531 + struct list_head dest_pages;
532 + struct list_head unuseable_pages;
534 + /* Address of next control page to allocate for crash kernels. */
535 + unsigned long control_page;
537 + /* Flags to indicate special processing */
538 + unsigned int type : 1;
539 +#define KEXEC_TYPE_DEFAULT 0
540 +#define KEXEC_TYPE_CRASH 1
545 +/* kexec interface functions */
546 +extern NORET_TYPE void machine_kexec(struct kimage *image) ATTRIB_NORET;
547 +extern int machine_kexec_prepare(struct kimage *image);
548 +extern void machine_kexec_cleanup(struct kimage *image);
549 +extern asmlinkage long sys_kexec_load(unsigned long entry,
550 + unsigned long nr_segments, struct kexec_segment __user *segments,
551 + unsigned long flags);
552 +extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order);
553 +extern void crash_kexec(void);
554 +extern struct kimage *kexec_image;
555 +extern struct kimage *kexec_crash_image;
557 +#define KEXEC_ON_CRASH 0x00000001
558 +#define KEXEC_ARCH_MASK 0xffff0000
560 +/* These values match the ELF architecture values.
561 + * Unless there is a good reason that should continue to be the case.
563 +#define KEXEC_ARCH_DEFAULT ( 0 << 16)
564 +#define KEXEC_ARCH_386 ( 3 << 16)
565 +#define KEXEC_ARCH_X86_64 (62 << 16)
566 +#define KEXEC_ARCH_PPC (20 << 16)
567 +#define KEXEC_ARCH_PPC64 (21 << 16)
568 +#define KEXEC_ARCH_IA_64 (50 << 16)
570 +#define KEXEC_FLAGS (KEXEC_ON_CRASH) /* List of defined/legal kexec flags */
572 +/* Location of a reserved region to hold the crash kernel.
574 +extern struct resource crashk_res;
576 +#else /* !CONFIG_KEXEC */
577 +static inline void crash_kexec(void) { }
578 +#endif /* CONFIG_KEXEC */
579 +#endif /* LINUX_KEXEC_H */
580 diff -Narup linux-2.4.31-orig/include/linux/mm.h linux-2.4.31/include/linux/mm.h
581 --- linux-2.4.31-orig/include/linux/mm.h 2005-01-19 06:10:12.000000000 -0800
582 +++ linux-2.4.31/include/linux/mm.h 2005-08-24 23:27:57.000000000 -0700
583 @@ -165,6 +165,8 @@ typedef struct page {
584 struct page **pprev_hash; /* Complement to *next_hash. */
585 struct buffer_head * buffers; /* Buffer maps us to a disk block. */
587 + unsigned long private; /* added for kexec */
590 * On machines where all RAM is mapped into kernel address space,
591 * we can simply calculate the virtual address. On machines with
592 diff -Narup linux-2.4.31-orig/include/linux/reboot.h linux-2.4.31/include/linux/reboot.h
593 --- linux-2.4.31-orig/include/linux/reboot.h 2001-02-09 14:46:13.000000000 -0800
594 +++ linux-2.4.31/include/linux/reboot.h 2005-08-24 23:00:54.000000000 -0700
596 * CAD_OFF Ctrl-Alt-Del sequence sends SIGINT to init task.
597 * POWER_OFF Stop OS and remove all power from system, if possible.
598 * RESTART2 Restart system using given command string.
599 + * KEXEC Restart system using a previously loaded Linux kernel
602 #define LINUX_REBOOT_CMD_RESTART 0x01234567
604 #define LINUX_REBOOT_CMD_CAD_OFF 0x00000000
605 #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC
606 #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4
607 +#define LINUX_REBOOT_CMD_KEXEC 0x45584543
611 @@ -45,6 +47,8 @@ extern int unregister_reboot_notifier(st
612 extern void machine_restart(char *cmd);
613 extern void machine_halt(void);
614 extern void machine_power_off(void);
615 +extern void machine_shutdown(void);
616 +extern void machine_crash_shutdown(void);
620 diff -Narup linux-2.4.31-orig/kernel/Makefile linux-2.4.31/kernel/Makefile
621 --- linux-2.4.31-orig/kernel/Makefile 2001-09-16 21:22:40.000000000 -0700
622 +++ linux-2.4.31/kernel/Makefile 2005-08-24 22:38:41.000000000 -0700
623 @@ -19,6 +19,7 @@ obj-y = sched.o dma.o fork.o exec_do
624 obj-$(CONFIG_UID16) += uid16.o
625 obj-$(CONFIG_MODULES) += ksyms.o
626 obj-$(CONFIG_PM) += pm.o
627 +obj-$(CONFIG_KEXEC) += kexec.o
629 ifneq ($(CONFIG_IA64),y)
630 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
631 diff -Narup linux-2.4.31-orig/kernel/kexec.c linux-2.4.31/kernel/kexec.c
632 --- linux-2.4.31-orig/kernel/kexec.c 1969-12-31 16:00:00.000000000 -0800
633 +++ linux-2.4.31/kernel/kexec.c 2005-08-24 23:30:47.000000000 -0700
636 + * kexec.c - kexec system call
637 + * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
639 + * This source code is licensed under the GNU General Public License,
640 + * Version 2. See the file COPYING for more details.
643 +#include <linux/mm.h>
644 +#include <linux/file.h>
645 +#include <linux/slab.h>
646 +#include <linux/fs.h>
647 +#include <linux/kexec.h>
648 +#include <linux/spinlock.h>
649 +#include <linux/list.h>
650 +#include <linux/highmem.h>
651 +#include <linux/reboot.h>
652 +//#include <linux/syscalls.h>
653 +#include <linux/ioport.h>
654 +#include <asm/page.h>
655 +#include <asm/uaccess.h>
657 +#include <asm/system.h>
658 +#include <asm/semaphore.h>
660 +/* Location of the reserved area for the crash kernel */
661 +struct resource crashk_res = {
662 + .name = "Crash kernel",
665 + .flags = IORESOURCE_BUSY | IORESOURCE_MEM
669 + * When kexec transitions to the new kernel there is a one-to-one
670 + * mapping between physical and virtual addresses. On processors
671 + * where you can disable the MMU this is trivial, and easy. For
672 + * others it is still a simple predictable page table to setup.
674 + * In that environment kexec copies the new kernel to its final
675 + * resting place. This means I can only support memory whose
676 + * physical address can fit in an unsigned long. In particular
677 + * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
678 + * If the assembly stub has more restrictive requirements
679 + * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
680 + * defined more restrictively in <asm/kexec.h>.
682 + * The code for the transition from the current kernel to the
683 + * the new kernel is placed in the control_code_buffer, whose size
684 + * is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
685 + * page of memory is necessary, but some architectures require more.
686 + * Because this memory must be identity mapped in the transition from
687 + * virtual to physical addresses it must live in the range
688 + * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
691 + * The assembly stub in the control code buffer is passed a linked list
692 + * of descriptor pages detailing the source pages of the new kernel,
693 + * and the destination addresses of those source pages. As this data
694 + * structure is not used in the context of the current OS, it must
695 + * be self-contained.
697 + * The code has been made to work with highmem pages and will use a
698 + * destination page in its final resting place (if it happens
699 + * to allocate it). The end product of this is that most of the
700 + * physical address space, and most of RAM can be used.
702 + * Future directions include:
703 + * - allocating a page table with the control code buffer identity
704 + * mapped, to simplify machine_kexec and make kexec_on_panic more
709 + * KIMAGE_NO_DEST is an impossible destination address..., for
710 + * allocating pages whose destination address we do not care about.
712 +#define KIMAGE_NO_DEST (-1UL)
714 +static int kimage_is_destination_range(
715 + struct kimage *image, unsigned long start, unsigned long end);
716 +static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long dest);
718 +static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
719 + unsigned long nr_segments, struct kexec_segment __user *segments)
721 + size_t segment_bytes;
722 + struct kimage *image;
726 + /* Allocate a controlling structure */
728 + image = kmalloc(sizeof(*image), GFP_KERNEL);
732 + memset(image, 0, sizeof(*image));
734 + image->entry = &image->head;
735 + image->last_entry = &image->head;
736 + image->control_page = ~0; /* By default this does not apply */
737 + image->start = entry;
738 + image->type = KEXEC_TYPE_DEFAULT;
740 + /* Initialize the list of control pages */
741 + INIT_LIST_HEAD(&image->control_pages);
743 + /* Initialize the list of destination pages */
744 + INIT_LIST_HEAD(&image->dest_pages);
746 + /* Initialize the list of unuseable pages */
747 + INIT_LIST_HEAD(&image->unuseable_pages);
749 + /* Read in the segments */
750 + image->nr_segments = nr_segments;
751 + segment_bytes = nr_segments * sizeof(*segments);
752 + result = copy_from_user(image->segment, segments, segment_bytes);
757 + * Verify we have good destination addresses. The caller is
758 + * responsible for making certain we don't attempt to load
759 + * the new image into invalid or reserved areas of RAM. This
760 + * just verifies it is an address we can use.
762 + * Since the kernel does everything in page size chunks ensure
763 + * the destination addreses are page aligned. Too many
764 + * special cases crop of when we don't do this. The most
765 + * insidious is getting overlapping destination addresses
766 + * simply because addresses are changed to page size
769 + result = -EADDRNOTAVAIL;
770 + for (i = 0; i < nr_segments; i++) {
771 + unsigned long mstart, mend;
772 + mstart = image->segment[i].mem;
773 + mend = mstart + image->segment[i].memsz;
774 + if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
776 + if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
780 + /* Verify our destination addresses do not overlap.
781 + * If we alloed overlapping destination addresses
782 + * through very weird things can happen with no
783 + * easy explanation as one segment stops on another.
786 + for(i = 0; i < nr_segments; i++) {
787 + unsigned long mstart, mend;
789 + mstart = image->segment[i].mem;
790 + mend = mstart + image->segment[i].memsz;
791 + for(j = 0; j < i; j++) {
792 + unsigned long pstart, pend;
793 + pstart = image->segment[j].mem;
794 + pend = pstart + image->segment[j].memsz;
795 + /* Do the segments overlap ? */
796 + if ((mend > pstart) && (mstart < pend))
801 + /* Ensure our buffer sizes are strictly less than
802 + * our memory sizes. This should always be the case,
803 + * and it is easier to check up front than to be surprised
807 + for(i = 0; i < nr_segments; i++) {
808 + if (image->segment[i].bufsz > image->segment[i].memsz)
824 +static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
825 + unsigned long nr_segments, struct kexec_segment __user *segments)
828 + struct kimage *image;
830 + /* Allocate and initialize a controlling structure */
832 + result = do_kimage_alloc(&image, entry, nr_segments, segments);
839 + * Find a location for the control code buffer, and add it
840 + * the vector of segments so that it's pages will also be
841 + * counted as destination pages.
844 + image->control_code_page = kimage_alloc_control_pages(image,
845 + get_order(KEXEC_CONTROL_CODE_SIZE));
846 + if (!image->control_code_page) {
847 + printk(KERN_ERR "Could not allocate control_code_buffer\n");
861 +static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
862 + unsigned long nr_segments, struct kexec_segment *segments)
865 + struct kimage *image;
869 + /* Verify we have a valid entry point */
870 + if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
871 + result = -EADDRNOTAVAIL;
875 + /* Allocate and initialize a controlling structure */
876 + result = do_kimage_alloc(&image, entry, nr_segments, segments);
881 + /* Enable the special crash kernel control page
882 + * allocation policy.
884 + image->control_page = crashk_res.start;
885 + image->type = KEXEC_TYPE_CRASH;
888 + * Verify we have good destination addresses. Normally
889 + * the caller is responsible for making certain we don't
890 + * attempt to load the new image into invalid or reserved
891 + * areas of RAM. But crash kernels are preloaded into a
892 + * reserved area of ram. We must ensure the addresses
893 + * are in the reserved area otherwise preloading the
894 + * kernel could corrupt things.
896 + result = -EADDRNOTAVAIL;
897 + for (i = 0; i < nr_segments; i++) {
898 + unsigned long mstart, mend;
899 + mstart = image->segment[i].mem;
900 + mend = mstart + image->segment[i].memsz;
901 + /* Ensure we are within the crash kernel limits */
902 + if ((mstart < crashk_res.start) || (mend > crashk_res.end))
908 + * Find a location for the control code buffer, and add
909 + * the vector of segments so that it's pages will also be
910 + * counted as destination pages.
913 + image->control_code_page = kimage_alloc_control_pages(image,
914 + get_order(KEXEC_CONTROL_CODE_SIZE));
915 + if (!image->control_code_page) {
916 + printk(KERN_ERR "Could not allocate control_code_buffer\n");
930 +static int kimage_is_destination_range(
931 + struct kimage *image, unsigned long start, unsigned long end)
935 + for (i = 0; i < image->nr_segments; i++) {
936 + unsigned long mstart, mend;
937 + mstart = image->segment[i].mem;
938 + mend = mstart + image->segment[i].memsz;
939 + if ((end > mstart) && (start < mend)) {
946 +static struct page *kimage_alloc_pages(unsigned int gfp_mask, unsigned int order)
948 + struct page *pages;
949 + pages = alloc_pages(gfp_mask, order);
951 + unsigned int count, i;
952 + pages->mapping = NULL;
953 + pages->private = order;
954 + count = 1 << order;
955 + for(i = 0; i < count; i++) {
956 + SetPageReserved(pages + i);
962 +static void kimage_free_pages(struct page *page)
964 + unsigned int order, count, i;
965 + order = page->private;
966 + count = 1 << order;
967 + for(i = 0; i < count; i++) {
968 + ClearPageReserved(page + i);
970 + __free_pages(page, order);
973 +static void kimage_free_page_list(struct list_head *list)
975 + struct list_head *pos, *next;
976 + list_for_each_safe(pos, next, list) {
979 + page = list_entry(pos, struct page, lru);
980 + list_del(&page->lru);
982 + kimage_free_pages(page);
986 +static struct page *kimage_alloc_normal_control_pages(
987 + struct kimage *image, unsigned int order)
989 + /* Control pages are special, they are the intermediaries
990 + * that are needed while we copy the rest of the pages
991 + * to their final resting place. As such they must
992 + * not conflict with either the destination addresses
993 + * or memory the kernel is already using.
995 + * The only case where we really need more than one of
996 + * these are for architectures where we cannot disable
997 + * the MMU and must instead generate an identity mapped
998 + * page table for all of the memory.
1000 + * At worst this runs in O(N) of the image size.
1002 + struct list_head extra_pages;
1003 + struct page *pages;
1004 + unsigned int count;
1006 + count = 1 << order;
1007 + INIT_LIST_HEAD(&extra_pages);
1009 + /* Loop while I can allocate a page and the page allocated
1010 + * is a destination page.
1013 + unsigned long pfn, epfn, addr, eaddr;
1014 + pages = kimage_alloc_pages(GFP_KERNEL, order);
1017 + pfn = page_to_pfn(pages);
1018 + epfn = pfn + count;
1019 + addr = pfn << PAGE_SHIFT;
1020 + eaddr = epfn << PAGE_SHIFT;
1021 + if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
1022 + kimage_is_destination_range(image, addr, eaddr))
1024 + list_add(&pages->lru, &extra_pages);
1029 + /* Remember the allocated page... */
1030 + list_add(&pages->lru, &image->control_pages);
1032 + /* Because the page is already in it's destination
1033 + * location we will never allocate another page at
1034 + * that address. Therefore kimage_alloc_pages
1035 + * will not return it (again) and we don't need
1036 + * to give it an entry in image->segment[].
1039 + /* Deal with the destination pages I have inadvertently allocated.
1041 + * Ideally I would convert multi-page allocations into single
1042 + * page allocations, and add everyting to image->dest_pages.
1044 + * For now it is simpler to just free the pages.
1046 + kimage_free_page_list(&extra_pages);
1051 +static struct page *kimage_alloc_crash_control_pages(
1052 + struct kimage *image, unsigned int order)
1054 + /* Control pages are special, they are the intermediaries
1055 + * that are needed while we copy the rest of the pages
1056 + * to their final resting place. As such they must
1057 + * not conflict with either the destination addresses
1058 + * or memory the kernel is already using.
1060 + * Control pages are also the only pags we must allocate
1061 + * when loading a crash kernel. All of the other pages
1062 + * are specified by the segments and we just memcpy
1063 + * into them directly.
1065 + * The only case where we really need more than one of
1066 + * these are for architectures where we cannot disable
1067 + * the MMU and must instead generate an identity mapped
1068 + * page table for all of the memory.
1070 + * Given the low demand this implements a very simple
1071 + * allocator that finds the first hole of the appropriate
1072 + * size in the reserved memory region, and allocates all
1073 + * of the memory up to and including the hole.
1075 + unsigned long hole_start, hole_end, size;
1076 + struct page *pages;
1078 + size = (1 << order) << PAGE_SHIFT;
1079 + hole_start = (image->control_page + (size - 1)) & ~(size - 1);
1080 + hole_end = hole_start + size - 1;
1081 + while(hole_end <= crashk_res.end) {
1083 + if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT) {
1086 + if (hole_end > crashk_res.end) {
1089 + /* See if I overlap any of the segments */
1090 + for(i = 0; i < image->nr_segments; i++) {
1091 + unsigned long mstart, mend;
1092 + mstart = image->segment[i].mem;
1093 + mend = mstart + image->segment[i].memsz - 1;
1094 + if ((hole_end >= mstart) && (hole_start <= mend)) {
1095 + /* Advance the hole to the end of the segment */
1096 + hole_start = (mend + (size - 1)) & ~(size - 1);
1097 + hole_end = hole_start + size - 1;
1101 + /* If I don't overlap any segments I have found my hole! */
1102 + if (i == image->nr_segments) {
1103 + pages = pfn_to_page(hole_start >> PAGE_SHIFT);
1108 + image->control_page = hole_end;
1114 +struct page *kimage_alloc_control_pages(
1115 + struct kimage *image, unsigned int order)
1117 + struct page *pages = NULL;
1118 + switch(image->type) {
1119 + case KEXEC_TYPE_DEFAULT:
1120 + pages = kimage_alloc_normal_control_pages(image, order);
1122 + case KEXEC_TYPE_CRASH:
1123 + pages = kimage_alloc_crash_control_pages(image, order);
1129 +static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
1131 + if (*image->entry != 0) {
1134 + if (image->entry == image->last_entry) {
1135 + kimage_entry_t *ind_page;
1136 + struct page *page;
1137 + page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
1141 + ind_page = page_address(page);
1142 + *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
1143 + image->entry = ind_page;
1144 + image->last_entry =
1145 + ind_page + ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
1147 + *image->entry = entry;
1149 + *image->entry = 0;
1153 +static int kimage_set_destination(
1154 + struct kimage *image, unsigned long destination)
1158 + destination &= PAGE_MASK;
1159 + result = kimage_add_entry(image, destination | IND_DESTINATION);
1160 + if (result == 0) {
1161 + image->destination = destination;
1167 +static int kimage_add_page(struct kimage *image, unsigned long page)
1171 + page &= PAGE_MASK;
1172 + result = kimage_add_entry(image, page | IND_SOURCE);
1173 + if (result == 0) {
1174 + image->destination += PAGE_SIZE;
1180 +static void kimage_free_extra_pages(struct kimage *image)
1182 + /* Walk through and free any extra destination pages I may have */
1183 + kimage_free_page_list(&image->dest_pages);
1185 + /* Walk through and free any unuseable pages I have cached */
1186 + kimage_free_page_list(&image->unuseable_pages);
1189 +static int kimage_terminate(struct kimage *image)
1191 + if (*image->entry != 0) {
1194 + *image->entry = IND_DONE;
1198 +#define for_each_kimage_entry(image, ptr, entry) \
1199 + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
1200 + ptr = (entry & IND_INDIRECTION)? \
1201 + phys_to_virt((entry & PAGE_MASK)): ptr +1)
1203 +static void kimage_free_entry(kimage_entry_t entry)
1205 + struct page *page;
1207 + page = pfn_to_page(entry >> PAGE_SHIFT);
1208 + kimage_free_pages(page);
1211 +static void kimage_free(struct kimage *image)
1213 + kimage_entry_t *ptr, entry;
1214 + kimage_entry_t ind = 0;
1218 + kimage_free_extra_pages(image);
1219 + for_each_kimage_entry(image, ptr, entry) {
1220 + if (entry & IND_INDIRECTION) {
1221 + /* Free the previous indirection page */
1222 + if (ind & IND_INDIRECTION) {
1223 + kimage_free_entry(ind);
1225 + /* Save this indirection page until we are
1230 + else if (entry & IND_SOURCE) {
1231 + kimage_free_entry(entry);
1234 + /* Free the final indirection page */
1235 + if (ind & IND_INDIRECTION) {
1236 + kimage_free_entry(ind);
1239 + /* Handle any machine specific cleanup */
1240 + machine_kexec_cleanup(image);
1242 + /* Free the kexec control pages... */
1243 + kimage_free_page_list(&image->control_pages);
1247 +static kimage_entry_t *kimage_dst_used(struct kimage *image, unsigned long page)
1249 + kimage_entry_t *ptr, entry;
1250 + unsigned long destination = 0;
1252 + for_each_kimage_entry(image, ptr, entry) {
1253 + if (entry & IND_DESTINATION) {
1254 + destination = entry & PAGE_MASK;
1256 + else if (entry & IND_SOURCE) {
1257 + if (page == destination) {
1260 + destination += PAGE_SIZE;
1266 +static struct page *kimage_alloc_page(struct kimage *image, unsigned int gfp_mask, unsigned long destination)
1269 + * Here we implement safeguards to ensure that a source page
1270 + * is not copied to its destination page before the data on
1271 + * the destination page is no longer useful.
1273 + * To do this we maintain the invariant that a source page is
1274 + * either its own destination page, or it is not a
1275 + * destination page at all.
1277 + * That is slightly stronger than required, but the proof
1278 + * that no problems will not occur is trivial, and the
1279 + * implementation is simply to verify.
1281 + * When allocating all pages normally this algorithm will run
1282 + * in O(N) time, but in the worst case it will run in O(N^2)
1283 + * time. If the runtime is a problem the data structures can
1286 + struct page *page;
1287 + unsigned long addr;
1290 + * Walk through the list of destination pages, and see if I
1293 + list_for_each_entry(page, &image->dest_pages, lru) {
1294 + addr = page_to_pfn(page) << PAGE_SHIFT;
1295 + if (addr == destination) {
1296 + list_del(&page->lru);
1302 + kimage_entry_t *old;
1304 + /* Allocate a page, if we run out of memory give up */
1305 + page = kimage_alloc_pages(gfp_mask, 0);
1309 + /* If the page cannot be used file it away */
1310 + if (page_to_pfn(page) > (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
1311 + list_add(&page->lru, &image->unuseable_pages);
1314 + addr = page_to_pfn(page) << PAGE_SHIFT;
1316 + /* If it is the destination page we want use it */
1317 + if (addr == destination)
1320 + /* If the page is not a destination page use it */
1321 + if (!kimage_is_destination_range(image, addr, addr + PAGE_SIZE))
1325 + * I know that the page is someones destination page.
1326 + * See if there is already a source page for this
1327 + * destination page. And if so swap the source pages.
1329 + old = kimage_dst_used(image, addr);
1331 + /* If so move it */
1332 + unsigned long old_addr;
1333 + struct page *old_page;
1335 + old_addr = *old & PAGE_MASK;
1336 + old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
1337 + copy_highpage(page, old_page);
1338 + *old = addr | (*old & ~PAGE_MASK);
1340 + /* The old page I have found cannot be a
1341 + * destination page, so return it.
1348 + /* Place the page on the destination list I
1349 + * will use it later.
1351 + list_add(&page->lru, &image->dest_pages);
1357 +static int kimage_load_normal_segment(struct kimage *image,
1358 + struct kexec_segment *segment)
1360 + unsigned long maddr;
1361 + unsigned long ubytes, mbytes;
1363 + unsigned char *buf;
1366 + buf = segment->buf;
1367 + ubytes = segment->bufsz;
1368 + mbytes = segment->memsz;
1369 + maddr = segment->mem;
1371 + result = kimage_set_destination(image, maddr);
1376 + struct page *page;
1378 + size_t uchunk, mchunk;
1379 + page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
1384 + result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT);
1389 + /* Start with a clear page */
1390 + memset(ptr, 0, PAGE_SIZE);
1391 + ptr += maddr & ~PAGE_MASK;
1392 + mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
1393 + if (mchunk > mbytes) {
1397 + if (uchunk > ubytes) {
1400 + result = copy_from_user(ptr, buf, uchunk);
1403 + result = (result < 0) ? result : -EIO;
1415 +static int kimage_load_crash_segment(struct kimage *image,
1416 + struct kexec_segment *segment)
1418 + /* For crash dumps kernels we simply copy the data from
1419 + * user space to it's destination.
1420 + * We do things a page at a time for the sake of kmap.
1422 + unsigned long maddr;
1423 + unsigned long ubytes, mbytes;
1425 + unsigned char *buf;
1428 + buf = segment->buf;
1429 + ubytes = segment->bufsz;
1430 + mbytes = segment->memsz;
1431 + maddr = segment->mem;
1433 + struct page *page;
1435 + size_t uchunk, mchunk;
1436 + page = pfn_to_page(maddr >> PAGE_SHIFT);
1442 + ptr += maddr & ~PAGE_MASK;
1443 + mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
1444 + if (mchunk > mbytes) {
1448 + if (uchunk > ubytes) {
1450 + /* Zero the trailing part of the page */
1451 + memset(ptr + uchunk, 0, mchunk - uchunk);
1453 + result = copy_from_user(ptr, buf, uchunk);
1456 + result = (result < 0) ? result : -EIO;
1468 +static int kimage_load_segment(struct kimage *image,
1469 + struct kexec_segment *segment)
1471 + int result = -ENOMEM;
1472 + switch(image->type) {
1473 + case KEXEC_TYPE_DEFAULT:
1474 + result = kimage_load_normal_segment(image, segment);
1476 + case KEXEC_TYPE_CRASH:
1477 + result = kimage_load_crash_segment(image, segment);
1484 + * Exec Kernel system call: for obvious reasons only root may call it.
1486 + * This call breaks up into three pieces.
1487 + * - A generic part which loads the new kernel from the current
1488 + * address space, and very carefully places the data in the
1489 + * allocated pages.
1491 + * - A generic part that interacts with the kernel and tells all of
1492 + * the devices to shut down. Preventing on-going dmas, and placing
1493 + * the devices in a consistent state so a later kernel can
1494 + * reinitialize them.
1496 + * - A machine specific part that includes the syscall number
1497 + * and the copies the image to it's final destination. And
1498 + * jumps into the image at entry.
1500 + * kexec does not sync, or unmount filesystems so if you need
1501 + * that to happen you need to do that yourself.
1503 +struct kimage *kexec_image = NULL;
1504 +struct kimage *kexec_crash_image = NULL;
1506 + * A home grown binary mutex.
1507 + * Nothing can wait so this mutex is safe to use
1508 + * in interrupt context :)
1510 +static int kexec_lock = 0;
1512 +asmlinkage long sys_kexec_load(unsigned long entry,
1513 + unsigned long nr_segments, struct kexec_segment __user *segments,
1514 + unsigned long flags)
1516 + struct kimage **dest_image, *image;
1520 + /* We only trust the superuser with rebooting the system. */
1521 + if (!capable(CAP_SYS_BOOT))
1525 + * Verify we have a legal set of flags
1526 + * This leaves us room for future extensions.
1528 + if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
1531 + /* Verify we are on the appropriate architecture */
1532 + if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
1533 + ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
1538 + /* Put an artificial cap on the number
1539 + * of segments passed to kexec_load.
1541 + if (nr_segments > KEXEC_SEGMENT_MAX)
1547 + /* Because we write directly to the reserved memory
1548 + * region when loading crash kernels we need a mutex here to
1549 + * prevent multiple crash kernels from attempting to load
1550 + * simultaneously, and to prevent a crash kernel from loading
1551 + * over the top of a in use crash kernel.
1553 + * KISS: always take the mutex.
1555 + locked = xchg(&kexec_lock, 1);
1559 + dest_image = &kexec_image;
1560 + if (flags & KEXEC_ON_CRASH) {
1561 + dest_image = &kexec_crash_image;
1563 + if (nr_segments > 0) {
1565 + /* Loading another kernel to reboot into */
1566 + if ((flags & KEXEC_ON_CRASH) == 0) {
1567 + result = kimage_normal_alloc(&image, entry, nr_segments, segments);
1569 + /* Loading another kernel to switch to if this one crashes */
1570 + else if (flags & KEXEC_ON_CRASH) {
1571 + /* Free any current crash dump kernel before
1574 + kimage_free(xchg(&kexec_crash_image, NULL));
1575 + result = kimage_crash_alloc(&image, entry, nr_segments, segments);
1580 + result = machine_kexec_prepare(image);
1584 + for(i = 0; i < nr_segments; i++) {
1585 + result = kimage_load_segment(image, &image->segment[i]);
1590 + result = kimage_terminate(image);
1595 + /* Install the new kernel, and Uninstall the old */
1596 + image = xchg(dest_image, image);
1599 + xchg(&kexec_lock, 0); /* Release the mutex */
1600 + kimage_free(image);
1604 +void crash_kexec(void)
1606 + struct kimage *image;
1610 + /* Take the kexec_lock here to prevent sys_kexec_load
1611 + * running on one cpu from replacing the crash kernel
1612 + * we are using after a panic on a different cpu.
1614 + * If the crash kernel was not located in a fixed area
1615 + * of memory the xchg(&kexec_crash_image) would be
1616 + * sufficient. But since I reuse the memory...
1618 + locked = xchg(&kexec_lock, 1);
1620 + image = xchg(&kexec_crash_image, NULL);
1622 + machine_crash_shutdown();
1623 + machine_kexec(image);
1625 + xchg(&kexec_lock, 0);
1628 diff -Narup linux-2.4.31-orig/kernel/panic.c linux-2.4.31/kernel/panic.c
1629 --- linux-2.4.31-orig/kernel/panic.c 2004-11-17 03:54:22.000000000 -0800
1630 +++ linux-2.4.31/kernel/panic.c 2005-08-24 22:40:47.000000000 -0700
1632 #include <linux/sysrq.h>
1633 #include <linux/interrupt.h>
1634 #include <linux/console.h>
1635 +#include <linux/kexec.h>
1637 asmlinkage void sys_sync(void); /* it's really int */
1639 @@ -70,6 +71,11 @@ NORET_TYPE void panic(const char * fmt,
1643 + /* if we crash and have a crash kernel loaded
1644 + * let it handle everything else
1651 diff -Narup linux-2.4.31-orig/kernel/sys.c linux-2.4.31/kernel/sys.c
1652 --- linux-2.4.31-orig/kernel/sys.c 2003-11-28 10:26:21.000000000 -0800
1653 +++ linux-2.4.31/kernel/sys.c 2005-08-24 23:01:24.000000000 -0700
1655 #include <linux/init.h>
1656 #include <linux/highuid.h>
1658 +#include <linux/kernel.h>
1659 +#include <linux/kexec.h>
1661 #include <asm/uaccess.h>
1664 @@ -342,6 +345,21 @@ asmlinkage long sys_reboot(int magic1, i
1665 machine_restart(buffer);
1668 + case LINUX_REBOOT_CMD_KEXEC:
1670 + struct kimage *image;
1671 + image = xchg(&kexec_image, 0);
1676 + notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
1677 + printk(KERN_EMERG "Starting new kernel\n");
1678 + machine_shutdown();
1679 + machine_kexec(image);