5a14364
diff -rup xen-unstable-16606.orig/tools/ioemu/hw/pc.c xen-unstable-16606.new/tools/ioemu/hw/pc.c
5a14364
--- xen-unstable-16606.orig/tools/ioemu/hw/pc.c	2007-12-17 17:52:29.000000000 -0500
5a14364
+++ xen-unstable-16606.new/tools/ioemu/hw/pc.c	2007-12-17 17:53:51.000000000 -0500
5a14364
@@ -31,10 +31,8 @@
5a14364
 #define VGABIOS_CIRRUS_FILENAME "vgabios-cirrus.bin"
5a14364
 #define LINUX_BOOT_FILENAME "linux_boot.bin"
5a14364
 
5a14364
-#define KERNEL_LOAD_ADDR     0x00100000
5a14364
-#define INITRD_LOAD_ADDR     0x00600000
5a14364
-#define KERNEL_PARAMS_ADDR   0x00090000
5a14364
-#define KERNEL_CMDLINE_ADDR  0x00099000
5a14364
+/* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables.  */
5a14364
+#define ACPI_DATA_SIZE        0x10000
5a14364
 
5a14364
 static fdctrl_t *floppy_controller;
5a14364
 static RTCState *rtc_state;
5a14364
@@ -363,36 +361,271 @@ void bochs_bios_init(void)
5a14364
     register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL);
5a14364
 }
5a14364
 
5a14364
+/* Generate an initial boot sector which sets state and jump to
5a14364
+   a specified vector */
5a14364
+static void generate_bootsect(uint32_t gpr[8], uint16_t segs[6], uint16_t ip)
5a14364
+{
5a14364
+    uint8_t bootsect[512], *p;
5a14364
+    int i;
5a14364
+
5a14364
+    if (bs_table[0] == NULL) {
5a14364
+        fprintf(stderr, "A disk image must be given for 'hda' when booting "
5a14364
+                "a Linux kernel\n");
5a14364
+        exit(1);
5a14364
+    }
5a14364
+
5a14364
+    memset(bootsect, 0, sizeof(bootsect));
5a14364
+
5a14364
+    /* Copy the MSDOS partition table if possible */
5a14364
+    bdrv_read(bs_table[0], 0, bootsect, 1);
5a14364
+
5a14364
+    /* Make sure we have a partition signature */
5a14364
+    bootsect[510] = 0x55;
5a14364
+    bootsect[511] = 0xaa;
5a14364
+
5a14364
+    /* Actual code */
5a14364
+    p = bootsect;
5a14364
+    *p++ = 0xfa;                /* CLI */
5a14364
+    *p++ = 0xfc;                /* CLD */
5a14364
+
5a14364
+    for (i = 0; i < 6; i++) {
5a14364
+        if (i == 1)             /* Skip CS */
5a14364
+            continue;
5a14364
 
5a14364
-int load_kernel(const char *filename, uint8_t *addr, 
5a14364
-                uint8_t *real_addr)
5a14364
+        *p++ = 0xb8;            /* MOV AX,imm16 */
5a14364
+        *p++ = segs[i];
5a14364
+        *p++ = segs[i] >> 8;
5a14364
+        *p++ = 0x8e;            /* MOV <seg>,AX */
5a14364
+        *p++ = 0xc0 + (i << 3);
5a14364
+    }
5a14364
+
5a14364
+    for (i = 0; i < 8; i++) {
5a14364
+        *p++ = 0x66;            /* 32-bit operand size */
5a14364
+        *p++ = 0xb8 + i;        /* MOV <reg>,imm32 */
5a14364
+        *p++ = gpr[i];
5a14364
+        *p++ = gpr[i] >> 8;
5a14364
+        *p++ = gpr[i] >> 16;
5a14364
+        *p++ = gpr[i] >> 24;
5a14364
+    }
5a14364
+
5a14364
+    *p++ = 0xea;                /* JMP FAR */
5a14364
+    *p++ = ip;                  /* IP */
5a14364
+    *p++ = ip >> 8;
5a14364
+    *p++ = segs[1];             /* CS */
5a14364
+    *p++ = segs[1] >> 8;
5a14364
+
5a14364
+    bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
5a14364
+}
5a14364
+
5a14364
+
5a14364
+static long get_file_size(FILE *f)
5a14364
 {
5a14364
-    int fd, size;
5a14364
-    int setup_sects;
5a14364
+    long where, size;
5a14364
+
5a14364
+    /* XXX: on Unix systems, using fstat() probably makes more sense */
5a14364
+
5a14364
+    where = ftell(f);
5a14364
+    fseek(f, 0, SEEK_END);
5a14364
+    size = ftell(f);
5a14364
+    fseek(f, where, SEEK_SET);
5a14364
 
5a14364
-    fd = open(filename, O_RDONLY | O_BINARY);
5a14364
-    if (fd < 0)
5a14364
-        return -1;
5a14364
-
5a14364
-    /* load 16 bit code */
5a14364
-    if (read(fd, real_addr, 512) != 512)
5a14364
-        goto fail;
5a14364
-    setup_sects = real_addr[0x1F1];
5a14364
-    if (!setup_sects)
5a14364
-        setup_sects = 4;
5a14364
-    if (read(fd, real_addr + 512, setup_sects * 512) != 
5a14364
-        setup_sects * 512)
5a14364
-        goto fail;
5a14364
-    
5a14364
-    /* load 32 bit code */
5a14364
-    size = read(fd, addr, 16 * 1024 * 1024);
5a14364
-    if (size < 0)
5a14364
-        goto fail;
5a14364
-    close(fd);
5a14364
     return size;
5a14364
- fail:
5a14364
-    close(fd);
5a14364
-    return -1;
5a14364
+}
5a14364
+
5a14364
+static int fread2guest(target_phys_addr_t dst_addr, size_t nbytes, FILE *f)
5a14364
+{
5a14364
+    size_t offset = 0;
5a14364
+    while (nbytes) {
5a14364
+        uint8_t buf[4096];
5a14364
+	size_t count = nbytes > sizeof(buf) ? sizeof(buf) : nbytes;
5a14364
+	if (fread(buf, 1, count, f) != count)
5a14364
+	    return -1;
5a14364
+
5a14364
+	cpu_physical_memory_rw(dst_addr+offset, buf, count, 1);
5a14364
+	offset += count;
5a14364
+	nbytes -= count;
5a14364
+    }
5a14364
+    return 0;
5a14364
+}
5a14364
+
5a14364
+static void load_linux(const char *kernel_filename,
5a14364
+                       const char *initrd_filename,
5a14364
+                       const char *kernel_cmdline)
5a14364
+{
5a14364
+    uint16_t protocol;
5a14364
+    uint32_t gpr[8];
5a14364
+    uint16_t seg[6];
5a14364
+    uint16_t real_seg;
5a14364
+    int setup_size, kernel_size, initrd_size, cmdline_size;
5a14364
+    uint32_t initrd_max;
5a14364
+    uint8_t header[1024];
5a14364
+    target_phys_addr_t real_addr, reloc_prot_addr, prot_addr, cmdline_addr, initrd_addr;
5a14364
+    size_t ncmdline;
5a14364
+    FILE *f, *fi;
5a14364
+
5a14364
+    /* Align to 16 bytes as a paranoia measure */
5a14364
+    cmdline_size = (strlen(kernel_cmdline)+16) & ~15;
5a14364
+
5a14364
+    /* load the kernel header */
5a14364
+    f = fopen(kernel_filename, "rb");
5a14364
+    if (!f || !(kernel_size = get_file_size(f)) ||
5a14364
+        fread(header, 1, 1024, f) != 1024) {
5a14364
+        fprintf(stderr, "qemu: could not load kernel '%s'\n",
5a14364
+                kernel_filename);
5a14364
+        exit(1);
5a14364
+    }
5a14364
+
5a14364
+    /* kernel protocol version */
5a14364
+    fprintf(stderr, "header magic: %#x\n", ldl_p(header+0x202));
5a14364
+    if (ldl_p(header+0x202) == 0x53726448)
5a14364
+        protocol = lduw_p(header+0x206);
5a14364
+    else
5a14364
+        protocol = 0;
5a14364
+    fprintf(stderr, "header protocol: %x\n", protocol);
5a14364
+    if (protocol < 0x200 || !(header[0x211] & 0x01)) {
5a14364
+        /* Low kernel */
5a14364
+        real_addr    = 0x90000;
5a14364
+        cmdline_addr = 0x9a000 - cmdline_size;
5a14364
+        prot_addr    = 0x10000;
5a14364
+	reloc_prot_addr = prot_addr;
5a14364
+    } else if (protocol < 0x202) {
5a14364
+        /* High but ancient kernel */
5a14364
+        real_addr    = 0x90000;
5a14364
+        cmdline_addr = 0x9a000 - cmdline_size;
5a14364
+        prot_addr    = 0x100000;
5a14364
+	reloc_prot_addr = 0x200000;
5a14364
+    } else {
5a14364
+        /* High and recent kernel */
5a14364
+        real_addr    = 0x10000;
5a14364
+        cmdline_addr = 0x20000;
5a14364
+        prot_addr    = 0x100000;
5a14364
+	reloc_prot_addr = 0x200000;
5a14364
+    }
5a14364
+
5a14364
+    fprintf(stderr,
5a14364
+            "qemu: real_addr     = %#zx\n"
5a14364
+            "qemu: cmdline_addr  = %#zx\n"
5a14364
+            "qemu: prot_addr     = %#zx\n",
5a14364
+            real_addr,
5a14364
+            cmdline_addr,
5a14364
+            prot_addr);
5a14364
+
5a14364
+    /* highest address for loading the initrd */
5a14364
+    if (protocol >= 0x203)
5a14364
+        initrd_max = ldl_p(header+0x22c);
5a14364
+    else
5a14364
+        initrd_max = 0x37ffffff;
5a14364
+
5a14364
+    if (initrd_max >= ram_size-ACPI_DATA_SIZE)
5a14364
+        initrd_max = ram_size-ACPI_DATA_SIZE-1;
5a14364
+
5a14364
+
5a14364
+    /* kernel command line */
5a14364
+    ncmdline = strlen(kernel_cmdline);
5a14364
+    if (ncmdline > 4095) {
5a14364
+        ncmdline = 4095;
5a14364
+	((uint8_t*)kernel_cmdline)[4095] = '\0';
5a14364
+    }
5a14364
+    fprintf(stderr, "qemu: kernel_cmdline: %#zx ('%s')\n", ncmdline, kernel_cmdline);
5a14364
+    cpu_physical_memory_rw(cmdline_addr, (uint8_t*)kernel_cmdline, ncmdline+1, 1);
5a14364
+
5a14364
+    if (protocol >= 0x202) {
5a14364
+        stl_p(header+0x228, cmdline_addr);
5a14364
+    } else {
5a14364
+        stw_p(header+0x20, 0xA33F);
5a14364
+        stw_p(header+0x22, cmdline_addr-real_addr);
5a14364
+    }
5a14364
+
5a14364
+    /* loader type */
5a14364
+    /* High nybble = B reserved for Qemu; low nybble is revision number.
5a14364
+       If this code is substantially changed, you may want to consider
5a14364
+       incrementing the revision. */
5a14364
+    if (protocol >= 0x200)
5a14364
+        header[0x210] = 0xB0;
5a14364
+
5a14364
+    /* heap */
5a14364
+    if (protocol >= 0x201) {
5a14364
+        header[0x211] |= 0x80;  /* CAN_USE_HEAP */
5a14364
+        stw_p(header+0x224, cmdline_addr-real_addr-0x200);
5a14364
+    }
5a14364
+
5a14364
+    /* load initrd */
5a14364
+    if (initrd_filename) {
5a14364
+        if (protocol < 0x200) {
5a14364
+            fprintf(stderr, "qemu: linux kernel too old to load a ram disk\n");
5a14364
+            exit(1);
5a14364
+        }
5a14364
+
5a14364
+        fi = fopen(initrd_filename, "rb");
5a14364
+        if (!fi) {
5a14364
+            fprintf(stderr, "qemu: could not load initial ram disk '%s'\n",
5a14364
+                    initrd_filename);
5a14364
+            exit(1);
5a14364
+        }
5a14364
+
5a14364
+        initrd_size = get_file_size(fi);
5a14364
+        initrd_addr = ((initrd_max-initrd_size) & ~4095);
5a14364
+
5a14364
+        fprintf(stderr, "qemu: loading initrd (%#x bytes) at %#zx\n",
5a14364
+                initrd_size, initrd_addr);
5a14364
+
5a14364
+	if (fread2guest(initrd_addr, initrd_size, fi) < 0) {
5a14364
+	    fprintf(stderr, "qemu: read error on initial ram disk '%s'\n",
5a14364
+		    initrd_filename);
5a14364
+	    exit(1);
5a14364
+	}
5a14364
+        fclose(fi);
5a14364
+
5a14364
+        stl_p(header+0x218, initrd_addr);
5a14364
+        stl_p(header+0x21c, initrd_size);
5a14364
+    }
5a14364
+
5a14364
+
5a14364
+    setup_size = header[0x1f1];
5a14364
+    if (setup_size == 0)
5a14364
+        setup_size = 4;
5a14364
+
5a14364
+    setup_size = (setup_size+1)*512;
5a14364
+    kernel_size -= setup_size;  /* Size of protected-mode code */
5a14364
+
5a14364
+    /* Urgh, Xen's HVM firmware lives at 0x100000, but that's also the
5a14364
+     * address Linux wants to start life at prior to relocatable support
5a14364
+     */
5a14364
+    if (prot_addr != reloc_prot_addr) {
5a14364
+        if (protocol >= 0x205 && (header[0x234] & 1)) {
5a14364
+	    /* Relocatable automatically */
5a14364
+	    stl_p(header+0x214, reloc_prot_addr);
5a14364
+	    fprintf(stderr, "qemu: kernel is relocatable\n");
5a14364
+	} else {
5a14364
+	    fprintf(stderr, "qemu: unable to load non-relocatable kernel\n");
5a14364
+	    exit(1);
5a14364
+	}
5a14364
+    }
5a14364
+
5a14364
+
5a14364
+    fprintf(stderr, "qemu: loading kernel real mode (%#x bytes) at %#zx\n",
5a14364
+	    setup_size-1024, real_addr);
5a14364
+    fprintf(stderr, "qemu: loading kernel protected mode (%#x bytes) at %#zx\n",
5a14364
+	    kernel_size, reloc_prot_addr);
5a14364
+
5a14364
+    /* store the finalized header and load the rest of the kernel */
5a14364
+    cpu_physical_memory_rw(real_addr, header, 1024, 1);
5a14364
+    if (fread2guest(real_addr+1024, setup_size-1024, f) < 0 ||
5a14364
+        fread2guest(reloc_prot_addr, kernel_size, f) < 0) {
5a14364
+	fprintf(stderr, "qemu: loading kernel protected mode (%#x bytes) at %#zx\n",
5a14364
+		kernel_size, reloc_prot_addr);
5a14364
+	exit(1);
5a14364
+    }
5a14364
+    fclose(f);
5a14364
+
5a14364
+    /* generate bootsector to set up the initial register state */
5a14364
+    real_seg = (real_addr) >> 4;
5a14364
+    seg[0] = seg[2] = seg[3] = seg[4] = seg[4] = real_seg;
5a14364
+    seg[1] = real_seg+0x20;     /* CS */
5a14364
+    memset(gpr, 0, sizeof gpr);
5a14364
+    gpr[4] = cmdline_addr-real_addr-16; /* SP (-16 is paranoia) */
5a14364
+
5a14364
+    generate_bootsect(gpr, seg, 0);
5a14364
 }
5a14364
 
5a14364
 static void main_cpu_reset(void *opaque)
5a14364
@@ -577,63 +810,8 @@ static void pc_init1(uint64_t ram_size, 
5a14364
     
5a14364
     bochs_bios_init();
5a14364
 
5a14364
-#ifndef CONFIG_DM
5a14364
-    if (linux_boot) {
5a14364
-        uint8_t bootsect[512];
5a14364
-        uint8_t old_bootsect[512];
5a14364
-
5a14364
-        if (bs_table[0] == NULL) {
5a14364
-            fprintf(stderr, "A disk image must be given for 'hda' when booting a Linux kernel\n");
5a14364
-            exit(1);
5a14364
-        }
5a14364
-        snprintf(buf, sizeof(buf), "%s/%s", bios_dir, LINUX_BOOT_FILENAME);
5a14364
-        ret = load_image(buf, bootsect);
5a14364
-        if (ret != sizeof(bootsect)) {
5a14364
-            fprintf(stderr, "qemu: could not load linux boot sector '%s'\n",
5a14364
-                    buf);
5a14364
-            exit(1);
5a14364
-        }
5a14364
-
5a14364
-        if (bdrv_read(bs_table[0], 0, old_bootsect, 1) >= 0) {
5a14364
-            /* copy the MSDOS partition table */
5a14364
-            memcpy(bootsect + 0x1be, old_bootsect + 0x1be, 0x40);
5a14364
-        }
5a14364
-
5a14364
-        bdrv_set_boot_sector(bs_table[0], bootsect, sizeof(bootsect));
5a14364
-
5a14364
-        /* now we can load the kernel */
5a14364
-        ret = load_kernel(kernel_filename, 
5a14364
-                          phys_ram_base + KERNEL_LOAD_ADDR,
5a14364
-                          phys_ram_base + KERNEL_PARAMS_ADDR);
5a14364
-        if (ret < 0) {
5a14364
-            fprintf(stderr, "qemu: could not load kernel '%s'\n", 
5a14364
-                    kernel_filename);
5a14364
-            exit(1);
5a14364
-        }
5a14364
-        
5a14364
-        /* load initrd */
5a14364
-        initrd_size = 0;
5a14364
-        if (initrd_filename) {
5a14364
-            initrd_size = load_image(initrd_filename, phys_ram_base + INITRD_LOAD_ADDR);
5a14364
-            if (initrd_size < 0) {
5a14364
-                fprintf(stderr, "qemu: could not load initial ram disk '%s'\n", 
5a14364
-                        initrd_filename);
5a14364
-                exit(1);
5a14364
-            }
5a14364
-        }
5a14364
-        if (initrd_size > 0) {
5a14364
-            stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x218, INITRD_LOAD_ADDR);
5a14364
-            stl_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x21c, initrd_size);
5a14364
-        }
5a14364
-        pstrcpy(phys_ram_base + KERNEL_CMDLINE_ADDR, 4096,
5a14364
-                kernel_cmdline);
5a14364
-        stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x20, 0xA33F);
5a14364
-        stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x22,
5a14364
-                KERNEL_CMDLINE_ADDR - KERNEL_PARAMS_ADDR);
5a14364
-        /* loader type */
5a14364
-        stw_raw(phys_ram_base + KERNEL_PARAMS_ADDR + 0x210, 0x01);
5a14364
-    }
5a14364
-#endif /* !CONFIG_DM */
5a14364
+    if (linux_boot)
5a14364
+        load_linux(kernel_filename, initrd_filename, kernel_cmdline);
5a14364
 
5a14364
     if (pci_enabled) {
5a14364
         pci_bus = i440fx_init(&i440fx_state);