Blob Blame History Raw
diff --git a/jchuff.c b/jchuff.c
index d8eb1d3..447209a 100644
--- a/jchuff.c
+++ b/jchuff.c
@@ -37,7 +37,7 @@
  */
 
 /* NOTE: Both GCC and Clang define __GNUC__ */
-#if defined __GNUC__ && defined __arm__
+#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
 #if !defined __thumb__ || defined __thumb2__
 #define USE_CLZ_INTRINSIC
 #endif
diff --git a/jdcol565.c b/jdcol565.c
index a2c98f3..43d285d 100644
--- a/jdcol565.c
+++ b/jdcol565.c
@@ -13,42 +13,11 @@
 /* This file is included by jdcolor.c */
 
 
-#define PACK_SHORT_565(r, g, b)   ((((r) << 8) & 0xf800) |  \
-                                   (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_TWO_PIXELS(l, r)     ((r << 16) | l)
-#define PACK_NEED_ALIGNMENT(ptr)  (((size_t)(ptr)) & 3)
-
-#define WRITE_TWO_PIXELS(addr, pixels) {  \
-  ((INT16*)(addr))[0] = (pixels);  \
-  ((INT16*)(addr))[1] = (pixels) >> 16;  \
-}
-#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels)  ((*(INT32 *)(addr)) = pixels)
-
-#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
-#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
-#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
-
-
-/* Declarations for ordered dithering
- *
- * We use a 4x4 ordered dither array packed into 32 bits.  This array is
- * sufficent for dithering RGB888 to RGB565.
- */
-
-#define DITHER_MASK       0x3
-#define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
-static const INT32 dither_matrix[4] = {
-  0x0008020A,
-  0x0C040E06,
-  0x030B0109,
-  0x0F070D05
-};
-
-
-METHODDEF(void)
-ycc_rgb565_convert (j_decompress_ptr cinfo,
-                    JSAMPIMAGE input_buf, JDIMENSION input_row,
-                    JSAMPARRAY output_buf, int num_rows)
+INLINE
+LOCAL(void)
+ycc_rgb565_convert_internal (j_decompress_ptr cinfo,
+                             JSAMPIMAGE input_buf, JDIMENSION input_row,
+                             JSAMPIMAGE output_buf, int num_rows)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int y, cb, cr;
@@ -123,10 +92,11 @@ ycc_rgb565_convert (j_decompress_ptr cinfo,
 }
 
 
-METHODDEF(void)
-ycc_rgb565D_convert (j_decompress_ptr cinfo,
-                     JSAMPIMAGE input_buf, JDIMENSION input_row,
-                     JSAMPARRAY output_buf, int num_rows)
+INLINE
+LOCAL(void)
+ycc_rgb565D_convert_internal (j_decompress_ptr cinfo,
+                              JSAMPIMAGE input_buf, JDIMENSION input_row,
+                              JSAMPARRAY output_buf, int num_rows)
 {
   my_cconvert_ptr cconvert = (my_cconvert_ptr) cinfo->cconvert;
   register int y, cb, cr;
@@ -208,10 +178,11 @@ ycc_rgb565D_convert (j_decompress_ptr cinfo,
 }
 
 
-METHODDEF(void)
-rgb_rgb565_convert (j_decompress_ptr cinfo,
-                    JSAMPIMAGE input_buf, JDIMENSION input_row,
-                    JSAMPARRAY output_buf, int num_rows)
+INLINE
+LOCAL(void)
+rgb_rgb565_convert_internal (j_decompress_ptr cinfo,
+                             JSAMPIMAGE input_buf, JDIMENSION input_row,
+                             JSAMPARRAY output_buf, int num_rows)
 {
   register JSAMPROW outptr;
   register JSAMPROW inptr0, inptr1, inptr2;
@@ -262,10 +233,11 @@ rgb_rgb565_convert (j_decompress_ptr cinfo,
 }
 
 
-METHODDEF(void)
-rgb_rgb565D_convert (j_decompress_ptr cinfo,
-                     JSAMPIMAGE input_buf, JDIMENSION input_row,
-                     JSAMPARRAY output_buf, int num_rows)
+INLINE
+LOCAL(void)
+rgb_rgb565D_convert_internal (j_decompress_ptr cinfo,
+                              JSAMPIMAGE input_buf, JDIMENSION input_row,
+                              JSAMPARRAY output_buf, int num_rows)
 {
   register JSAMPROW outptr;
   register JSAMPROW inptr0, inptr1, inptr2;
@@ -320,10 +292,11 @@ rgb_rgb565D_convert (j_decompress_ptr cinfo,
 }
 
 
-METHODDEF(void)
-gray_rgb565_convert (j_decompress_ptr cinfo,
-                     JSAMPIMAGE input_buf, JDIMENSION input_row,
-                     JSAMPARRAY output_buf, int num_rows)
+INLINE
+LOCAL(void)
+gray_rgb565_convert_internal (j_decompress_ptr cinfo,
+                              JSAMPIMAGE input_buf, JDIMENSION input_row,
+                              JSAMPARRAY output_buf, int num_rows)
 {
   register JSAMPROW inptr, outptr;
   register JDIMENSION col;
@@ -359,10 +332,11 @@ gray_rgb565_convert (j_decompress_ptr cinfo,
 }
 
 
-METHODDEF(void)
-gray_rgb565D_convert (j_decompress_ptr cinfo,
-                      JSAMPIMAGE input_buf, JDIMENSION input_row,
-                      JSAMPARRAY output_buf, int num_rows)
+INLINE
+LOCAL(void)
+gray_rgb565D_convert_internal (j_decompress_ptr cinfo,
+                               JSAMPIMAGE input_buf, JDIMENSION input_row,
+                               JSAMPARRAY output_buf, int num_rows)
 {
   register JSAMPROW inptr, outptr;
   register JDIMENSION col;
diff --git a/jdcolor.c b/jdcolor.c
index ffedabd..779fa51 100644
--- a/jdcolor.c
+++ b/jdcolor.c
@@ -544,7 +544,162 @@ ycck_cmyk_convert (j_decompress_ptr cinfo,
 }
 
 
+/*
+ * RGB565 conversion
+ */
+
+#define PACK_SHORT_565_LE(r, g, b)   ((((r) << 8) & 0xF800) |  \
+                                      (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b)   (((r) & 0xF8) | ((g) >> 5) |  \
+                                      (((g) << 11) & 0xE000) |  \
+                                      (((b) << 5) & 0x1F00))
+
+#define PACK_TWO_PIXELS_LE(l, r)     ((r << 16) | l)
+#define PACK_TWO_PIXELS_BE(l, r)     ((l << 16) | r)
+
+#define PACK_NEED_ALIGNMENT(ptr)     (((size_t)(ptr)) & 3)
+
+#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels)  ((*(int *)(addr)) = pixels)
+
+#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
+#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
+#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
+
+
+/* Declarations for ordered dithering
+ *
+ * We use a 4x4 ordered dither array packed into 32 bits.  This array is
+ * sufficent for dithering RGB888 to RGB565.
+ */
+
+#define DITHER_MASK       0x3
+#define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
+static const INT32 dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+
+static INLINE boolean is_big_endian(void)
+{
+  int test_value = 1;
+  if(*(char *)&test_value != 1)
+    return TRUE;
+  return FALSE;
+}
+
+
+/* Include inline routines for RGB565 conversion */
+
+#define PACK_SHORT_565 PACK_SHORT_565_LE
+#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE
+#define ycc_rgb565_convert_internal ycc_rgb565_convert_le
+#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_le
+#define rgb_rgb565_convert_internal rgb_rgb565_convert_le
+#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_le
+#define gray_rgb565_convert_internal gray_rgb565_convert_le
+#define gray_rgb565D_convert_internal gray_rgb565D_convert_le
+#include "jdcol565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef ycc_rgb565_convert_internal
+#undef ycc_rgb565D_convert_internal
+#undef rgb_rgb565_convert_internal
+#undef rgb_rgb565D_convert_internal
+#undef gray_rgb565_convert_internal
+#undef gray_rgb565D_convert_internal
+
+#define PACK_SHORT_565 PACK_SHORT_565_BE
+#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE
+#define ycc_rgb565_convert_internal ycc_rgb565_convert_be
+#define ycc_rgb565D_convert_internal ycc_rgb565D_convert_be
+#define rgb_rgb565_convert_internal rgb_rgb565_convert_be
+#define rgb_rgb565D_convert_internal rgb_rgb565D_convert_be
+#define gray_rgb565_convert_internal gray_rgb565_convert_be
+#define gray_rgb565D_convert_internal gray_rgb565D_convert_be
 #include "jdcol565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef ycc_rgb565_convert_internal
+#undef ycc_rgb565D_convert_internal
+#undef rgb_rgb565_convert_internal
+#undef rgb_rgb565D_convert_internal
+#undef gray_rgb565_convert_internal
+#undef gray_rgb565D_convert_internal
+
+
+METHODDEF(void)
+ycc_rgb565_convert (j_decompress_ptr cinfo,
+                    JSAMPIMAGE input_buf, JDIMENSION input_row,
+                    JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    ycc_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+ycc_rgb565D_convert (j_decompress_ptr cinfo,
+                     JSAMPIMAGE input_buf, JDIMENSION input_row,
+                     JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    ycc_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+rgb_rgb565_convert (j_decompress_ptr cinfo,
+                    JSAMPIMAGE input_buf, JDIMENSION input_row,
+                    JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    rgb_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+rgb_rgb565D_convert (j_decompress_ptr cinfo,
+                     JSAMPIMAGE input_buf, JDIMENSION input_row,
+                     JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    rgb_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+gray_rgb565_convert (j_decompress_ptr cinfo,
+                     JSAMPIMAGE input_buf, JDIMENSION input_row,
+                     JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    gray_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+gray_rgb565D_convert (j_decompress_ptr cinfo,
+                      JSAMPIMAGE input_buf, JDIMENSION input_row,
+                      JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    gray_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
 
 
 /*
diff --git a/jdmerge.c b/jdmerge.c
index f89d69f..d857183 100644
--- a/jdmerge.c
+++ b/jdmerge.c
@@ -45,38 +45,6 @@
 #ifdef UPSAMPLE_MERGING_SUPPORTED
 
 
-#define PACK_SHORT_565(r, g, b)   ((((r) << 8) & 0xf800) |  \
-                                   (((g) << 3) & 0x7E0) | ((b) >> 3))
-#define PACK_TWO_PIXELS(l, r)     ((r << 16) | l)
-#define PACK_NEED_ALIGNMENT(ptr)  (((size_t)(ptr)) & 3)
-
-#define WRITE_TWO_PIXELS(addr, pixels) {  \
-  ((INT16*)(addr))[0] = (pixels);  \
-  ((INT16*)(addr))[1] = (pixels) >> 16;  \
-}
-#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels)  ((*(INT32 *)(addr)) = pixels)
-
-#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
-#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
-#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
-
-
-/* Declarations for ordered dithering
- *
- * We use a 4x4 ordered dither array packed into 32 bits.  This array is
- * sufficent for dithering RGB888 to RGB565.
- */
-
-#define DITHER_MASK       0x3
-#define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
-static const INT32 dither_matrix[4] = {
-  0x0008020A,
-  0x0C040E06,
-  0x030B0109,
-  0x0F070D05
-};
-
-
 /* Private subobject */
 
 typedef struct {
@@ -450,74 +418,107 @@ h2v2_merged_upsample (j_decompress_ptr cinfo,
   }
 }
 
+/*
+ * RGB565 conversion
+ */
+
+#define PACK_SHORT_565_LE(r, g, b)   ((((r) << 8) & 0xF800) |  \
+                                      (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b)   (((r) & 0xF8) | ((g) >> 5) |  \
+                                      (((g) << 11) & 0xE000) |  \
+                                      (((b) << 5) & 0x1F00))
+
+#define PACK_TWO_PIXELS_LE(l, r)     ((r << 16) | l)
+#define PACK_TWO_PIXELS_BE(l, r)     ((l << 16) | r)
+
+#define PACK_NEED_ALIGNMENT(ptr)  (((size_t)(ptr)) & 3)
+
+#define WRITE_TWO_PIXELS_LE(addr, pixels) {  \
+  ((INT16*)(addr))[0] = (pixels);  \
+  ((INT16*)(addr))[1] = (pixels) >> 16;  \
+}
+#define WRITE_TWO_PIXELS_BE(addr, pixels) {  \
+  ((INT16*)(addr))[1] = (pixels);  \
+  ((INT16*)(addr))[0] = (pixels) >> 16;  \
+}
+
+#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
+#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
+#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
+
+
+/* Declarations for ordered dithering
+ *
+ * We use a 4x4 ordered dither array packed into 32 bits.  This array is
+ * sufficent for dithering RGB888 to RGB565.
+ */
+
+#define DITHER_MASK       0x3
+#define DITHER_ROTATE(x)  (((x) << 24) | (((x) >> 8) & 0x00FFFFFF))
+static const INT32 dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+
+/* Include inline routines for RGB565 conversion */
+
+#define PACK_SHORT_565 PACK_SHORT_565_LE
+#define PACK_TWO_PIXELS PACK_TWO_PIXELS_LE
+#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_LE
+#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_le
+#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_le
+#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_le
+#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_le
+#include "jdmrg565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef WRITE_TWO_PIXELS
+#undef h2v1_merged_upsample_565_internal
+#undef h2v1_merged_upsample_565D_internal
+#undef h2v2_merged_upsample_565_internal
+#undef h2v2_merged_upsample_565D_internal
+
+#define PACK_SHORT_565 PACK_SHORT_565_BE
+#define PACK_TWO_PIXELS PACK_TWO_PIXELS_BE
+#define WRITE_TWO_PIXELS WRITE_TWO_PIXELS_BE
+#define h2v1_merged_upsample_565_internal h2v1_merged_upsample_565_be
+#define h2v1_merged_upsample_565D_internal h2v1_merged_upsample_565D_be
+#define h2v2_merged_upsample_565_internal h2v2_merged_upsample_565_be
+#define h2v2_merged_upsample_565D_internal h2v2_merged_upsample_565D_be
+#include "jdmrg565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef WRITE_TWO_PIXELS
+#undef h2v1_merged_upsample_565_internal
+#undef h2v1_merged_upsample_565D_internal
+#undef h2v2_merged_upsample_565_internal
+#undef h2v2_merged_upsample_565D_internal
+
+
+static INLINE boolean is_big_endian(void)
+{
+  int test_value = 1;
+  if(*(char *)&test_value != 1)
+    return TRUE;
+  return FALSE;
+}
+
 
 METHODDEF(void)
 h2v1_merged_upsample_565 (j_decompress_ptr cinfo,
                           JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
                           JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  unsigned int r, g, b;
-  INT32 rgb;
-  SHIFT_TEMPS
-
-  inptr0 = input_buf[0][in_row_group_ctr];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr = output_buf[0];
-
-  /* Loop for each pair of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-
-    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_SHORT_565(r, g, b);
-
-    y  = GETJSAMPLE(*inptr0++);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
-
-    WRITE_TWO_PIXELS(outptr, rgb);
-    outptr += 4;
-  }
-
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_SHORT_565(r, g, b);
-    *(INT16*)outptr = rgb;
-   }
- }
+  if (is_big_endian())
+    h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
+  else
+    h2v1_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
+}
 
 
 METHODDEF(void)
@@ -525,70 +526,12 @@ h2v1_merged_upsample_565D (j_decompress_ptr cinfo,
                            JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
                            JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr;
-  JSAMPROW inptr0, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
-  unsigned int r, g, b;
-  INT32 rgb;
-  SHIFT_TEMPS
-
-  inptr0 = input_buf[0][in_row_group_ctr];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr = output_buf[0];
-
-  /* Loop for each pair of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-
-    /* Fetch 2 Y values and emit 2 pixels */
-    y  = GETJSAMPLE(*inptr0++);
-    r = range_limit[DITHER_565_R(y + cred, d0)];
-    g = range_limit[DITHER_565_G(y + cgreen, d0)];
-    b = range_limit[DITHER_565_B(y + cblue, d0)];
-    d0 = DITHER_ROTATE(d0);
-    rgb = PACK_SHORT_565(r, g, b);
-
-    y  = GETJSAMPLE(*inptr0++);
-    r = range_limit[DITHER_565_R(y + cred, d0)];
-    g = range_limit[DITHER_565_G(y + cgreen, d0)];
-    b = range_limit[DITHER_565_B(y + cblue, d0)];
-    d0 = DITHER_ROTATE(d0);
-    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
-
-    WRITE_TWO_PIXELS(outptr, rgb);
-    outptr += 4;
-  }
-
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-    y  = GETJSAMPLE(*inptr0);
-    r = range_limit[DITHER_565_R(y + cred, d0)];
-    g = range_limit[DITHER_565_G(y + cgreen, d0)];
-    b = range_limit[DITHER_565_B(y + cblue, d0)];
-    rgb = PACK_SHORT_565(r, g, b);
-    *(INT16*)outptr = rgb;
-  }
+  if (is_big_endian())
+    h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
+  else
+    h2v1_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
 }
 
 
@@ -597,92 +540,12 @@ h2v2_merged_upsample_565 (j_decompress_ptr cinfo,
                           JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
                           JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  unsigned int r, g, b;
-  INT32 rgb;
-  SHIFT_TEMPS
-
-  inptr00 = input_buf[0][in_row_group_ctr * 2];
-  inptr01 = input_buf[0][in_row_group_ctr * 2 + 1];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr0 = output_buf[0];
-  outptr1 = output_buf[1];
-
-  /* Loop for each group of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-
-    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_SHORT_565(r, g, b);
-
-    y  = GETJSAMPLE(*inptr00++);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
-
-    WRITE_TWO_PIXELS(outptr0, rgb);
-    outptr0 += 4;
-
-    y  = GETJSAMPLE(*inptr01++);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_SHORT_565(r, g, b);
-
-    y  = GETJSAMPLE(*inptr01++);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
-
-    WRITE_TWO_PIXELS(outptr1, rgb);
-    outptr1 += 4;
-  }
-
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-
-    y  = GETJSAMPLE(*inptr00);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_SHORT_565(r, g, b);
-    *(INT16*)outptr0 = rgb;
-
-    y  = GETJSAMPLE(*inptr01);
-    r = range_limit[y + cred];
-    g = range_limit[y + cgreen];
-    b = range_limit[y + cblue];
-    rgb = PACK_SHORT_565(r, g, b);
-    *(INT16*)outptr1 = rgb;
-  }
+  if (is_big_endian())
+    h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
+  else
+    h2v2_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
 }
 
 
@@ -691,98 +554,12 @@ h2v2_merged_upsample_565D (j_decompress_ptr cinfo,
                            JSAMPIMAGE input_buf, JDIMENSION in_row_group_ctr,
                            JSAMPARRAY output_buf)
 {
-  my_upsample_ptr upsample = (my_upsample_ptr) cinfo->upsample;
-  register int y, cred, cgreen, cblue;
-  int cb, cr;
-  register JSAMPROW outptr0, outptr1;
-  JSAMPROW inptr00, inptr01, inptr1, inptr2;
-  JDIMENSION col;
-  /* copy these pointers into registers if possible */
-  register JSAMPLE * range_limit = cinfo->sample_range_limit;
-  int * Crrtab = upsample->Cr_r_tab;
-  int * Cbbtab = upsample->Cb_b_tab;
-  INT32 * Crgtab = upsample->Cr_g_tab;
-  INT32 * Cbgtab = upsample->Cb_g_tab;
-  INT32 d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
-  INT32 d1 = dither_matrix[(cinfo->output_scanline+1) & DITHER_MASK];
-  unsigned int r, g, b;
-  INT32 rgb;
-  SHIFT_TEMPS
-
-  inptr00 = input_buf[0][in_row_group_ctr*2];
-  inptr01 = input_buf[0][in_row_group_ctr*2 + 1];
-  inptr1 = input_buf[1][in_row_group_ctr];
-  inptr2 = input_buf[2][in_row_group_ctr];
-  outptr0 = output_buf[0];
-  outptr1 = output_buf[1];
-
-  /* Loop for each group of output pixels */
-  for (col = cinfo->output_width >> 1; col > 0; col--) {
-    /* Do the chroma part of the calculation */
-    cb = GETJSAMPLE(*inptr1++);
-    cr = GETJSAMPLE(*inptr2++);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-
-    /* Fetch 4 Y values and emit 4 pixels */
-    y  = GETJSAMPLE(*inptr00++);
-    r = range_limit[DITHER_565_R(y + cred, d0)];
-    g = range_limit[DITHER_565_G(y + cgreen, d0)];
-    b = range_limit[DITHER_565_B(y + cblue, d0)];
-    d0 = DITHER_ROTATE(d0);
-    rgb = PACK_SHORT_565(r, g, b);
-
-    y  = GETJSAMPLE(*inptr00++);
-    r = range_limit[DITHER_565_R(y + cred, d1)];
-    g = range_limit[DITHER_565_G(y + cgreen, d1)];
-    b = range_limit[DITHER_565_B(y + cblue, d1)];
-    d1 = DITHER_ROTATE(d1);
-    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
-
-    WRITE_TWO_PIXELS(outptr0, rgb);
-    outptr0 += 4;
-
-    y  = GETJSAMPLE(*inptr01++);
-    r = range_limit[DITHER_565_R(y + cred, d0)];
-    g = range_limit[DITHER_565_G(y + cgreen, d0)];
-    b = range_limit[DITHER_565_B(y + cblue, d0)];
-    d0 = DITHER_ROTATE(d0);
-    rgb = PACK_SHORT_565(r, g, b);
-
-    y  = GETJSAMPLE(*inptr01++);
-    r = range_limit[DITHER_565_R(y + cred, d1)];
-    g = range_limit[DITHER_565_G(y + cgreen, d1)];
-    b = range_limit[DITHER_565_B(y + cblue, d1)];
-    d1 = DITHER_ROTATE(d1);
-    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
-
-    WRITE_TWO_PIXELS(outptr1, rgb);
-    outptr1 += 4;
-  }
-
-  /* If image width is odd, do the last output column separately */
-  if (cinfo->output_width & 1) {
-    cb = GETJSAMPLE(*inptr1);
-    cr = GETJSAMPLE(*inptr2);
-    cred = Crrtab[cr];
-    cgreen = (int) RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
-    cblue = Cbbtab[cb];
-
-    y  = GETJSAMPLE(*inptr00);
-    r = range_limit[DITHER_565_R(y + cred, d0)];
-    g = range_limit[DITHER_565_G(y + cgreen, d0)];
-    b = range_limit[DITHER_565_B(y + cblue, d0)];
-    rgb = PACK_SHORT_565(r, g, b);
-    *(INT16*)outptr0 = rgb;
-
-    y  = GETJSAMPLE(*inptr01);
-    r = range_limit[DITHER_565_R(y + cred, d1)];
-    g = range_limit[DITHER_565_G(y + cgreen, d1)];
-    b = range_limit[DITHER_565_B(y + cblue, d1)];
-    rgb = PACK_SHORT_565(r, g, b);
-    *(INT16*)outptr1 = rgb;
-  }
+  if (is_big_endian())
+    h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
+  else
+    h2v2_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
 }
 
 
diff --git a/md5/md5.c b/md5/md5.c
index 7193e95..1e09880 100644
--- a/md5/md5.c
+++ b/md5/md5.c
@@ -39,6 +39,14 @@ static void MD5Transform(unsigned int [4], const unsigned char [64]);
 #else 
 
 /*
+ * OS X doesn't have le32toh() or htole32()
+ */
+#ifdef __APPLE__
+#include <libkern/OSByteOrder.h>
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#endif
+/*
  * Encodes input (unsigned int) into output (unsigned char). Assumes len is
  * a multiple of 4.
  */
diff --git a/wrbmp.c b/wrbmp.c
index 3a85441..5bcf23d 100644
--- a/wrbmp.c
+++ b/wrbmp.c
@@ -64,6 +64,15 @@ LOCAL(void) write_colormap
          int map_entry_size);
 
 
+static inline boolean is_big_endian(void)
+{
+  int test_value = 1;
+  if(*(char *)&test_value != 1)
+    return TRUE;
+  return FALSE;
+}
+
+
 /*
  * Write some pixel data.
  * In this module rows_supplied will always be 1.
@@ -93,18 +102,18 @@ put_pixel_rows (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
   outptr = image_ptr[0];
 
   if(cinfo->out_color_space == JCS_RGB565) {
-    #define red_mask    0xF800
-    #define green_mask  0x7E0
-    #define blue_mask   0x1F
-    unsigned char  r, g, b;
+    boolean big_endian = is_big_endian();
     unsigned short *inptr2 = (unsigned short *)inptr;
     for (col = cinfo->output_width; col > 0; col--) {
-      r = (*inptr2 & red_mask) >> 11;
-      g = (*inptr2 & green_mask) >> 5;
-      b = (*inptr2 & blue_mask);
-      outptr[0] = b << 3;
-      outptr[1] = g << 2;
-      outptr[2] = r << 3;
+      if (big_endian) {
+        outptr[0] = (*inptr2 >> 5) & 0xF8;
+        outptr[1] = ((*inptr2 << 5) & 0xE0) | ((*inptr2 >> 11) & 0x1C);
+        outptr[2] = *inptr2 & 0xF8;
+      } else {
+        outptr[0] = (*inptr2 << 3) & 0xF8;
+        outptr[1] = (*inptr2 >> 3) & 0xFC;
+        outptr[2] = (*inptr2 >> 8) & 0xF8;
+      }
       outptr += 3;
       inptr2++;
     }