20fcaf1
diff -up lua-5.4.4/lua-5.4.4-tests/utf8.lua.bug8 lua-5.4.4/lua-5.4.4-tests/utf8.lua
20fcaf1
--- lua-5.4.4/lua-5.4.4-tests/utf8.lua.bug8	2023-02-14 14:53:22.877506883 -0500
20fcaf1
+++ lua-5.4.4/lua-5.4.4-tests/utf8.lua	2023-02-14 14:53:28.297574398 -0500
20fcaf1
@@ -97,9 +97,15 @@ do    -- error indication in utf8.len
20fcaf1
     assert(not a and b == p)
20fcaf1
   end
20fcaf1
   check("abc\xE3def", 4)
20fcaf1
-  check("汉字\x80", #("汉字") + 1)
20fcaf1
   check("\xF4\x9F\xBF", 1)
20fcaf1
   check("\xF4\x9F\xBF\xBF", 1)
20fcaf1
+  -- spurious continuation bytes
20fcaf1
+  check("汉字\x80", #("汉字") + 1)
20fcaf1
+  check("\x80hello", 1)
20fcaf1
+  check("hel\x80lo", 4)
20fcaf1
+  check("汉字\xBF", #("汉字") + 1)
20fcaf1
+  check("\xBFhello", 1)
20fcaf1
+  check("hel\xBFlo", 4)
20fcaf1
 end
20fcaf1
 
20fcaf1
 -- errors in utf8.codes
20fcaf1
@@ -112,12 +118,16 @@ do
20fcaf1
   end
20fcaf1
   errorcodes("ab\xff")
20fcaf1
   errorcodes("\u{110000}")
20fcaf1
+  errorcodes("in\x80valid")
20fcaf1
+  errorcodes("\xbfinvalid")
20fcaf1
+  errorcodes("αλφ\xBFα")
20fcaf1
 
20fcaf1
   -- calling interation function with invalid arguments
20fcaf1
   local f = utf8.codes("")
20fcaf1
   assert(f("", 2) == nil)
20fcaf1
   assert(f("", -1) == nil)
20fcaf1
   assert(f("", math.mininteger) == nil)
20fcaf1
+
20fcaf1
 end
20fcaf1
 
20fcaf1
 -- error in initial position for offset
20fcaf1
diff -up lua-5.4.4/src/lutf8lib.c.bug8 lua-5.4.4/src/lutf8lib.c
20fcaf1
--- lua-5.4.4/src/lutf8lib.c.bug8	2023-02-14 14:53:49.114833691 -0500
20fcaf1
+++ lua-5.4.4/src/lutf8lib.c	2023-02-14 14:59:12.660905056 -0500
20fcaf1
@@ -25,6 +25,9 @@
20fcaf1
 
20fcaf1
 #define MAXUTF		0x7FFFFFFFu
20fcaf1
 
20fcaf1
+
20fcaf1
+#define MSGInvalid	"invalid UTF-8 code"
20fcaf1
+
20fcaf1
 /*
20fcaf1
 ** Integer type for decoded UTF-8 values; MAXUTF needs 31 bits.
20fcaf1
 */
20fcaf1
@@ -35,7 +38,8 @@ typedef unsigned long utfint;
20fcaf1
 #endif
20fcaf1
 
20fcaf1
 
20fcaf1
-#define iscont(p)	((*(p) & 0xC0) == 0x80)
20fcaf1
+#define iscont(c)	(((c) & 0xC0) == 0x80)
20fcaf1
+#define iscontp(p)	iscont(*(p))
20fcaf1
 
20fcaf1
 
20fcaf1
 /* from strlib */
20fcaf1
@@ -65,7 +69,7 @@ static const char *utf8_decode (const ch
20fcaf1
     int count = 0;  /* to count number of continuation bytes */
20fcaf1
     for (; c & 0x40; c <<= 1) {  /* while it needs continuation bytes... */
20fcaf1
       unsigned int cc = (unsigned char)s[++count];  /* read next byte */
20fcaf1
-      if ((cc & 0xC0) != 0x80)  /* not a continuation byte? */
20fcaf1
+      if (!iscont(cc))  /* not a continuation byte? */
20fcaf1
         return NULL;  /* invalid byte sequence */
20fcaf1
       res = (res << 6) | (cc & 0x3F);  /* add lower 6 bits from cont. byte */
20fcaf1
     }
20fcaf1
@@ -140,7 +144,7 @@ static int codepoint (lua_State *L) {
20fcaf1
     utfint code;
20fcaf1
     s = utf8_decode(s, &code, !lax);
20fcaf1
     if (s == NULL)
20fcaf1
-      return luaL_error(L, "invalid UTF-8 code");
20fcaf1
+      return luaL_error(L, MSGInvalid);
20fcaf1
     lua_pushinteger(L, code);
20fcaf1
     n++;
20fcaf1
   }
20fcaf1
@@ -190,16 +194,16 @@ static int byteoffset (lua_State *L) {
20fcaf1
                    "position out of bounds");
20fcaf1
   if (n == 0) {
20fcaf1
     /* find beginning of current byte sequence */
20fcaf1
-    while (posi > 0 && iscont(s + posi)) posi--;
20fcaf1
+    while (posi > 0 && iscontp(s + posi)) posi--;
20fcaf1
   }
20fcaf1
   else {
20fcaf1
-    if (iscont(s + posi))
20fcaf1
+    if (iscontp(s + posi))
20fcaf1
       return luaL_error(L, "initial position is a continuation byte");
20fcaf1
     if (n < 0) {
20fcaf1
        while (n < 0 && posi > 0) {  /* move back */
20fcaf1
          do {  /* find beginning of previous character */
20fcaf1
            posi--;
20fcaf1
-         } while (posi > 0 && iscont(s + posi));
20fcaf1
+         } while (posi > 0 && iscontp(s + posi));
20fcaf1
          n++;
20fcaf1
        }
20fcaf1
      }
20fcaf1
@@ -208,7 +212,7 @@ static int byteoffset (lua_State *L) {
20fcaf1
        while (n > 0 && posi < (lua_Integer)len) {
20fcaf1
          do {  /* find beginning of next character */
20fcaf1
            posi++;
20fcaf1
-         } while (iscont(s + posi));  /* (cannot pass final '\0') */
20fcaf1
+         } while (iscontp(s + posi));  /* (cannot pass final '\0') */
20fcaf1
          n--;
20fcaf1
        }
20fcaf1
      }
20fcaf1
@@ -226,15 +230,15 @@ static int iter_aux (lua_State *L, int s
20fcaf1
   const char *s = luaL_checklstring(L, 1, &len;;
20fcaf1
   lua_Unsigned n = (lua_Unsigned)lua_tointeger(L, 2);
20fcaf1
   if (n < len) {
20fcaf1
-    while (iscont(s + n)) n++;  /* skip continuation bytes */
20fcaf1
+    while (iscontp(s + n)) n++;  /* go to next character */
20fcaf1
   }
20fcaf1
   if (n >= len)  /* (also handles original 'n' being negative) */
20fcaf1
     return 0;  /* no more codepoints */
20fcaf1
   else {
20fcaf1
     utfint code;
20fcaf1
     const char *next = utf8_decode(s + n, &code, strict);
20fcaf1
-    if (next == NULL)
20fcaf1
-      return luaL_error(L, "invalid UTF-8 code");
20fcaf1
+    if (next == NULL || iscontp(next))
20fcaf1
+      return luaL_error(L, MSGInvalid);
20fcaf1
     lua_pushinteger(L, n + 1);
20fcaf1
     lua_pushinteger(L, code);
20fcaf1
     return 2;
20fcaf1
@@ -253,7 +257,8 @@ static int iter_auxlax (lua_State *L) {
20fcaf1
 
20fcaf1
 static int iter_codes (lua_State *L) {
20fcaf1
   int lax = lua_toboolean(L, 2);
20fcaf1
-  luaL_checkstring(L, 1);
20fcaf1
+  const char *s = luaL_checkstring(L, 1);
20fcaf1
+  luaL_argcheck(L, !iscontp(s), 1, MSGInvalid);
20fcaf1
   lua_pushcfunction(L, lax ? iter_auxlax : iter_auxstrict);
20fcaf1
   lua_pushvalue(L, 1);
20fcaf1
   lua_pushinteger(L, 0);