Blob Blame History Raw
commit 5e381e3878b5da87ee7542f7e51c3c1a7fd84b89
Author: John MacFarlane <jgm@berkeley.edu>
Date:   Tue Jun 20 13:50:13 2023 -0700

    Fix a security vulnerability in MediaBag and T.P.Class.IO.writeMedia.
    
    This vulnerability, discovered by Entroy C, allows users to write
    arbitrary files to any location by feeding pandoc a specially crafted
    URL in an image element.  The vulnerability is serious for anyone
    using pandoc to process untrusted input.  The vulnerability does
    not affect pandoc when run with the `--sandbox` flag.

diff -up pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs.orig0 pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs
--- pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs.orig0	2001-09-09 09:46:40.000000000 +0800
+++ pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs	2024-02-28 02:30:14.070207552 +0800
@@ -50,7 +50,7 @@ import Network.HTTP.Client.Internal (add
 import Network.HTTP.Client.TLS (mkManagerSettings)
 import Network.HTTP.Types.Header ( hContentType )
 import Network.Socket (withSocketsDo)
-import Network.URI (unEscapeString)
+import Network.URI (URI(..), parseURI)
 import System.Directory (createDirectoryIfMissing)
 import System.Environment (getEnv)
 import System.FilePath ((</>), takeDirectory, normalise)
@@ -122,11 +122,11 @@ newUniqueHash = hashUnique <$> liftIO Da
 
 openURL :: (PandocMonad m, MonadIO m) => Text -> m (B.ByteString, Maybe MimeType)
 openURL u
- | Just u'' <- T.stripPrefix "data:" u = do
-     let mime     = T.takeWhile (/=',') u''
-     let contents = UTF8.fromString $
-                     unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u''
-     return (decodeBase64Lenient contents, Just mime)
+ | Just (URI{ uriScheme = "data:",
+              uriPath = upath }) <- parseURI (T.unpack u) = do
+     let (mime, rest) = break (== '.') upath
+     let contents = UTF8.fromString $ drop 1 rest
+     return (decodeBase64Lenient contents, Just (T.pack mime))
  | otherwise = do
      let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v)
      customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders
@@ -224,7 +224,7 @@ writeMedia :: (PandocMonad m, MonadIO m)
            -> m ()
 writeMedia dir (fp, _mt, bs) = do
   -- we normalize to get proper path separators for the platform
-  let fullpath = normalise $ dir </> unEscapeString fp
+  let fullpath = normalise $ dir </> fp
   liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath)
   logIOError $ BL.writeFile fullpath bs
 
diff -up pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs.orig0 pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs
--- pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs.orig0	2001-09-09 09:46:40.000000000 +0800
+++ pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs	2024-02-28 02:29:19.159845432 +0800
@@ -28,6 +28,7 @@ import Data.Data (Data)
 import qualified Data.Map as M
 import Data.Maybe (fromMaybe, isNothing)
 import Data.Typeable (Typeable)
+import Network.URI (unEscapeString)
 import System.FilePath
 import qualified System.FilePath.Posix as Posix
 import qualified System.FilePath.Windows as Windows
@@ -35,7 +36,7 @@ import Text.Pandoc.MIME (MimeType, getMi
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.Digest.Pure.SHA (sha1, showDigest)
-import Network.URI (URI (..), parseURI)
+import Network.URI (URI (..), parseURI, isURI)
 
 data MediaItem =
   MediaItem
@@ -54,9 +55,12 @@ newtype MediaBag = MediaBag (M.Map Text
 instance Show MediaBag where
   show bag = "MediaBag " ++ show (mediaDirectory bag)
 
--- | We represent paths with /, in normalized form.
+-- | We represent paths with /, in normalized form.  Percent-encoding
+-- is resolved.
 canonicalize :: FilePath -> Text
-canonicalize = T.replace "\\" "/" . T.pack . normalise
+canonicalize fp
+  | isURI fp = T.pack fp
+  | otherwise = T.replace "\\" "/" . T.pack . normalise . unEscapeString $ fp
 
 -- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds
 -- to the given path.
@@ -79,23 +83,23 @@ insertMedia fp mbMime contents (MediaBag
                              , mediaContents = contents
                              , mediaMimeType = mt }
         fp' = canonicalize fp
+        fp'' = T.unpack fp'
         uri = parseURI fp
-        newpath = if Posix.isRelative fp
-                       && Windows.isRelative fp
+        newpath = if Posix.isRelative fp''
+                       && Windows.isRelative fp''
                        && isNothing uri
-                       && ".." `notElem` splitDirectories fp
-                     then T.unpack fp'
+                       && not (".." `T.isInfixOf` fp')
+                     then fp''
                      else showDigest (sha1 contents) <> "." <> ext
-        fallback = case takeExtension fp of
-                        ".gz" -> getMimeTypeDef $ dropExtension fp
-                        _     -> getMimeTypeDef fp
+        fallback = case takeExtension fp'' of
+                        ".gz" -> getMimeTypeDef $ dropExtension fp''
+                        _     -> getMimeTypeDef fp''
         mt = fromMaybe fallback mbMime
-        path = maybe fp uriPath uri
+        path = maybe fp'' (unEscapeString . uriPath) uri
         ext = case takeExtension path of
                 '.':e -> e
                 _ -> maybe "" T.unpack $ extensionFromMimeType mt
 
-
 -- | Lookup a media item in a 'MediaBag', returning mime type and contents.
 lookupMedia :: FilePath
             -> MediaBag