commit 5e381e3878b5da87ee7542f7e51c3c1a7fd84b89 Author: John MacFarlane Date: Tue Jun 20 13:50:13 2023 -0700 Fix a security vulnerability in MediaBag and T.P.Class.IO.writeMedia. This vulnerability, discovered by Entroy C, allows users to write arbitrary files to any location by feeding pandoc a specially crafted URL in an image element. The vulnerability is serious for anyone using pandoc to process untrusted input. The vulnerability does not affect pandoc when run with the `--sandbox` flag. diff -up pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs.orig0 pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs --- pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs.orig0 2001-09-09 09:46:40.000000000 +0800 +++ pandoc-3.1.3/src/Text/Pandoc/Class/IO.hs 2024-02-28 02:30:14.070207552 +0800 @@ -50,7 +50,7 @@ import Network.HTTP.Client.Internal (add import Network.HTTP.Client.TLS (mkManagerSettings) import Network.HTTP.Types.Header ( hContentType ) import Network.Socket (withSocketsDo) -import Network.URI (unEscapeString) +import Network.URI (URI(..), parseURI) import System.Directory (createDirectoryIfMissing) import System.Environment (getEnv) import System.FilePath ((), takeDirectory, normalise) @@ -122,11 +122,11 @@ newUniqueHash = hashUnique <$> liftIO Da openURL :: (PandocMonad m, MonadIO m) => Text -> m (B.ByteString, Maybe MimeType) openURL u - | Just u'' <- T.stripPrefix "data:" u = do - let mime = T.takeWhile (/=',') u'' - let contents = UTF8.fromString $ - unEscapeString $ T.unpack $ T.drop 1 $ T.dropWhile (/=',') u'' - return (decodeBase64Lenient contents, Just mime) + | Just (URI{ uriScheme = "data:", + uriPath = upath }) <- parseURI (T.unpack u) = do + let (mime, rest) = break (== '.') upath + let contents = UTF8.fromString $ drop 1 rest + return (decodeBase64Lenient contents, Just (T.pack mime)) | otherwise = do let toReqHeader (n, v) = (CI.mk (UTF8.fromText n), UTF8.fromText v) customHeaders <- map toReqHeader <$> getsCommonState stRequestHeaders @@ -224,7 +224,7 @@ writeMedia :: (PandocMonad m, MonadIO m) -> m () writeMedia dir (fp, _mt, bs) = do -- we normalize to get proper path separators for the platform - let fullpath = normalise $ dir unEscapeString fp + let fullpath = normalise $ dir fp liftIOError (createDirectoryIfMissing True) (takeDirectory fullpath) logIOError $ BL.writeFile fullpath bs diff -up pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs.orig0 pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs --- pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs.orig0 2001-09-09 09:46:40.000000000 +0800 +++ pandoc-3.1.3/src/Text/Pandoc/MediaBag.hs 2024-02-28 02:29:19.159845432 +0800 @@ -28,6 +28,7 @@ import Data.Data (Data) import qualified Data.Map as M import Data.Maybe (fromMaybe, isNothing) import Data.Typeable (Typeable) +import Network.URI (unEscapeString) import System.FilePath import qualified System.FilePath.Posix as Posix import qualified System.FilePath.Windows as Windows @@ -35,7 +36,7 @@ import Text.Pandoc.MIME (MimeType, getMi import Data.Text (Text) import qualified Data.Text as T import Data.Digest.Pure.SHA (sha1, showDigest) -import Network.URI (URI (..), parseURI) +import Network.URI (URI (..), parseURI, isURI) data MediaItem = MediaItem @@ -54,9 +55,12 @@ newtype MediaBag = MediaBag (M.Map Text instance Show MediaBag where show bag = "MediaBag " ++ show (mediaDirectory bag) --- | We represent paths with /, in normalized form. +-- | We represent paths with /, in normalized form. Percent-encoding +-- is resolved. canonicalize :: FilePath -> Text -canonicalize = T.replace "\\" "/" . T.pack . normalise +canonicalize fp + | isURI fp = T.pack fp + | otherwise = T.replace "\\" "/" . T.pack . normalise . unEscapeString $ fp -- | Delete a media item from a 'MediaBag', or do nothing if no item corresponds -- to the given path. @@ -79,23 +83,23 @@ insertMedia fp mbMime contents (MediaBag , mediaContents = contents , mediaMimeType = mt } fp' = canonicalize fp + fp'' = T.unpack fp' uri = parseURI fp - newpath = if Posix.isRelative fp - && Windows.isRelative fp + newpath = if Posix.isRelative fp'' + && Windows.isRelative fp'' && isNothing uri - && ".." `notElem` splitDirectories fp - then T.unpack fp' + && not (".." `T.isInfixOf` fp') + then fp'' else showDigest (sha1 contents) <> "." <> ext - fallback = case takeExtension fp of - ".gz" -> getMimeTypeDef $ dropExtension fp - _ -> getMimeTypeDef fp + fallback = case takeExtension fp'' of + ".gz" -> getMimeTypeDef $ dropExtension fp'' + _ -> getMimeTypeDef fp'' mt = fromMaybe fallback mbMime - path = maybe fp uriPath uri + path = maybe fp'' (unEscapeString . uriPath) uri ext = case takeExtension path of '.':e -> e _ -> maybe "" T.unpack $ extensionFromMimeType mt - -- | Lookup a media item in a 'MediaBag', returning mime type and contents. lookupMedia :: FilePath -> MediaBag