From 23b2c55737ca8b368197f01cc547d5439c84b862 Mon Sep 17 00:00:00 2001
From: totaam <antoine@xpra.org>
Date: Sun, 27 Nov 2022 21:19:47 +0700
Subject: [PATCH] make sure to include cuda libs when needed

(on MS Windows)
---
 setup.py | 187 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 106 insertions(+), 81 deletions(-)

diff --git a/setup.py b/setup.py
index fc5831cdc..22a2256b8 100755
--- a/setup.py
+++ b/setup.py
@@ -1503,7 +1503,7 @@ def add_service_exe(script, icon, base_name):
             add_console_exe("xpra/codecs/nv_util.py",                   "nvidia.ico",   "NVidia_info")
         if nvfbc_ENABLED:
             add_console_exe("xpra/codecs/nvfbc/capture.py",             "nvidia.ico",   "NvFBC_capture")
-        if nvfbc_ENABLED or nvenc_ENABLED:
+        if nvfbc_ENABLED or nvenc_ENABLED or nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED:
             add_console_exe("xpra/codecs/cuda_common/cuda_context.py",  "cuda.ico",     "CUDA_info")
 
     if ("install_exe" in sys.argv) or ("install" in sys.argv):
@@ -2043,22 +2043,18 @@ def add_cython_ext(*_args, **_kwargs):
 toggle_packages(nvenc_ENABLED or nvfbc_ENABLED, "xpra.codecs.cuda_common")
 toggle_packages(nvenc_ENABLED or nvfbc_ENABLED, "xpra.codecs.nv_util")
 
-CUDA_BIN = "%s/cuda" % share_xpra
-if (nvenc_ENABLED and cuda_kernels_ENABLED) or nvjpeg_encoder_ENABLED:
+nvidia_ENABLED = nvenc_ENABLED or nvfbc_ENABLED or nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED
+toggle_packages(nvidia_ENABLED, "xpra.codecs.nvidia")
+if nvidia_ENABLED:
+    CUDA_BIN = f"{share_xpra}/cuda"
     #find nvcc:
-    from xpra.util import sorted_nicely
+    from xpra.util import sorted_nicely  # pylint: disable=import-outside-toplevel
     path_options = os.environ.get("PATH", "").split(os.path.pathsep)
     if WIN32:
-        external_includes += ["pycuda"]
+        external_includes.append("pycuda")
         nvcc_exe = "nvcc.exe"
         CUDA_DIR = os.environ.get("CUDA_DIR", "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA")
-        path_options += ["./cuda/bin/"]+list(reversed(sorted_nicely(glob.glob("%s\\*\\bin" % CUDA_DIR))))
-        #pycuda may link against curand, find it and ship it:
-        for p in path_options:
-            if os.path.exists(p):
-                add_data_files("", glob.glob("%s\\curand64*.dll" % p))
-                add_data_files("", glob.glob("%s\\cudart64*.dll" % p))
-                break
+        path_options += ["./cuda/bin/"]+list(reversed(sorted_nicely(glob.glob(f"{CUDA_DIR}\\*\\bin"))))
     else:
         nvcc_exe = "nvcc"
         path_options += ["/usr/local/cuda/bin", "/opt/cuda/bin"]
@@ -2066,12 +2062,9 @@ def add_cython_ext(*_args, **_kwargs):
         path_options += list(reversed(sorted_nicely(glob.glob("/opt/cuda*/bin"))))
     options = [os.path.join(x, nvcc_exe) for x in path_options]
     #prefer the one we find on the $PATH, if any:
-    try:
-        v = shutil.which(nvcc_exe)
-        if v and (v not in options):
-            options.insert(0, v)
-    except:
-        pass
+    v = shutil.which(nvcc_exe)
+    if v and (v not in options):
+        options.insert(0, v)
     nvcc_versions = {}
     def get_nvcc_version(command):
         if not os.path.exists(command):
@@ -2082,30 +2075,94 @@ def get_nvcc_version(command):
         vpos = out.rfind(", V")
         if vpos>0:
             version = out[vpos+3:].split("\n")[0]
-            version_str = " version %s" % version
+            version_str = f" version {version}"
         else:
             version = "0"
             version_str = " unknown version!"
-        print("found CUDA compiler: %s%s" % (filename, version_str))
+        print(f"found CUDA compiler: {filename}{version_str}")
         return tuple(int(x) for x in version.split("."))
     for filename in options:
         vnum = get_nvcc_version(filename)
         if vnum:
             nvcc_versions[vnum] = filename
+    nvcc_version = nvcc = None
     if nvcc_versions:
         #choose the most recent one:
         nvcc_version, nvcc = list(reversed(sorted(nvcc_versions.items())))[0]
         if len(nvcc_versions)>1:
-            print(" using version %s from %s" % (nvcc_version, nvcc))
-    else:
-        nvcc_version = nvcc = None
-    if ((nvenc_ENABLED or nvjpeg_encoder_ENABLED) and cuda_kernels_ENABLED):
-        assert nvcc_versions, "cannot find nvcc compiler!"
+            print(f" using version {nvcc_version} from {nvcc}")
+    if cuda_kernels_ENABLED and (nvenc_ENABLED or nvjpeg_encoder_ENABLED):
+        def get_gcc_version():
+            if CC_is_clang():
+                return (0, )
+            cc = os.environ.get("CC", "gcc")
+            r, _, err = get_status_output([cc, "-v"])
+            if r==0:
+                V_LINE = "gcc version "
+                tmp_version = []
+                for line in err.splitlines():
+                    if not line.startswith(V_LINE):
+                        continue
+                    v_str = line[len(V_LINE):].strip().split(" ")[0]
+                    for p in v_str.split("."):
+                        try:
+                            tmp_version.append(int(p))
+                        except ValueError:
+                            break
+                    print("found gcc version: %s" % ".".join(str(x) for x in tmp_version))
+                    break
+                return tuple(tmp_version)
+            return (0, )
+        assert nvcc, "cannot find nvcc compiler!"
+        def get_nvcc_args():
+            nvcc_cmd = [nvcc, "-fatbin"]
+            gcc_version = get_gcc_version()
+            if gcc_version<(7, 5):
+                print("gcc versions older than 7.5 are not supported!")
+                for _ in range(5):
+                    sleep(1)
+                    print(".")
+            if (8,1)<=gcc_version<(9, ):
+                #GCC 8.1 has compatibility issues with CUDA 9.2,
+                #so revert to C++03:
+                nvcc_cmd.append("-std=c++03")
+            #GCC 6 uses C++11 by default:
+            else:
+                nvcc_cmd.append("-std=c++11")
+            if gcc_version>=(12, 0) or CC_is_clang():
+                nvcc_cmd.append("--allow-unsupported-compiler")
+            if nvcc_version>=(11, 5):
+                nvcc_cmd += ["-arch=all",
+                        "-Wno-deprecated-gpu-targets",
+                        ]
+                if nvcc_version>=(11, 6):
+                    nvcc_cmd += ["-Xnvlink", "-ignore-host-info"]
+                return nvcc_cmd
+            #older versions, add every arch we know about:
+            comp_code_options = []
+            if nvcc_version>=(7, 5):
+                comp_code_options.append((52, 52))
+                comp_code_options.append((53, 53))
+            if nvcc_version>=(8, 0):
+                comp_code_options.append((60, 60))
+                comp_code_options.append((61, 61))
+                comp_code_options.append((62, 62))
+            if nvcc_version>=(9, 0):
+                comp_code_options.append((70, 70))
+            if nvcc_version>=(10, 0):
+                comp_code_options.append((75, 75))
+            if nvcc_version>=(11, 0):
+                comp_code_options.append((80, 80))
+            if nvcc_version>=(11, 1):
+                comp_code_options.append((86, 86))
+            #if nvcc_version>=(11, 6):
+            #    comp_code_options.append((87, 87))
+            for arch, code in comp_code_options:
+                nvcc_cmd.append(f"-gencode=arch=compute_{arch},code=sm_{code}")
+            return nvcc_cmd
+        nvcc_args = get_nvcc_args()
         #first compile the cuda kernels
         #(using the same cuda SDK for both nvenc modules for now..)
-        #TODO:
-        # * compile directly to output directory instead of using data files?
-        # * detect which arches we want to build for? (does it really matter much?)
         kernels = []
         if nvenc_ENABLED:
             kernels += ["XRGB_to_NV12", "XRGB_to_YUV444", "BGRX_to_NV12", "BGRX_to_YUV444"]
@@ -2113,67 +2170,26 @@ def get_nvcc_version(command):
             kernels += ["BGRX_to_RGB", "RGBX_to_RGB", "RGBA_to_RGBAP", "BGRA_to_RGBAP"]
         nvcc_commands = []
         for kernel in kernels:
-            cuda_src = "fs/share/xpra/cuda/%s.cu" % kernel
-            cuda_bin = "fs/share/xpra/cuda/%s.fatbin" % kernel
+            cuda_src = f"fs/share/xpra/cuda/{kernel}.cu"
+            cuda_bin = f"fs/share/xpra/cuda/{kernel}.fatbin"
             if os.path.exists(cuda_bin) and (cuda_rebuild_ENABLED is False):
                 continue
             reason = should_rebuild(cuda_src, cuda_bin)
             if not reason:
                 continue
-            print("rebuilding %s: %s" % (kernel, reason))
-            cmd = [nvcc,
-                   '-fatbin',
-                   "-c", cuda_src,
-                   "-o", cuda_bin]
-            gcc_version = get_gcc_version()
-            if (8,1)<=gcc_version<(9, ):
-                #GCC 8.1 has compatibility issues with CUDA 9.2,
-                #so revert to C++03:
-                cmd.append("-std=c++03")
-            #GCC 6 uses C++11 by default:
-            else:
-                cmd.append("-std=c++11")
-            if gcc_version>=(12, 0) or CC_is_clang():
-                cmd.append("--allow-unsupported-compiler")
-            if nvcc_version>=(11, 5):
-                cmd += ["-arch=all",
-                        "-Wno-deprecated-gpu-targets",
-                        ]
-                if nvcc_version>=(11, 6):
-                    cmd += ["-Xnvlink", "-ignore-host-info"]
-            else:
-                comp_code_options = []
-                if nvcc_version>=(7, 5):
-                    comp_code_options.append((52, 52))
-                    comp_code_options.append((53, 53))
-                if nvcc_version>=(8, 0):
-                    comp_code_options.append((60, 60))
-                    comp_code_options.append((61, 61))
-                    comp_code_options.append((62, 62))
-                if nvcc_version>=(9, 0):
-                    comp_code_options.append((70, 70))
-                if nvcc_version>=(10, 0):
-                    comp_code_options.append((75, 75))
-                if nvcc_version>=(11, 0):
-                    comp_code_options.append((80, 80))
-                if nvcc_version>=(11, 1):
-                    comp_code_options.append((86, 86))
-                #if nvcc_version>=(11, 6):
-                #    comp_code_options.append((87, 87))
-                for arch, code in comp_code_options:
-                    cmd.append("-gencode=arch=compute_%s,code=sm_%s" % (arch, code))
-            print("CUDA compiling %s (%s)" % (kernel.ljust(16), reason))
-            print(" %s" % " ".join("'%s'" % x for x in cmd))
-            nvcc_commands.append(cmd)
-
+            print(f"rebuilding {kernel}: {reason}")
+            kbuild_cmd = nvcc_args + ["-c", cuda_src, "-o", cuda_bin]
+            print(f"CUDA compiling %s ({reason})" % kernel.ljust(16))
+            print(" "+" ".join(f"{x!r}" for x in kbuild_cmd))
+            nvcc_commands.append(kbuild_cmd)
         #parallel build:
         nvcc_errors = []
-        def nvcc_compile(cmd):
-            c, stdout, stderr = get_status_output(cmd)
+        def nvcc_compile(nvcc_cmd):
+            c, stdout, stderr = get_status_output(nvcc_cmd)
             if c!=0:
                 nvcc_errors.append(c)
-                print("Error: failed to compile CUDA kernel %s" % kernel)
-                print(" using command: %s" % (cmd,))
+                print(f"Error: failed to compile CUDA kernel {kernel}")
+                print(f" using command: {nvcc_cmd}")
                 print(stdout or "")
                 print(stderr or "")
         nvcc_threads = []
@@ -2186,8 +2202,17 @@ def nvcc_compile(cmd):
             if nvcc_errors:
                 sys.exit(1)
             t.join()
+        add_data_files(CUDA_BIN, [f"fs/share/xpra/cuda/{x}.fatbin" for x in kernels])
+        add_data_files(CUDA_BIN, ["fs/share/xpra/cuda/README.md"])
+    if WIN32 and (nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED or nvenc_ENABLED):
+        assert nvcc_versions
+        CUDA_BIN_DIR = os.path.dirname(nvcc)
+        add_data_files("", glob.glob(f"{CUDA_BIN_DIR}/cudart64*dll"))
+        #if pycuda is built with curand, add this:
+        #add_data_files("", glob.glob(f"{CUDA_BIN_DIR}/curand64*dll"))
+        if nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED:
+            add_data_files("", glob.glob(f"{CUDA_BIN_DIR}/nvjpeg64*dll"))
 
-        add_data_files(CUDA_BIN, ["fs/share/xpra/cuda/%s.fatbin" % x for x in kernels])
 add_data_files(CUDA_BIN, ["fs/share/xpra/cuda/README.md"])
 
 tace(nvenc_ENABLED, "xpra.codecs.nvenc.encoder", "nvenc")