From 23b2c55737ca8b368197f01cc547d5439c84b862 Mon Sep 17 00:00:00 2001 From: totaam Date: Sun, 27 Nov 2022 21:19:47 +0700 Subject: [PATCH] make sure to include cuda libs when needed (on MS Windows) --- setup.py | 187 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 81 deletions(-) diff --git a/setup.py b/setup.py index fc5831cdc..22a2256b8 100755 --- a/setup.py +++ b/setup.py @@ -1503,7 +1503,7 @@ def add_service_exe(script, icon, base_name): add_console_exe("xpra/codecs/nv_util.py", "nvidia.ico", "NVidia_info") if nvfbc_ENABLED: add_console_exe("xpra/codecs/nvfbc/capture.py", "nvidia.ico", "NvFBC_capture") - if nvfbc_ENABLED or nvenc_ENABLED: + if nvfbc_ENABLED or nvenc_ENABLED or nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED: add_console_exe("xpra/codecs/cuda_common/cuda_context.py", "cuda.ico", "CUDA_info") if ("install_exe" in sys.argv) or ("install" in sys.argv): @@ -2043,22 +2043,18 @@ def add_cython_ext(*_args, **_kwargs): toggle_packages(nvenc_ENABLED or nvfbc_ENABLED, "xpra.codecs.cuda_common") toggle_packages(nvenc_ENABLED or nvfbc_ENABLED, "xpra.codecs.nv_util") -CUDA_BIN = "%s/cuda" % share_xpra -if (nvenc_ENABLED and cuda_kernels_ENABLED) or nvjpeg_encoder_ENABLED: +nvidia_ENABLED = nvenc_ENABLED or nvfbc_ENABLED or nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED +toggle_packages(nvidia_ENABLED, "xpra.codecs.nvidia") +if nvidia_ENABLED: + CUDA_BIN = f"{share_xpra}/cuda" #find nvcc: - from xpra.util import sorted_nicely + from xpra.util import sorted_nicely # pylint: disable=import-outside-toplevel path_options = os.environ.get("PATH", "").split(os.path.pathsep) if WIN32: - external_includes += ["pycuda"] + external_includes.append("pycuda") nvcc_exe = "nvcc.exe" CUDA_DIR = os.environ.get("CUDA_DIR", "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA") - path_options += ["./cuda/bin/"]+list(reversed(sorted_nicely(glob.glob("%s\\*\\bin" % CUDA_DIR)))) - #pycuda may link against curand, find it and ship it: - for p in path_options: - if os.path.exists(p): - add_data_files("", glob.glob("%s\\curand64*.dll" % p)) - add_data_files("", glob.glob("%s\\cudart64*.dll" % p)) - break + path_options += ["./cuda/bin/"]+list(reversed(sorted_nicely(glob.glob(f"{CUDA_DIR}\\*\\bin")))) else: nvcc_exe = "nvcc" path_options += ["/usr/local/cuda/bin", "/opt/cuda/bin"] @@ -2066,12 +2062,9 @@ def add_cython_ext(*_args, **_kwargs): path_options += list(reversed(sorted_nicely(glob.glob("/opt/cuda*/bin")))) options = [os.path.join(x, nvcc_exe) for x in path_options] #prefer the one we find on the $PATH, if any: - try: - v = shutil.which(nvcc_exe) - if v and (v not in options): - options.insert(0, v) - except: - pass + v = shutil.which(nvcc_exe) + if v and (v not in options): + options.insert(0, v) nvcc_versions = {} def get_nvcc_version(command): if not os.path.exists(command): @@ -2082,30 +2075,94 @@ def get_nvcc_version(command): vpos = out.rfind(", V") if vpos>0: version = out[vpos+3:].split("\n")[0] - version_str = " version %s" % version + version_str = f" version {version}" else: version = "0" version_str = " unknown version!" - print("found CUDA compiler: %s%s" % (filename, version_str)) + print(f"found CUDA compiler: {filename}{version_str}") return tuple(int(x) for x in version.split(".")) for filename in options: vnum = get_nvcc_version(filename) if vnum: nvcc_versions[vnum] = filename + nvcc_version = nvcc = None if nvcc_versions: #choose the most recent one: nvcc_version, nvcc = list(reversed(sorted(nvcc_versions.items())))[0] if len(nvcc_versions)>1: - print(" using version %s from %s" % (nvcc_version, nvcc)) - else: - nvcc_version = nvcc = None - if ((nvenc_ENABLED or nvjpeg_encoder_ENABLED) and cuda_kernels_ENABLED): - assert nvcc_versions, "cannot find nvcc compiler!" + print(f" using version {nvcc_version} from {nvcc}") + if cuda_kernels_ENABLED and (nvenc_ENABLED or nvjpeg_encoder_ENABLED): + def get_gcc_version(): + if CC_is_clang(): + return (0, ) + cc = os.environ.get("CC", "gcc") + r, _, err = get_status_output([cc, "-v"]) + if r==0: + V_LINE = "gcc version " + tmp_version = [] + for line in err.splitlines(): + if not line.startswith(V_LINE): + continue + v_str = line[len(V_LINE):].strip().split(" ")[0] + for p in v_str.split("."): + try: + tmp_version.append(int(p)) + except ValueError: + break + print("found gcc version: %s" % ".".join(str(x) for x in tmp_version)) + break + return tuple(tmp_version) + return (0, ) + assert nvcc, "cannot find nvcc compiler!" + def get_nvcc_args(): + nvcc_cmd = [nvcc, "-fatbin"] + gcc_version = get_gcc_version() + if gcc_version<(7, 5): + print("gcc versions older than 7.5 are not supported!") + for _ in range(5): + sleep(1) + print(".") + if (8,1)<=gcc_version<(9, ): + #GCC 8.1 has compatibility issues with CUDA 9.2, + #so revert to C++03: + nvcc_cmd.append("-std=c++03") + #GCC 6 uses C++11 by default: + else: + nvcc_cmd.append("-std=c++11") + if gcc_version>=(12, 0) or CC_is_clang(): + nvcc_cmd.append("--allow-unsupported-compiler") + if nvcc_version>=(11, 5): + nvcc_cmd += ["-arch=all", + "-Wno-deprecated-gpu-targets", + ] + if nvcc_version>=(11, 6): + nvcc_cmd += ["-Xnvlink", "-ignore-host-info"] + return nvcc_cmd + #older versions, add every arch we know about: + comp_code_options = [] + if nvcc_version>=(7, 5): + comp_code_options.append((52, 52)) + comp_code_options.append((53, 53)) + if nvcc_version>=(8, 0): + comp_code_options.append((60, 60)) + comp_code_options.append((61, 61)) + comp_code_options.append((62, 62)) + if nvcc_version>=(9, 0): + comp_code_options.append((70, 70)) + if nvcc_version>=(10, 0): + comp_code_options.append((75, 75)) + if nvcc_version>=(11, 0): + comp_code_options.append((80, 80)) + if nvcc_version>=(11, 1): + comp_code_options.append((86, 86)) + #if nvcc_version>=(11, 6): + # comp_code_options.append((87, 87)) + for arch, code in comp_code_options: + nvcc_cmd.append(f"-gencode=arch=compute_{arch},code=sm_{code}") + return nvcc_cmd + nvcc_args = get_nvcc_args() #first compile the cuda kernels #(using the same cuda SDK for both nvenc modules for now..) - #TODO: - # * compile directly to output directory instead of using data files? - # * detect which arches we want to build for? (does it really matter much?) kernels = [] if nvenc_ENABLED: kernels += ["XRGB_to_NV12", "XRGB_to_YUV444", "BGRX_to_NV12", "BGRX_to_YUV444"] @@ -2113,67 +2170,26 @@ def get_nvcc_version(command): kernels += ["BGRX_to_RGB", "RGBX_to_RGB", "RGBA_to_RGBAP", "BGRA_to_RGBAP"] nvcc_commands = [] for kernel in kernels: - cuda_src = "fs/share/xpra/cuda/%s.cu" % kernel - cuda_bin = "fs/share/xpra/cuda/%s.fatbin" % kernel + cuda_src = f"fs/share/xpra/cuda/{kernel}.cu" + cuda_bin = f"fs/share/xpra/cuda/{kernel}.fatbin" if os.path.exists(cuda_bin) and (cuda_rebuild_ENABLED is False): continue reason = should_rebuild(cuda_src, cuda_bin) if not reason: continue - print("rebuilding %s: %s" % (kernel, reason)) - cmd = [nvcc, - '-fatbin', - "-c", cuda_src, - "-o", cuda_bin] - gcc_version = get_gcc_version() - if (8,1)<=gcc_version<(9, ): - #GCC 8.1 has compatibility issues with CUDA 9.2, - #so revert to C++03: - cmd.append("-std=c++03") - #GCC 6 uses C++11 by default: - else: - cmd.append("-std=c++11") - if gcc_version>=(12, 0) or CC_is_clang(): - cmd.append("--allow-unsupported-compiler") - if nvcc_version>=(11, 5): - cmd += ["-arch=all", - "-Wno-deprecated-gpu-targets", - ] - if nvcc_version>=(11, 6): - cmd += ["-Xnvlink", "-ignore-host-info"] - else: - comp_code_options = [] - if nvcc_version>=(7, 5): - comp_code_options.append((52, 52)) - comp_code_options.append((53, 53)) - if nvcc_version>=(8, 0): - comp_code_options.append((60, 60)) - comp_code_options.append((61, 61)) - comp_code_options.append((62, 62)) - if nvcc_version>=(9, 0): - comp_code_options.append((70, 70)) - if nvcc_version>=(10, 0): - comp_code_options.append((75, 75)) - if nvcc_version>=(11, 0): - comp_code_options.append((80, 80)) - if nvcc_version>=(11, 1): - comp_code_options.append((86, 86)) - #if nvcc_version>=(11, 6): - # comp_code_options.append((87, 87)) - for arch, code in comp_code_options: - cmd.append("-gencode=arch=compute_%s,code=sm_%s" % (arch, code)) - print("CUDA compiling %s (%s)" % (kernel.ljust(16), reason)) - print(" %s" % " ".join("'%s'" % x for x in cmd)) - nvcc_commands.append(cmd) - + print(f"rebuilding {kernel}: {reason}") + kbuild_cmd = nvcc_args + ["-c", cuda_src, "-o", cuda_bin] + print(f"CUDA compiling %s ({reason})" % kernel.ljust(16)) + print(" "+" ".join(f"{x!r}" for x in kbuild_cmd)) + nvcc_commands.append(kbuild_cmd) #parallel build: nvcc_errors = [] - def nvcc_compile(cmd): - c, stdout, stderr = get_status_output(cmd) + def nvcc_compile(nvcc_cmd): + c, stdout, stderr = get_status_output(nvcc_cmd) if c!=0: nvcc_errors.append(c) - print("Error: failed to compile CUDA kernel %s" % kernel) - print(" using command: %s" % (cmd,)) + print(f"Error: failed to compile CUDA kernel {kernel}") + print(f" using command: {nvcc_cmd}") print(stdout or "") print(stderr or "") nvcc_threads = [] @@ -2186,8 +2202,17 @@ def nvcc_compile(cmd): if nvcc_errors: sys.exit(1) t.join() + add_data_files(CUDA_BIN, [f"fs/share/xpra/cuda/{x}.fatbin" for x in kernels]) + add_data_files(CUDA_BIN, ["fs/share/xpra/cuda/README.md"]) + if WIN32 and (nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED or nvenc_ENABLED): + assert nvcc_versions + CUDA_BIN_DIR = os.path.dirname(nvcc) + add_data_files("", glob.glob(f"{CUDA_BIN_DIR}/cudart64*dll")) + #if pycuda is built with curand, add this: + #add_data_files("", glob.glob(f"{CUDA_BIN_DIR}/curand64*dll")) + if nvjpeg_encoder_ENABLED or nvjpeg_decoder_ENABLED: + add_data_files("", glob.glob(f"{CUDA_BIN_DIR}/nvjpeg64*dll")) - add_data_files(CUDA_BIN, ["fs/share/xpra/cuda/%s.fatbin" % x for x in kernels]) add_data_files(CUDA_BIN, ["fs/share/xpra/cuda/README.md"]) tace(nvenc_ENABLED, "xpra.codecs.nvenc.encoder", "nvenc")