1# NVIDIA NCCL 2
2# A package of optimized primitives for collective multi-GPU communication.
3
4licenses(["notice"])
5
6exports_files(["LICENSE.txt"])
7
8load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
9load(
10    "@local_config_nccl//:build_defs.bzl",
11    "cuda_rdc_library",
12    "gen_device_srcs",
13)
14
15cc_library(
16    name = "src_hdrs",
17    hdrs = [
18        "src/include/collectives.h",
19        "src/nccl.h",
20    ],
21    strip_include_prefix = "src",
22)
23
24cc_library(
25    name = "include_hdrs",
26    hdrs = glob(["src/include/**"]),
27    strip_include_prefix = "src/include",
28    deps = ["@local_config_cuda//cuda:cuda_headers"],
29)
30
31cc_library(
32    name = "device_hdrs",
33    hdrs = glob(["src/collectives/device/*.h"]),
34    strip_include_prefix = "src/collectives/device",
35)
36
37# NCCL compiles the same source files with different NCCL_OP/NCCL_TYPE defines.
38# RDC compilation requires that each compiled module has a unique ID. Clang
39# derives the module ID from the path only so we need to copy the files to get
40# different IDs for different parts of compilation. NVCC does not have that
41# problem because it generates IDs based on preprocessed content.
42gen_device_srcs(
43    name = "device_srcs",
44    srcs = [
45        "src/collectives/device/all_gather.cu.cc",
46        "src/collectives/device/all_reduce.cu.cc",
47        "src/collectives/device/broadcast.cu.cc",
48        "src/collectives/device/reduce.cu.cc",
49        "src/collectives/device/reduce_scatter.cu.cc",
50        "src/collectives/device/sendrecv.cu.cc",
51    ],
52)
53
54cuda_rdc_library(
55    name = "device",
56    srcs = [
57        "src/collectives/device/functions.cu.cc",
58        ":device_srcs",
59    ] + glob([
60        # Required for header inclusion checking, see below for details.
61        "src/collectives/device/*.h",
62        "src/nccl.h",
63    ]),
64    deps = [
65        ":device_hdrs",
66        ":include_hdrs",
67        ":src_hdrs",
68        "@local_config_cuda//cuda:cuda_headers",
69    ],
70)
71
72# Primary NCCL target.
73#
74# This needs to be cuda_library instead of cc_library so that clang uses the
75# correct name for kernel host stubs (function pointers to initialize ncclKerns
76# in enqueue.cc) after https://reviews.llvm.org/D68578.
77cuda_library(
78    name = "nccl",
79    srcs = glob(
80        include = [
81            "src/**/*.cc",
82            # Required for header inclusion checking, see below for details.
83            "src/graph/*.h",
84        ],
85        # Exclude device-library code.
86        exclude = ["src/collectives/device/**"],
87    ) + [
88        # Required for header inclusion checking (see
89        # http://docs.bazel.build/versions/master/be/c-cpp.html#hdrs).
90        # Files in src/ which #include "nccl.h" load it from there rather than
91        # from the virtual includes directory.
92        "src/include/collectives.h",
93        "src/nccl.h",
94    ],
95    hdrs = ["src/nccl.h"],
96    include_prefix = "third_party/nccl",
97    linkopts = select({
98        "@org_tensorflow//tensorflow:macos": [],
99        "//conditions:default": ["-lrt"],
100    }),
101    strip_include_prefix = "src",
102    visibility = ["//visibility:public"],
103    deps = [
104        ":device",
105        ":include_hdrs",
106        ":src_hdrs",
107    ],
108)
109