728x90
출처
빌드 패키지 설치
(.venv) [bluesanta@localhost pytorch]$ sudo dnf install -y cmake
(.venv) [bluesanta@localhost pytorch]$ sudo dnf --enablerepo=devel install -y ninja-build
(.venv) [bluesanta@localhost pytorch]$ python -m pip install mkl-include mkl-static ninja scikit-build
(.venv) [bluesanta@localhost pytorch]$ python -m pip install -r requirements.txt
(.venv) [bluesanta@localhost pytorch]$ sudo dnf install -y libomp-devel
OpenMPI 설치
mkdir /tmp/openmpi \
&& cd /tmp/openmpi \
&& wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.gz \
&& tar zxf openmpi-4.1.6.tar.gz \
&& cd openmpi-4.1.6 \
&& ./configure --prefix=/usr --enable-orterun-prefix-by-default --with-cuda=$CUDA_HOME --with-cuda-libdir=$CUDA_HOME/lib64/stubs --with-slurm > /dev/null \
&& make -j $(nproc) all \
&& sudo make -s install \
&& sudo ldconfig \
&& cd ~/ \
&& rm -rf /tmp/openmpi \
&& ompi_info | grep "MPI extensions"
(.venv) [bluesanta@localhost ~]$ mkdir /tmp/openmpi \
> && cd /tmp/openmpi \
> && wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.gz \
> && tar zxf openmpi-4.1.6.tar.gz \
> && cd openmpi-4.1.6 \
> && ./configure --prefix=/usr --enable-orterun-prefix-by-default --with-cuda=$CUDA_HOME --with-cuda-libdir=$CUDA_HOME/lib64/stubs --with-slurm > /dev/null \
> && make -j $(nproc) all \
> && sudo make -s install \
> && sudo ldconfig \
> && cd ~/ \
> && rm -rf /tmp/openmpi \
> && ompi_info | grep "MPI extensions"
Making install in tools/mpisync
Making install in test
Making install in support
Making install in asm
Making install in class
Making install in threads
Making install in datatype
Making install in util
Making install in dss
Making install in mpool
Making install in monitoring
Making install in spc
MPI extensions: affinity, cuda, pcollreq
pytorch 소스 다운로드
(.venv) [bluesanta@localhost stable-diffusion]$ git clone https://github.com/pytorch/pytorch
(.venv) [bluesanta@localhost stable-diffusion]$ cd pytorch
(.venv) [bluesanta@localhost pytorch]$ git submodule sync
(.venv) [bluesanta@localhost pytorch]$ git submodule update --init --recursive
pytorch 빌드 패키지 설치
(.venv) [bluesanta@localhost pytorch]$ pip install -r requirements.txt
nccl 패키지 설치
(.venv) [bluesanta@localhost ~]$ sudo dnf config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
(.venv) [bluesanta@localhost ~]$ sudo dnf install -y libnccl libnccl-devel libnccl-static
(.venv) [bluesanta@localhost ~]$ sudo dnf update -y
PyTorch 빌드
export CMAKE_PREFIX_PATH="/usr/include/openmpi;/usr/lib/openmpi;/usr/lib"
USE_CUDA=1 \
USE_CUDNN=1 \
USE_MPI=1 \
USE_SYSTEM_NCCL=1 \
USE_ROCM=0 \
NCCL_INCLUDE_DIR=/usr/include \
python setup.py develop
[bluesanta@localhost ~]$ cd Applications/
[bluesanta@localhost Applications]$ cd stable-diffusion/
[bluesanta@localhost stable-diffusion]$ source .venv/bin/activate
(.venv) [bluesanta@localhost stable-diffusion]$ cd pytorch/
(.venv) [bluesanta@localhost pytorch]$ export CMAKE_PREFIX_PATH="/usr/include/openmpi;/usr/lib/openmpi;/usr/lib"
(.venv) [bluesanta@localhost pytorch]$
(.venv) [bluesanta@localhost pytorch]$ USE_CUDA=1 \
> USE_CUDNN=1 \
> USE_MPI=1 \
> USE_SYSTEM_NCCL=1 \
> USE_ROCM=0 \
> NCCL_INCLUDE_DIR=/usr/include \
> python setup.py develop
copying build/lib.linux-x86_64-cpython-311/torch/_C.cpython-311-x86_64-linux-gnu.so -> torch
copying build/lib.linux-x86_64-cpython-311/functorch/_C.cpython-311-x86_64-linux-gnu.so -> functorch
Creating /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages/torch.egg-link (link to .)
Adding torch 2.8.0a0+git663bcb6 to easy-install.pth file
Installing torchfrtrace script to /home/bluesanta/Applications/stable-diffusion/.venv/bin
Installing torchrun script to /home/bluesanta/Applications/stable-diffusion/.venv/bin
Installed /home/bluesanta/Applications/stable-diffusion/pytorch
Processing dependencies for torch==2.8.0a0+git663bcb6
Searching for fsspec==2025.3.2
Best match: fsspec 2025.3.2
Adding fsspec 2025.3.2 to easy-install.pth file
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Searching for jinja2==3.1.6
Best match: jinja2 3.1.6
Adding jinja2 3.1.6 to easy-install.pth file
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Searching for networkx==3.4.2
Best match: networkx 3.4.2
Adding networkx 3.4.2 to easy-install.pth file
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Searching for sympy==1.14.0
Best match: sympy 1.14.0
Adding sympy 1.14.0 to easy-install.pth file
Installing isympy script to /home/bluesanta/Applications/stable-diffusion/.venv/bin
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Searching for typing-extensions==4.13.2
Best match: typing-extensions 4.13.2
Adding typing-extensions 4.13.2 to easy-install.pth file
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Searching for filelock==3.18.0
Best match: filelock 3.18.0
Adding filelock 3.18.0 to easy-install.pth file
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Searching for MarkupSafe==3.0.2
Best match: MarkupSafe 3.0.2
Adding MarkupSafe 3.0.2 to easy-install.pth file
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Searching for mpmath==1.3.0
Best match: mpmath 1.3.0
Adding mpmath 1.3.0 to easy-install.pth file
Using /home/bluesanta/Applications/stable-diffusion/.venv/lib/python3.11/site-packages
Finished processing dependencies for torch==2.8.0a0+git663bcb6
tensorflow gpu 설치
(.venv) [bluesanta@localhost stable-diffusion]$ pip install tensorflow[and-cuda]==2.19.0
CUDA 버전 확인
cuda_version_check.py 소스
import torch
import tensorflow as tf
from tensorflow.python.client import device_lib
x = torch.rand(5, 3)
print(x)
print("----------------------------------------")
print("torch.cuda.is_available() =", torch.cuda.is_available())
print("----------------------------------------")
print("torch.cuda.current_device() =", torch.cuda.current_device())
print("----------------------------------------")
print("torch.cuda.get_device_name(0) =", torch.cuda.get_device_name(0))
print("----------------------------------------")
print("torch.__version__ =", torch.__version__)
# Print the CUDA version that PyTorch is using
print(f"CUDA version: {torch.version.cuda}")
# Check if the GPU and CuDNN are recognized
print("----------------------------------------")
print(device_lib.list_local_devices())
print("----------------------------------------")
print(tf.test.is_built_with_cuda())
print("----------------------------------------")
print(tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None))
cuda_version_check.py 실행
(.venv) [bluesanta@localhost stable-diffusion]$ python cuda_version_check.py
2025-04-30 23:56:43.699453: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1746025003.710231 604585 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746025003.713433 604585 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1746025003.722518 604585 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746025003.722529 604585 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746025003.722532 604585 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1746025003.722534 604585 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
2025-04-30 23:56:43.725295: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
tensor([[0.3775, 0.4729, 0.4964],
[0.0783, 0.6957, 0.6801],
[0.8738, 0.3818, 0.6314],
[0.8639, 0.7027, 0.7775],
[0.3149, 0.0923, 0.2672]])
----------------------------------------
torch.cuda.is_available() = True
----------------------------------------
torch.cuda.current_device() = 0
----------------------------------------
torch.cuda.get_device_name(0) = NVIDIA GeForce RTX 4090
----------------------------------------
torch.__version__ = 2.8.0a0+git663bcb6
CUDA version: 12.5
----------------------------------------
I0000 00:00:1746025005.014143 604585 gpu_device.cc:2019] Created device /device:GPU:0 with 21961 MB memory: -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:06:00.0, compute capability: 8.9
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 5174814723682739464
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 23028629504
locality {
bus_id: 1
links {
}
}
incarnation: 18339436096741683477
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:06:00.0, compute capability: 8.9"
xla_global_id: 416903419
]
----------------------------------------
True
----------------------------------------
WARNING:tensorflow:From /home/bluesanta/Applications/stable-diffusion/pytorch_test1.py:27: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
I0000 00:00:1746025005.019230 604585 gpu_device.cc:2019] Created device /device:GPU:0 with 21961 MB memory: -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:06:00.0, compute capability: 8.9
True
728x90