Python实现GPU加速图像处理的代码详解

更新时间：2025年04月08日 09:14:50 作者：ak啊

这篇文章主要为大家详细介绍了Python实现GPU加速图像处理的相关知识,文中的示例代码讲解详细,感兴趣的小伙伴可以跟随小编一起学习一下

1. 使用 PyTorch 实现 GPU 加速的卷积滤波（如边缘检测）

import torch
import torch.nn as nn
import cv2
import numpy as np

# 检查 GPU 是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 读取图像并转换为 PyTorch 张量
image = cv2.imread("input.jpg")  # 读取 BGR 格式图像
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # 转为 RGB
image_tensor = torch.from_numpy(image).float().permute(2, 0, 1)  # HWC -> CHW
image_tensor = image_tensor.unsqueeze(0).to(device)  # 添加 batch 维度并移至 GPU

# 定义边缘检测卷积核（Sobel算子）
conv_layer = nn.Conv2d(
    in_channels=3,
    out_channels=3,
    kernel_size=3,
    bias=False,
    padding=1
).to(device)

# 手动设置 Sobel 核权重（示例，仅作用于水平边缘）
sobel_kernel = torch.tensor([
    [[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]],  # Red 通道
    [[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]],  # Green 通道
    [[[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]],  # Blue 通道
], dtype=torch.float32).repeat(3, 1, 1, 1).to(device)

conv_layer.weight.data = sobel_kernel

# 执行卷积操作（GPU加速）
with torch.no_grad():
    output_tensor = conv_layer(image_tensor)

# 将结果转换回 numpy 并保存
output = output_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
output = np.clip(output, 0, 255).astype(np.uint8)
cv2.imwrite("edge_detection_gpu.jpg", cv2.cvtColor(output, cv2.COLOR_RGB2BGR))

2. 使用 OpenCV 的 CUDA 模块加速高斯模糊

import cv2
import time

# 检查 OpenCV 是否支持 CUDA
print("CUDA devices:", cv2.cuda.getCudaEnabledDeviceCount())

# 读取图像并上传到 GPU
image = cv2.imread("input.jpg")
gpu_image = cv2.cuda_GpuMat()
gpu_image.upload(image)

# 创建 GPU 加速的高斯滤波器
gaussian_filter = cv2.cuda.createGaussianFilter(
    cv2.CV_8UC3,  # 输入类型 (8-bit unsigned, 3 channels)
    cv2.CV_8UC3,  # 输出类型
    (15, 15),      # 核大小
    0              # Sigma（自动计算）
)

# 执行滤波（重复多次测试速度）
start_time = time.time()
for _ in range(100):  # 重复 100 次模拟大数据量
    gpu_blur = gaussian_filter.apply(gpu_image)
end_time = time.time()

# 下载结果到 CPU 并保存
result = gpu_blur.download()
print(f"GPU Time: {end_time - start_time:.4f} seconds")
cv2.imwrite("blur_gpu.jpg", result)

3. 使用 CuPy 加速图像傅里叶变换

import cupy as cp
import cv2
import numpy as np
import time

# 读取图像并转为灰度
image = cv2.imread("input.jpg", cv2.IMREAD_GRAYSCALE)

# 将 numpy 数组转为 CuPy 数组（上传到 GPU）
image_gpu = cp.asarray(image)

# 快速傅里叶变换（FFT）和逆变换（IFFT）
start_time = time.time()
fft_gpu = cp.fft.fft2(image_gpu)
fft_shift = cp.fft.fftshift(fft_gpu)
magnitude_spectrum = cp.log(cp.abs(fft_shift))
end_time = time.time()

# 将结果转回 CPU
magnitude_cpu = cp.asnumpy(magnitude_spectrum)
print(f"GPU FFT Time: {end_time - start_time:.4f} seconds")

# 归一化并保存频谱图
magnitude_cpu = cv2.normalize(magnitude_cpu, None, 0, 255, cv2.NORM_MINMAX)
cv2.imwrite("fft_spectrum_gpu.jpg", magnitude_cpu.astype(np.uint8))

4. 使用 Numba 编写自定义 GPU 核函数（图像反色）

from numba import cuda
import numpy as np
import cv2
import time

# 读取图像
image = cv2.imread("input.jpg")
height, width, channels = image.shape

# 定义 GPU 核函数
@cuda.jit
def invert_colors_kernel(image):
    x, y = cuda.grid(2)
    if x < image.shape[0] and y < image.shape[1]:
        for c in range(3):  # 遍历 RGB 通道
            image[x, y, c] = 255 - image[x, y, c]

# 将图像上传到 GPU
image_gpu = cuda.to_device(image)

# 配置线程和块
threads_per_block = (16, 16)
blocks_per_grid_x = (height + threads_per_block[0] - 1) // threads_per_block[0]
blocks_per_grid_y = (width + threads_per_block[1] - 1) // threads_per_block[1]
blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)

# 执行核函数
start_time = time.time()
invert_colors_kernel[blocks_per_grid, threads_per_block](image_gpu)
cuda.synchronize()  # 等待 GPU 完成
end_time = time.time()

# 下载结果并保存
image_cpu = image_gpu.copy_to_host()
print(f"GPU Invert Time: {end_time - start_time:.6f} seconds")
cv2.imwrite("inverted_gpu.jpg", image_cpu)

5. 使用 PyTorch 实现实时风格迁移（GPU加速）

import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image

# 加载预训练模型到 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg19(pretrained=True).features.to(device).eval()

# 图像预处理
preprocess = transforms.Compose([
    transforms.Resize(512),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 加载内容图像和风格图像
content_image = Image.open("content.jpg")
style_image = Image.open("style.jpg")

# 将图像转为张量并移至 GPU
content_tensor = preprocess(content_image).unsqueeze(0).to(device)
style_tensor = preprocess(style_image).unsqueeze(0).to(device)

# 定义风格迁移函数（示例，需完整实现损失计算和优化）
def style_transfer(model, content_input, style_input, iterations=500):
    # 创建可优化图像
    input_image = content_input.clone().requires_grad_(True)
    
    # 定义优化器
    optimizer = torch.optim.LBFGS([input_image])
    
    # 风格迁移循环
    for i in range(iterations):
        def closure():
            optimizer.zero_grad()
            # 提取特征并计算损失（需实现具体细节）
            # ...
            return total_loss
        
        optimizer.step(closure)
    
    return input_image

# 执行风格迁移（需补充完整代码）
output_image = style_transfer(model, content_tensor, style_tensor)

# 后处理并保存结果
output_image = output_image.squeeze().cpu().detach()
output_image = transforms.ToPILImage()(output_image)
output_image.save("style_transfer_gpu.jpg")

关键说明

1.硬件依赖：需 NVIDIA GPU 并安装正确版本的 CUDA 和 cuDNN。

2.库安装：

pip install torch torchvision opencv-python-headless cupy numba

3.性能对比：与 CPU 版本相比，GPU 加速通常快 10-100 倍（取决于任务复杂度）。

4.适用场景：

PyTorch：适合深度学习相关的图像处理（如 GAN、超分辨率）。
OpenCV CUDA：适合传统图像处理加速（滤波、特征提取）。
CuPy/Numba：适合自定义数值计算或科研算法。

到此这篇关于Python实现GPU加速图像处理的代码详解的文章就介绍到这了,更多相关Python GPU加速图像处理内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家！

您可能感兴趣的文章:

Python3并发写文件与Python对比
这篇文章主要介绍了Python3并发写文件原理解析,文中通过示例代码介绍的非常详细，对大家的学习或者工作具有一定的参考学习价值,需要的朋友可以参考下
2019-11-11
Python+django实现文件上传
本系列以可操作性为主，介绍如何通过django web框架来实现一些简单的功能。每一篇文章都具有完整性和独立性。使用新手在动手做的过程中体会web开发的过程，过程中细节请参考相关文档。
2016-01-01
python 绘制3D图案例分享
这篇文章主要介绍了python 绘制3D图案例分享，文章围绕主题展开详细的内容介绍，具有一定的参考价值，需要的小伙伴可以参考一下，希望对你的学习有所帮助
2022-07-07
使用Python编写一个沙箱隔离功能
这篇文章主要为大家详细介绍了如何通过 Docker 实现一个简单的沙箱隔离应用,然后提供一个通过 Python 模拟沙箱的例子,感兴趣的可以了解下
2024-12-12
Python线性分类介绍
这篇文章主要介绍了Python线性分类，线性分类指在机器学习领域，分类的目标是指将具有相似特征的对象聚集。而一个线性分类器则透过特征的线性组合来做出分类决定，以达到此种目的。对象的特征通常被描述为特征值，而在向量中则描述为特征向量,需要的朋友可以参考下
2022-02-02
django框架两个使用模板实例
这篇文章主要介绍了django框架使用模板方法,结合两个具体实例形式详细分析了Django框架模板的相关使用技巧与操作注意事项,需要的朋友可以参考下
2019-12-12
Python实现获取当前目录下文件名代码详解
这篇文章主要介绍了Python实现获取当前目录下文件名，本文通过实例代码给大家介绍的非常详细，对大家的学习或工作具有一定的参考借鉴价值,需要的朋友可以参考下
2020-03-03
Python自省及反射原理实例详解
这篇文章主要介绍了Python自省及反射原理实例详解,文中通过示例代码介绍的非常详细，对大家的学习或者工作具有一定的参考学习价值,需要的朋友可以参考下
2020-07-07
python读取浮点数和读取文本文件示例
这篇文章主要介绍了python读取浮点数和读取文本文件示例,需要的朋友可以参考下
2014-05-05
python爬虫之爬取谷歌趋势数据
这篇文章主要介绍了python爬虫之爬取谷歌趋势数据,文中有非常详细的代码示例,对正在学习python爬虫的小伙伴们有非常好的帮助,需要的朋友可以参考下
2021-04-04