-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcuzfft1d.f
109 lines (109 loc) · 2.87 KB
/
cuzfft1d.f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
C
C FFTE: A FAST FOURIER TRANSFORM PACKAGE
C
C (C) COPYRIGHT SOFTWARE, 2000-2004, 2008-2014, ALL RIGHTS RESERVED
C BY
C DAISUKE TAKAHASHI
C FACULTY OF ENGINEERING, INFORMATION AND SYSTEMS
C UNIVERSITY OF TSUKUBA
C 1-1-1 TENNODAI, TSUKUBA, IBARAKI 305-8573, JAPAN
C E-MAIL: [email protected]
C
C
C 1-D COMPLEX FFT ROUTINE (FOR NVIDIA GPUS)
C
C CUDA FORTRAN SOURCE PROGRAM
C
C CALL ZFFT1D(A,N,IOPT,B)
C
C A(N) IS COMPLEX INPUT/OUTPUT VECTOR (COMPLEX*16)
C B(N*2) IS WORK/COEFFICIENT VECTOR (COMPLEX*16)
C N IS THE LENGTH OF THE TRANSFORMS (INTEGER*4)
C -----------------------------------
C N = (2**IP) * (3**IQ) * (5**IR)
C -----------------------------------
C IOPT = 0 CREATE AN FFT PLAN (INTEGER*4)
C = -1 FOR FORWARD TRANSFORM
C = +1 FOR INVERSE TRANSFORM
C = 3 DESTROY THE FFT PLAN
C
C WRITTEN BY DAISUKE TAKAHASHI
C
SUBROUTINE ZFFT1D(A,N,IOPT,B)
use cudafor
use cufft
IMPLICIT REAL*8 (A-H,O-Z)
COMPLEX*16 A(*),B(*)
complex(8),device,allocatable :: A_d(:),B_d(:)
INTEGER*4 PLANX,PLANY
SAVE PLANX,PLANY
C
CALL GETNXNY(N,NX,NY)
C
IF (IOPT .EQ. 0) THEN
istat=cufftPlan1D(PLANX,NX,CUFFT_Z2Z,NY)
istat=cufftPlan1D(PLANY,NY,CUFFT_Z2Z,NX)
RETURN
END IF
C
IF (IOPT .EQ. 3) THEN
istat=cufftDestroy(PLANX)
istat=cufftDestroy(PLANY)
RETURN
END IF
C
ALLOCATE(A_d(N),B_d(N))
A_d=A(1:N)
C
IF (IOPT .EQ. 1) THEN
!$cuf kernel do <<<*,*>>>
DO 10 I=1,N
A_d(I)=DCONJG(A_d(I))
10 CONTINUE
END IF
C
CALL ZTRANS(A_d,B_d,NX,NY)
istat=cufftExecZ2Z(PLANY,B_d,B_d,CUFFT_FORWARD)
CALL ZTRANSMUL(B_d,A_d,NY,NX)
istat=cufftExecZ2Z(PLANX,A_d,A_d,CUFFT_FORWARD)
CALL ZTRANS(A_d,B_d,NX,NY)
C
IF (IOPT .EQ. 1) THEN
DN=1.0D0/DBLE(N)
!$cuf kernel do <<<*,*>>>
DO 20 I=1,N
B_d(I)=DCONJG(B_d(I))*DN
20 CONTINUE
END IF
C
A(1:N)=B_d
DEALLOCATE(A_d,B_d)
RETURN
END
SUBROUTINE ZTRANSMUL(A_d,B_d,NX,NY)
use cudafor
IMPLICIT REAL*8 (A-H,O-Z)
INCLUDE 'param.h'
complex(8),device :: A_d(NX,*),B_d(NY,*),C_d(NB,NB)
C
PI2=8.0D0*DATAN(1.0D0)
PX=-PI2/(DBLE(NX)*DBLE(NY))
DO 60 II=1,NX,NB
DO 50 JJ=1,NY,NB
!$cuf kernel do(2) <<<*,*>>>
DO 20 I=II,MIN0(II+NB-1,NX)
DO 10 J=JJ,MIN0(JJ+NB-1,NY)
C_d(J-JJ+1,I-II+1)=A_d(I,J)
10 CONTINUE
20 CONTINUE
!$cuf kernel do(2) <<<*,*>>>
DO 40 I=II,MIN0(II+NB-1,NX)
DO 30 J=JJ,MIN0(JJ+NB-1,NY)
TEMP=PX*DBLE(I-1)*DBLE(J-1)
B_d(J,I)=C_d(J-JJ+1,I-II+1)*DCMPLX(DCOS(TEMP),DSIN(TEMP))
30 CONTINUE
40 CONTINUE
50 CONTINUE
60 CONTINUE
RETURN
END