-
Notifications
You must be signed in to change notification settings - Fork 13
/
Makevars
303 lines (252 loc) · 8.99 KB
/
Makevars
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
PKG_LIBS =
CXX_STD = CXX11
#CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
#CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
#LDFLAGS =
# Enable O3 optimisations
PKG_CFLAGS += -O3
PKG_CPPFLAGS += -O3
ifndef UNAME_S
UNAME_S := $(shell uname -s)
endif
ifndef UNAME_P
UNAME_P := $(shell uname -p)
endif
ifndef UNAME_M
UNAME_M := $(shell uname -m)
endif
ifndef NVCC_VERSION
ifeq ($(call,$(shell which nvcc))$(.SHELLSTATUS),0)
NVCC_VERSION := $(shell nvcc --version | egrep -o "V[0-9]+.[0-9]+.[0-9]+" | cut -c2-)
endif
endif
# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
ifeq ($(UNAME_S),Darwin)
CPUINFO_CMD := sysctl machdep.cpu.features machdep.cpu.leaf7_features
else ifeq ($(UNAME_S),Linux)
CPUINFO_CMD := cat /proc/cpuinfo
else ifneq (,$(filter MINGW32_NT% MINGW64_NT% MSYS_NT%,$(UNAME_S)))
CPUINFO_CMD := cat /proc/cpuinfo
else ifneq (,$(filter DragonFly FreeBSD,$(UNAME_S)))
CPUINFO_CMD := grep Features /var/run/dmesg.boot
else ifeq ($(UNAME_S),Haiku)
CPUINFO_CMD := sysinfo -cpu
endif
ifdef CPUINFO_CMD
AVX_M := $(shell $(CPUINFO_CMD) | grep -iwE 'AVX|AVX1.0')
ifneq (,$(AVX_M))
PKG_CFLAGS += -mavx
PKG_CPPFLAGS += -mavx
endif
AVX2_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX2')
ifneq (,$(AVX2_M))
PKG_CFLAGS += -mavx2
PKG_CPPFLAGS += -mavx2
endif
AVX512F_M := $(shell $(CPUINFO_CMD) | grep -iw 'AVX512F')
ifneq (,$(AVX512F_M))
PKG_CFLAGS += -mavx512f
PKG_CPPFLAGS += -mavx512f
endif
FMA_M := $(shell $(CPUINFO_CMD) | grep -iw 'FMA')
ifneq (,$(FMA_M))
PKG_CFLAGS += -mfma
PKG_CPPFLAGS += -mfma
endif
F16C_M := $(shell $(CPUINFO_CMD) | grep -iw 'F16C')
ifneq (,$(F16C_M))
PKG_CFLAGS += -mf16c
PKG_CPPFLAGS += -mf16c
endif
SSE3_M := $(shell $(CPUINFO_CMD) | grep -iwE 'PNI|SSE3')
ifneq (,$(SSE3_M))
PKG_CFLAGS += -msse3
PKG_CPPFLAGS += -msse3
endif
SSSE3_M := $(shell $(CPUINFO_CMD) | grep -iw 'SSSE3')
ifneq (,$(SSSE3_M))
PKG_CFLAGS += -mssse3
PKG_CPPFLAGS += -mssse3
endif
endif
endif
ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M)))
PKG_CFLAGS += -mpower9-vector
endif
# Require c++23's std::byteswap for big-endian support.
ifeq ($(UNAME_M),ppc64)
PKG_CPPFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
endif
endif
ifdef WHISPER_ACCELERATE
# Mac M1 - include Accelerate framework
ifeq ($(UNAME_S),Darwin)
PKG_CFLAGS += -DGGML_USE_ACCELERATE
PKG_LIBS += -framework Accelerate
endif
endif
SOURCES_COREML =
OBJECTS_COREML =
ifdef WHISPER_COREML
PKG_CPPFLAGS += -DWHISPER_USE_COREML
PKG_LIBS += -framework Foundation -framework CoreML
ifdef WHISPER_COREML_ALLOW_FALLBACK
PKG_CPPFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
endif
SOURCES_COREML = whisper_cpp/coreml/whisper-encoder.mm whisper_cpp/coreml/whisper-encoder-impl.m
OBJECTS_COREML = whisper_cpp/coreml/whisper-encoder.o whisper_cpp/coreml/whisper-encoder-impl.o
PKG_CPPFLAGS += -fobjc-arc
endif
SOURCES_METAL =
OBJECTS_METAL =
ifdef WHISPER_METAL
ifeq ($(UNAME_S),Darwin)
WHISPER_METAL := 1
PKG_CFLAGS += -DGGML_USE_METAL
PKG_CPPFLAGS += -DGGML_USE_METAL
PKG_LIBS += -framework Foundation -framework Metal -framework MetalKit
SOURCES_METAL = whisper_cpp/ggml-metal.m
OBJECTS_METAL = whisper_cpp/ggml-metal.o
endif
endif
ifdef WHISPER_OPENBLAS
PKG_CFLAGS += -DGGML_USE_OPENBLAS $(shell pkg-config --libs openblas)
PKG_LIBS += $(shell pkg-config --cflags openblas)
endif
ifdef WHISPER_CUBLAS
ifeq ($(shell expr $(NVCC_VERSION) \>= 11.6), 1)
CUDA_ARCH_FLAG ?= native
else
CUDA_ARCH_FLAG ?= all
endif
#CUDA_ARCH_FLAG = all
ifndef CUDA_PATH
CUDA_PATH := /usr/local/cuda
endif
NVCC = nvcc
PKG_CFLAGS += -DGGML_USE_CUBLAS -I"$(CUDA_PATH)/include" -I"$(CUDA_PATH)/targets/$(UNAME_M)-linux/include"
PKG_CPPFLAGS += -DGGML_USE_CUBLAS -I"$(CUDA_PATH)/include" -I"$(CUDA_PATH)/targets/$(UNAME_M)-linux/include"
PKG_LIBS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L"$(CUDA_PATH)/lib64" -L/opt/cuda/lib64 -L"$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib" -L/usr/lib/wsl/lib
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)
OBJECTS_CUDA = whisper_cpp/ggml-cuda.o
endif
## Note/TODO: removed sections on WHISPER_HIPBLAS / WHISPER_CLBLAST
ifdef WHISPER_GPROF
PKG_CFLAGS += -pg
PKG_CPPFLAGS += -pg
endif
ifneq ($(filter aarch64%,$(UNAME_M)),)
PKG_CFLAGS += -mcpu=native
PKG_CPPFLAGS += -mcpu=native
endif
ifneq ($(filter armv6%,$(UNAME_M)),)
# 32-bit Raspberry Pi 1, 2, 3
PKG_CFLAGS += -mfpu=neon -mfp16-format=ieee -mno-unaligned-access
endif
ifneq ($(filter armv7%,$(UNAME_M)),)
# 32-bit ARM, for example on Armbian or possibly raspbian
#PKG_CFLAGS += -mfpu=neon -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
#PKG_CPPFLAGS += -mfpu=neon -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
# 64-bit ARM on 32-bit OS, use these (TODO: auto-detect 64-bit)
PKG_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
PKG_CPPFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
endif
ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 4
PKG_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
PKG_CPPFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -funsafe-math-optimizations -mno-unaligned-access
endif
##############################################################################################################
##################### OVERWRITE PKG_CFLAGS / PKG_CPPFLAGS IF THE USER KNOWS WHAT TO DO #######################
##############################################################################################################
ifdef WHISPER_CFLAGS
PKG_CFLAGS = $(WHISPER_CFLAGS)
endif
ifdef WHISPER_CPPFLAGS
PKG_CPPFLAGS = $(WHISPER_CPPFLAGS)
endif
ifdef WHISPER_LIBS
PKG_LIBS = $(WHISPER_LIBS)
endif
PKG_CPPFLAGS += -DSTRICT_R_HEADERS -I./dr_libs -I./whisper_cpp
PKG_CFLAGS += -D_XOPEN_SOURCE=600
PKG_CPPFLAGS += -D_XOPEN_SOURCE=600
# Somehow in OpenBSD whenever POSIX conformance is specified
# some string functions rely on locale_t availability,
# which was introduced in POSIX.1-2008, forcing us to go higher
ifeq ($(UNAME_S),OpenBSD)
PKG_CFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
PKG_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
endif
# Data types, macros and functions related to controlling CPU affinity
# are available on Linux through GNU extensions in libc
ifeq ($(UNAME_S),Linux)
PKG_CFLAGS += -D_GNU_SOURCE
PKG_CPPFLAGS += -D_GNU_SOURCE
endif
# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
# and on macOS its availability depends on enabling Darwin extensions
# similarly on DragonFly, enabling BSD extensions is necessary
ifeq ($(UNAME_S),Darwin)
PKG_CFLAGS += -D_DARWIN_C_SOURCE
PKG_CPPFLAGS += -D_DARWIN_C_SOURCE
endif
ifeq ($(UNAME_S),DragonFly)
PKG_CFLAGS += -D__BSD_VISIBLE
PKG_CPPFLAGS += -D__BSD_VISIBLE
endif
# alloca is a non-standard interface that is not visible on BSDs when
# POSIX conformance is specified, but not all of them provide a clean way
# to enable it in such cases
ifeq ($(UNAME_S),FreeBSD)
PKG_CFLAGS += -D__BSD_VISIBLE
PKG_CPPFLAGS += -D__BSD_VISIBLE
endif
ifeq ($(UNAME_S),NetBSD)
PKG_CFLAGS += -D_NETBSD_SOURCE
PKG_CPPFLAGS += -D_NETBSD_SOURCE
endif
ifeq ($(UNAME_S),OpenBSD)
PKG_CFLAGS += -D_BSD_SOURCE
PKG_CPPFLAGS += -D_BSD_SOURCE
endif
# OS specific
# TODO: support Windows
ifeq ($(filter $(UNAME_S),Linux Darwin DragonFly FreeBSD NetBSD OpenBSD Haiku),$(UNAME_S))
PKG_CFLAGS += -pthread
PKG_CPPFLAGS += -pthread
endif
#
# Print build information
#
$(info I whisper.cpp build info: )
$(info I UNAME_S: $(UNAME_S))
$(info I UNAME_P: $(UNAME_P))
$(info I UNAME_M: $(UNAME_M))
$(info I PKG_CFLAGS: $(PKG_CFLAGS))
$(info I PKG_CPPFLAGS: $(PKG_CPPFLAGS))
$(info I PKG_LIBS: $(PKG_LIBS))
#$(info I CC: $(CCV))
#$(info I CXX: $(CXXV))
$(info )
#PKG_CFLAGS = -mavx -mavx2 -mfma -mf16c
#PKG_CFLAGS = -msse3
#PKG_CLFAGS = -mcpu=native
#PKG_CFLAGS = -O3
#PKG_CPPFLAGS += -mcpu=native
SOURCES = whisper_cpp/ggml-quants.c whisper_cpp/ggml-backend.c whisper_cpp/ggml-alloc.c whisper_cpp/ggml.c whisper_cpp/whisper.cpp whisper_cpp/common-ggml.cpp whisper_cpp/common.cpp rcpp_whisper.cpp RcppExports.cpp
OBJECTS = whisper_cpp/ggml-quants.o whisper_cpp/ggml-backend.o whisper_cpp/ggml-alloc.o whisper_cpp/ggml.o whisper_cpp/whisper.o whisper_cpp/common-ggml.o whisper_cpp/common.o rcpp_whisper.o RcppExports.o
SOURCES += $(SOURCES_METAL)
OBJECTS += $(OBJECTS_METAL)
SOURCES += $(SOURCES_COREML)
OBJECTS += $(OBJECTS_COREML)
OBJECTS += $(OBJECTS_CUDA)
all: $(SHLIB)
$(SHLIB): $(OBJECTS)
whisper_cpp/ggml-cuda.o: whisper_cpp/ggml-cuda.cu whisper_cpp/ggml-cuda.h
$(NVCC) $(NVCCFLAGS) $(PKG_CPPFLAGS) -I"$(R_INCLUDE_DIR)" -fPIC -c $< -o $@