Skip to content

Commit 15cb124

Browse files
authored
Merge pull request #2100 from xianyi/develop
Merge develop in preparation of 0.3.6 release
2 parents eebc189 + 97d5034 commit 15cb124

File tree

386 files changed

+37196
-9621
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

386 files changed

+37196
-9621
lines changed

.travis.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ matrix:
149149

150150
- &test-macos
151151
os: osx
152-
osx_image: xcode8
152+
osx_image: xcode10.1
153153
before_script:
154154
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
155155
- brew update
@@ -160,6 +160,7 @@ matrix:
160160
- BTYPE="BINARY=64 INTERFACE64=1"
161161

162162
- <<: *test-macos
163+
osx_image: xcode8.3
163164
env:
164165
- BTYPE="BINARY=32"
165166

CMakeLists.txt

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
66
project(OpenBLAS C ASM)
77
set(OpenBLAS_MAJOR_VERSION 0)
88
set(OpenBLAS_MINOR_VERSION 3)
9-
set(OpenBLAS_PATCH_VERSION 5)
9+
set(OpenBLAS_PATCH_VERSION 6)
1010
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
1111

1212
# Adhere to GNU filesystem layout conventions
@@ -42,6 +42,19 @@ endif()
4242

4343
#######
4444

45+
if(MSVC AND MSVC_STATIC_CRT)
46+
set(CompilerFlags
47+
CMAKE_CXX_FLAGS
48+
CMAKE_CXX_FLAGS_DEBUG
49+
CMAKE_CXX_FLAGS_RELEASE
50+
CMAKE_C_FLAGS
51+
CMAKE_C_FLAGS_DEBUG
52+
CMAKE_C_FLAGS_RELEASE
53+
)
54+
foreach(CompilerFlag ${CompilerFlags})
55+
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
56+
endforeach()
57+
endif()
4558

4659
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
4760

@@ -62,10 +75,10 @@ endif ()
6275

6376
set(SUBDIRS ${BLASDIRS})
6477
if (NOT NO_LAPACK)
65-
list(APPEND SUBDIRS lapack)
6678
if(BUILD_RELAPACK)
6779
list(APPEND SUBDIRS relapack/src)
6880
endif()
81+
list(APPEND SUBDIRS lapack)
6982
endif ()
7083

7184
# set which float types we want to build for
@@ -134,7 +147,7 @@ endif ()
134147

135148
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
136149
if(MSVC)
137-
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
150+
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
138151
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
139152
endif()
140153
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
@@ -149,15 +162,9 @@ if (${DYNAMIC_ARCH})
149162
endforeach()
150163
endif ()
151164

152-
# Only build shared libs for MSVC
153-
if (MSVC)
154-
set(BUILD_SHARED_LIBS ON)
155-
endif()
156-
157-
158165
# add objects to the openblas lib
159166
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
160-
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include>)
167+
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
161168

162169
# Android needs to explicitly link against libm
163170
if(ANDROID)
@@ -166,7 +173,7 @@ endif()
166173

167174
# Handle MSVC exports
168175
if(MSVC AND BUILD_SHARED_LIBS)
169-
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
176+
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
170177
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
171178
else()
172179
# Creates verbose .def file (51KB vs 18KB)
@@ -217,6 +224,14 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
217224
SOVERSION ${OpenBLAS_MAJOR_VERSION}
218225
)
219226

227+
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
228+
if (NOT MSVC)
229+
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
230+
else()
231+
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
232+
endif()
233+
endif()
234+
220235
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
221236
if (NOT DEFINED ARCH)
222237
set(ARCH_IN "x86_64")
@@ -314,7 +329,7 @@ install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
314329
if(NOT NOFORTRAN)
315330
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
316331

317-
set(F77BLAS_H ${CMAKE_BINARY_DIR}/f77blas.h)
332+
set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h)
318333
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
319334
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
320335
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
@@ -327,10 +342,11 @@ endif()
327342
if(NOT NO_CBLAS)
328343
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
329344

345+
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
330346
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
331347
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
332-
file(WRITE ${CMAKE_BINARY_DIR}/cblas.tmp "${CBLAS_H_CONTENTS_NEW}")
333-
install (FILES ${CMAKE_BINARY_DIR}/cblas.tmp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} RENAME cblas.h)
348+
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
349+
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
334350
endif()
335351

336352
if(NOT NO_LAPACKE)

Changelog.txt

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,82 @@
11
OpenBLAS ChangeLog
2+
====================================================================
3+
Version 0.3.6
4+
29-Apr-2019
5+
6+
common:
7+
* the build tools now check that a given cpu TARGET is actually valid
8+
* the build-time check of system features (c_check) has been made
9+
less dependent on particular perl features (this should mainly
10+
benefit building on Windows)
11+
* several problem with the ReLAPACK integration were fixed,
12+
including INTERFACE64 support and building a shared library
13+
* building with CMAKE on BSD systems was improved
14+
* a non-absolute SUM function was added based on the
15+
existing optimized code for ASUM
16+
* CBLAS interfaces to the IxMIN and IxMAX functions were added
17+
* a name clash between LAPACKE and BOOST headers was resolved
18+
* CMAKE builds with OpenMP failed to include the appropriate getrf_parallel
19+
kernels
20+
* a crash on thread (key) deletion with the USE_TLS=1 memory management
21+
option was fixed
22+
* restored several earlier fixes, in particular for OpenMP performance,
23+
building on BSD, and calling fork on CYGWIN, which had inadvertently
24+
been dropped in the 0.3.3 rewrite of the memory management code.
25+
26+
x86_64:
27+
* the AVX512 DGEMM kernel has been disabled again due to unsolved problems
28+
* building with old versions of MSVC was fixed
29+
* it is now possible to build a static library on Windows with CMAKE
30+
* accessing environment variables on CYGWIN at run time was fixed
31+
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
32+
* Intel "Denverton" atom and Hygon "Dhyana" zen CPUs are now autodetected
33+
* building for DYNAMIC_ARCH with a DYNAMIC_LIST of targets is now supported
34+
with CMAKE as well
35+
* building for DYNAMIC_ARCH with GENERIC as the default target is now supported
36+
* a buffer overflow in the SSE GEMM kernel for Intel Nano targets was fixed
37+
* assembly bugs involving undeclared modification of input operands were fixed
38+
in the AXPY, DOT, GEMV, GER, SCAL, SYMV and TRSM microkernels for Nehalem,
39+
Sandybridge, Haswell, Bulldozer and Piledriver. These would typically cause
40+
test failures or segfaults when compiled with recent versions of gcc from 8 onward.
41+
* a similar bug was fixed in the blas_quickdivide code used to split workloads
42+
in most functions
43+
* a bug in the IxMIN implementation for the GENERIC target made it return the result of IxMAX
44+
* fixed building on SkylakeX systems when either the compiler or the (emulated) operating
45+
environment does not support AVX512
46+
* improved GEMM performance on ZEN targets
47+
48+
x86:
49+
* build failures caused by the recently added checks for AVX512 were fixed
50+
* an inline assembly bug involving undeclared modification of an input argument was
51+
fixed in the blas_quickdivide code used to split workloads in most functions
52+
* a bug in the IMIN implementation for the GENERIC target made it return the result of IMAX
53+
54+
MIPS32:
55+
* a bug in the IMIN implementation made it return the result of IMAX
56+
57+
POWER:
58+
* single precision BLAS1/2 functions have received optimized POWER8 kernels
59+
* POWER9 is now a separate target, with an optimized DGEMM/DTRMM kernel
60+
* building on PPC970 systems under OSX Leopard or Tiger is now supported
61+
* out-of-bounds memory accesses in the gemm_beta microkernels were fixed
62+
* building a shared library on AIX is now supported for POWER6
63+
* DYNAMIC_ARCH support has been added for POWER6 and newer
64+
65+
ARMv7:
66+
* corrected xDOT behaviour with zero INC_X or INC_Y
67+
* a bug in the IMIN implementation made it return the result of IMAX
68+
69+
ARMv8:
70+
* added support for HiSilicon TSV110 cpus
71+
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
72+
* cross-compilation with CMAKE now works again
73+
* a bug in the IMIN implementation made it return the result of IMAX
74+
* ARMV8 builds with the BINARY=32 option are now automatically handled as ARMV7
75+
76+
IBM Z:
77+
* optimized microkernels for single precicion BLAS1/2 functions have been added
78+
for both Z13 and Z14
79+
280
====================================================================
381
Version 0.3.5
482
31-Dec-2018

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ endif
9696
@echo
9797

9898
shared :
99-
ifndef NO_SHARED
99+
ifneq ($(NO_SHARED), 1)
100100
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
101101
@$(MAKE) -C exports so
102102
@ln -fs $(LIBSONAME) $(LIBPREFIX).so

Makefile.arm64

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,8 @@ ifeq ($(CORE), THUNDERX2T99)
3838
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
3939
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
4040
endif
41+
42+
ifeq ($(CORE), TSV110)
43+
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
44+
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
45+
endif

Makefile.install

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ ifndef NO_LAPACKE
5858
endif
5959

6060
#for install static library
61-
ifndef NO_STATIC
61+
ifneq ($(NO_STATIC),1)
6262
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
6363
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
6464
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
6565
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
6666
endif
6767
#for install shared library
68-
ifndef NO_SHARED
68+
ifneq ($(NO_SHARED),1)
6969
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
7070
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
7171
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@@ -106,14 +106,14 @@ ifndef NO_LAPACKE
106106
endif
107107

108108
#for install static library
109-
ifndef NO_STATIC
109+
ifneq ($(NO_STATIC),1)
110110
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
111111
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
112112
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
113113
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
114114
endif
115115
#for install shared library
116-
ifndef NO_SHARED
116+
ifneq ($(NO_SHARED),1)
117117
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
118118
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
119119
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
@@ -138,7 +138,7 @@ endif
138138
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
139139
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
140140

141-
ifndef NO_SHARED
141+
ifneq ($(NO_SHARED),1)
142142
#ifeq logical or
143143
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
144144
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"

Makefile.power

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,15 @@ else
99
USE_OPENMP = 1
1010
endif
1111

12-
12+
ifeq ($(CORE), POWER9)
13+
ifeq ($(USE_OPENMP), 1)
14+
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
15+
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
16+
else
17+
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
18+
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
19+
endif
20+
endif
1321

1422
ifeq ($(CORE), POWER8)
1523
ifeq ($(USE_OPENMP), 1)

0 commit comments

Comments
 (0)