Skip to content

Commit

Permalink
Support Arm64 (and apple M1) (#609)
Browse files Browse the repository at this point in the history
* Progress towards ARM64 support

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* Re-introduce Ofast flag

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* unit_signal: fix test

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* Update architecture detection

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* ocamlformat

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* fixup! Update architecture detection

* Improve architecture and os detection

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* config: cleanup unused open

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* reintroduce -mfpmath=sse on x86_64

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* owl_macros.h: remove empty if

* owl/configure: remove devmode cflags

They break the compilation with some c compilers. See #609

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* owl_core_utils: Fix empty elif

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* Cleanup arch detection code

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* configure: update to support new lapacke packages

Debian now ships lapacke separately, requiring new set of flags for the linking to work.
This uses pkg-config to improve the detection of these flags.

Furthermore this commit adds a workaround to solve the issue with libgcc11_s not found,
which will not be addressed upstream in homebrew for the time being.

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* ocamlformat the code

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* Workaround configurator problem with windows

And remove march=native from arm, since it breaks on a number of arm systems

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* configurator: make more robust wrt not officially supported platforms (e.g. bsds)

* Update src/owl/config/configure.ml

Co-authored-by: Thomas Gazagnaire <thomas@gazagnaire.org>

* configurator: drop unnecessary dependency on stdio

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* add @jcreinhold patch for nonstandard setups

See #636

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>

* Update src/aeos/config/configure.ml

* Update src/owl/config/configure.ml

---------

Signed-off-by: Marcello Seri <marcello.seri@gmail.com>
Co-authored-by: Thomas Gazagnaire <thomas@gazagnaire.org>
  • Loading branch information
mseri and samoht authored May 1, 2024
1 parent cd382bb commit 5d2f6d1
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 90 deletions.
8 changes: 4 additions & 4 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ before compilation:
- `OWL_CFLAGS` allows to change the default flags passed to the C targets,
it defaults to
```
OWL_CFLAGS="-g -O3 -Ofast -march=native -mfpmath=sse -funroll-loops -ffast-math -DSFMT_MEXP=19937 -msse2 -fno-strict-aliasing -Wno-tautological-constant-out-of-range-compare"`
OWL_CFLAGS="-g -O3 -Ofast -funroll-loops -ffast-math -DSFMT_MEXP=19937 -msse2 -fno-strict-aliasing"`
```

- `OWL_AEOS_CFLAGS` allows to change the default flags passed to the C targets
when compiling AEOS. It defaults to
```
OWL_AEOS_CFLAGS="-g -O3 -Ofast -march=native -funroll-loops -ffast-math -DSFMT_MEXP=19937 -fno-strict-aliasing"
OWL_AEOS_CFLAGS="-g -O3 -Ofast -funroll-loops -ffast-math -DSFMT_MEXP=19937 -fno-strict-aliasing"
```

- `OWL_DISABLE_LAPACKE_LINKING_FLAG=1` disables the `-llapacke` flag in the
linking options. This is useful when you have lapacke installed on
non-standard location.
linking options. This may be useful when you have certain unusual lapacke
setups, do not use it unless you know what you are doing.

If you are not using `opam`, you should run `make clean` before recompiling
the library after having changed any of those environment variables.
Expand Down
1 change: 0 additions & 1 deletion owl.opam
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,5 @@ depends: [
"dune" {>= "2.0.0"}
"dune-configurator"
"owl-base" {= version}
"stdio" {build}
"npy"
]
220 changes: 147 additions & 73 deletions src/owl/config/configure.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,45 @@

module C = Configurator.V1

let detect_system_os =
{|
#if __APPLE__
#include <TargetConditionals.h>
#if TARGET_OS_IPHONE
#define PLATFORM_NAME "ios"
#else
#define PLATFORM_NAME "mac"
#endif
#elif __linux__
#if __ANDROID__
#define PLATFORM_NAME "android"
#else
#define PLATFORM_NAME "linux"
#endif
#elif WIN32
#define PLATFORM_NAME "windows"
#else
#define PLATFORM_NAME "unknown"
#endif
|}


let detect_system_arch =
{|
#if __x86_64__
#define PLATFORM_ARCH "x86_64"
#elif __i386__
#define PLATFORM_ARCH "x86"
#elif __aarch64__
#define PLATFORM_ARCH "arm64"
#elif __arm__
#define PLATFORM_ARCH "arm"
#else
#define PLATFORM_ARCH "unknown"
#endif
|}


let bgetenv v =
let v' =
try Sys.getenv v |> int_of_string with
Expand Down Expand Up @@ -71,51 +110,83 @@ let get_os_type c =

let get_ocaml_default_flags _c = []


let get_ocaml_devmode_flags _c =
let enable_devmode = bgetenv "OWL_ENABLE_DEVMODE" in
if not enable_devmode then [] else [ "-w"; "-32-27-6-37-3" ]


let default_cflags =
try
Sys.getenv "OWL_CFLAGS"
let clean_env_var env_var =
Sys.getenv env_var
|> String.trim
|> String.split_on_char ' '
|> List.filter (fun s -> String.trim s <> "")
with
| Not_found ->
[ (* Basic optimisation *)
"-g"
; "-O3"
; "-Ofast"
; (* FIXME: experimental switches *)
(* "-mavx2"; "-mfma"; "-ffp-contract=fast"; *)
(* Experimental switches, -ffast-math may break IEEE754 semantics*)
"-march=native"
; "-mfpmath=sse"
; "-funroll-loops"
; "-ffast-math"
; (* Configure Mersenne Twister RNG *)
"-DSFMT_MEXP=19937"
; "-msse2"
; "-fno-strict-aliasing"
]


let default_libs = [ "-lm" ]


let get_default_config c =
let os =
let platform = C.C_define.import c ~includes:[ ] ~prelude:detect_system_os [ "PLATFORM_NAME", String ] in
match List.map snd platform with
| [ String "android" ] -> `android
| [ String "ios" ] -> `ios
| [ String "linux" ] -> `linux
| [ String "mac" ] -> `mac
| [ String "windows" ] -> `windows
| _ -> `unknown
in
let arch =
let arch = C.C_define.import c ~includes:[ ] ~prelude:detect_system_arch [ "PLATFORM_ARCH", String ] in
match List.map snd arch with
| [ String "x86_64" ] -> `x86_64
| [ String "x86" ] -> `x86
| [ String "arm64" ] -> `arm64
| [ String "arm" ] -> `arm
| _ -> `unknown
in
let cflags =
try clean_env_var "OWL_CFLAGS" with
| Not_found ->
[ (* Basic optimisation *) "-g"; "-O3"; "-Ofast" ]
@ (match arch, os with
| `arm64, `mac -> [ "-march=native" ]
| `x86_64, _ -> [ "-march=native"; "-mfpmath=sse"; "-msse2" ]
| _ -> [])
@ [ (* Experimental switches, -ffast-math may break IEEE754 semantics*)
"-funroll-loops"
; "-ffast-math"
; (* Configure Mersenne Twister RNG *)
"-DSFMT_MEXP=19937"
; "-fno-strict-aliasing"
]
in
(* homebrew M1 issue workaround, works only if users use the default homebrew path *)
let libs =
let p0 = "/opt/homebrew/opt/gcc/lib/gcc/current/" in
if os = `mac && arch = `arm64 && Sys.file_exists p0 then [ "-L" ^ p0 ] else []
in
C.Pkg_config.{ cflags; libs }


let default_cppflags =
try clean_env_var "OWL_CPPFLAGS" with
| Not_found -> []


let default_ldflags =
try clean_env_var "OWL_LDFLAGS" with
| Not_found -> []


let default_ldlibs =
try clean_env_var "OWL_LDLIBS" with
| Not_found -> [ "-lm" ]


let get_expmode_cflags _c =
let enable_expmode = bgetenv "OWL_ENABLE_EXPMODE" in
if not enable_expmode then [] else [ "-flto" ]


let get_devmode_cflags _c =
let enable_devmode = bgetenv "OWL_ENABLE_DEVMODE" in
if not enable_devmode
then [ "-Wno-logical-op-parentheses" ]
else [ "-Wall"; "-pedantic"; "-Wextra"; "-Wunused" ]


let default_gcc_path =
let p0 = "/usr/local/lib/gcc/7" in
if Sys.file_exists p0 then [ "-L" ^ p0 ] else []
Expand Down Expand Up @@ -166,7 +237,7 @@ let get_openmp_config c =
{|
You have set OWL_ENABLE_OPENMP = 1 however I am unable to link
against openmp: the current values for cflags and libs are respectively
%s and %s.
(%s) and (%s).

You can disable openmp/aeos by unsetting OWL_ENABLE_OPEN or by setting
it to 0.
Expand All @@ -176,38 +247,55 @@ or `dune clean` before rebuilding the project with a modified flag.
If you think this is our mistake please open an issue reporting
the output of `src/owl/config/configure.exe --verbose`.
|}
Base.(string_of_sexp @@ sexp_of_list sexp_of_string cflags)
Base.(string_of_sexp @@ sexp_of_list sexp_of_string libs);
(String.concat " " cflags)
(String.concat " " libs);
failwith "Unable to link against openmp");
C.Pkg_config.{ cflags; libs })


let () =
C.main ~name:"owl" (fun c ->
let (>>=) = Option.bind in
let default_config = get_default_config c in
let default_pkg_config = { C.Pkg_config.cflags = []; libs = [] } in
let cblas_conf =
let default = { C.Pkg_config.cflags = []; libs = [] } in
let open Base.Option.Monad_infix in
Base.Option.value
~default
Option.value ~default:default_pkg_config
(C.Pkg_config.get c >>= C.Pkg_config.query ~package:"cblas")
in
let openblas_conf =
let open Base.Option.Monad_infix in
Base.Option.value
Option.value
~default:openblas_default
(C.Pkg_config.get c >>= C.Pkg_config.query ~package:"openblas")
in
if not
@@ C.c_test
c
test_linking
~c_flags:openblas_conf.cflags
~link_flags:openblas_conf.libs
let openmp_config = get_openmp_config c in
(* configure link options *)
let libs =
[]
@ default_ldflags
@ default_ldlibs
@ default_config.libs
@ openblas_conf.libs
@ cblas_conf.libs
@ default_gcc_path
@ get_accelerate_libs c
@ openmp_config.libs
in
(* configure compile options *)
let cflags =
[]
@ default_config.cflags
@ default_cppflags
@ openblas_conf.cflags
@ cblas_conf.cflags
@ get_expmode_cflags c
@ openmp_config.cflags
in
if not @@ C.c_test c test_linking ~c_flags:cflags ~link_flags:libs
then (
Printf.printf
{|
Unable to link against openblas: the current values for cflags and libs
are respectively %s and %s.
are respectively (%s) and (%s).

Usually this is due to missing paths for pkg-config. Try to re-install
or re-compile owl by prefixing the command with (or exporting)
Expand All @@ -218,10 +306,10 @@ If this does not work please open an issue in the owl repository, adding
some details on how your openblas has been installed and the output of
`src/owl/config/configure.exe --verbose`.
|}
Base.(Sexp.to_string @@ sexp_of_list sexp_of_string openblas_conf.cflags)
Base.(Sexp.to_string @@ sexp_of_list sexp_of_string openblas_conf.libs);
(String.concat " " openblas_conf.cflags)
(String.concat " " openblas_conf.libs);
failwith "Unable to link against openblas.");
let lapacke_lib =
let lapacke_conf =
let disable_linking_flag = bgetenv "OWL_DISABLE_LAPACKE_LINKING_FLAG" in
let needs_lapacke_flag =
if disable_linking_flag
Expand All @@ -231,33 +319,19 @@ some details on how your openblas has been installed and the output of
c
test_lapacke_working_code
~c_flags:openblas_conf.cflags
~link_flags:(openblas_conf.libs @ [ "-lm" ])
~link_flags:(default_config.libs @ openblas_conf.libs @ [ "-lm" ])
|> not
in
if needs_lapacke_flag then [ "-llapacke" ] else []
if needs_lapacke_flag
then
Option.value
~default:C.Pkg_config.{ cflags = []; libs = [ "-llapacke" ] }
(C.Pkg_config.get c >>= C.Pkg_config.query ~package:"llapacke")
else default_pkg_config
in
let openmp_config = get_openmp_config c in
(* configure link options *)
let libs =
[]
@ lapacke_lib
@ openblas_conf.libs
@ cblas_conf.libs
@ default_libs
@ default_gcc_path
@ get_accelerate_libs c
@ openmp_config.libs
in
(* configure compile options *)
let cflags =
[]
@ openblas_conf.cflags
@ cblas_conf.cflags
@ default_cflags
@ get_devmode_cflags c
@ get_expmode_cflags c
@ openmp_config.cflags
in
let libs = lapacke_conf.libs @ libs in
let cflags = lapacke_conf.cflags @ cflags in
(* configure ocaml options *)
let ocaml_flags = [] @ get_ocaml_default_flags c @ get_ocaml_devmode_flags c in
(* assemble default config *)
Expand Down
2 changes: 1 addition & 1 deletion src/owl/config/dune
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
(executable
(name configure)
(libraries dune.configurator stdio))
(libraries dune.configurator))
9 changes: 8 additions & 1 deletion src/owl/core/owl_core_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,13 @@ void c_slicing_offset (struct caml_ba_array *X, int64_t *slice, int64_t *offset)
* Code heavily inspired by Eigen (http://eigen.tuxfamily.org/).
*/


#if defined(__arm__) || defined(__aarch64__)
void query_cache_sizes(int* l1p, int* l2p, int* l3p) {
*l1p = 16 * 1024;
*l2p = 512 * 1024;
*l3p = 512 * 1024;
}
#else
OWL_INLINE void query_cache_sizes_intel(int* l1p, int* l2p, int* l3p) {
int cpuinfo[4];
int l1 = 0, l2 = 0, l3 = 0;
Expand Down Expand Up @@ -295,3 +301,4 @@ void query_cache_sizes(int* l1p, int* l2p, int* l3p) {
*l3p = 512 * 1024;
}
}
#endif
21 changes: 11 additions & 10 deletions src/owl/core/owl_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,19 @@ typedef struct { double r, i; } complex_double;
#define OWL_ARCH_x86_64 0
#endif

#if defined(__PIC__) && OWL_ARCH_i386
#define CPUID(cpuinfo,func,id) \
__asm__ __volatile__ ("xchgl %%ebx, %k1; cpuid; xchgl %%ebx,%k1": "=a" (cpuinfo[0]), "=&r" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) : "a" (func), "c" (id));
#elif defined(__PIC__) && OWL_ARCH_x86_64
#define CPUID(cpuinfo,func,id) \
__asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (cpuinfo[0]), "=&r" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) : "0" (func), "2" (id));
#else
#define CPUID(cpuinfo,func,id) \
__asm__ __volatile__ ("cpuid": "=a" (cpuinfo[0]), "=b" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) : "0" (func), "2" (id) );
#if !defined(__arm__) && !defined(__aarch64__)
#if defined(__PIC__) && OWL_ARCH_i386
#define CPUID(cpuinfo,func,id) \
__asm__ __volatile__ ("xchgl %%ebx, %k1; cpuid; xchgl %%ebx,%k1": "=a" (cpuinfo[0]), "=&r" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) : "a" (func), "c" (id));
#elif defined(__PIC__) && OWL_ARCH_x86_64
#define CPUID(cpuinfo,func,id) \
__asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (cpuinfo[0]), "=&r" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) : "0" (func), "2" (id));
#else
#define CPUID(cpuinfo,func,id) \
__asm__ __volatile__ ("cpuid": "=a" (cpuinfo[0]), "=b" (cpuinfo[1]), "=c" (cpuinfo[2]), "=d" (cpuinfo[3]) : "0" (func), "2" (id) );
#endif
#endif


// Other

#if defined(_MSC_VER)
Expand Down

0 comments on commit 5d2f6d1

Please sign in to comment.