diff --git a/.github/workflows/Example.yml b/.github/workflows/Example.yml
index 372efc9f..95ab37ad 100644
--- a/.github/workflows/Example.yml
+++ b/.github/workflows/Example.yml
@@ -53,7 +53,7 @@ jobs:
           echo "starting gif fixing"
           mv examples/src/gif_*.gif examples/src/${{ matrix.file-name }}_files
           $env:Path += ";C:\Program Files\Git\usr\bin"
-          awk '{if($0~/<img src=\"data:image\/gif;base64,[[:alpha:],[:digit:],\/,+,=]*\" \/>/) {sub(/<img src=\"data:image\/gif;base64,[[:alpha:],[:digit:],\/,+,=]*\" \/>/,\"![gif](${{ matrix.file-name }}_files\/gif_\"++i\".gif)\")}}1' examples/src/${{ matrix.file-name }}.md > examples/src/tmp_${{ matrix.file-name }}.md
+          awk '{if($0~/<img src="data:image\/gif;base64,[[:alpha:],[:digit:],\/,+,=]*" \/>/) {sub(/<img src="data:image\/gif;base64,[[:alpha:],[:digit:],\/,+,=]*" \/>/,"![gif](${{ matrix.file-name }}_files\/gif_"++i".gif)")}}1' examples/src/${{ matrix.file-name }}.md > examples/src/tmp_${{ matrix.file-name }}.md
           mv -Force examples/src/tmp_${{ matrix.file-name }}.md examples/src/${{ matrix.file-name }}.md
           echo "gifs should be fixed"
 
diff --git a/examples/src/growing_horizon_ME.ipynb b/examples/src/growing_horizon_ME.ipynb
index a0ddc17d..9394cd8b 100644
--- a/examples/src/growing_horizon_ME.ipynb
+++ b/examples/src/growing_horizon_ME.ipynb
@@ -641,10 +641,10 @@
    "outputs": [],
    "source": [
     "# train\n",
-    "paramsNet = FMIFlux.params(neuralFMU)\n",
+    "paramsNet = Flux.params(neuralFMU)\n",
     "\n",
     "optim = Adam()\n",
-    "FMIFlux.train!(lossSum, paramsNet, Iterators.repeated((), 1000), optim; cb=()->callb(paramsNet)) "
+    "FMIFlux.train!(lossSum, neuralFMU, Iterators.repeated((), 1000), optim; cb=()->callb(paramsNet)) "
    ]
   },
   {
diff --git a/examples/src/juliacon_2023.ipynb b/examples/src/juliacon_2023.ipynb
index 0628c79f..8ff62e9d 100644
--- a/examples/src/juliacon_2023.ipynb
+++ b/examples/src/juliacon_2023.ipynb
@@ -465,10 +465,13 @@
     "                  gates,                              # compute resulting dx from ANN + FMU\n",
     "                  dx -> cacheRetrieve(1:4, dx))       # stack together: dx[1,2,3,4] from cache + dx[5,6] from gates\n",
     "\n",
+    "    solver = Tsit5()\n",
+    "    \n",
     "    # new NeuralFMU \n",
     "    neuralFMU = ME_NeuralFMU(f,                 # the FMU used in the NeuralFMU \n",
     "                             model,             # the model we specified above \n",
     "                             (tStart, tStop),   # a default start ad stop time for solving the NeuralFMU\n",
+    "                             solver;\n",
     "                             saveat=tSave)      # the time points to save the solution at\n",
     "    neuralFMU.modifiedState = false             # speed optimization (NeuralFMU state equals FMU state)\n",
     "    \n",
@@ -740,7 +743,7 @@
     "   \n",
     "    # the actual training\n",
     "    FMIFlux.train!(loss,                            # the loss function for training\n",
-    "                   params,                          # the parameters to train\n",
+    "                   neuralFMU,                          # the parameters to train\n",
     "                   Iterators.repeated((), steps),   # an iterator repeating `steps` times\n",
     "                   optim;                           # the optimizer to train\n",
     "                   gradient=:ForwardDiff,           # currently, only ForwarDiff leads to good results for multi-event systems\n",
diff --git a/examples/src/juliacon_2023_helpers.jl b/examples/src/juliacon_2023_helpers.jl
index 163ee60e..184c0431 100644
--- a/examples/src/juliacon_2023_helpers.jl
+++ b/examples/src/juliacon_2023_helpers.jl
@@ -8,6 +8,7 @@ import FMIFlux: roundToLength
 import FMIZoo: movavg
 
 import FMI: FMU2Solution
+import FMI.DifferentialEquations: Tsit5
 import FMIZoo: VLDM, VLDM_Data
 
 function fmiSingleInstanceMode(fmu::FMU2, mode::Bool)
diff --git a/examples/src/mdpi_2022.ipynb b/examples/src/mdpi_2022.ipynb
index 5f963aea..e3d4948b 100644
--- a/examples/src/mdpi_2022.ipynb
+++ b/examples/src/mdpi_2022.ipynb
@@ -108,6 +108,7 @@
         "using FMIFlux.Flux  # Machine Learning in Julia\n",
         "\n",
         "import FMI.DifferentialEquations: Tsit5     # import the Tsit5-solver\n",
+        "import FMI: FMU2Solution\n",
         "using JLD2                                  # data format for saving/loading parameters\n",
         "\n",
         "# plotting\n",
@@ -611,14 +612,14 @@
         "\n",
         "# we use ForwardDiff for gradinet determination, because the FMU throws multiple events per time instant (this is not supported by reverse mode AD)\n",
         "# the chunk_size controls the nuber of forward evaluations of the model (the bigger, the less evaluations)\n",
-        "FMIFlux.train!(loss, params, Iterators.repeated((), batchLen), optim; gradient=:ForwardDiff, chunk_size=32, cb=updateScheduler) \n",
+        "FMIFlux.train!(loss, neuralFMU, Iterators.repeated((), batchLen), optim; gradient=:ForwardDiff, chunk_size=32, cb=updateScheduler) \n",
         "loss_after = batch_loss(params[1])"
       ]
     },
     {
       "attachments": {},
       "cell_type": "markdown",
-	  "metadata": {},
+      "metadata": {},
       "source": [
         "The batch loss (\"AVG\" and \"MAX\") is only updated every 5 steps, as defined in the scheduler. Every 25 steps, we plot the current batch element losses. Please note, that we only did around 100 training steps, so training has not converged for now. But we are curious and want to have a look on the intermediate results. \n",
         "\n",
diff --git a/examples/src/modelica_conference_2021.ipynb b/examples/src/modelica_conference_2021.ipynb
index e7f72003..66bc565d 100644
--- a/examples/src/modelica_conference_2021.ipynb
+++ b/examples/src/modelica_conference_2021.ipynb
@@ -891,7 +891,7 @@
    "outputs": [],
    "source": [
     "optim = Adam()\n",
-    "FMIFlux.train!(lossSum, paramsNet, Iterators.repeated((), 1), optim; cb=()->callb(paramsNet)) "
+    "FMIFlux.train!(lossSum, neuralFMU, Iterators.repeated((), 1), optim; cb=()->callb(paramsNet)) "
    ]
   },
   {
@@ -950,7 +950,7 @@
     "for run in 1:numRuns\n",
     "    @time for epoch in 1:numEpochs\n",
     "        @info \"Run: $(run)/$(numRuns)  Epoch: $(epoch)/$(numEpochs)\"\n",
-    "        FMIFlux.train!(lossSum, paramsNet, Iterators.repeated((), numIterations), optim; cb=()->callb(paramsNet))\n",
+    "        FMIFlux.train!(lossSum, neuralFMU, Iterators.repeated((), numIterations), optim; cb=()->callb(paramsNet))\n",
     "    end\n",
     "    flush(stderr)\n",
     "    flush(stdout)\n",
diff --git a/examples/src/simple_hybrid_CS.ipynb b/examples/src/simple_hybrid_CS.ipynb
index 298af2fe..45962aec 100644
--- a/examples/src/simple_hybrid_CS.ipynb
+++ b/examples/src/simple_hybrid_CS.ipynb
@@ -530,7 +530,7 @@
     "paramsNet = FMIFlux.params(csNeuralFMU)\n",
     "\n",
     "optim = Adam()\n",
-    "FMIFlux.train!(lossSum, paramsNet, Iterators.repeated((), 250), optim; cb=()->callb(paramsNet))"
+    "FMIFlux.train!(lossSum, csNeuralFMU, Iterators.repeated((), 250), optim; cb=()->callb(paramsNet))"
    ]
   },
   {
diff --git a/examples/src/simple_hybrid_ME.ipynb b/examples/src/simple_hybrid_ME.ipynb
index 2d902798..1986087e 100644
--- a/examples/src/simple_hybrid_ME.ipynb
+++ b/examples/src/simple_hybrid_ME.ipynb
@@ -500,7 +500,7 @@
     "paramsNet = FMIFlux.params(neuralFMU)\n",
     "\n",
     "optim = Adam()\n",
-    "FMIFlux.train!(lossSum, paramsNet, Iterators.repeated((), 300), optim; cb=()->callb(paramsNet)) "
+    "FMIFlux.train!(lossSum, neuralFMU, Iterators.repeated((), 300), optim; cb=()->callb(paramsNet)) "
    ]
   },
   {
@@ -563,7 +563,7 @@
    },
    "outputs": [],
    "source": [
-    "FMIFlux.train!(lossSum, paramsNet, Iterators.repeated((), 1200), optim; cb=()->callb(paramsNet)) \n",
+    "FMIFlux.train!(lossSum, neuralFMU, Iterators.repeated((), 1200), optim; cb=()->callb(paramsNet)) \n",
     "# plot results mass.s\n",
     "solutionAfter = neuralFMU(x₀)\n",
     "Plots.plot!(fig, solutionAfter; stateIndices=1:1, values=false, label=\"NeuralFMU (1500 epochs)\", linewidth=2)\n",
diff --git a/src/neural.jl b/src/neural.jl
index 2db47697..26bc4dbd 100644
--- a/src/neural.jl
+++ b/src/neural.jl
@@ -1030,6 +1030,7 @@ function getComponent(nfmu::NeuralFMU)
     return hasCurrentComponent(nfmu.fmu) ? getCurrentComponent(nfmu.fmu) : nothing
 end
 
+# ToDo: Separate this: NeuralFMU creation and solving!
 """
     
     TODO: Signature, Arguments and Keyword-Arguments descriptions.
@@ -1287,7 +1288,11 @@ function (nfmu::ME_NeuralFMU)(x_start::Union{Array{<:Real}, Nothing} = nfmu.x0,
     prob = ODEProblem{true}(ff, nfmu.x0, nfmu.tspan, p)
 
     if isnothing(sensealg)
-        if isimplicit(solver)
+        if !isnothing(solver)
+
+            logWarning(nfmu.fmu, "No solver keyword detected for NeuralFMU.\nContinuous adjoint method is applied, which requires solving backward in time.\nThis might be not supported by every FMU.", 1)
+            sensealg = InterpolatingAdjoint(; autojacvec=ReverseDiffVJP(true), checkpointing=true)
+        elseif isimplicit(solver)
             @assert !(alg_autodiff(solver) isa AutoForwardDiff) "Implicit solver using `autodiff=true` detected for NeuralFMU.\nThis is currently not supported, please use `autodiff=false` as solver keyword.\nExample: `Rosenbrock23(autodiff=false)` instead of `Rosenbrock23()`."
 
             logWarning(nfmu.fmu, "Implicit solver detected for NeuralFMU.\nContinuous adjoint method is applied, which requires solving backward in time.\nThis might be not supported by every FMU.", 1)
@@ -1677,23 +1682,23 @@ end
 
 """
 
-    train!(loss, params::Union{Flux.Params, Zygote.Params}, data, optim::Flux.Optimise.AbstractOptimiser; gradient::Symbol=:Zygote, cb=nothing, chunk_size::Integer=64, printStep::Bool=false)
+    train!(loss, neuralFMU::Union{ME_NeuralFMU, CS_NeuralFMU}, data, optim; gradient::Symbol=:ReverseDiff, kwargs...)
 
 A function analogous to Flux.train! but with additional features and explicit parameters (faster).
 
 # Arguments
 - `loss` a loss function in the format `loss(p)`
-- `params` a object holding the parameters
+- `neuralFMU` a object holding the neuralFMU with its parameters
 - `data` the training data (or often an iterator)
 - `optim` the optimizer used for training 
 
 # Keywords 
 - `gradient` a symbol determining the AD-library for gradient computation, available are `:ForwardDiff`, `:Zygote` and :ReverseDiff (default)
-- `cb` a custom callback function that is called after every training step
-- `chunk_size` the chunk size for AD using ForwardDiff (ignored for other AD-methods)
-- `printStep` a boolean determining wheater the gradient min/max is printed after every step (for gradient debugging)
-- `proceed_on_assert` a boolean that determins wheater to throw an ecxeption on error or proceed training and just print the error
-- `numThreads` [WIP]: an integer determining how many threads are used for training (how many gradients are generated in parallel)
+- `cb` a custom callback function that is called after every training step (default `nothing`)
+- `chunk_size` the chunk size for AD using ForwardDiff (ignored for other AD-methods) (default `:auto_fmiflux`)
+- `printStep` a boolean determining wheater the gradient min/max is printed after every step (for gradient debugging) (default `false`)
+- `proceed_on_assert` a boolean that determins wheater to throw an ecxeption on error or proceed training and just print the error (default `false`)
+- `multiThreading`: a boolean that determins if multiple gradients are generated in parallel (default `false`)
 - `multiObjective`: set this if the loss function returns multiple values (multi objective optimization), currently gradients are fired to the optimizer one after another (default `false`)
 """
 function train!(loss, neuralFMU::Union{ME_NeuralFMU, CS_NeuralFMU}, data, optim; gradient::Symbol=:ReverseDiff, kwargs...)