diff --git a/.gitignore b/.gitignore
index 2cad792..0808c4a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,10 @@
 ## Ignore Visual Studio temporary files, build results, and
 ## files generated by popular Visual Studio add-ons.
 ##
-## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
+## Get latest from `dotnet new gitignore`
 
-Simple/
+# dotenv files
+.env
 
 # User-specific files
 *.rsuser
@@ -31,7 +32,6 @@ x86/
 bld/
 [Bb]in/
 [Oo]bj/
-[Oo]ut/
 [Ll]og/
 [Ll]ogs/
 
@@ -60,11 +60,14 @@ dlldata.c
 # Benchmark Results
 BenchmarkDotNet.Artifacts/
 
-# .NET Core
+# .NET
 project.lock.json
 project.fragment.lock.json
 artifacts/
 
+# Tye
+.tye/
+
 # ASP.NET Scaffolding
 ScaffoldingReadMe.txt
 
@@ -85,6 +88,8 @@ StyleCopReport.xml
 *.pgc
 *.pgd
 *.rsp
+# but not Directory.Build.rsp, as it configures directory-level build defaults
+!Directory.Build.rsp
 *.sbr
 *.tlb
 *.tli
@@ -93,6 +98,7 @@ StyleCopReport.xml
 *.tmp_proj
 *_wpftmp.csproj
 *.log
+*.tlog
 *.vspscc
 *.vssscc
 .builds
@@ -296,6 +302,17 @@ node_modules/
 # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
 *.vbw
 
+# Visual Studio 6 auto-generated project file (contains which files were open etc.)
+*.vbp
+
+# Visual Studio 6 workspace and project file (working project files containing files to include in project)
+*.dsw
+*.dsp
+
+# Visual Studio 6 technical files
+*.ncb
+*.aps
+
 # Visual Studio LightSwitch build output
 **/*.HTMLClient/GeneratedArtifacts
 **/*.DesktopClient/GeneratedArtifacts
@@ -352,6 +369,9 @@ ASALocalRun/
 # Local History for Visual Studio
 .localhistory/
 
+# Visual Studio History (VSHistory) files
+.vshistory/
+
 # BeatPulse healthcheck temp database
 healthchecksdb
 
@@ -363,4 +383,100 @@ MigrationBackup/
 
 # Fody - auto-generated XML schema
 FodyWeavers.xsd
-/Simple/Simple.csproj
+
+# VS Code files for those working on multiple tools
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+
+# Local History for Visual Studio Code
+.history/
+
+# Windows Installer files from build outputs
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# JetBrains Rider
+*.sln.iml
+.idea/
+
+##
+## Visual studio for Mac
+##
+
+
+# globs
+Makefile.in
+*.userprefs
+*.usertasks
+config.make
+config.status
+aclocal.m4
+install-sh
+autom4te.cache/
+*.tar.gz
+tarballs/
+test-results/
+
+# content below from: https://github.com/github/gitignore/blob/main/Global/macOS.gitignore
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# content below from: https://github.com/github/gitignore/blob/main/Global/Windows.gitignore
+# Windows thumbnail cache files
+Thumbs.db
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# Vim temporary swap files
+*.swp
diff --git a/Directory.Build.props b/Directory.Build.props
index 623b3e5..e6f56a8 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -2,6 +2,7 @@
   <PropertyGroup>
     <DotNetVersion>net10.0</DotNetVersion>
     <Nullable>enable</Nullable>
+    <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/MachineLearning.Benchmarks/MachineLearning.Benchmarks.csproj b/ML.Benchy/ML.Benchy.csproj
similarity index 66%
rename from MachineLearning.Benchmarks/MachineLearning.Benchmarks.csproj
rename to ML.Benchy/ML.Benchy.csproj
index d8cd377..375fbfe 100644
--- a/MachineLearning.Benchmarks/MachineLearning.Benchmarks.csproj
+++ b/ML.Benchy/ML.Benchy.csproj
@@ -8,12 +8,11 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
+    <ProjectReference Include="..\ML.Runner\ML.Runner.csproj" />
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Training\MachineLearning.Training.csproj" />
-    <ProjectReference Include="..\MachineLearning.Samples\MachineLearning.Samples.csproj" />
+    <PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
   </ItemGroup>
 
 </Project>
diff --git a/ML.Benchy/Program.cs b/ML.Benchy/Program.cs
new file mode 100644
index 0000000..032bb8d
--- /dev/null
+++ b/ML.Benchy/Program.cs
@@ -0,0 +1,44 @@
+﻿using System.Buffers;
+using Ametrin.Numerics;
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Running;
+using ML.Core.Training;
+using Weight = float;
+
+BenchmarkRunner.Run<Benchmarks>();
+
+[MemoryDiagnoser(false)]
+public class Benchmarks
+{
+    [Params(512)]
+    public int Size { get; set; }
+    private Vector logits;
+    private Vector expected;
+    private Vector destination;
+
+    private AdamOptimizer optimizer = new() { LearningRate = 0.01f };
+
+
+    [GlobalSetup]
+    public void Setup()
+    {
+        logits = Vector.Create(Size);
+        logits.Uniform(-1, 1, new Random(43));
+        expected = Vector.Create(Size);
+        expected.Uniform(-1, 1, new Random(68));
+        destination = Vector.Create(Size);
+        optimizer.Init();
+    }
+
+    [Benchmark]
+    public void Delegates()
+    {
+        // SpanOperations.MapTo(logits.AsSpan(), expected.AsSpan(), destination.AsSpan(), optimizer.WeightReduction, optimizer.WeightReduction);
+    }
+
+    [Benchmark]
+    public void Static()
+    {
+        // SpanOperations.MapTo(optimizer.WeightReductionOperation, logits.AsSpan(), expected.AsSpan(), destination.AsSpan());
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Attributes.cs b/ML.Core/Attributes.cs
new file mode 100644
index 0000000..0f9de7a
--- /dev/null
+++ b/ML.Core/Attributes.cs
@@ -0,0 +1,18 @@
+namespace ML.Core.Attributes;
+
+#pragma warning disable CS9113 // Parameter is unread. only required by sourcegen
+[AttributeUsage(AttributeTargets.Property)]
+public sealed class SubModuleAttribute : Attribute;
+
+[AttributeUsage(AttributeTargets.Property)]
+public sealed class WeightsAttribute : Attribute;
+
+[AttributeUsage(AttributeTargets.Property)]
+public sealed class PropertyAttribute : Attribute;
+
+[AttributeUsage(AttributeTargets.Class)]
+public sealed class GeneratedModuleAttribute(bool IncludeSerializer = false) : Attribute;
+
+[AttributeUsage(AttributeTargets.Class)]
+public sealed class GeneratedAdamAttribute(Type module) : Attribute;
+#pragma warning restore CS9113 // Parameter is unread.
diff --git a/ML.Core/Converters/MatrixConverter.cs b/ML.Core/Converters/MatrixConverter.cs
new file mode 100644
index 0000000..0ebd23b
--- /dev/null
+++ b/ML.Core/Converters/MatrixConverter.cs
@@ -0,0 +1,32 @@
+using Ametrin.Serializer;
+
+namespace ML.Core.Converters;
+
+public sealed class MatrixConverter : ISerializationConverter<Matrix>
+{
+    static MatrixConverter()
+    {
+        AmetrinSerializer.RegisterSerializer<MatrixConverter, Matrix>();
+    }
+
+    public static Result<Matrix, DeserializationError> TryReadValue(IAmetrinReader reader)
+    {
+        using var objectReader = reader.ReadStartObject();
+        var rowCount = objectReader.ReadInt32Property("RowCount");
+        objectReader.ReadPropertyName("Storage");
+        var storage = VectorConverter.ReadValue<VectorConverter, Vector>(objectReader);
+        reader.ReadEndObject();
+        Debug.Assert(storage.Count % rowCount == 0);
+        var columnCount = storage.Count / rowCount;
+        return Matrix.Of(rowCount, columnCount, storage);
+    }
+    
+    public static void WriteValue(IAmetrinWriter writer, Matrix value)
+    {
+        using var objectWriter = writer.WriteStartObject();
+        objectWriter.WriteInt32Property("RowCount", value.RowCount);
+        objectWriter.WritePropertyName("Storage");
+        VectorConverter.WriteValue(objectWriter, value.Storage);
+        writer.WriteEndObject();
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Converters/ModuleSerializer.cs b/ML.Core/Converters/ModuleSerializer.cs
new file mode 100644
index 0000000..fa9cfe4
--- /dev/null
+++ b/ML.Core/Converters/ModuleSerializer.cs
@@ -0,0 +1,49 @@
+using System.IO;
+using System.Runtime.CompilerServices;
+using Ametrin.Serializer;
+using Ametrin.Serializer.Readers;
+using Ametrin.Serializer.Writers;
+using ML.Core.Modules;
+
+namespace ML.Core.Converters;
+
+public static class ModuleSerializer
+{
+    public const string FILE_EXTENSION = ".gmw";
+    public const uint FORMAT_VERSION = 3;
+
+#pragma warning disable CA2255
+    [ModuleInitializer]
+#pragma warning restore
+    internal static void Init()
+    {
+        AmetrinSerializer.RegisterSerializer<SequenceModuleConverter<Vector>, SequenceModule<Vector>>();
+        AmetrinSerializer.RegisterSerializer<SequenceModuleConverter<Matrix>, SequenceModule<Matrix>>();
+        AmetrinSerializer.RegisterSerializer<SequenceModuleConverter<Tensor>, SequenceModule<Tensor>>();
+        AmetrinSerializer.RegisterSerializer<EmbeddedModule<int[], Vector, int>, EmbeddedModule<int[], Vector, int>>();
+    }
+
+    public static void Write(IModule module, FileInfo file)
+    {
+        using var stream = file.Create();
+        using var writer = new AmetrinBinaryWriter(stream);
+
+        writer.WriteStringProperty("$format", FILE_EXTENSION);
+        writer.WriteUInt32Property("$version", FORMAT_VERSION);
+
+        AmetrinSerializer.WriteDynamic(writer, module);
+    }
+
+    public static T Read<T>(FileInfo file)
+    {
+        using var stream = file.OpenRead();
+        using var reader = new AmetrinBinaryReader(stream);
+
+        var format = reader.ReadStringProperty("$format");
+        if (format is not FILE_EXTENSION) throw new InvalidOperationException();
+        var version = reader.ReadUInt32Property("$version");
+        if (version is not FORMAT_VERSION) throw new InvalidOperationException();
+
+        return AmetrinSerializer.TryReadDynamic<T>(reader).Or(e => e.Throw<T>());
+    }
+}
diff --git a/ML.Core/Converters/VectorConverter.cs b/ML.Core/Converters/VectorConverter.cs
new file mode 100644
index 0000000..c003c61
--- /dev/null
+++ b/ML.Core/Converters/VectorConverter.cs
@@ -0,0 +1,21 @@
+using Ametrin.Serializer;
+
+namespace ML.Core.Converters;
+
+public sealed class VectorConverter : ISerializationConverter<Vector>
+{
+    static VectorConverter()
+    {
+        AmetrinSerializer.RegisterSerializer<VectorConverter, Vector>();
+    }
+
+    public static Result<Vector, DeserializationError> TryReadValue(IAmetrinReader reader)
+    {
+        return reader.TryReadArrayValue(static reader => reader.TryReadSingleValue()).Map(Vector.Of);
+    }
+
+    public static void WriteValue(IAmetrinWriter writer, Vector value)
+    {
+        writer.WriteArrayValue(value.AsSpan(), static (writer, v) => writer.WriteSingleValue(v));
+    }
+}
\ No newline at end of file
diff --git a/MachineLearning.Data/ITokenizer.cs b/ML.Core/Data/ITokenizer.cs
similarity index 69%
rename from MachineLearning.Data/ITokenizer.cs
rename to ML.Core/Data/ITokenizer.cs
index ca2cfcb..702f96b 100644
--- a/MachineLearning.Data/ITokenizer.cs
+++ b/ML.Core/Data/ITokenizer.cs
@@ -1,4 +1,4 @@
-﻿namespace MachineLearning.Data;
+namespace ML.Core.Data;
 
 public interface ITokenizer<TData>
 {
@@ -6,6 +6,5 @@ public interface ITokenizer<TData>
     public IEnumerable<int> Tokenize(TData data);
     public int TokenizeSingle(TData data);
     public TData GetToken(int data);
-    public string Decode(IEnumerable<int> tokens);
-
-}
+    public TData Decode(IEnumerable<int> tokens);
+}
\ No newline at end of file
diff --git a/ML.Core/Data/Noise/IDataNoise.cs b/ML.Core/Data/Noise/IDataNoise.cs
new file mode 100644
index 0000000..f20311c
--- /dev/null
+++ b/ML.Core/Data/Noise/IDataNoise.cs
@@ -0,0 +1,13 @@
+namespace ML.Core.Data.Noise;
+
+public interface IDataNoise<TData>
+{
+    public TData Apply(TData data);
+}
+
+
+public sealed class NoDataNoise<TData> : IDataNoise<TData>
+{
+    public static NoDataNoise<TData> Instance => field ??= new();
+    public TData Apply(TData data) => data;
+}
diff --git a/MachineLearning.Data/Noise/ImageInputNoise.cs b/ML.Core/Data/Noise/ImageNoise.cs
similarity index 81%
rename from MachineLearning.Data/Noise/ImageInputNoise.cs
rename to ML.Core/Data/Noise/ImageNoise.cs
index 790b9b9..1a86066 100644
--- a/MachineLearning.Data/Noise/ImageInputNoise.cs
+++ b/ML.Core/Data/Noise/ImageNoise.cs
@@ -1,12 +1,12 @@
 using Ametrin.Utils.Transformation;
 
-namespace MachineLearning.Data.Noise;
+namespace ML.Core.Data.Noise;
 
-public sealed class ImageInputNoise : IInputDataNoise<double[]>
+public sealed class ImageNoise : IDataNoise<double[]>
 {
     public required int Size { get; init; }
     public double NoiseStrength { get; init; } = 0;
-    public double NoiseProbability { get; init; } = 0;
+    // public double NoiseProbability { get; init; } = 0;
     public int MaxShift { get; init; } = 0;
     public double MaxAngle { get; init; } = 0;
     public double MinScale { get; init; } = 1;
@@ -23,10 +23,10 @@ public double[] Apply(double[] data)
         };
         var transformed = transform.ApplySmooth(data, Size);
 
-        foreach(var i in ..transformed.Length)
+        foreach (var i in ..transformed.Length)
         {
             transformed[i] += (Random.NextDouble() - 0.5) * 2 * NoiseStrength;
         }
         return transformed;
     }
-}
+}
\ No newline at end of file
diff --git a/ML.Core/Data/Training/BatchHelper.cs b/ML.Core/Data/Training/BatchHelper.cs
new file mode 100644
index 0000000..67b6d5f
--- /dev/null
+++ b/ML.Core/Data/Training/BatchHelper.cs
@@ -0,0 +1,13 @@
+namespace ML.Core.Data.Training;
+
+public static class BatchHelper
+{
+    public static IEnumerable<T> Create<T>(IEnumerable<T> source, int startIndex, int batchSize)
+        => Create(source.Skip(startIndex), batchSize);
+
+    public static IEnumerable<T> Create<T>(IEnumerable<T> source, int batchSize)
+        => source.Take(batchSize);
+
+    public static IEnumerable<T> CreateRandom<T>(ICollection<T> source, int batchSize, Random? random = null)
+        => source.GetRandomElements(batchSize, random);
+}
\ No newline at end of file
diff --git a/ML.Core/Data/Training/ITrainingDataSource.cs b/ML.Core/Data/Training/ITrainingDataSource.cs
new file mode 100644
index 0000000..066fd70
--- /dev/null
+++ b/ML.Core/Data/Training/ITrainingDataSource.cs
@@ -0,0 +1,37 @@
+namespace ML.Core.Data.Training;
+
+public interface ITrainingDataSource<T>
+{
+    public int BatchCount { get; }
+    public int BatchSize { get; }
+    public IEnumerable<IEnumerable<T>> GetBatches();
+    public void Reset();
+}
+
+
+public sealed class TrainingDataSource<T>(IEnumerable<T> data) : ITrainingDataSource<T>
+{
+    public bool ShuffleOnReset { get; init; } = true;
+    public Random Random { get; init; } = Random.Shared;
+    public required int BatchCount { get; init; }
+    public int BatchSize => data.Length / BatchCount;
+
+    private readonly T[] data = [.. data];
+
+    public IEnumerable<IEnumerable<T>> GetBatches()
+    {
+        var batchSize = BatchSize;
+        foreach (var i in ..BatchCount)
+        {
+            yield return BatchHelper.Create(data, i * batchSize, batchSize);
+        }
+    }
+
+    public void Reset()
+    {
+        if (ShuffleOnReset)
+        {
+            Random.Shuffle(data);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Data/Training/TrainingEntry.cs b/ML.Core/Data/Training/TrainingEntry.cs
new file mode 100644
index 0000000..33ebf47
--- /dev/null
+++ b/ML.Core/Data/Training/TrainingEntry.cs
@@ -0,0 +1,3 @@
+namespace ML.Core.Data.Training;
+
+public sealed record TrainingEntry<TInput, TArch, TExpected>(TInput InputValue, TArch ExpectedWeights, TExpected ExpectedValue);
diff --git a/ML.Core/Evaluation/Cost/BinaryCrossEntropyCost.cs b/ML.Core/Evaluation/Cost/BinaryCrossEntropyCost.cs
new file mode 100644
index 0000000..c758ae8
--- /dev/null
+++ b/ML.Core/Evaluation/Cost/BinaryCrossEntropyCost.cs
@@ -0,0 +1,26 @@
+namespace ML.Core.Evaluation.Cost;
+
+/// <summary>
+/// Binary-Cross-Entropy Cost Function<br/>
+/// classification tasks, particularly binary<br/>
+/// requires outputs in range 0..1<br/>
+/// Cons: Numerically unstable (e.g., log(0) issues), this impl clamps to <see cref="EPSILON"/><br/>
+/// </summary>
+// TODO: FromLogits version see CrossEntropyCostFromLogits but with Sigmoid
+public sealed class BinaryCrossEntropyCost : ICostFunction
+{
+    public static BinaryCrossEntropyCost Instance => field ??= new();
+    public const Weight EPSILON = 1e-7f;
+
+    public Weight Cost(Weight output, Weight expected)
+    {
+        output = Weight.Clamp(output, EPSILON, 1 - EPSILON); // just return 0 or 1?
+        return -(expected * Weight.Log(output) + (1 - expected) * Weight.Log(1 - output));
+    }
+
+    public Weight Derivative(Weight output, Weight expected)
+    {
+        output = Weight.Clamp(output, EPSILON, 1 - EPSILON); // just return 0 or 1?
+        return (output - expected) / (output * (1 - output));
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Evaluation/Cost/CrossEntropyCostFromLogits.cs b/ML.Core/Evaluation/Cost/CrossEntropyCostFromLogits.cs
new file mode 100644
index 0000000..8822aad
--- /dev/null
+++ b/ML.Core/Evaluation/Cost/CrossEntropyCostFromLogits.cs
@@ -0,0 +1,39 @@
+using System.Buffers;
+
+namespace ML.Core.Evaluation.Cost;
+
+/// <summary>
+/// Cross-Entropy Cost Function using SoftMax<br/>
+/// requires a linear output<br/>
+/// requires expected.Sum() == 1<para/>
+/// parts of softmax and cross entropy cancel out in the backwards pass reducing operations, also stabilizes gradients because less divisions
+/// </summary>
+public sealed class CrossEntropyCostFromLogits : ICostFunction<Vector>
+{
+    public static readonly CrossEntropyCostFromLogits Instance = new();
+
+    public Weight TotalCost(Vector logits, Vector expected)
+    {
+        NumericsDebug.AssertSameDimensions(logits, expected);
+
+        using var destinationStorage = ArrayPool<Weight>.Shared.RentNumerics(logits.FlatCount);
+        var destination = Vector.OfSize(logits, destinationStorage);
+
+        var maxLogit = logits.Max();
+        logits.SubtractPointwiseTo(maxLogit, destination);
+        destination.PointwiseExpToSelf();
+        var expSum = destination.Sum();
+
+        var logSumExp = maxLogit + Weight.Log(expSum);
+        var expectedDotLogits = expected.Dot(logits);
+        return -expectedDotLogits + logSumExp;
+    }
+
+    public void DerivativeTo(Vector logits, Vector expected, Vector destination)
+    {
+        NumericsDebug.AssertSameDimensions(logits, expected, destination);
+
+        logits.SoftMaxTo(destination);
+        destination.SubtractToSelf(expected);
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Evaluation/Cost/CrossEntropyCostFromProbabilities.cs b/ML.Core/Evaluation/Cost/CrossEntropyCostFromProbabilities.cs
new file mode 100644
index 0000000..97f35d9
--- /dev/null
+++ b/ML.Core/Evaluation/Cost/CrossEntropyCostFromProbabilities.cs
@@ -0,0 +1,25 @@
+namespace ML.Core.Evaluation.Cost;
+
+/// <summary>
+/// Cross-Entropy Cost Function<br/>
+/// requires outputs in range 0..1<br/>
+/// prefer <see cref="CrossEntropyCostFromLogits"/>
+/// Cons: Numerically unstable (e.g., log(0) issues), this impl clamps to <see cref="EPSILON"/><br/>
+/// </summary>
+public sealed class CrossEntropyCostFromProbabilities : ICostFunction
+{
+    public static CrossEntropyCostFromProbabilities Instance => field ??= new();
+    const Weight EPSILON = 1e-7f;
+
+    public Weight Cost(Weight output, Weight expected)
+    {
+        output = Weight.Clamp(output, EPSILON, 1 - EPSILON);
+        return -expected * Weight.Log(output);
+    }
+
+    public Weight Derivative(Weight output, Weight expected)
+    {
+        return -expected / Weight.Clamp(output, EPSILON, 1 - EPSILON);
+        // return output - expected;
+    }
+}
diff --git a/ML.Core/Evaluation/Cost/ICostFunction.cs b/ML.Core/Evaluation/Cost/ICostFunction.cs
new file mode 100644
index 0000000..d1e0891
--- /dev/null
+++ b/ML.Core/Evaluation/Cost/ICostFunction.cs
@@ -0,0 +1,44 @@
+namespace ML.Core.Evaluation.Cost;
+
+public interface ICostFunction<TArch>
+    where TArch : ITensorLike<TArch>
+{
+    public Weight TotalCost(TArch output, TArch expected);
+    public void DerivativeTo(TArch output, TArch expected, TArch destination);
+
+    public TArch Derivative(TArch output, TArch expected)
+    {
+        var result = TArch.OfSize(output);
+        DerivativeTo(output, expected, result);
+        NumericsDebug.AssertValidNumbers(result.AsSpan());
+        return result;
+    }
+}
+
+public interface ICostFunction : ICostFunction<Vector>
+{
+    internal Weight Cost(Weight output, Weight expected);
+    Weight ICostFunction<Vector>.TotalCost(Vector output, Vector expected)
+    {
+        NumericsDebug.AssertSameDimensions(output, expected);
+        var totalCost = 0.0f;
+
+        foreach (var i in ..output.Count)
+        {
+            totalCost += Cost(output[i], expected[i]);
+        }
+
+        return totalCost;
+    }
+
+    internal Weight Derivative(Weight output, Weight expected);
+
+    void ICostFunction<Vector>.DerivativeTo(Vector output, Vector expected, Vector destination)
+    {
+        NumericsDebug.AssertSameDimensions(output, expected, destination);
+        for (int i = 0; i < destination.Count; i++)
+        {
+            destination[i] = Derivative(output[i], expected[i]);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Evaluation/EvaluationResult.cs b/ML.Core/Evaluation/EvaluationResult.cs
new file mode 100644
index 0000000..d96283e
--- /dev/null
+++ b/ML.Core/Evaluation/EvaluationResult.cs
@@ -0,0 +1,44 @@
+namespace ML.Core.Evaluation;
+
+public sealed class EvaluationResult
+{
+    public static readonly EvaluationResult ZERO = new() { TotalCount = 0, CorrectCount = 0, CorrectConfidenceSum = 0, WrongConfidenceSum = 0, TotalCost = 0, TotalElapsedTime = TimeSpan.Zero, stackCount = 0 };
+    public required int TotalCount { get; init; }
+    public int AverageCount => TotalCount / stackCount;
+    public required int CorrectCount { get; init; }
+    public float CorrectPercentage => (float)CorrectCount / TotalCount;
+    public int WrongCount => TotalCount - CorrectCount;
+    public float WrongPercentage => (float)WrongCount / TotalCount;
+
+    public required float CorrectConfidenceSum { get; init; }
+    public float CorrectConfidence => CorrectConfidenceSum / CorrectCount;
+
+    public required float WrongConfidenceSum { get; init; }
+    public float WrongConfidence => WrongConfidenceSum / WrongCount;
+
+    public required double TotalCost { get; init; }
+    public double AverageCost => TotalCost / TotalCount;
+
+    public TimeSpan TotalElapsedTime { get; init; } = TimeSpan.Zero;
+    public TimeSpan AverageElapsedTime => TotalElapsedTime / stackCount;
+    private int stackCount = 1;
+
+    public static EvaluationResult operator +(EvaluationResult left, EvaluationResult right) => new()
+    {
+        TotalCount = left.TotalCount + right.TotalCount,
+        CorrectCount = left.CorrectCount + right.CorrectCount,
+        CorrectConfidenceSum = left.CorrectConfidenceSum + right.CorrectConfidenceSum,
+        WrongConfidenceSum = left.WrongConfidenceSum + right.WrongConfidenceSum,
+        TotalCost = left.TotalCost + right.TotalCost,
+        TotalElapsedTime = left.TotalElapsedTime + right.TotalElapsedTime,
+        stackCount = left.stackCount + right.stackCount
+    };
+
+    public override string ToString() => $"{CorrectPercentage * 100,5:F1}% | {CorrectConfidence:F2} {WrongConfidence:F2}";
+    public string ToColoredString() => $"{ConfidenceToTextColor(CorrectPercentage)}{CorrectPercentage * 100,5:F1}%{RESET_COLOR} | {CorrectConfidence:F2} {WrongConfidence:F2}";
+
+    public static string GetHeader() => "  ✅   | Conf.     | Cost";
+
+    const string RESET_COLOR = "\u001b[0m";
+    static string ConfidenceToTextColor(Weight confidence) => $"\u001b[38;2;{(1 - confidence) * 255:F0};{confidence * 255:F0};60m";
+}
diff --git a/MachineLearning.Training/Evaluation/TrainingEvaluationContext.cs b/ML.Core/Evaluation/TrainingEvaluationContext.cs
similarity index 56%
rename from MachineLearning.Training/Evaluation/TrainingEvaluationContext.cs
rename to ML.Core/Evaluation/TrainingEvaluationContext.cs
index e59d65c..1f10f4c 100644
--- a/MachineLearning.Training/Evaluation/TrainingEvaluationContext.cs
+++ b/ML.Core/Evaluation/TrainingEvaluationContext.cs
@@ -1,4 +1,4 @@
-namespace MachineLearning.Training.Evaluation;
+namespace ML.Core.Evaluation;
 
 public sealed class TrainingEvaluationContext
 {
@@ -6,6 +6,6 @@ public sealed class TrainingEvaluationContext
     public required int MaxEpoch { get; init; }
     public required int CurrentBatch { get; init; }
     public required int MaxBatch { get; init; }
-    public required double LearnRate { get; init; }
-    public string Dump() => $"epoch {CurrentEpoch}/{MaxEpoch}\tbatch {CurrentBatch}/{MaxBatch}";
+    public required double LearningRate { get; init; }
+    public override string ToString() => $"{CurrentEpoch,2}/{MaxEpoch,-2} {CurrentBatch,4}/{MaxBatch,-4}";
 }
diff --git a/ML.Core/Evaluation/TrainingEvaluationResult.cs b/ML.Core/Evaluation/TrainingEvaluationResult.cs
new file mode 100644
index 0000000..f9496f0
--- /dev/null
+++ b/ML.Core/Evaluation/TrainingEvaluationResult.cs
@@ -0,0 +1,12 @@
+namespace ML.Core.Evaluation;
+
+public sealed class TrainingEvaluationResult
+{
+    public required TrainingEvaluationContext Context { get; init; }
+    public required EvaluationResult Result { get; init; }
+    public TimeSpan Duration { get; init; }
+    public override string ToString() => $"{Result.ToColoredString()} | {Result.AverageCost:F4} | {Result.TotalElapsedTime:ss\\.ff}s ({Result.AverageElapsedTime:ss\\.ff}s) | {Context} | {Result.AverageCount}";
+
+    // Emoji helps quickly finding the start of the current training run
+    public static string GetHeader() => $"{EvaluationResult.GetHeader()}   | Time   (/batch) | epoch   batch   | entries";
+}
diff --git a/ML.Core/ML.Core.csproj b/ML.Core/ML.Core.csproj
new file mode 100644
index 0000000..b6abd76
--- /dev/null
+++ b/ML.Core/ML.Core.csproj
@@ -0,0 +1,16 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>$(DotNetVersion)</TargetFramework>
+    <!-- <EmitCompilerGeneratedFiles>true</EmitCompilerGeneratedFiles> -->
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\..\Packages\Ametrin.Utils\src\Ametrin.Utils.csproj" />
+    <ProjectReference Include="..\..\..\Packages\Ametrin.Numerics\src\Ametrin.Numerics.csproj" />
+    <ProjectReference Include="../../../Packages/Ametrin.Serializer/Ametrin.Serializer/Ametrin.Serializer.csproj" />
+    <ProjectReference Include="..\ML.SourceGenerator\ML.SourceGenerator.csproj" ReferenceOutputAssembly="false" OutputItemType="Analyzer" />
+    <!-- <ProjectReference Include="../../../Packages/Ametrin.Serializer/Ametrin.Serializer.Generator/Ametrin.Serializer.Generator.csproj" ReferenceOutputAssembly="false" OutputItemType="Analyzer" /> -->
+  </ItemGroup>
+
+</Project>
diff --git a/ML.Core/Modules/Activations/IActivationModule.cs b/ML.Core/Modules/Activations/IActivationModule.cs
new file mode 100644
index 0000000..309085f
--- /dev/null
+++ b/ML.Core/Modules/Activations/IActivationModule.cs
@@ -0,0 +1,5 @@
+namespace ML.Core.Modules.Activations;
+
+public interface IActivationModule : IModule;
+public interface IActivationModule<TArch> : IActivationModule, IHiddenModule<TArch>;
+public interface IActivationModule<TArch, TSnapshot> : IActivationModule<TArch>, IHiddenModule<TArch, TSnapshot, EmptyModuleData> where TSnapshot : IModuleSnapshot;
diff --git a/ML.Core/Modules/Activations/LeakyReLUActivation.cs b/ML.Core/Modules/Activations/LeakyReLUActivation.cs
new file mode 100644
index 0000000..5f1ecfa
--- /dev/null
+++ b/ML.Core/Modules/Activations/LeakyReLUActivation.cs
@@ -0,0 +1,75 @@
+using ML.Core.Attributes;
+
+namespace ML.Core.Modules.Activations;
+
+[GeneratedModule(IncludeSerializer: true)]
+public sealed partial class LeakyReLUActivation(Weight alpha = 0.01f) : IActivationModule<Vector, LeakyReLUActivation.Snapshot>
+{
+    public static LeakyReLUActivation Instance => field ??= new();
+
+    public Weight Alpha { get; } = alpha;
+    private readonly LeakyReLUOperation forwardOp = new(alpha);
+    private readonly LeakyReLUDerivativeOperation derivativeOp = new(alpha);
+
+    public Vector Forward(Vector input, Snapshot snapshot)
+    {
+        snapshot.Input = input;
+        snapshot.Input.MapTo(forwardOp, snapshot.Output);
+        return snapshot.Output;
+    }
+
+    public Vector Backward(Vector outputGradient, Snapshot snapshot, EmptyModuleData gradients)
+    {
+        snapshot.Input.MapTo(derivativeOp, snapshot.InputGradient);
+        snapshot.InputGradient.PointwiseMultiplyToSelf(outputGradient);
+        NumericsDebug.AssertValidNumbers(snapshot.InputGradient);
+        return snapshot.InputGradient;
+    }
+
+    public sealed class Snapshot() : IModuleSnapshot
+    {
+        public Vector Input
+        {
+            get;
+            set
+            {
+                field = value;
+                outputHandle.SetCount(field.Count);
+                inputGradientHandle.SetCount(field.Count);
+            }
+        }
+        public Vector Output => outputHandle.Vector;
+        public Vector InputGradient => inputGradientHandle.Vector;
+
+        private DynamicVector outputHandle = new();
+        private DynamicVector inputGradientHandle = new();
+
+        internal Snapshot(LeakyReLUActivation _) : this() { }
+
+        public void Dispose()
+        {
+            outputHandle.Dispose();
+            inputGradientHandle.Dispose();
+        }
+    }
+
+    public readonly struct LeakyReLUOperation(Weight alpha) : IUnaryOperator<LeakyReLUOperation>
+    {
+        private readonly Weight alpha = alpha;
+        // constructing an alpha vector once and reusing seems to be slower
+
+        public static Weight Invoke(in LeakyReLUOperation info, Weight input) => input > 0 ? input : info.alpha * input;
+        public static SimdVector Invoke(in LeakyReLUOperation info, SimdVector input)
+            => SimdVectorHelper.ConditionalSelect(SimdVectorHelper.GreaterThan(input, SimdVector.Zero), input, input * info.alpha);
+    }
+
+    private readonly struct LeakyReLUDerivativeOperation(Weight alpha) : IUnaryOperator<LeakyReLUDerivativeOperation>
+    {
+        private readonly Weight alpha = alpha;
+        // constructing an alpha vector once and reusing seems to be slower
+
+        public static Weight Invoke(in LeakyReLUDerivativeOperation info, Weight input) => input > 0 ? 1 : info.alpha;
+        public static SimdVector Invoke(in LeakyReLUDerivativeOperation info, SimdVector input)
+            => SimdVectorHelper.ConditionalSelect(SimdVectorHelper.GreaterThan(input, SimdVector.Zero), SimdVector.One, SimdVectorHelper.Create(info.alpha));
+    }
+}
diff --git a/ML.Core/Modules/Activations/SoftMaxActivation.cs b/ML.Core/Modules/Activations/SoftMaxActivation.cs
new file mode 100644
index 0000000..925952f
--- /dev/null
+++ b/ML.Core/Modules/Activations/SoftMaxActivation.cs
@@ -0,0 +1,51 @@
+using ML.Core.Attributes;
+
+namespace ML.Core.Modules.Activations;
+
+[GeneratedModule(IncludeSerializer: true)]
+public sealed partial class SoftMaxActivation : IActivationModule<Vector, SoftMaxActivation.Snapshot>
+{
+    public static SoftMaxActivation Instance => field ??= new();
+    public Vector Forward(Vector input, Snapshot snapshot)
+    {
+        snapshot.Input = input;
+        snapshot.Input.SoftMaxTo(snapshot.Output);
+        return snapshot.Output;
+    }
+
+    public Vector Backward(Vector outputGradient, Snapshot snapshot, EmptyModuleData gradients)
+    {
+        var dot = snapshot.Output.Dot(outputGradient);
+        outputGradient.SubtractPointwiseTo(dot, snapshot.InputGradient);
+        snapshot.InputGradient.PointwiseMultiplyToSelf(snapshot.Output);
+        NumericsDebug.AssertValidNumbers(snapshot.InputGradient);
+        return snapshot.InputGradient;
+    }
+
+    public sealed class Snapshot() : IModuleSnapshot
+    {
+        public Vector Input
+        {
+            get;
+            set
+            {
+                field = value;
+                outputHandle.SetCount(field.Count);
+                inputGradientHandle.SetCount(field.Count);
+            }
+        }
+        public Vector Output => outputHandle.Vector;
+        public Vector InputGradient => inputGradientHandle.Vector;
+
+        private DynamicVector outputHandle = new();
+        private DynamicVector inputGradientHandle = new();
+
+        internal Snapshot(SoftMaxActivation _) : this() { }
+
+        public void Dispose()
+        {
+            outputHandle.Dispose();
+            inputGradientHandle.Dispose();
+        }
+    }
+}
diff --git a/ML.Core/Modules/Builder/MultiLayerPerceptronBuilder.cs b/ML.Core/Modules/Builder/MultiLayerPerceptronBuilder.cs
new file mode 100644
index 0000000..23eb82a
--- /dev/null
+++ b/ML.Core/Modules/Builder/MultiLayerPerceptronBuilder.cs
@@ -0,0 +1,49 @@
+using ML.Core.Modules.Activations;
+using ML.Core.Modules.Initialization;
+
+namespace ML.Core.Modules.Builder;
+
+public sealed class MultiLayerPerceptronBuilder
+{
+    private readonly List<(int input, int output, IActivationModule<Vector> activation)> layers = [];
+    private int nextInput;
+    public static MultiLayerPerceptronBuilder Create(int inputNodes) => new() { nextInput = inputNodes };
+
+    public MultiLayerPerceptronBuilder AddLayer(int outputNodes, IActivationModule<Vector> activation)
+    {
+        layers.Add((nextInput, outputNodes, activation));
+        nextInput = outputNodes;
+        return this;
+    }
+
+    public MultiLayerPerceptronBuilder AddLayer(int outputNodes, Func<int, int, IActivationModule<Vector>> activation)
+    {
+        layers.Add((nextInput, outputNodes, activation.Invoke(nextInput, outputNodes)));
+        nextInput = outputNodes;
+        return this;
+    }
+
+    public SequenceModule<Vector> Build() => new()
+    {
+        Inner = [.. layers.Select(d => new PerceptronModule(d.input, d.output) { Activation = d.activation })],
+    };
+
+    public SequenceModule<Vector> BuildAndInit(Random random)
+    {
+        var module = Build();
+
+        var initializer = new SequenceModule<Vector>.Initializer
+        {
+            Inner = [.. module.Inner.Cast<PerceptronModule>().Select(inner => (IModuleInitializer)(inner.Activation switch
+            {
+                SoftMaxActivation or EmptyModule => new PerceptronModule.XavierInitializer() { Random = random },
+                LeakyReLUActivation => new PerceptronModule.KaimingInitializer(inner.Activation) { Random = random },
+                _ => throw new NotImplementedException(),
+            }))],
+        };
+
+        initializer.Init(module);
+
+        return module;
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/EmbeddedModule.cs b/ML.Core/Modules/EmbeddedModule.cs
new file mode 100644
index 0000000..530f381
--- /dev/null
+++ b/ML.Core/Modules/EmbeddedModule.cs
@@ -0,0 +1,88 @@
+using System.Diagnostics.CodeAnalysis;
+using Ametrin.Serializer;
+using ML.Core.Attributes;
+using ML.Core.Modules.Initialization;
+using ML.Core.Training;
+
+namespace ML.Core.Modules;
+
+[GeneratedModule(IncludeSerializer: true)]
+public sealed partial class EmbeddedModule<TIn, TArch, TOut> : IModule<TArch>, IEmbeddedModule<TIn, TOut>
+{
+    [SubModule] public required IInputModule<TIn, TArch> Input { get; init; }
+    [SubModule] public required IHiddenModule<TArch> Hidden { get; init; }
+    [SubModule] public required IOutputModule<TArch, TOut> Output { get; init; }
+
+    public EmbeddedModule() { }
+
+    [SetsRequiredMembers]
+    public EmbeddedModule(IInputModule<TIn, TArch> input, IHiddenModule<TArch> hidden, IOutputModule<TArch, TOut> output)
+    {
+        Input = input;
+        Hidden = hidden;
+        Output = output;
+    }
+
+    public (TOut Output, Weight Confidence, TArch Weights) Forward(TIn input, Snapshot snapshot)
+    {
+        return Output.Forward(Hidden.Forward(Input.Forward(input, snapshot.Input), snapshot.Hidden), snapshot.Output);
+    }
+
+    public TArch Backward(TArch outputGradient, Snapshot snapshot, Gradients gradients)
+    {
+        return Input.Backward(Hidden.Backward(Output.Backward(outputGradient, snapshot.Output, gradients.Output), snapshot.Hidden, gradients.Hidden), snapshot.Input, gradients.Input);
+    }
+
+    (TOut Output, float Confidence) IEmbeddedModule<TIn, TOut>.Forward(TIn input, IModuleSnapshot snapshot)
+    {
+        var (output, confidence, _) = Forward(input, (Snapshot)snapshot);
+        return (output, confidence);
+    }
+
+    static EmbeddedModule()
+    {
+        AdamOptimizer.Registry.Register<EmbeddedModule<TIn, TArch, TOut>>(static (o, module) => new Adam(o, module));
+    }
+
+    public sealed class Adam(AdamOptimizer optimizer, EmbeddedModule<TIn, TArch, TOut> module) : IModuleOptimizer<Gradients>
+    {
+        public IModuleOptimizer Input { get; } = optimizer.CreateModuleOptimizer(module.Input);
+        public IModuleOptimizer Hidden { get; } = optimizer.CreateModuleOptimizer(module.Hidden);
+        public IModuleOptimizer Output { get; } = optimizer.CreateModuleOptimizer(module.Output);
+
+        public void Apply(Gradients gradients)
+        {
+            Input.Apply(gradients.Input);
+            Hidden.Apply(gradients.Hidden);
+            Output.Apply(gradients.Output);
+        }
+
+        public void FullReset()
+        {
+            Input.FullReset();
+            Hidden.FullReset();
+            Output.FullReset();
+        }
+    }
+
+    public sealed class Initializer : IModuleInitializer<EmbeddedModule<TIn, TArch, TOut>>
+    {
+        public IModuleInitializer Input { get; init; } = EmptyModuleInitializer.Instance;
+        public IModuleInitializer Hidden { get; init; } = EmptyModuleInitializer.Instance;
+        public IModuleInitializer Output { get; init; } = EmptyModuleInitializer.Instance;
+
+        public EmbeddedModule<TIn, TArch, TOut> Init(EmbeddedModule<TIn, TArch, TOut> module)
+        {
+            Input.Init(module.Input);
+            Hidden.Init(module.Hidden);
+            Output.Init(module.Output);
+
+            return module;
+        }
+    }
+}
+
+public interface IEmbeddedModule<TIn, TOut> : IModule
+{
+    public (TOut Output, Weight Confidence) Forward(TIn input, IModuleSnapshot snapshot);
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/EmptyModule.cs b/ML.Core/Modules/EmptyModule.cs
new file mode 100644
index 0000000..3af395e
--- /dev/null
+++ b/ML.Core/Modules/EmptyModule.cs
@@ -0,0 +1,12 @@
+using ML.Core.Attributes;
+using ML.Core.Modules.Activations;
+
+namespace ML.Core.Modules;
+
+[GeneratedModule(IncludeSerializer: true)]
+public sealed partial class EmptyModule : IActivationModule<Vector, EmptyModuleData>
+{
+    public static EmptyModule Instance => field ??= new();
+    public Vector Forward(Vector input, EmptyModuleData snapshot) => input;
+    public Vector Backward(Vector outputGradient, EmptyModuleData snapshot, EmptyModuleData gradients) => outputGradient;
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/IHiddenModule.cs b/ML.Core/Modules/IHiddenModule.cs
new file mode 100644
index 0000000..2587c20
--- /dev/null
+++ b/ML.Core/Modules/IHiddenModule.cs
@@ -0,0 +1,12 @@
+namespace ML.Core.Modules;
+
+public interface IHiddenModule<TArch> : IInputModule<TArch, TArch>;
+
+public interface IHiddenModule<TArch, TSnapshot, TGradients> : IHiddenModule<TArch>, IModule<TArch, TSnapshot, TGradients>
+    where TSnapshot : IModuleSnapshot
+    where TGradients : IModuleGradients
+{
+    public TArch Forward(TArch input, TSnapshot snapshot);
+    TArch IInputModule<TArch, TArch>.Forward(TArch input, IModuleSnapshot snapshot)
+        => Forward(input, Guard.Is<TSnapshot>(snapshot));
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/IInputModule.cs b/ML.Core/Modules/IInputModule.cs
new file mode 100644
index 0000000..619c595
--- /dev/null
+++ b/ML.Core/Modules/IInputModule.cs
@@ -0,0 +1,15 @@
+namespace ML.Core.Modules;
+
+public interface IInputModule<in TIn, TArch> : IModule<TArch>
+{
+    public TArch Forward(TIn input, IModuleSnapshot snapshot);
+}
+
+public interface IInputModule<TIn, TArch, TSnapshot, TGradients> : IInputModule<TIn, TArch>, IModule<TArch, TSnapshot, TGradients>
+    where TSnapshot : IModuleSnapshot
+    where TGradients : IModuleGradients
+{
+    public TArch Forward(TIn input, TSnapshot snapshot);
+    TArch IInputModule<TIn, TArch>.Forward(TIn input, IModuleSnapshot snapshot)
+        => Forward(input, Guard.Is<TSnapshot>(snapshot));
+}
diff --git a/ML.Core/Modules/IModule.cs b/ML.Core/Modules/IModule.cs
new file mode 100644
index 0000000..7bd6fbb
--- /dev/null
+++ b/ML.Core/Modules/IModule.cs
@@ -0,0 +1,53 @@
+namespace ML.Core.Modules;
+
+public interface IModule
+{
+    public ulong ParameterCount { get; }
+
+    public IModuleSnapshot CreateSnapshot();
+    public IModuleGradients CreateGradients();
+}
+
+public interface IModule<TArch> : IModule
+{
+    public TArch Backward(TArch outputGradient, IModuleSnapshot snapshot, IModuleGradients gradients);
+}
+
+public interface IModule<TArch, TSnapshot, TGradients> : IModule<TArch>
+    where TSnapshot : IModuleSnapshot
+    where TGradients : IModuleGradients
+{
+    public TArch Backward(TArch outputGradient, TSnapshot snapshot, TGradients gradients);
+    TArch IModule<TArch>.Backward(TArch outputGradient, IModuleSnapshot snapshot, IModuleGradients gradients)
+        => Backward(outputGradient, Guard.Is<TSnapshot>(snapshot), Guard.Is<TGradients>(gradients));
+
+    public new TSnapshot CreateSnapshot();
+    public new TGradients CreateGradients();
+
+    IModuleSnapshot IModule.CreateSnapshot() => CreateSnapshot();
+    IModuleGradients IModule.CreateGradients() => CreateGradients();
+}
+
+public interface IModuleSnapshot : IDisposable;
+
+public interface IModuleGradients
+{
+    public void Add(IModuleGradients other);
+    public void Reset();
+}
+
+public interface IModuleGradients<TSelf> : IModuleGradients where TSelf : IModuleGradients
+{
+    public void Add(TSelf other);
+    void IModuleGradients.Add(IModuleGradients other) => Add(Guard.Is<TSelf>(other));
+}
+
+public sealed class EmptyModuleData() : IModuleGradients<EmptyModuleData>, IModuleSnapshot
+{
+    public static EmptyModuleData Instance => field ??= new();
+    public EmptyModuleData(object? _) : this() { }
+
+    public void Add(EmptyModuleData other) { }
+    public void Reset() { }
+    public void Dispose() { }
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/IOutputModule.cs b/ML.Core/Modules/IOutputModule.cs
new file mode 100644
index 0000000..dbe9389
--- /dev/null
+++ b/ML.Core/Modules/IOutputModule.cs
@@ -0,0 +1,15 @@
+namespace ML.Core.Modules;
+
+public interface IOutputModule<TArch, TOut> : IModule<TArch>
+{
+    public (TOut Output, Weight Confidence, TArch Weights) Forward(TArch input, IModuleSnapshot snapshot);
+}
+
+public interface IOutputModule<TArch, TOut, TSnapshot, TGradients> : IOutputModule<TArch, TOut>, IModule<TArch, TSnapshot, TGradients>
+    where TSnapshot : IModuleSnapshot
+    where TGradients : IModuleGradients
+{
+    public (TOut Output, Weight Confidence, TArch Weights) Forward(TArch input, TSnapshot snapshot);
+    (TOut Output, Weight Confidence, TArch Weights) IOutputModule<TArch, TOut>.Forward(TArch input, IModuleSnapshot snapshot)
+        => Forward(input, Guard.Is<TSnapshot>(snapshot));
+}
diff --git a/ML.Core/Modules/IndexEmbeddingModule.cs b/ML.Core/Modules/IndexEmbeddingModule.cs
new file mode 100644
index 0000000..33e442b
--- /dev/null
+++ b/ML.Core/Modules/IndexEmbeddingModule.cs
@@ -0,0 +1,118 @@
+using System.Numerics.Tensors;
+using ML.Core.Attributes;
+using ML.Core.Training;
+
+namespace ML.Core.Modules;
+
+[GeneratedModule(IncludeSerializer: true)]
+public sealed partial class IndexEmbeddingModule(Matrix embeddingMatrix) : IInputModule<int[], Matrix>
+{
+    [Weights] public Matrix EmbeddingMatrix { get; } = embeddingMatrix;
+
+    public int TokenCount => EmbeddingMatrix.RowCount;
+    public int EmbeddingSize => EmbeddingMatrix.ColumnCount;
+
+    public IndexEmbeddingModule(int tokenCount, int embeddingSize)
+        : this(Matrix.Create(tokenCount, embeddingSize)) { }
+
+    public Matrix Forward(int[] input, Snapshot snapshot)
+    {
+        snapshot.Input = input;
+
+        foreach (var i in ..input.Length)
+        {
+            GetEmbedding(input[i]).CopyTo(snapshot.Output.RowSpan(i));
+        }
+
+        return snapshot.Output;
+    }
+
+    public Matrix Backward(Matrix outputGradients, Snapshot snapshot, Gradients gradients)
+    {
+        foreach (var i in ..snapshot.Input.Length)
+        {
+            var token = snapshot.Input[i];
+            gradients.TouchedTokens.Add(token);
+            var embeddingGradient = gradients.EmbeddingMatrix.RowSpan(token);
+            TensorPrimitives.Add(embeddingGradient, outputGradients.RowSpan(i), embeddingGradient);
+        }
+
+        return Matrix.Empty;
+    }
+
+    private Span<Weight> GetEmbedding(int index)
+    {
+        if (index < 0 || index >= EmbeddingMatrix.RowCount)
+        {
+            throw new ArgumentException($"Unknown token: {index}");
+        }
+
+        return EmbeddingMatrix.RowSpan(index);
+    }
+
+    static IndexEmbeddingModule()
+    {
+        AdamOptimizer.Registry.Register<IndexEmbeddingModule>(static (op, module) => new Adam(op, module));
+    }
+
+
+    partial class Snapshot
+    {
+        public int[] Input
+        {
+            get;
+            set
+            {
+                field = value;
+                OutputStorage.SetCount(field.Length * module.EmbeddingSize);
+                Output = Matrix.Of(field.Length, module.EmbeddingSize, OutputStorage.Vector);
+            }
+        } = [];
+
+        // TODO: dispose hook to set Output to Empty
+        public Matrix Output { get; private set; }
+
+        private DynamicVector OutputStorage { get; } = new();
+    }
+
+    partial class Gradients
+    {
+        // TODO: clear in reset (remove clear call from Adam.Apply)
+        public HashSet<int> TouchedTokens { get; } = [];
+    }
+
+    public partial class Adam(AdamOptimizer optimizer, IndexEmbeddingModule module) : IModuleOptimizer<Gradients>
+    {
+        public IndexEmbeddingModule Module { get; } = module;
+        public AdamOptimizer Optimizer { get; } = optimizer;
+
+        public Matrix FirstMomentEmbeddingMatrix { get; } = Matrix.OfSize(module.EmbeddingMatrix);
+        public Matrix SecondMomentEmbeddingMatrix { get; } = Matrix.OfSize(module.EmbeddingMatrix);
+
+        public void Apply(Gradients gradients)
+        {
+            foreach (var token in gradients.TouchedTokens)
+            {
+                var gradient = gradients.EmbeddingMatrix.RowSpan(token);
+                var firstMoment = FirstMomentEmbeddingMatrix.RowSpan(token);
+                var secondMoment = SecondMomentEmbeddingMatrix.RowSpan(token);
+                var weights = Module.EmbeddingMatrix.RowSpan(token);
+
+                SpanOperations.MapTo(Optimizer.FirstMomentEstimateOperation, firstMoment, gradient, firstMoment);
+                SpanOperations.MapTo(Optimizer.SecondMomentEstimateOperation, secondMoment, gradient, secondMoment);
+                SpanOperations.MapTo(Optimizer.WeightReductionOperation, weights, firstMoment, secondMoment, weights);
+            }
+
+            NumericsDebug.AssertValidNumbers(FirstMomentEmbeddingMatrix);
+            NumericsDebug.AssertValidNumbers(SecondMomentEmbeddingMatrix);
+
+            gradients.TouchedTokens.Clear(); // TODO: this should happen in Gradients.FullReset();
+        }
+
+        public void FullReset()
+        {
+            FirstMomentEmbeddingMatrix.ResetZero();
+            SecondMomentEmbeddingMatrix.ResetZero();
+        }
+    }
+}
diff --git a/ML.Core/Modules/IndexOutputLayer.cs b/ML.Core/Modules/IndexOutputLayer.cs
new file mode 100644
index 0000000..945eb63
--- /dev/null
+++ b/ML.Core/Modules/IndexOutputLayer.cs
@@ -0,0 +1,33 @@
+using ML.Core.Attributes;
+
+namespace ML.Core.Modules;
+
+[GeneratedModule(IncludeSerializer: true)]
+public sealed partial class IndexOutputLayer(int tokenCount, bool weightedRandom, Random? random = null) : IOutputModule<Vector, int, EmptyModuleData, EmptyModuleData>
+{
+    [Property] public int TokenCount { get; } = tokenCount;
+    [Property] public bool WeightedRandom { get; } = weightedRandom;
+    public Random Random { get; } = random ?? Random.Shared;
+
+    public (int Output, float Confidence, Vector Weights) Forward(Vector input, EmptyModuleData snapshot)
+    {
+        Debug.Assert(input.Count == TokenCount);
+
+        var index = WeightedRandom ? GetWeightedRandomIndex(input, Random) : input.MaximumIndex();
+        return (index, input[index], input);
+    }
+
+    public Vector Backward(Vector outputGradient, EmptyModuleData snapshot, EmptyModuleData gradients) => outputGradient;
+
+    private static int GetWeightedRandomIndex(Vector weights, Random random)
+    {
+        var value = random.NextDouble();
+        for (int i = 0; i < weights.Count; i++)
+        {
+            value -= weights[i];
+            if (value < 0)
+                return i;
+        }
+        return weights.Count - 1;
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/Initialization/IModuleInitializer.cs b/ML.Core/Modules/Initialization/IModuleInitializer.cs
new file mode 100644
index 0000000..8d72f1d
--- /dev/null
+++ b/ML.Core/Modules/Initialization/IModuleInitializer.cs
@@ -0,0 +1,36 @@
+using ML.Core.Modules.Activations;
+
+namespace ML.Core.Modules.Initialization;
+
+public interface IModuleInitializer
+{
+    public IModule Init(IModule module);
+}
+
+public interface IModuleInitializer<TModule> : IModuleInitializer
+    where TModule : IModule
+{
+    public TModule Init(TModule module);
+    IModule IModuleInitializer.Init(IModule module)
+        => Init(Guard.Is<TModule>(module));
+}
+
+public sealed class EmptyModuleInitializer : IModuleInitializer
+{
+    public static EmptyModuleInitializer Instance => field ??= new();
+    public IModule Init(IModule module) => module;
+}
+
+public static class InitializationHelper
+{
+    public static Weight GetKaimingGain(IActivationModule n) => n switch
+    {
+        // SigmoidActivation => 1,
+        // TanhActivation => 5 / 3,
+        // ReLUActivation => Weight.Sqrt(2f),
+        LeakyReLUActivation l => Weight.Sqrt(2 / (1 + l.Alpha * l.Alpha)),
+        // Nonlinearity.GELU => Weight.Sqrt(2.0),   // common approx
+        // Nonlinearity.Swish => Weight.Sqrt(2.0),  // reasonable default
+        _ => throw new NotImplementedException(),
+    };
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/ModuleDataPool.cs b/ML.Core/Modules/ModuleDataPool.cs
new file mode 100644
index 0000000..d1adeff
--- /dev/null
+++ b/ML.Core/Modules/ModuleDataPool.cs
@@ -0,0 +1,68 @@
+using System.Collections.Concurrent;
+
+namespace ML.Core.Modules;
+
+public sealed class ModuleDataPool(Func<IModuleSnapshot> snapshotGetter, Func<IModuleGradients> gradientGetter)
+{
+    private readonly ConcurrentStack<IModuleGradients> gradientCache = [];
+    private readonly ConcurrentStack<IModuleSnapshot> snaphotCache = [];
+
+    public int UnusedItems => gradientCache.Count;
+
+    public ModuleDataPool(IModule module)
+    : this(module.CreateSnapshot, module.CreateGradients)
+    {
+
+    }
+
+    public RentedSnapshotsMarker RentSnapshot()
+    {
+        var rented = snaphotCache.TryPop(out var snapshots) ? snapshots : snapshotGetter();
+        return new(this, rented);
+    }
+
+    public IModuleGradients RentGradients()
+    {
+        if (gradientCache.TryPop(out var gradients))
+        {
+            return gradients;
+        }
+
+        return gradientGetter();
+    }
+
+
+    public void Return(IModuleGradients gradients)
+    {
+        Debug.Assert(!gradientCache.Contains(gradients));
+        gradients.Reset();
+        gradientCache.Push(gradients);
+    }
+
+    public void Return(IModuleSnapshot snapshots)
+    {
+        Debug.Assert(!snaphotCache.Contains(snapshots));
+        // snapshots are always overriden, so no reset
+        snaphotCache.Push(snapshots);
+    }
+
+    public void Clear()
+    {
+        foreach(var snapshot in snaphotCache)
+        {
+            snapshot.Dispose();
+        }
+        snaphotCache.Clear();
+        gradientCache.Clear();
+    }
+
+    public readonly ref struct RentedSnapshotsMarker(ModuleDataPool pool, IModuleSnapshot snapshot)
+    {
+        public IModuleSnapshot Snapshot { get; } = snapshot;
+
+        public readonly void Dispose()
+        {
+            pool.Return(Snapshot);
+        }
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Modules/PerceptronModule.cs b/ML.Core/Modules/PerceptronModule.cs
new file mode 100644
index 0000000..28a0dc8
--- /dev/null
+++ b/ML.Core/Modules/PerceptronModule.cs
@@ -0,0 +1,95 @@
+using System.Diagnostics.CodeAnalysis;
+using ML.Core.Attributes;
+using ML.Core.Modules.Activations;
+using ML.Core.Modules.Initialization;
+
+namespace ML.Core.Modules;
+
+[GeneratedModule(IncludeSerializer: true)]
+public sealed partial class PerceptronModule : IHiddenModule<Vector>
+{
+    public int InputNodes => Weights.ColumnCount;
+    public int OutputNodes => Weights.RowCount;
+    [SubModule] public required IActivationModule<Vector> Activation { get; init; }
+    [Weights] public Matrix Weights { get; }
+    [Weights] public Vector Biases { get; }
+
+    public PerceptronModule(int inputNodes, int outputNodes)
+    {
+        Weights = Matrix.Create(outputNodes, inputNodes);
+        Biases = Vector.Create(outputNodes);
+    }
+
+    [SetsRequiredMembers]
+    public PerceptronModule(IActivationModule<Vector> activation, Matrix weights, Vector biases)
+    {
+        Debug.Assert(weights.RowCount == biases.Count);
+        Weights = weights;
+        Biases = biases;
+        Activation = activation;
+    }
+
+    public Vector Forward(Vector input, Snapshot snapshot)
+    {
+        Debug.Assert(input.Count == InputNodes);
+        snapshot.Input = input;
+        Weights.MultiplyTo(snapshot.Input, snapshot.Weighted);
+        snapshot.Weighted.AddTo(Biases, snapshot.Biased);
+        return Activation.Forward(snapshot.Biased, snapshot.Activation);
+    }
+
+    public Vector Backward(Vector outputGradient, Snapshot snapshot, Gradients gradients)
+    {
+        var biasedGradient = Activation.Backward(outputGradient, snapshot.Activation, gradients.Activation);
+        gradients.Biases.AddToSelf(biasedGradient);
+        VectorHelper.MultiplyToMatrixAddTo(biasedGradient, snapshot.Input, gradients.Weights);
+        Weights.MultiplyTransposedTo(biasedGradient, snapshot.InputGradient);
+        NumericsDebug.AssertValidNumbers(snapshot.InputGradient);
+        return snapshot.InputGradient;
+    }
+
+    partial class Snapshot
+    {
+        public Vector Input { get; set; }
+        public Vector Weighted { get; } = Vector.OfSize(module.Biases);
+        public Vector Biased { get; } = Vector.OfSize(module.Biases);
+        public Vector InputGradient { get; } = Vector.Create(module.InputNodes);
+    }
+
+    [GeneratedAdam(typeof(PerceptronModule))]
+    public sealed partial class Adam;
+
+    /// <summary>
+    /// suited for (Leaky)ReLU<br/>
+    /// not suited for SoftMax/Sigmoid
+    /// </summary>
+    public sealed class KaimingInitializer(IActivationModule activation) : IModuleInitializer<PerceptronModule>
+    {
+        public Random Random { get; init; } = Random.Shared;
+        private readonly Weight gain = InitializationHelper.GetKaimingGain(activation);
+        public PerceptronModule Init(PerceptronModule module)
+        {
+            Debug.Assert(module.Activation is not SoftMaxActivation);
+            module.Weights.KaimingNormal(gain, Random);
+            module.Biases.Normal(0, 0.1f, Random);
+            return module;
+        }
+    }
+
+    /// <summary>
+    /// suited for SoftMax/Sigmoid<br/>
+    /// not suited for (Leaky)ReLU
+    /// </summary>
+    public sealed class XavierInitializer : IModuleInitializer<PerceptronModule>
+    {
+        public static XavierInitializer Instance => field ??= new();
+        public Random Random { get; init; } = Random.Shared;
+        public PerceptronModule Init(PerceptronModule module)
+        {
+            Debug.Assert(module.Activation is not LeakyReLUActivation);
+            module.Weights.XavierUniform(Random);
+            module.Biases.Normal(0, 0.1f, Random);
+            return module;
+        }
+    }
+}
diff --git a/ML.Core/Modules/SequenceModule.cs b/ML.Core/Modules/SequenceModule.cs
new file mode 100644
index 0000000..3d1fe0b
--- /dev/null
+++ b/ML.Core/Modules/SequenceModule.cs
@@ -0,0 +1,120 @@
+using System.Runtime.InteropServices;
+using Ametrin.Serializer;
+using ML.Core.Modules.Initialization;
+
+namespace ML.Core.Modules;
+
+public sealed class SequenceModule<TArch> : IHiddenModule<TArch, SequenceModule<TArch>.Snapshot, SequenceModule<TArch>.Gradients>
+{
+    public required ImmutableArray<IHiddenModule<TArch>> Inner { get; init; }
+
+    public TArch Forward(TArch input, Snapshot snapshot)
+    {
+        Debug.Assert(Inner.Length == snapshot.Inner.Length);
+        return Inner.Zip(snapshot.Inner).Aggregate(input, static (input, m) => m.First.Forward(input, m.Second));
+    }
+
+    public TArch Backward(TArch outputGradient, Snapshot snapshot, Gradients gradients)
+    {
+        Debug.Assert(Inner.Length == snapshot.Inner.Length);
+        Debug.Assert(Inner.Length == gradients.Inner.Length);
+
+        foreach (var i in Inner.IndexRange.Reversed())
+        {
+            outputGradient = Inner[i].Backward(outputGradient, snapshot.Inner[i], gradients.Inner[i]);
+        }
+
+        return outputGradient;
+    }
+
+    public ulong ParameterCount => Inner.Sum(static m => m.ParameterCount);
+    public Snapshot CreateSnapshot() => new(this);
+    public Gradients CreateGradients() => new(this);
+
+    public sealed class Snapshot(SequenceModule<TArch> module) : IModuleSnapshot
+    {
+        public ImmutableArray<IModuleSnapshot> Inner { get; } = [.. module.Inner.Select(static m => m.CreateSnapshot())];
+
+        public void Dispose()
+        {
+            Inner.ForEach(static i => i.Dispose());
+        }
+    }
+
+    public sealed class Gradients(SequenceModule<TArch> module) : IModuleGradients<Gradients>
+    {
+        public ImmutableArray<IModuleGradients> Inner { get; } = [.. module.Inner.Select(static m => m.CreateGradients())];
+
+        public void Add(Gradients other)
+        {
+            Debug.Assert(Inner.Length == other.Inner.Length);
+            foreach (var (left, right) in Inner.Zip(other.Inner))
+            {
+                left.Add(right);
+            }
+        }
+
+        public void Reset()
+        {
+            Inner.ForEach(static m => m.Reset());
+        }
+    }
+
+    static SequenceModule()
+    {
+        Training.AdamOptimizer.Registry.Register<SequenceModule<TArch>>(static (o, module) => new Adam(o, module));
+    }
+
+    public sealed class Adam(Training.AdamOptimizer optimizer, SequenceModule<TArch> module) : Training.IModuleOptimizer<Gradients>
+    {
+        public ImmutableArray<Training.IModuleOptimizer> SubOptimizers { get; } = [.. module.Inner.Select(optimizer.CreateModuleOptimizer)];
+        public Training.AdamOptimizer Optimizer { get; } = optimizer;
+
+        public void Apply(Gradients gradients)
+        {
+            Debug.Assert(gradients.Inner.Length == SubOptimizers.Length);
+            SubOptimizers.Zip(gradients.Inner).ForEach(static p => p.First.Apply(p.Second));
+        }
+
+        public void FullReset()
+        {
+            SubOptimizers.ForEach(static sub => sub.FullReset());
+        }
+    }
+
+    public sealed class SharedInitializer : IModuleInitializer<SequenceModule<TArch>>
+    {
+        public IModuleInitializer Inner { get; init; } = EmptyModuleInitializer.Instance;
+
+        public SequenceModule<TArch> Init(SequenceModule<TArch> module)
+        {
+            module.Inner.ForEach(m => Inner.Init(m));
+            return module;
+        }
+    }
+
+    public sealed class Initializer : IModuleInitializer<SequenceModule<TArch>>
+    {
+        public required ImmutableArray<IModuleInitializer> Inner { get; init; }
+
+        public SequenceModule<TArch> Init(SequenceModule<TArch> module)
+        {
+            module.Inner.Zip(Inner).ForEach(static p => p.Second.Init(p.First));
+            return module;
+        }
+    }
+}
+
+public sealed class SequenceModuleConverter<TArch> : ISerializationConverter<SequenceModule<TArch>>
+{
+    public static Result<SequenceModule<TArch>, DeserializationError> TryReadValue(IAmetrinReader reader)
+    {
+        var modules = reader.TryReadArrayValue(AmetrinSerializer.TryReadDynamic<IHiddenModule<TArch>>);
+        return modules.Map(static modules => new SequenceModule<TArch> { Inner = ImmutableCollectionsMarshal.AsImmutableArray(modules) });
+    }
+
+    public static void WriteValue(IAmetrinWriter writer, SequenceModule<TArch> value)
+    {
+        writer.WriteArrayValue(value.Inner.AsSpan(), AmetrinSerializer.WriteDynamic);
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/ThreadingMode.cs b/ML.Core/ThreadingMode.cs
new file mode 100644
index 0000000..6cf2985
--- /dev/null
+++ b/ML.Core/ThreadingMode.cs
@@ -0,0 +1,3 @@
+namespace ML.Core;
+
+public enum ThreadingMode { Single, Half, AlmostFull, Full }
\ No newline at end of file
diff --git a/ML.Core/Training/AdamOptimizer.cs b/ML.Core/Training/AdamOptimizer.cs
new file mode 100644
index 0000000..f28c5c2
--- /dev/null
+++ b/ML.Core/Training/AdamOptimizer.cs
@@ -0,0 +1,78 @@
+namespace ML.Core.Training;
+
+public sealed class AdamOptimizer : Optimizer
+{
+    public static ModuleOptimizerRegistry<AdamOptimizer> Registry { get; } = [];
+    protected override ModuleOptimizerRegistry RegistryGetter => Registry;
+    public Weight FirstDecayRate { get; init; } = 0.9f;
+    public Weight SecondDecayRate { get; init; } = 0.99f; // or 0.999f
+    public Weight Epsilon { get; init; } = 1e-8f;
+
+    public Weight Iteration
+    {
+        get;
+        set
+        {
+            field = value;
+            CurrentFirstCorrection = 1 - Weight.Pow(FirstDecayRate, Iteration);
+            CurrentSecondCorrection = 1 - Weight.Pow(SecondDecayRate, Iteration);
+            WeightReductionOperation = new(this);
+        }
+    }
+
+    public AdamFirstMomentEstimateOperation FirstMomentEstimateOperation { get; private set; }
+    public AdamSecondMomentEstimateOperation SecondMomentEstimateOperation { get; private set; }
+    public AdamWeightReductionOperation WeightReductionOperation { get; private set; }
+
+    public Weight CurrentFirstCorrection { get; private set; }
+    public Weight CurrentSecondCorrection { get; private set; }
+
+
+    public override void Init()
+    {
+        Iteration = 1;  // even when retraining!
+        FirstMomentEstimateOperation = new(FirstDecayRate);
+        SecondMomentEstimateOperation = new(SecondDecayRate);
+    }
+
+    public override void OnBatchCompleted()
+    {
+        Iteration++;
+    }
+
+    public readonly struct AdamFirstMomentEstimateOperation(Weight decayRate) : IBinaryOperator<AdamFirstMomentEstimateOperation>
+    {
+        private readonly Weight decayRate = decayRate;
+
+        public static Weight Invoke(in AdamFirstMomentEstimateOperation state, Weight lastMoment, Weight gradient) => state.decayRate * lastMoment + (1 - state.decayRate) * gradient;
+        public static SimdVector Invoke(in AdamFirstMomentEstimateOperation state, SimdVector lastMoment, SimdVector gradient) => state.decayRate * lastMoment + (1 - state.decayRate) * gradient;
+    }
+
+    public readonly struct AdamSecondMomentEstimateOperation(Weight decayRate) : IBinaryOperator<AdamSecondMomentEstimateOperation>
+    {
+        private readonly Weight decayRate = decayRate;
+        public static Weight Invoke(in AdamSecondMomentEstimateOperation state, Weight lastMoment, Weight gradient) => state.decayRate * lastMoment + (1 - state.decayRate) * gradient * gradient;
+        public static SimdVector Invoke(in AdamSecondMomentEstimateOperation state, SimdVector lastMoment, SimdVector gradient) => state.decayRate * lastMoment + (1 - state.decayRate) * gradient * gradient;
+    }
+
+    public readonly struct AdamWeightReductionOperation(AdamOptimizer context) : ITernaryOperator<AdamWeightReductionOperation>
+    {
+        private readonly Weight learningRate = context.LearningRate;
+        private readonly Weight firstMomentCorrection = context.CurrentFirstCorrection;
+        private readonly Weight secondMomentCorrection = context.CurrentSecondCorrection;
+        private readonly Weight epsilon = context.Epsilon;
+
+        public static Weight Invoke(in AdamWeightReductionOperation state, Weight currentWeight, Weight firstMoment, Weight secondMoment)
+        {
+            var mHat = firstMoment / state.firstMomentCorrection;
+            var vHat = secondMoment / state.secondMomentCorrection;
+            return currentWeight - (state.learningRate * mHat / (Weight.Sqrt(vHat) + state.epsilon));
+        }
+        public static SimdVector Invoke(in AdamWeightReductionOperation state, SimdVector currentWeight, SimdVector firstMoment, SimdVector secondMoment)
+        {
+            var mHat = firstMoment / state.firstMomentCorrection;
+            var vHat = secondMoment / state.secondMomentCorrection;
+            return currentWeight - (state.learningRate * mHat / (SimdVectorHelper.SquareRoot(vHat) + SimdVectorHelper.Create(state.epsilon)));
+        }
+    }
+}
diff --git a/ML.Core/Training/IModuleOptimizer.cs b/ML.Core/Training/IModuleOptimizer.cs
new file mode 100644
index 0000000..b6af1f3
--- /dev/null
+++ b/ML.Core/Training/IModuleOptimizer.cs
@@ -0,0 +1,25 @@
+using ML.Core.Modules;
+
+namespace ML.Core.Training;
+
+public interface IModuleOptimizer
+{
+    public void Apply(IModuleGradients gradients);
+    public void FullReset();
+};
+
+public interface IModuleOptimizer<TGradients> : IModuleOptimizer
+    where TGradients : IModuleGradients<TGradients>
+{
+    public void Apply(TGradients gradients);
+    void IModuleOptimizer.Apply(IModuleGradients gradients)
+        => Apply(Guard.Is<TGradients>(gradients));
+}
+
+public sealed class EmptyModuleOptimizer : IModuleOptimizer<EmptyModuleData>
+{
+    public static EmptyModuleOptimizer Instance { get; } = new();
+
+    public void Apply(EmptyModuleData gradients) { }
+    public void FullReset() { }
+}
\ No newline at end of file
diff --git a/ML.Core/Training/ModuleTrainer.cs b/ML.Core/Training/ModuleTrainer.cs
new file mode 100644
index 0000000..845a15d
--- /dev/null
+++ b/ML.Core/Training/ModuleTrainer.cs
@@ -0,0 +1,193 @@
+using System.Buffers;
+using System.Text;
+using System.Threading;
+using ML.Core.Evaluation;
+using ML.Core.Evaluation.Cost;
+using ML.Core.Modules;
+using ML.Core.Data.Training;
+
+namespace ML.Core.Training;
+
+public sealed class EmbeddedModuleTrainer<TIn, TArch, TOut>
+    where TArch : ITensorLike<TArch>
+{
+    public EmbeddedModule<TIn, TArch, TOut> Module { get; }
+    public TrainingConfig Config { get; }
+    public required ITrainingDataSource<TrainingEntry<TIn, TArch, TOut>> TrainingData { get; init; }
+    public required ICostFunction<TArch> CostFunction { get; init; }
+    public ModuleDataPool DataPool { get; }
+
+    private Optimizer Optimizer => Config.Optimizer;
+    private readonly IModuleOptimizer moduleOptimizer;
+
+    public EmbeddedModuleTrainer(EmbeddedModule<TIn, TArch, TOut> module, TrainingConfig config)
+    {
+        Module = module;
+        Config = config;
+        moduleOptimizer = Optimizer.CreateModuleOptimizer(module);
+        DataPool = new(module);
+    }
+
+    public void TrainConsole(bool cancelable = true)
+    {
+        using var cts = new CancellationTokenSource();
+        if (cancelable)
+        {
+            Task.Run(async () =>
+            {
+                while (!cts.IsCancellationRequested)
+                {
+                    if (Console.KeyAvailable && Console.ReadKey(intercept: true).Key == ConsoleKey.C)
+                    {
+                        Console.WriteLine("Canceling...");
+                        cts.Cancel();
+                        break;
+                    }
+                    await Task.Delay(500);
+                }
+            });
+        }
+
+        Console.WriteLine($"Training {Module} ({Module.ParameterCount})");
+        Console.WriteLine(GenerateTrainingOverview(Config, TrainingData.BatchCount, TrainingData.BatchSize));
+        Console.WriteLine("Starting Training...");
+        Console.WriteLine(TrainingEvaluationResult.GetHeader());
+        Train(cts.Token);
+        cts.Cancel();
+        Console.WriteLine("Training Done!");
+    }
+
+
+    public void Train(CancellationToken token = default)
+    {
+        Optimizer.Init();
+        var runningEvaluation = EvaluationResult.ZERO;
+
+        foreach (var epochIndex in ..Config.EpochCount)
+        {
+            TrainingData.Reset();
+
+            foreach (var (batchIndex, batch) in TrainingData.GetBatches().Index())
+            {
+                runningEvaluation += RunBatch(batch);
+
+                if ((Config.BatchEvaluationEnabled && batchIndex % Config.EvaluationCallbackAfterBatches is 0)
+                    || (batchIndex + 1 == TrainingData.BatchCount && Config.EpochEvaluationEnabled))
+                {
+                    Config.EvaluationCallback!.Invoke(new TrainingEvaluationResult { Context = GetContext(), Result = runningEvaluation });
+                    runningEvaluation = EvaluationResult.ZERO;
+                }
+
+                Optimizer.OnBatchCompleted();
+
+                if (token.IsCancellationRequested)
+                {
+                    Optimizer.OnEpochCompleted();
+                    return;
+                }
+
+                TrainingEvaluationContext GetContext() => new()
+                {
+                    CurrentBatch = batchIndex + 1,
+                    MaxBatch = TrainingData.BatchCount,
+                    CurrentEpoch = epochIndex + 1,
+                    MaxEpoch = Config.EpochCount,
+                    LearningRate = Optimizer.LearningRate,
+                };
+            }
+
+            Optimizer.OnEpochCompleted();
+        }
+    }
+
+    public EvaluationResult RunBatch(IEnumerable<TrainingEntry<TIn, TArch, TOut>> batch)
+    {
+        var timeStamp = Stopwatch.GetTimestamp();
+
+        using var context = ThreadedTrainer.Train(batch, DataPool, Config.Threading, (entry, context) =>
+        {
+            var (output, condfidence, cost) = RunEntry(entry, (EmbeddedModule<TIn, TArch, TOut>.Gradients)context.Gradients);
+            if (EqualityComparer<TOut>.Default.Equals(output, entry.ExpectedValue))
+            {
+                context.CorrectCount++;
+                context.CorrectConfidenceSum += condfidence;
+            }
+            else
+            {
+                context.WrongConfidenceSum += condfidence;
+            }
+
+            context.TotalCount++;
+            context.TotalCost += cost;
+        });
+
+
+        moduleOptimizer.Apply(context.Gradients);
+
+        return new()
+        {
+            TotalCount = context.TotalCount,
+            CorrectCount = context.CorrectCount,
+            CorrectConfidenceSum = context.CorrectConfidenceSum,
+            WrongConfidenceSum = context.WrongConfidenceSum,
+            TotalCost = context.TotalCost,
+            TotalElapsedTime = Stopwatch.GetElapsedTime(timeStamp),
+        };
+    }
+
+    private (TOut output, Weight confidence, Weight cost) RunEntry(TrainingEntry<TIn, TArch, TOut> entry, EmbeddedModule<TIn, TArch, TOut>.Gradients gradients)
+    {
+        using var marker = DataPool.RentSnapshot();
+        var snapshot = (EmbeddedModule<TIn, TArch, TOut>.Snapshot)marker.Snapshot;
+
+        var (output, condfidence, outputWeights) = Module.Forward(entry.InputValue, snapshot);
+
+        NumericsDebug.AssertSameDimensions(outputWeights, entry.ExpectedWeights);
+        using var outputGradientStorage = ArrayPool<Weight>.Shared.RentNumerics(outputWeights.FlatCount);
+        var outputGradient = TArch.OfSize(outputWeights, outputGradientStorage);
+        CostFunction.DerivativeTo(outputWeights, entry.ExpectedWeights, outputGradient);
+
+        var inputGradient = Module.Backward(outputGradient, snapshot, gradients);
+
+        return (output, condfidence, CostFunction.TotalCost(outputWeights, entry.ExpectedWeights));
+    }
+
+    public void FullReset()
+    {
+        moduleOptimizer.FullReset();
+    }
+
+    public static string GenerateTrainingOverview(TrainingConfig config, int batchCount, int batchSize)
+    {
+        var sb = new StringBuilder();
+        sb.AppendLine();
+        sb.AppendLine("Training Info:");
+        sb.AppendLine($"using {config.Optimizer.GetType().Name} ({config.Threading})");
+        sb.AppendLine("Training for");
+        sb.AppendLine($" - {config.EpochCount} epochs");
+        sb.AppendLine($"  - {batchCount} batches");
+        sb.AppendLine($"   - {batchSize} entries");
+
+        if (config.EvaluationCallbackEnabled)
+        {
+            if (config.BatchEvaluationEnabled)
+            {
+                if (config.EvaluationCallbackAfterBatches == 1)
+                {
+                    sb.AppendLine("Dumping every batch");
+                }
+                else
+                {
+                    sb.AppendLine($"Dumping every {config.EvaluationCallbackAfterBatches} batches");
+                }
+            }
+            else
+            {
+                sb.AppendLine($"Dumping every epoch");
+            }
+        }
+
+        sb.AppendLine();
+        return sb.ToString();
+    }
+}
\ No newline at end of file
diff --git a/ML.Core/Training/Optimizer.cs b/ML.Core/Training/Optimizer.cs
new file mode 100644
index 0000000..82279e7
--- /dev/null
+++ b/ML.Core/Training/Optimizer.cs
@@ -0,0 +1,33 @@
+using ML.Core.Modules;
+
+namespace ML.Core.Training;
+
+public abstract class Optimizer
+{
+    public required Weight LearningRate { get; set; }
+
+    public virtual void Init() { }
+    public virtual void OnBatchCompleted() { }
+    public virtual void OnEpochCompleted() { }
+
+    protected abstract ModuleOptimizerRegistry RegistryGetter { get; }
+    public IModuleOptimizer CreateModuleOptimizer(IModule module)
+    {
+        if (RegistryGetter.TryGetValue(module.GetType(), out var factory))
+        {
+            return factory(this, module);
+        }
+
+        throw new NotImplementedException($"No known {GetType().Name} for {module.GetType().Name}");
+    }
+}
+
+public class ModuleOptimizerRegistry : Dictionary<Type, Func<Optimizer, IModule, IModuleOptimizer>>;
+public sealed class ModuleOptimizerRegistry<TOptimizer> : ModuleOptimizerRegistry
+{
+    public void Register<TModule>(Func<TOptimizer, TModule, IModuleOptimizer> factory) where TModule : IModule
+        => Add(typeof(TModule), (op, layer) => factory(Guard.Is<TOptimizer>(op), Guard.Is<TModule>(layer)));
+
+    public void RegisterEmpty<TModule>() where TModule : IModule
+        => Add(typeof(TModule), static (_, _) => EmptyModuleOptimizer.Instance);
+}
\ No newline at end of file
diff --git a/MachineLearning.Training/ThreadedTrainer.cs b/ML.Core/Training/ThreadedTrainer.cs
similarity index 59%
rename from MachineLearning.Training/ThreadedTrainer.cs
rename to ML.Core/Training/ThreadedTrainer.cs
index a2a5f0e..744d2c0 100644
--- a/MachineLearning.Training/ThreadedTrainer.cs
+++ b/ML.Core/Training/ThreadedTrainer.cs
@@ -1,15 +1,14 @@
 using System.Collections.Concurrent;
-using System.Collections.Immutable;
-using MachineLearning.Data.Entry;
-using MachineLearning.Model.Layer.Snapshot;
+using System.Threading;
+using ML.Core.Modules;
 
-namespace MachineLearning.Training;
+namespace ML.Core.Training;
 
 public sealed class ThreadedTrainer
 {
-    public static TrainingContext Train(IEnumerable<TrainingData> trainingSet, ModelCachePool contextPool, ThreadingMode threading, Action<TrainingData, TrainingContext> action)
+    public static TrainingContext Train<T>(IEnumerable<T> trainingSet, ModuleDataPool contextPool, ThreadingMode threading, Action<T, TrainingContext> action)
     {
-        using var contexts = new ThreadLocal<TrainingContext>(() => new() { Gradients = contextPool.RentGradients() }, trackAllValues: true);
+        using var contexts = new ThreadLocal<TrainingContext>(() => new() { Pool = contextPool }, trackAllValues: true);
         var options = new ParallelOptions
         {
             MaxDegreeOfParallelism = threading switch
@@ -21,8 +20,7 @@ public static TrainingContext Train(IEnumerable<TrainingData> trainingSet, Model
                 _ => throw new UnreachableException()
             },
         };
-        var partitioner = Partitioner.Create(trainingSet);
-        var result = Parallel.ForEach(partitioner, options, (item, state) =>
+        var result = Parallel.ForEach(trainingSet, options, (item, state) =>
         {
             action(item, contexts.Value!);
         });
@@ -34,43 +32,44 @@ public static TrainingContext Train(IEnumerable<TrainingData> trainingSet, Model
         foreach (var other in contexts.Values.Skip(1))
         {
             context.Add(other);
-            contextPool.Return(other.Gradients);
+            other.Dispose();
         }
 
         return context;
     }
 }
 
-public enum ThreadingMode { Single, Half, Full, AlmostFull }
-
-public sealed class TrainingContext
+public sealed class TrainingContext : IDisposable
 {
     public int TotalCount { get; set; }
     public int CorrectCount { get; set; }
+    public float CorrectConfidenceSum { get; set; }
+    public float WrongConfidenceSum { get; set; }
     public float TotalCost { get; set; }
-    public required ImmutableArray<IGradients> Gradients { get; init; }
+    public required ModuleDataPool Pool { get; init; }
+    private IModuleGradients? _gradients;
+    public IModuleGradients Gradients => _gradients ??= Pool.RentGradients();
 
     public void Add(TrainingContext other)
     {
         TotalCount += other.TotalCount;
         CorrectCount += other.CorrectCount;
+        CorrectConfidenceSum += other.CorrectConfidenceSum;
+        WrongConfidenceSum += other.WrongConfidenceSum;
         TotalCost += other.TotalCost;
 
-        foreach (var (g, o) in Gradients.Zip(other.Gradients))
-        {
-            g.Add(o);
-        }
+        Gradients.Add(other.Gradients);
     }
 
-    public void Reset()
+    public void Dispose()
     {
         TotalCount = 0;
         CorrectCount = 0;
         TotalCost = 0;
 
-        foreach (var gradient in Gradients)
+        if (_gradients is not null)
         {
-            gradient.Reset();
+            Pool.Return(_gradients);
         }
     }
-}
+}
\ No newline at end of file
diff --git a/ML.Core/Training/TrainingConfig.cs b/ML.Core/Training/TrainingConfig.cs
new file mode 100644
index 0000000..47ed337
--- /dev/null
+++ b/ML.Core/Training/TrainingConfig.cs
@@ -0,0 +1,17 @@
+using ML.Core.Evaluation;
+
+namespace ML.Core.Training;
+
+public sealed record TrainingConfig
+{
+    public required int EpochCount { get; init; }
+
+    public required Optimizer Optimizer { get; init; }
+    public ThreadingMode Threading { get; init; } = ThreadingMode.Full;
+
+    public Action<TrainingEvaluationResult>? EvaluationCallback { get; init; } = null;
+    public bool EvaluationCallbackEnabled => EvaluationCallback is not null;
+    public bool EpochEvaluationEnabled => EvaluationCallbackEnabled && !BatchEvaluationEnabled;
+    public int EvaluationCallbackAfterBatches { get; init; } = -1;
+    public bool BatchEvaluationEnabled => EvaluationCallbackEnabled && EvaluationCallbackAfterBatches > 0;
+}
diff --git a/MachineLearning.Samples/AssetManager.cs b/ML.Runner/AssetManager.cs
similarity index 75%
rename from MachineLearning.Samples/AssetManager.cs
rename to ML.Runner/AssetManager.cs
index 1b81bf8..4ac28db 100644
--- a/MachineLearning.Samples/AssetManager.cs
+++ b/ML.Runner/AssetManager.cs
@@ -1,8 +1,11 @@
-﻿namespace MachineLearning.Samples;
+using System.IO;
+using ML.Core.Converters;
+
+namespace ML.Runner;
 
 public static class AssetManager
 {
-    public static readonly DirectoryInfo Directory = new DirectoryInfo(@"I:\Coding\TestChamber\MachineLearning");
+    public static readonly DirectoryInfo Directory = new(@"I:/Coding/TestChamber/MachineLearning");
     public static readonly DirectoryInfo ModelDirectory = Directory.Directory("Model");
     public static readonly DirectoryInfo WeightMapsDirectory = Directory.Directory("Maps");
     public static readonly DirectoryInfo DataDirectory = Directory.Directory("Data");
@@ -11,7 +14,7 @@ public static class AssetManager
     public static readonly FileInfo Sentences = GetDataFile("sentences.txt");
     public static readonly FileInfo Speech = GetDataFile("speech.txt");
 
-    public static FileInfo GetModelFile(string fileName) => ModelDirectory.File(fileName.EndsWith(ModelSerializer.FILE_EXTENSION) ? fileName : $"{fileName}{ModelSerializer.FILE_EXTENSION}");
+    public static FileInfo GetModelFile(string fileName) => ModelDirectory.File(fileName.EndsWith(ModuleSerializer.FILE_EXTENSION) ? fileName : $"{fileName}{ModuleSerializer.FILE_EXTENSION}");
     public static FileInfo GetDataFile(string fileName) => DataDirectory.File(fileName);
     public static DirectoryInfo GetWeightMapFolder(string modelName) => WeightMapsDirectory.Directory(modelName);
-}
+}
\ No newline at end of file
diff --git a/ML.Runner/ML.Runner.csproj b/ML.Runner/ML.Runner.csproj
new file mode 100644
index 0000000..04a4b2d
--- /dev/null
+++ b/ML.Runner/ML.Runner.csproj
@@ -0,0 +1,13 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>$(DotNetVersion)</TargetFramework>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\ML.Core\ML.Core.csproj" />
+    <ProjectReference Include="..\ML.SourceGenerator\ML.SourceGenerator.csproj" ReferenceOutputAssembly="false" OutputItemType="Analyzer" />
+  </ItemGroup>
+
+</Project>
diff --git a/ML.Runner/Program.cs b/ML.Runner/Program.cs
new file mode 100644
index 0000000..ac60a80
--- /dev/null
+++ b/ML.Runner/Program.cs
@@ -0,0 +1,15 @@
+﻿using System.Globalization;
+using ML.Runner.Samples.Language;
+using ML.Runner.Samples.Mnist;
+
+CultureInfo.CurrentCulture = CultureInfo.InvariantCulture;
+
+// var random = Random.Shared;
+var random = new Random(69);
+
+MnistModel.Run(random);
+
+#if DEBUG
+// forces all remaining finalizers to be called to make sure all have been disposed
+GC.Collect();
+#endif
\ No newline at end of file
diff --git a/MachineLearning.Samples/Language/C4DataSet.cs b/ML.Runner/Samples/Language/C4DataSet.cs
similarity index 86%
rename from MachineLearning.Samples/Language/C4DataSet.cs
rename to ML.Runner/Samples/Language/C4DataSet.cs
index fbba569..a564078 100644
--- a/MachineLearning.Samples/Language/C4DataSet.cs
+++ b/ML.Runner/Samples/Language/C4DataSet.cs
@@ -1,10 +1,13 @@
+using System.IO;
 using System.IO.Compression;
+using System.Net.Http;
 using System.Text.Json;
-using MachineLearning.Data;
+using ML.Core.Data;
+using ML.Core.Data.Training;
 
-namespace MachineLearning.Samples.Language;
+namespace ML.Runner.Samples.Language;
 
-public sealed class C4DataSet(ITokenizer<string> tokenizer, int contextSize, int initalFile = 0) : ITrainingSet, IDisposable
+public sealed class C4DataSet(ITokenizer<string> tokenizer, int contextSize, int initalFile = 0) : ITrainingDataSource<TrainingEntry<int[], Vector, int>>, IDisposable
 {
     public int BatchCount { get; } = int.MaxValue;
     public required int BatchSize { get; init; }
@@ -18,16 +21,16 @@ public sealed class C4DataSet(ITokenizer<string> tokenizer, int contextSize, int
     private Task<FileInfo> downloadTask = Download(initalFile);
 
 
-    public IEnumerable<Batch> GetBatches()
+    public IEnumerable<IEnumerable<TrainingEntry<int[], Vector, int>>> GetBatches()
     {
         while (true)
         {
-            yield return new Batch(GetTrainingData().Take(BatchSize));
+            yield return GetTrainingData().Take(BatchSize);
         }
     }
 
-    private IEnumerator<TrainingData>? dataEnumerator;
-    public IEnumerable<TrainingData> GetTrainingData()
+    private IEnumerator<TrainingEntry<int[], Vector, int>>? dataEnumerator;
+    public IEnumerable<TrainingEntry<int[], Vector, int>> GetTrainingData()
     {
         while (true)
         {
@@ -35,7 +38,7 @@ public IEnumerable<TrainingData> GetTrainingData()
             {
                 try
                 {
-                    dataEnumerator = tokenizer.Tokenize(NextLine()).ToArray().SlidingWindow(tokenizer.TokenizeSingle("\0"), contextSize).ToTrainingDataMatrix(tokenizer.TokenCount, contextSize, null).GetEnumerator();
+                    dataEnumerator = tokenizer.Tokenize(NextLine()).ToArray().SlidingWindow(tokenizer.TokenizeSingle("\0"), contextSize).ToTrainingData(tokenizer.TokenCount).GetEnumerator();
                 }
                 catch (Exception) { /* Console.WriteLine(e.Message); */ }
             }
@@ -112,6 +115,11 @@ public static async Task<FileInfo> Download(int fileIndex)
         return file;
     }
 
+    public void Reset()
+    {
+        
+    }
+
     private sealed class C4FileReader : IDisposable
     {
         private readonly Stream stream;
diff --git a/MachineLearning.Samples/Language/CharTokenizer.cs b/ML.Runner/Samples/Language/CharTokenizer.cs
similarity index 90%
rename from MachineLearning.Samples/Language/CharTokenizer.cs
rename to ML.Runner/Samples/Language/CharTokenizer.cs
index a7791ef..67a8b2c 100644
--- a/MachineLearning.Samples/Language/CharTokenizer.cs
+++ b/ML.Runner/Samples/Language/CharTokenizer.cs
@@ -1,7 +1,6 @@
-﻿using MachineLearning.Data;
-using System.Diagnostics;
+using ML.Core.Data;
 
-namespace MachineLearning.Samples.Language;
+namespace ML.Runner.Samples.Language;
 
 public sealed class CharTokenizer(string tokens) : ITokenizer<string>
 {
diff --git a/MachineLearning.Samples/Language/LMHelper.cs b/ML.Runner/Samples/Language/LMHelper.cs
similarity index 72%
rename from MachineLearning.Samples/Language/LMHelper.cs
rename to ML.Runner/Samples/Language/LMHelper.cs
index 8501e2c..50b18f8 100644
--- a/MachineLearning.Samples/Language/LMHelper.cs
+++ b/ML.Runner/Samples/Language/LMHelper.cs
@@ -1,11 +1,12 @@
-using MachineLearning.Data;
+using ML.Core.Data;
+using ML.Core.Modules;
 
-namespace MachineLearning.Samples.Language;
+namespace ML.Runner.Samples.Language;
 
 public static class LMHelper
 {
     private static readonly HashSet<string> EndTokens = ["\0"];
-    public static void StartChat(IEmbeddedModel<int[], int> model, int contextSize, ITokenizer<string> tokenizer)
+    public static void StartChat(IEmbeddedModule<int[], int> model, int contextSize, ITokenizer<string> tokenizer)
     {
         var fillerToken = tokenizer.TokenizeSingle("\0");
         string input;
@@ -18,14 +19,14 @@ public static void StartChat(IEmbeddedModel<int[], int> model, int contextSize,
             }
             if (!Console.IsOutputRedirected)
             {
-                Console.SetCursorPosition(0, Console.CursorTop - 1);            
+                Console.SetCursorPosition(0, Console.CursorTop - 1);
             }
             Console.Write(input);
             Generate([.. tokenizer.Tokenize(input)], model, contextSize, tokenizer, fillerToken);
         } while (true);
     }
 
-    public static void Generate(int[] input, IEmbeddedModel<int[], int> model, int contextSize, ITokenizer<string> tokenizer, int fillerToken)
+    public static void Generate(int[] input, IEmbeddedModule<int[], int> model, int contextSize, ITokenizer<string> tokenizer, int fillerToken)
     {
         if (input.Contains(-1))
         {
@@ -36,9 +37,10 @@ public static void Generate(int[] input, IEmbeddedModel<int[], int> model, int c
         int prediction;
         string token;
         Weight confidence;
+        using var snapshot = model.CreateSnapshot();
         do
         {
-            (prediction, confidence) = model.Process(input);
+            (prediction, confidence) = model.Forward(input, snapshot);
             token = tokenizer.GetToken(prediction);
             input = input[0] == fillerToken ? [.. input[1..], prediction] : [.. input, prediction];
             SetConsoleTextColor(confidence);
@@ -48,9 +50,9 @@ public static void Generate(int[] input, IEmbeddedModel<int[], int> model, int c
         Console.Write("\u001b[0m"); // reset color
         Console.WriteLine();
 
-        static void SetConsoleTextColor(double confidence)
+        static void SetConsoleTextColor(Weight confidence)
         {
             Console.Write($"\u001b[38;2;{(1 - confidence) * 255:F0};{confidence * 255:F0};60m");
         }
     }
-}
+}
\ No newline at end of file
diff --git a/MachineLearning.Samples/Language/LanguageDataHelper.cs b/ML.Runner/Samples/Language/LanguageDataHelper.cs
similarity index 63%
rename from MachineLearning.Samples/Language/LanguageDataHelper.cs
rename to ML.Runner/Samples/Language/LanguageDataHelper.cs
index 29ccf5a..20c999a 100644
--- a/MachineLearning.Samples/Language/LanguageDataHelper.cs
+++ b/ML.Runner/Samples/Language/LanguageDataHelper.cs
@@ -1,34 +1,14 @@
-﻿using System.Collections.Frozen;
-using System.Diagnostics;
+using System;
+using System.IO;
 using System.Text;
-using MachineLearning.Data;
+using ML.Core.Data;
+using ML.Core.Data.Training;
 
-namespace MachineLearning.Samples.Language;
+namespace ML.Runner.Samples.Language;
 
 public static class LanguageDataHelper
 {
-    public static IEnumerable<TrainingData> ToTrainingData(this IEnumerable<DataEntry<string, char>> source, ITokenizer<string> tokenizer)
-    {
-        var cache = Enumerable.Range(0, tokenizer.TokenCount).Select(i =>
-        {
-            var vector = Vector.Create(tokenizer.TokenCount);
-            vector[i] = 1;
-            return new KeyValuePair<int, Vector>(i, vector);
-        }).ToFrozenDictionary();
-
-        return source.Select(MapData);
-
-
-        TrainingData MapData(DataEntry<string, char> e)
-        {
-            var input = tokenizer.Tokenize(e.Input).ToArray();
-            var expectedToken = tokenizer.TokenizeSingle(e.Expected.ToString());
-
-            return new TrainingData<int[], int>(input, expectedToken, cache[expectedToken]);
-        }
-    }
-
-    public static IEnumerable<TrainingData> ToTrainingData(this IEnumerable<DataEntry<int[], int>> source, int tokenCount)
+    public static IEnumerable<TrainingEntry<int[], Vector, int>> ToTrainingData(this IEnumerable<(int[] Input, int Expected)> source, int tokenCount)
     {
         var cache = Enumerable.Range(0, tokenCount).Select(i =>
         {
@@ -39,14 +19,14 @@ public static IEnumerable<TrainingData> ToTrainingData(this IEnumerable<DataEntr
 
         return source.Select(MapData);
 
-        TrainingData MapData(DataEntry<int[], int> e)
+        TrainingEntry<int[], Vector, int> MapData((int[] Input, int Expected) e)
         {
-            return new TrainingData<int[], int>(e.Input, e.Expected, cache[e.Expected]);
+            return new (e.Input, cache[e.Expected], e.Expected);
         }
     }
 
 
-    public static IEnumerable<TrainingData> ToTrainingDataMatrix(this IEnumerable<(int[] Input, int Expected)> source, int tokenCount, int contextSize, int? fillerToken)
+    public static IEnumerable<TrainingEntry<int[], Matrix, int>> ToTrainingDataMatrix(this IEnumerable<(int[] Input, int Expected)> source, int tokenCount, int contextSize, int? fillerToken)
     {
         var cache = Enumerable.Range(0, tokenCount).Select(i =>
         {
@@ -55,13 +35,13 @@ public static IEnumerable<TrainingData> ToTrainingDataMatrix(this IEnumerable<(i
             return new KeyValuePair<int, Vector>(i, vector);
         }).ToFrozenDictionary();
 
-        return source.Where(e => e.Input.Length > 0).Select(MapData);
+        return source.Where(static e => e.Input.Length > 0).Select(MapData);
 
-        TrainingData MapData((int[] Input, int Expected) e)
+        TrainingEntry<int[], Matrix, int> MapData((int[] Input, int Expected) e)
         {
             return fillerToken.HasValue ? ImplFiller(fillerToken.Value) : Impl();
 
-            TrainingData Impl()
+            TrainingEntry<int[], Matrix, int> Impl()
             {
                 var length = int.Min(e.Input.Length, contextSize);
                 var expected = Matrix.Create(length, tokenCount);
@@ -75,11 +55,11 @@ TrainingData Impl()
 
                 cache[e.Expected].CopyTo(expected.RowRef(length - 1));
 
-                return new TrainingData<int[], int>(fillerToken.HasValue ? e.Input.PadLeft(contextSize, fillerToken.Value) : e.Input, e.Expected, expected.Storage);
+                return new(fillerToken.HasValue ? e.Input.PadLeft(contextSize, fillerToken.Value) : e.Input, expected, e.Expected);
             }
 
             // filling with a filler in this way is probably bad but i'll use dynamic input size anyway 
-            TrainingData ImplFiller(int filler)
+            TrainingEntry<int[], Matrix, int> ImplFiller(int filler)
             {
                 var length = contextSize;
                 var expected = Matrix.Create(length, tokenCount);
@@ -101,7 +81,7 @@ TrainingData ImplFiller(int filler)
 
                 cache[e.Expected].CopyTo(expected.RowRef(length - 1));
 
-                return new TrainingData<int[], int>(fillerToken.HasValue ? e.Input.PadLeft(contextSize, fillerToken.Value) : e.Input, e.Expected, expected.Storage);
+                return new(fillerToken.HasValue ? e.Input.PadLeft(contextSize, fillerToken.Value) : e.Input, expected, e.Expected);
 
             }
         }
@@ -132,30 +112,24 @@ public static IEnumerable<int[]> TokenizeSkipInvalid(this IEnumerable<string> so
         }
     }
 
-    public static IEnumerable<DataEntry<string, char>> SentencesData(int contextSize)
-        => GetLines(AssetManager.Sentences.FullName).InContextSize(contextSize).ExpandPerChar();
-
-    public static IEnumerable<DataEntry<string, char>> SpeechData(int contextSize)
-        => GetLines(AssetManager.Speech.FullName).SlidingWindow(contextSize);
-
     public static IEnumerable<string> InContextSize(this IEnumerable<string> data, int contextSize)
         => data.Where(s => s.Length <= contextSize);
 
-    public static IEnumerable<DataEntry<int[], int>> ExpandPerToken(this IEnumerable<IEnumerable<int>> data, int endToken, int contextSize)
+    public static IEnumerable<(int[] Input, int Expected)> ExpandPerToken(this IEnumerable<IEnumerable<int>> data, int endToken, int contextSize)
         => data.Select(Enumerable.ToArray).ExpandPerToken(endToken, contextSize);
 
-    public static IEnumerable<DataEntry<int[], int>> ExpandPerToken(this IEnumerable<int[]> data, int endToken, int contextSize)
+    public static IEnumerable<(int[] Input, int Expected)> ExpandPerToken(this IEnumerable<int[]> data, int endToken, int contextSize)
     {
         foreach (var sentence in data)
         {
             var max = int.Min(contextSize, sentence.Length);
             for (var i = 0; i < max; i++)
             {
-                yield return new(sentence[..i], sentence[i]);
+                yield return (sentence[..i], sentence[i]);
             }
             if (sentence.Length <= contextSize && sentence[^1] != endToken)
             {
-                yield return new(sentence, endToken);
+                yield return (sentence, endToken);
             }
         }
     }
@@ -182,33 +156,6 @@ public static IEnumerable<DataEntry<int[], int>> ExpandPerToken(this IEnumerable
         }
     }
 
-    public static IEnumerable<DataEntry<string, char>> ExpandPerChar(this IEnumerable<string> data)
-    {
-        foreach (var sentence in data)
-        {
-            for (var i = 0; i < sentence.Length; i++)
-            {
-                yield return new(sentence[..i], sentence[i]);
-            }
-        }
-    }
-
-    public static IEnumerable<DataEntry<string, char>> SlidingWindow(this IEnumerable<string> data, int contextSize)
-    {
-        foreach (var sentence in data)
-        {
-            var start = 0;
-            for (var i = 4; i < sentence.Length; i++)
-            {
-                if (i - start > contextSize)
-                {
-                    start = sentence.AsSpan()[start..].IndexOf(' ') + 1 + start;
-                }
-                yield return new(sentence[start..i], sentence[i]);
-            }
-        }
-    }
-
     public static IEnumerable<string> GetLines(FileInfo fileInfo) => GetLines(fileInfo.FullName);
     public static IEnumerable<string> GetLines(string path)
         => File.ReadAllText(path, Encoding.UTF8).ToLowerInvariant()
@@ -249,4 +196,4 @@ IEnumerable<string> ParseSentences()
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/ML.Runner/Samples/Language/SLM3.cs b/ML.Runner/Samples/Language/SLM3.cs
new file mode 100644
index 0000000..00ccb9a
--- /dev/null
+++ b/ML.Runner/Samples/Language/SLM3.cs
@@ -0,0 +1,143 @@
+using System.IO;
+using ML.Core.Attributes;
+using ML.Core.Converters;
+using ML.Core.Data.Training;
+using ML.Core.Evaluation.Cost;
+using ML.Core.Modules;
+using ML.Core.Modules.Activations;
+using ML.Core.Modules.Builder;
+using ML.Core.Training;
+
+namespace ML.Runner.Samples.Language;
+
+public static class SLM3
+{
+    public static readonly HashSet<string> WORD_TOKENS = ["the", "and", "to", "of", "in", "is", "for", "that", "you", "with", "on", "it", "are", "as", "this", "be", "your", "or", "have", "at", "was", "from", "we", "by", "will", "not", "can", "an", "but", "all", "they", "if", "has", "our", "my", "more", "their", "one", "so", "he", "about", "which", "when", "what", "also", "out", "his", "up", "there", "time", "new", "do", "who", "like", "some", "other", "been", "just", "get", "how", "her", "would", "had", "them", "were", "any", "no", "these", "into", "me", "than", "people", "its", "make", "most", "only", "may", "she", "us", "first", "over", "use", "work", "very", "after", "well", "then", "now", "many", "need", "even", "through", "way", "two", "good", "best", "because", "see", "years", "know", "where", "day", "should", "much", "could", "such", "great", "here", "while", "take", "help", "home", "said", "back", "want", "it's", "being", "before", "year", "those", "find", "each", "made", "right", "used", "life", "go", "world", "free", "information", "business", "really", "every", "love", "think", "own", "both", "still", "around", "him", "last", "going", "off", "same", "different", "look", "place", "part", "between", "too", "did", "down", "service", "am", "during", "does", "since", "using", "high", "things", "company", "always", "another", "few", "set", "little", "available", "long", "services", "without", "online", "don't", "system", "family", "experience", "something", "come", "data", "next", "school", "why", "better", "sure", "under", "give", "however", "must", "including", "support", "can't"];
+    public const string SYMBOLS = "\0 ?!\"#$%&'()*+,-./0123456789:;=?_abcdefghijklmnopqrstuvwxyz|ßäöü€";
+    public const int CONTEXT_SIZE = 128;
+    public const int EMBEDDING_SIZE = 48;
+    public static StringTokenizer Tokenizer { get; } = new(WORD_TOKENS, SYMBOLS, [("“", "\""), ("”", "\""), ("\n", " "), ("–", "-"), ("—", "-"), ("’", "'"), ("it’s", "it's"), ("don’t", "don't"), ("can’t", "can't")]);
+    public static FileInfo ModelFile { get; } = AssetManager.GetModelFile("slm3");
+
+    public static EmbeddedModule<int[], Vector, int> CreateAndInitModel(Random random)
+    {
+        var innerModel = MultiLayerPerceptronBuilder.Create(CONTEXT_SIZE * EMBEDDING_SIZE)
+            .AddLayer(1024 * 2, LeakyReLUActivation.Instance)
+            .AddLayer(1024 * 2, LeakyReLUActivation.Instance)
+            .AddLayer(1024, LeakyReLUActivation.Instance)
+            .AddLayer(1024, LeakyReLUActivation.Instance)
+            .AddLayer(Tokenizer.TokenCount, SoftMaxActivation.Instance)
+        .BuildAndInit(random);
+
+        var input = new IndexEmbeddingModule(Tokenizer.TokenCount, EMBEDDING_SIZE);
+
+        NumericsInitializer.XavierUniform(input.EmbeddingMatrix, random);
+
+        return new EmbeddedModule<int[], Vector, int>
+        {
+            Input = new MatrixToFixedVectorModule(CONTEXT_SIZE, EMBEDDING_SIZE, input),
+            Hidden = innerModel,
+            Output = new IndexOutputLayer(Tokenizer.TokenCount, weightedRandom: false, random),
+        };
+    }
+
+    public static void Run(Random random)
+    {
+        var trainingConfig = new TrainingConfig
+        {
+            EpochCount = 1,
+            Optimizer = new AdamOptimizer
+            {
+                LearningRate = 0.0003f,
+            },
+
+            EvaluationCallbackAfterBatches = 8,
+            EvaluationCallback = evaluation => Console.WriteLine(evaluation),
+            Threading = ThreadingMode.Half, // half seems to be faster than full
+        };
+
+        var model = ModuleSerializer.Read<EmbeddedModule<int[], Vector, int>>(ModelFile);
+        // var model = CreateAndInitModel(random);
+
+        var trainingSource = GetTrainingSource(random);
+        // using var trainingSource = GetC4DataSet();
+
+        var trainer = new EmbeddedModuleTrainer<int[], Vector, int>(model, trainingConfig)
+        {
+            CostFunction = CrossEntropyCostFromProbabilities.Instance,
+            TrainingData = trainingSource,
+        };
+
+        trainer.TrainConsole();
+
+        trainer.DataPool.Clear();
+
+        ModuleSerializer.Write(model, ModelFile);
+
+        LMHelper.StartChat(model, CONTEXT_SIZE, Tokenizer);
+    }
+
+    public static TrainingDataSource<TrainingEntry<int[], Vector, int>> GetTrainingSource(Random random)
+    {
+        Console.WriteLine("Analyzing Trainings Data...");
+        var lines = LanguageDataHelper.GetLines(AssetManager.Sentences).ToArray();
+        Console.WriteLine($"Longest sentence {lines.Max(s => s.Length)} chars");
+        var tokensUsedBySource = new string([.. lines.SelectMany(s => s).Distinct().Order()]);
+        Console.WriteLine($"Source uses '{tokensUsedBySource}'");
+
+        Console.WriteLine(lines.SelectDuplicates().Dump('\n'));
+
+        // var words = lines.SelectMany(l => l.Split([' ', '.', ','], StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries));
+        // var usages = words.CountBy(w => w).OrderByDescending(g => g.Value).Select(g => $"{g.Key}: {g.Value}");
+        // Console.WriteLine(string.Join('\n', usages.Take(50)));
+        var endToken = Tokenizer.TokenizeSingle("\0");
+
+        return new(lines.Tokenize(Tokenizer).ExpandPerToken(endToken, CONTEXT_SIZE).ToTrainingData(Tokenizer.TokenCount))
+        {
+            BatchCount = 256,
+            Random = random ?? Random.Shared,
+        };
+    }
+
+    public static C4DataSet GetC4DataSet()
+    {
+        return new(Tokenizer, CONTEXT_SIZE) { BatchSize = 512 };
+    }
+}
+
+[GeneratedModule(IncludeSerializer: true)]
+internal sealed partial class MatrixToFixedVectorModule(int contextSize, int embeddingSize, IInputModule<int[], Matrix> inner) : IInputModule<int[], Vector>
+{
+    [Property] public int ContextSize { get; } = contextSize;
+    [Property] public int EmbeddingSize { get; } = embeddingSize;
+
+    [SubModule] public IInputModule<int[], Matrix> Inner { get; } = inner;
+
+    public Vector Forward(int[] input, Snapshot snapshot)
+    {
+        snapshot.InputCount = input.Length;
+        var matrix = Inner.Forward(input, snapshot.Inner);
+        Debug.Assert(matrix.RowCount <= ContextSize);
+        Debug.Assert(matrix.ColumnCount == EmbeddingSize);
+
+        snapshot.Output.ResetZero();
+        matrix.AsSpan().CopyTo(snapshot.Output.AsSpan()[^matrix.FlatCount..]);
+
+        return snapshot.Output;
+    }
+
+    public Vector Backward(Vector outputGradient, Snapshot snapshot, Gradients gradients)
+    {
+        var matrixGradient = Matrix.Of(snapshot.InputCount, EmbeddingSize, outputGradient.Slice(0, snapshot.InputCount * EmbeddingSize));
+        return Inner.Backward(matrixGradient, snapshot.Inner, gradients.Inner).Storage;
+    }
+
+    partial class Snapshot
+    {
+        public int InputCount { get; set; }
+        public Vector Output { get; } = Vector.Create(module.ContextSize * module.EmbeddingSize);
+    }
+
+    [GeneratedAdam(typeof(MatrixToFixedVectorModule))]
+    public sealed partial class Adam;
+}
\ No newline at end of file
diff --git a/MachineLearning.Samples/Language/StringTokenizer.cs b/ML.Runner/Samples/Language/StringTokenizer.cs
similarity index 95%
rename from MachineLearning.Samples/Language/StringTokenizer.cs
rename to ML.Runner/Samples/Language/StringTokenizer.cs
index 84a6b96..791e093 100644
--- a/MachineLearning.Samples/Language/StringTokenizer.cs
+++ b/ML.Runner/Samples/Language/StringTokenizer.cs
@@ -1,10 +1,8 @@
-﻿using System.Buffers;
+using System.Buffers;
 using System.Collections.Concurrent;
-using System.Collections.Frozen;
-using System.Diagnostics;
-using MachineLearning.Data;
+using ML.Core.Data;
 
-namespace MachineLearning.Samples.Language;
+namespace ML.Runner.Samples.Language;
 
 public sealed class StringTokenizer : ITokenizer<string>
 {
diff --git a/ML.Runner/Samples/Mnist/MnistDataSource.cs b/ML.Runner/Samples/Mnist/MnistDataSource.cs
new file mode 100644
index 0000000..c239ea2
--- /dev/null
+++ b/ML.Runner/Samples/Mnist/MnistDataSource.cs
@@ -0,0 +1,162 @@
+using System.IO;
+using System.IO.Compression;
+using System.Text;
+using ML.Core.Data.Training;
+using ML.Core.Data.Noise;
+
+namespace ML.Runner.Samples.Mnist;
+
+public sealed class MnistImageSource(IEnumerable<MnistImage> images) : ITrainingDataSource<TrainingEntry<double[], Vector, int>>
+{
+    public bool ShuffleOnReset { get; init; } = true;
+    public Random Random { get; init; } = Random.Shared;
+    public required int BatchCount { get; init; }
+    public IDataNoise<double[]> Noise { get; init; } = NoDataNoise<double[]>.Instance;
+    public int BatchSize => data.Length / BatchCount;
+
+    private readonly MnistImage[] data = [.. images];
+
+    public IEnumerable<IEnumerable<TrainingEntry<double[], Vector, int>>> GetBatches()
+    {
+        var batchSize = BatchSize;
+        foreach (var i in ..BatchCount)
+        {
+            yield return BatchHelper.Create(data, i * batchSize, batchSize).Select(d => new TrainingEntry<double[], Vector, int>(Noise.Apply(d.Image), Expected(d.Digit), d.Digit));
+        }
+    }
+
+    public void Reset()
+    {
+        if (ShuffleOnReset)
+        {
+            Random.Shuffle(data);
+        }
+    }
+
+    private static readonly FrozenDictionary<int, Vector> _map = new Dictionary<int, Vector>() {
+            { 0, Vector.Of([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])},
+            { 1, Vector.Of([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])},
+            { 2, Vector.Of([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])},
+            { 3, Vector.Of([0, 0, 0, 1, 0, 0, 0, 0, 0, 0])},
+            { 4, Vector.Of([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])},
+            { 5, Vector.Of([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])},
+            { 6, Vector.Of([0, 0, 0, 0, 0, 0, 1, 0, 0, 0])},
+            { 7, Vector.Of([0, 0, 0, 0, 0, 0, 0, 1, 0, 0])},
+            { 8, Vector.Of([0, 0, 0, 0, 0, 0, 0, 0, 1, 0])},
+            { 9, Vector.Of([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])},
+        }.ToFrozenDictionary();
+
+    private Vector Expected(int output) => _map[output];
+}
+
+public sealed class MnistDataSet
+{
+    public MnistImage[] TrainingSet { get; }
+    public MnistImage[] TestingSet { get; }
+
+    public MnistDataSet(FileInfo mnistFileInfo)
+    {
+        using var mnistStream = mnistFileInfo.OpenRead();
+        using var mnistArchive = new ZipArchive(mnistStream);
+
+        var trainingImages = ReadImages(mnistArchive.GetEntry("train-images.idx3-ubyte")!);
+        var trainingLabels = ReadLabels(mnistArchive.GetEntry("train-labels.idx1-ubyte")!);
+
+        TrainingSet = new MnistImage[trainingImages.Length];
+        foreach (var i in ..trainingImages.Length)
+        {
+            TrainingSet[i] = MnistImage.FromRaw(trainingImages[i], trainingLabels[i]);
+        }
+
+        var testingImages = ReadImages(mnistArchive.GetEntry("t10k-images.idx3-ubyte")!);
+        var testingLabels = ReadLabels(mnistArchive.GetEntry("t10k-labels.idx1-ubyte")!);
+
+        TestingSet = new MnistImage[testingImages.Length];
+        foreach (var i in ..testingImages.Length)
+        {
+            TestingSet[i] = MnistImage.FromRaw(testingImages[i], testingLabels[i]);
+        }
+    }
+
+    private static byte[][] ReadImages(ZipArchiveEntry entry)
+    {
+        using var stream = entry.Open();
+        using var reader = new BinaryReader(stream);
+
+        reader.ReadInt32BigEndian(); // magic starting value
+        var imageCount = reader.ReadInt32BigEndian();
+        var rowCount = reader.ReadInt32BigEndian();
+        var columnCount = reader.ReadInt32BigEndian();
+
+        var images = new byte[imageCount][];
+        foreach (var i in ..imageCount)
+        {
+            images[i] = reader.ReadBytes(rowCount * columnCount);
+        }
+        return images;
+    }
+
+    private static byte[] ReadLabels(ZipArchiveEntry entry)
+    {
+        using var stream = entry.Open();
+        using var reader = new BinaryReader(stream);
+
+        reader.ReadInt32BigEndian(); // magic starting value
+        var labelCount = reader.ReadInt32BigEndian();
+        var labels = new byte[labelCount];
+        foreach (var i in ..labelCount)
+        {
+            labels[i] = reader.ReadByte();
+        }
+
+        return labels;
+    }
+}
+
+public sealed record MnistImage(double[] Image, int Digit)
+{
+    public const int SIZE = 28; // TODO: un-hardcode
+    public string DumpImage()
+    {
+        var sb = new StringBuilder();
+        var i = 0;
+        foreach (var l in ..SIZE)
+        {
+            if (l > 0)
+                sb.AppendLine();
+            foreach (var c in ..SIZE)
+            {
+                sb.Append((Image[i] * 9).ToString("0"));
+                sb.Append(' ');
+                i++;
+            }
+        }
+
+        sb.Replace('0', '.');
+
+        return sb.ToString();
+    }
+
+    public static MnistImage FromRaw(byte[] rawImage, byte rawDigit)
+    {
+        var image = new double[rawImage.Length];
+        foreach (var i in ..rawImage.Length)
+        {
+            image[i] = rawImage[i] / 255.0;
+        }
+
+        return new(image, rawDigit);
+    }
+
+    // public void SaveImage(FileInfo fileInfo)
+    // {
+    //     var image = new Image<Rgba32>(SIZE, SIZE);
+    //     foreach (var i in ..Image.Length)
+    //     {
+    //         int row = i / SIZE;
+    //         int column = i % SIZE;
+    //         image[column, row] = new Rgba32((float)Image[i], (float)Image[i], (float)Image[i]);
+    //     }
+    //     image.SaveAsPng(fileInfo.FullName);
+    // }
+}
diff --git a/ML.Runner/Samples/Mnist/MnistInput.cs b/ML.Runner/Samples/Mnist/MnistInput.cs
new file mode 100644
index 0000000..d5fb735
--- /dev/null
+++ b/ML.Runner/Samples/Mnist/MnistInput.cs
@@ -0,0 +1,29 @@
+using ML.Core.Attributes;
+using ML.Core.Modules;
+
+namespace ML.Runner.Samples.Mnist;
+
+[GeneratedModule]
+public sealed partial class MnistInput(int outputNodes) : IInputModule<double[], Vector, MnistInput.Snapshot, EmptyModuleData>
+{
+    public static MnistInput Instance => field ??= new MnistInput(MnistImage.SIZE * MnistImage.SIZE);
+
+    [Property] public int OutputNodes { get; } = outputNodes;
+
+    public Vector Forward(double[] input, Snapshot snapshot)
+    {
+        foreach (var i in input.IndexRange)
+        {
+            snapshot.Output[i] = (Weight)input[i];
+        }
+        return snapshot.Output;
+    }
+
+    public Vector Backward(Vector outputGradient, Snapshot snapshot, EmptyModuleData gradients) => outputGradient;
+
+    public sealed class Snapshot(MnistInput module) : IModuleSnapshot
+    {
+        public Vector Output { get; } = Vector.Create(module.OutputNodes);
+        public void Dispose() { }
+    }
+}
diff --git a/ML.Runner/Samples/Mnist/MnistModel.cs b/ML.Runner/Samples/Mnist/MnistModel.cs
new file mode 100644
index 0000000..1677a0c
--- /dev/null
+++ b/ML.Runner/Samples/Mnist/MnistModel.cs
@@ -0,0 +1,81 @@
+using System.IO;
+using ML.Core.Converters;
+using ML.Core.Data.Noise;
+using ML.Core.Evaluation.Cost;
+using ML.Core.Modules;
+using ML.Core.Modules.Activations;
+using ML.Core.Modules.Builder;
+using ML.Core.Training;
+
+namespace ML.Runner.Samples.Mnist;
+
+public static class MnistModel
+{
+    public static MnistDataSet DataSet => field ??= new MnistDataSet(AssetManager.MNISTArchive);
+    public static int BatchCount => 128;
+    public static FileInfo ModelFile { get; } = AssetManager.GetModelFile("mnist");
+
+    public static SequenceModule<Vector> CreateAndInitModel(Random random) => MultiLayerPerceptronBuilder.Create(784)
+        .AddLayer(256, LeakyReLUActivation.Instance)
+        .AddLayer(128, LeakyReLUActivation.Instance)
+        .AddLayer(10, SoftMaxActivation.Instance)
+        .BuildAndInit(random);
+
+    public static void Run(Random random)
+    {
+        var trainingConfig = new TrainingConfig
+        {
+            EpochCount = 1,
+            Optimizer = new AdamOptimizer
+            {
+                LearningRate = 0.0046225016f,
+            },
+
+            EvaluationCallbackAfterBatches = 8,
+            EvaluationCallback = evaluation => Console.WriteLine(evaluation),
+            Threading = ThreadingMode.Single,
+        };
+
+        var model = ModuleSerializer.Read<SequenceModule<Vector>>(ModelFile);
+        // var model = CreateAndInitModel(random);
+
+        // modify the last layer to output logit instead of probabilites
+        // so we can use the optimized version of CrossEntropyCost
+        var last = (PerceptronModule)model.Inner[^1];
+        var embeddedModel = new EmbeddedModule<double[], Vector, int>
+        {
+            Input = MnistInput.Instance,
+            Hidden = new SequenceModule<Vector> { Inner = model.Inner.SetItem(model.Inner.Length - 1, new PerceptronModule(EmptyModule.Instance, last.Weights, last.Biases)) },
+            Output = new IndexOutputLayer(tokenCount: 10, weightedRandom: false),
+        };
+
+        var trainer = new EmbeddedModuleTrainer<double[], Vector, int>(embeddedModel, trainingConfig)
+        {
+            CostFunction = CrossEntropyCostFromLogits.Instance,
+            TrainingData = GetTrainingSource(random),
+        };
+
+        trainer.TrainConsole();
+
+        trainer.DataPool.Clear();
+
+        // ModuleSerializer.Write(model, ModelFile);
+    }
+
+    public static MnistImageSource GetTrainingSource(Random random) => GetDataSourceWithNoise(DataSet.TrainingSet, random);
+    public static MnistImageSource GetDataSourceWithNoise(IEnumerable<MnistImage> images, Random random) => new(images)
+    {
+        BatchCount = BatchCount,
+        Noise = new ImageNoise
+        {
+            Size = MnistImage.SIZE,
+            NoiseStrength = 0.35,
+            MaxShift = 2,
+            MaxAngle = 30,
+            MinScale = 0.8,
+            MaxScale = 1.2,
+            Random = random,
+        },
+        Random = random,
+    };
+}
diff --git a/ML.SourceGenerator/AdamModuleOptimizerGenerator.cs b/ML.SourceGenerator/AdamModuleOptimizerGenerator.cs
new file mode 100644
index 0000000..c824568
--- /dev/null
+++ b/ML.SourceGenerator/AdamModuleOptimizerGenerator.cs
@@ -0,0 +1,151 @@
+using System.Text;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+
+namespace ML.SourceGenerator;
+
+[Generator]
+internal sealed class AdamModuleOptimizerGenerator : IIncrementalGenerator
+{
+    public void Initialize(IncrementalGeneratorInitializationContext context)
+    {
+        var optimizers = context.SyntaxProvider.ForAttributeWithMetadataName("ML.Core.Attributes.GeneratedAdamAttribute",
+            (node, _) => node is ClassDeclarationSyntax,
+            (context, token) => context.SemanticModel.GetDeclaredSymbol(context.TargetNode, token) as INamedTypeSymbol
+        );
+
+        context.RegisterSourceOutput(optimizers.Combine(context.CompilationProvider), GenerateAdam);
+    }
+
+    private static void GenerateAdam(SourceProductionContext context, (INamedTypeSymbol?, Compilation) pair)
+    {
+        var (optimizer, compilation) = pair;
+        if (optimizer is null) return;
+
+        var attribute = optimizer.TryGetAttribute(IsGeneratedAdamAttribute);
+        Debug.Assert(attribute is not null);
+
+        if (attribute.ConstructorArguments[0].Value is not INamedTypeSymbol module) return;
+
+        var moduleInfo = SubModuleInfo.CreateFull(module, canGenerateDataClasses: true);
+        if (moduleInfo is null) return;
+
+        var sb = new StringBuilder();
+
+        sb.AppendLine($$"""
+        using ML.Core.Training;
+        """);
+
+        var containerCount = BuildFileHeaderFor(optimizer, sb);
+
+
+        BuildTypeHeader(optimizer, sb);
+
+
+        sb.AppendLine($$"""
+        (AdamOptimizer optimizer, {{moduleInfo.ModuleDefinitionString}} module) : IModuleOptimizer<{{moduleInfo.GradientsTypeString}}>
+        {
+        """);
+
+        sb.AppendLine($$"""
+            public {{moduleInfo.ModuleDefinitionString}} Module { get; } = module;
+            public AdamOptimizer Optimizer { get; } = optimizer;
+
+        """);
+
+        foreach (var submodule in moduleInfo.SubModules)
+        {
+            sb.AppendLine($$"""
+            public IModuleOptimizer {{submodule.Name}}Optimizer { get; } = optimizer.CreateModuleOptimizer(module.{{submodule.Name}});
+        """);
+        }
+
+        foreach (var weight in moduleInfo.Weights)
+        {
+            sb.AppendLine($$"""
+
+            public {{weight.Type}} FirstMoment{{weight.Name}} { get; } = {{weight.Type}}.OfSize(module.{{weight.Name}});
+            public {{weight.Type}} SecondMoment{{weight.Name}} { get; } = {{weight.Type}}.OfSize(module.{{weight.Name}});
+        """);
+        }
+
+        sb.AppendLine($$"""
+        
+            public void Apply({{moduleInfo.GradientsTypeString}} gradients)
+            {
+        """);
+
+        foreach (var submodule in moduleInfo.SubModules)
+        {
+            sb.AppendLine($$"""
+                {{submodule.Name}}Optimizer.Apply(gradients.{{submodule.Name}});
+        """);
+        }
+
+        if(moduleInfo.Weights.Length > 0)
+        {
+            sb.AppendLine($$"""
+                var firstMomentEstimateOperation = Optimizer.FirstMomentEstimateOperation;
+                var secondMomentEstimateOperation = Optimizer.SecondMomentEstimateOperation;
+                var weightReductionOperation = Optimizer.WeightReductionOperation;
+        """);
+
+
+            foreach (var weight in moduleInfo.Weights)
+            {
+                sb.AppendLine($$"""
+            
+                SpanOperations.MapTo(in firstMomentEstimateOperation, FirstMoment{{weight.Name}}.AsSpan(), gradients.{{weight.Name}}.AsSpan(), FirstMoment{{weight.Name}}.AsSpan());
+                NumericsDebug.AssertValidNumbers(FirstMoment{{weight.Name}});
+                SpanOperations.MapTo(in secondMomentEstimateOperation, SecondMoment{{weight.Name}}.AsSpan(), gradients.{{weight.Name}}.AsSpan(), SecondMoment{{weight.Name}}.AsSpan());
+                NumericsDebug.AssertValidNumbers(SecondMoment{{weight.Name}});
+                SpanOperations.MapTo(in weightReductionOperation, Module.{{weight.Name}}.AsSpan(), FirstMoment{{weight.Name}}.AsSpan(), SecondMoment{{weight.Name}}.AsSpan(), Module.{{weight.Name}}.AsSpan());
+        """);
+            }
+        }
+
+
+        
+        sb.AppendLine($$"""
+            }
+
+            public void FullReset()
+            {
+        """);
+
+        foreach (var submodule in moduleInfo.SubModules)
+        {
+            sb.AppendLine($$"""
+                {{submodule.Name}}Optimizer.FullReset();
+        """);
+        }
+
+
+        foreach (var weight in moduleInfo.Weights)
+        {
+            sb.AppendLine($$"""
+        
+                FirstMoment{{weight.Name}}.ResetZero();
+                SecondMoment{{weight.Name}}.ResetZero();
+        """);
+        }
+
+        sb.AppendLine($$"""
+            }
+
+            [System.Runtime.CompilerServices.ModuleInitializer]
+            internal static void Register()
+            {
+                global::ML.Core.Training.AdamOptimizer.Registry.Register<{{moduleInfo.ModuleDefinitionString}}>(static (op, module) => new {{optimizer.Name}}(op, module));
+            }
+        """);
+
+
+        foreach (var _ in Enumerable.Range(0, containerCount + 1))
+        {
+            sb.AppendLine("}");
+        }
+
+
+        context.AddSource($"{module.Name}.Adam.g.cs", sb.ToString());
+    }
+}
diff --git a/ML.SourceGenerator/GlobalUsings.cs b/ML.SourceGenerator/GlobalUsings.cs
new file mode 100644
index 0000000..83f7e06
--- /dev/null
+++ b/ML.SourceGenerator/GlobalUsings.cs
@@ -0,0 +1,15 @@
+// global using Ametrin.Optional;
+// global using Ametrin.Utils;
+// global using Ametrin.Guards;
+
+global using System;
+global using System.Collections.Frozen;
+global using System.Collections.Generic;
+global using System.Collections.Immutable;
+global using System.Diagnostics;
+global using System.Linq;
+global using System.Threading.Tasks;
+
+global using Microsoft.CodeAnalysis;
+
+global using static ML.SourceGenerator.Helper;
diff --git a/ML.SourceGenerator/GlobalUsingsGenerator.cs b/ML.SourceGenerator/GlobalUsingsGenerator.cs
new file mode 100644
index 0000000..37e075c
--- /dev/null
+++ b/ML.SourceGenerator/GlobalUsingsGenerator.cs
@@ -0,0 +1,31 @@
+﻿namespace ML.SourceGenerator;
+
+[Generator]
+public sealed class GlobalUsingsGenerator : IIncrementalGenerator
+{
+    public void Initialize(IncrementalGeneratorInitializationContext context)
+    {
+        context.RegisterPostInitializationOutput(static ctx =>
+        {
+            ctx.AddSource("GlobalUsings.g.cs", """
+            global using Ametrin.Optional;
+            global using Ametrin.Utils;
+            global using Ametrin.Guards;
+            global using Ametrin.Numerics;
+            
+            global using System;
+            global using System.Collections.Generic;
+            global using System.Collections.Frozen;
+            global using System.Collections.Immutable;
+            global using System.Diagnostics;
+            global using System.Linq;
+            global using System.Threading.Tasks;
+
+            global using ML.Core;
+            global using Weight = float;
+            global using SimdVector = System.Numerics.Vector<float>;
+            global using SimdVectorHelper = System.Numerics.Vector;
+            """);
+        });
+    }
+}
diff --git a/ML.SourceGenerator/Helper.cs b/ML.SourceGenerator/Helper.cs
new file mode 100644
index 0000000..ec6d9aa
--- /dev/null
+++ b/ML.SourceGenerator/Helper.cs
@@ -0,0 +1,172 @@
+using System.Text;
+
+namespace ML.SourceGenerator;
+
+internal static class Helper
+{
+    public const string CoreAssemblyName = "ML.Core";
+    public const string NumericsAssemblyName = "Ametrin.Numerics";
+    public const string ModuleName = "IModule";
+    public const string InputModuleName = "IInputModule";
+    public const string HiddenModuleName = "IHiddenModule";
+    public const string OutputModuleName = "IOutputModule";
+
+    public static bool IsIModule(INamedTypeSymbol? symbol)
+        => symbol is { Name: ModuleName or InputModuleName or HiddenModuleName or OutputModuleName, ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Modules", IsGenericType: true };
+
+    public static bool ImplementsIModule(ITypeSymbol symbol)
+        => symbol.Interfaces.Any(static i => IsIModule(i) || ImplementsIModule(i)) || (symbol.BaseType is not null && ImplementsIModule(symbol.BaseType));
+
+    public static INamedTypeSymbol? GetIModule(ITypeSymbol symbol)
+    {
+        if (symbol is INamedTypeSymbol self && IsIModule(self))
+        {
+            return self;
+        }
+        else if (symbol.Interfaces.Where(IsIModule).OrderByDescending(i => i.TypeArguments.Length).FirstOrDefault() is INamedTypeSymbol inter)
+        {
+            return inter;
+        }
+        else if (symbol.Interfaces.FirstOrDefault(ImplementsIModule) is INamedTypeSymbol inter2)
+        {
+            return GetIModule(inter2);
+        }
+        else
+        {
+            return symbol.BaseType is null ? null : GetIModule(symbol.BaseType);
+        }
+
+    }
+
+    public static bool IsIDisposable(ITypeSymbol? symbol) => symbol is { Name: "IDisposable" };
+    public static bool IsEmptyModuleData(ITypeSymbol? symbol) => symbol is { Name: "EmptyModuleData", ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Modules" };
+    public static bool IsSubModuleAttribute(INamedTypeSymbol? symbol) => symbol is { Name: "SubModuleAttribute", ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Attributes" };
+    public static bool IsWeightAttribute(INamedTypeSymbol? symbol) => symbol is { Name: "WeightsAttribute", ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Attributes" };
+    public static bool IsPropertyAttribute(INamedTypeSymbol? symbol) => symbol is { Name: "PropertyAttribute", ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Attributes" };
+    public static bool IsGeneratedModuleAttribute(INamedTypeSymbol? symbol) => symbol is { Name: "GeneratedModuleAttribute", ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Attributes" };
+    // public static bool IsGenerateOptimizersAttribute(INamedTypeSymbol? symbol) => symbol is { Name: "GenerateOptimizersAttribute", ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Attributes" };
+    public static bool IsGeneratedAdamAttribute(INamedTypeSymbol? symbol) => symbol is { Name: "GeneratedAdamAttribute", ContainingAssembly.Name: CoreAssemblyName, ContainingNamespace.Name: "Attributes" };
+    public static bool IsVector(ITypeSymbol symbol) => symbol is { Name: "Vector", ContainingAssembly.Name: NumericsAssemblyName };
+    public static bool IsMatrix(ITypeSymbol symbol) => symbol is { Name: "Matrix", ContainingAssembly.Name: NumericsAssemblyName };
+    public static bool IsTensor(ITypeSymbol symbol) => symbol is { Name: "Tensor", ContainingAssembly.Name: NumericsAssemblyName };
+    public static bool IsTensorLike(ITypeSymbol symbol) => symbol is { Name: "Vector" or "Matrix" or "Tensor", ContainingAssembly.Name: NumericsAssemblyName };
+
+    public static int BuildFileHeaderFor(INamedTypeSymbol type, StringBuilder sb)
+    {
+        sb.AppendLine($$"""#nullable enable""");
+        sb.AppendLine();
+
+        if (!type.ContainingNamespace.IsGlobalNamespace)
+        {
+            sb.AppendLine($$"""namespace {{type.ContainingNamespace}};""");
+            sb.AppendLine();
+        }
+        sb.AppendLine("""// ---- auto generated ----""");
+        sb.AppendLine();
+
+        var containers = new Stack<INamedTypeSymbol>();
+        for (var t = type.ContainingType; t is not null; t = t.ContainingType) containers.Push(t);
+        foreach (var c in containers)
+        {
+            BuildTypeHeader(c, sb);
+            sb.AppendLine("\n{");
+        }
+
+        return containers.Count;
+    }
+    public static void BuildTypeHeader(INamedTypeSymbol type, StringBuilder sb)
+    {
+        var kind = type switch
+        {
+            { IsRecord: true, TypeKind: TypeKind.Struct } => "record struct",
+            { IsRecord: true, TypeKind: TypeKind.Class } => "record",
+            { TypeKind: TypeKind.Struct } => "struct",
+            _ => "class",
+        };
+
+        sb.Append(type.IsStatic ? "static " : "");
+        sb.Append(type is { IsRefLikeType: true, TypeKind: TypeKind.Struct } ? "ref " : "");
+        sb.Append("partial ");
+        sb.Append(kind);
+        sb.Append(' ');
+        sb.Append(type.Name);
+    }
+
+    extension(ISymbol symbol)
+    {
+        public bool HasAttribute(Func<INamedTypeSymbol?, bool> condition) => symbol.GetAttributes().Select(static a => a.AttributeClass).Any(condition);
+        public AttributeData? TryGetAttribute(Func<INamedTypeSymbol?, bool> condition) => symbol.GetAttributes().Where(a => condition(a.AttributeClass)).FirstOrDefault();
+
+    }
+
+    extension(ITypeSymbol symbol)
+    {
+        public IEnumerable<IPropertySymbol> GetProperties() => symbol.GetMembers().OfType<IPropertySymbol>();
+    }
+}
+
+
+public sealed record ModulePropertyInfo(IPropertySymbol Property, SubModuleInfo Module)
+{
+    public string Name => Property.Name;
+    public static ModulePropertyInfo? FromProperty(IPropertySymbol property) => SubModuleInfo.Create((INamedTypeSymbol)property.Type, canGenerateDataClasses: false) is { } module ? new(property, module) : null;
+}
+
+
+public sealed record ModuleInfo(INamedTypeSymbol Type, ImmutableArray<IPropertySymbol> Properties, ImmutableArray<ModulePropertyInfo> SubModules, ImmutableArray<IPropertySymbol> Weights, INamedTypeSymbol RootModule, string ModuleDefinitionString, ITypeSymbol ArchType, bool GenerateDataClasses, ITypeSymbol? SnapshotType, ITypeSymbol? GradientsType)
+    : SubModuleInfo(RootModule, ModuleDefinitionString, ArchType, GenerateDataClasses, SnapshotType, GradientsType);
+public record SubModuleInfo(INamedTypeSymbol RootModule, string ModuleDefinitionString, ITypeSymbol ArchType, bool GenerateDataClasses, ITypeSymbol? SnapshotType, ITypeSymbol? GradientsType)
+{
+    public string SnapshotTypeString { get; init; } = SnapshotType is null ? "IModuleSnapshot" : SnapshotType.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat);
+    public string GradientsTypeString { get; init; } = GradientsType is null ? "IModuleGradients" : GradientsType.ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat);
+
+    public static SubModuleInfo? Create(INamedTypeSymbol type, bool canGenerateDataClasses)
+    {
+        var module = GetIModule(type);
+        if (module is null) return null;
+
+        var moduleDefinitionString = type.ToDisplayString(SymbolDisplayFormat.MinimallyQualifiedFormat);
+
+        var archType = module.Name switch
+        {
+            InputModuleName => module.TypeArguments[1],
+            _ => module.TypeArguments[0],
+        };
+
+        var (snapshot, gradients) = module.TypeArguments.Length > 2 ? module.Name switch
+        {
+            InputModuleName or OutputModuleName => (module.TypeArguments[2], module.TypeArguments[3]),
+            _ => (module.TypeArguments[1], module.TypeArguments[2]),
+        } : (null, null);
+
+        var generateDataClasses = canGenerateDataClasses && snapshot is null;
+        if (generateDataClasses)
+        {
+            return new(module, moduleDefinitionString, archType, generateDataClasses, snapshot, gradients)
+            {
+                SnapshotTypeString = $$"""{{moduleDefinitionString}}.Snapshot""",
+                GradientsTypeString = $$"""{{moduleDefinitionString}}.Gradients""",
+            };
+        }
+
+        return new(module, moduleDefinitionString, archType, generateDataClasses, snapshot, gradients);
+    }
+
+    public static ModuleInfo? CreateFull(INamedTypeSymbol type, bool canGenerateDataClasses)
+    {
+
+        var sub = Create(type, canGenerateDataClasses);
+
+        if (sub is null) return null;
+
+        var modules = type.GetProperties().Where(static p => p.HasAttribute(IsSubModuleAttribute)).Select(ModulePropertyInfo.FromProperty).OfType<ModulePropertyInfo>().ToImmutableArray();
+        var weights = type.GetProperties().Where(static p => p.HasAttribute(IsWeightAttribute)).ToImmutableArray();
+        var properties = type.GetProperties().Where(static p => p.HasAttribute(IsPropertyAttribute)).ToImmutableArray();
+
+        return new(type, properties, modules, weights, sub.RootModule, sub.ModuleDefinitionString, sub.ArchType, sub.GenerateDataClasses, sub.SnapshotType, sub.GradientsType)
+        {
+            SnapshotTypeString = sub.SnapshotTypeString,
+            GradientsTypeString = sub.GradientsTypeString,
+        };
+    }
+}
\ No newline at end of file
diff --git a/ML.SourceGenerator/LayerAnalyzer.cs b/ML.SourceGenerator/LayerAnalyzer.cs
new file mode 100644
index 0000000..e8cbf06
--- /dev/null
+++ b/ML.SourceGenerator/LayerAnalyzer.cs
@@ -0,0 +1,85 @@
+using Microsoft.CodeAnalysis.Diagnostics;
+
+namespace ML.SourceGenerator;
+
+[DiagnosticAnalyzer(LanguageNames.CSharp)]
+public sealed class LayerAnalyzer : DiagnosticAnalyzer
+{
+    private static readonly DiagnosticDescriptor WeightsMustBeTensors = new(
+       "ML001", "Non-Tensor used as Weights", "WeightsAttribute can only be used on Tensors not {0}", "Usage", DiagnosticSeverity.Warning, isEnabledByDefault: true
+    );
+    private static readonly DiagnosticDescriptor InvalidGeneratedModule = new(
+        "ML002", "Invalid GeneratedModuleAttribute", "GeneratedModuleAttribute can only be used on instances of IModule", "Usage", DiagnosticSeverity.Error, isEnabledByDefault: true
+    );
+    private static readonly DiagnosticDescriptor InvalidModuleSerializer = new(
+        "ML003", "Invalid ModuleSerializerAttribute", "ModuleSerializerAttribute can only be used with GeneratedModuleAttribute", "Usage", DiagnosticSeverity.Error, isEnabledByDefault: true
+    );
+    private static readonly DiagnosticDescriptor SubModuleMustBeIModule = new(
+       "ML004", "Non-IModule used as SubModule", "SubModuleAttribute can only be used IModule<TArch> not {0}", "Usage", DiagnosticSeverity.Error, isEnabledByDefault: true
+    );
+    private static readonly DiagnosticDescriptor InvalidGeneratedAdam = new(
+       "ML005", "Non-IModule used for GeneratedAdamAttribute", "GeneratedAdamAttributes module argument must be a IModule<TArch> not {0}", "Usage", DiagnosticSeverity.Error, isEnabledByDefault: true
+    );
+
+
+    public override ImmutableArray<DiagnosticDescriptor> SupportedDiagnostics { get; } = [WeightsMustBeTensors, InvalidGeneratedModule, InvalidModuleSerializer, SubModuleMustBeIModule, InvalidGeneratedAdam];
+
+    public override void Initialize(AnalysisContext context)
+    {
+        context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.Analyze | GeneratedCodeAnalysisFlags.ReportDiagnostics);
+        context.EnableConcurrentExecution();
+
+        context.RegisterSymbolAction(AnalyzePropertySymbol, SymbolKind.Property);
+        context.RegisterSymbolAction(AnalyzeClassSymbol, SymbolKind.NamedType);
+    }
+
+    private static void AnalyzePropertySymbol(SymbolAnalysisContext context)
+    {
+        var propertySymbol = (IPropertySymbol)context.Symbol;
+        if (propertySymbol.HasAttribute(IsWeightAttribute))
+        {
+            if (!IsTensorLike(propertySymbol.Type))
+            {
+                context.ReportDiagnostic(Diagnostic.Create(WeightsMustBeTensors, propertySymbol.Locations[0], propertySymbol.Type));
+            }
+        }
+        else if (propertySymbol.HasAttribute(IsSubModuleAttribute))
+        {
+            if (!ImplementsIModule(propertySymbol.Type))
+            {
+                context.ReportDiagnostic(Diagnostic.Create(SubModuleMustBeIModule, propertySymbol.Locations[0], propertySymbol.Type));
+            }
+            // var moduleSymbol = GetIModule(propertySymbol.Type)!;
+            // var snapshotTypeName = moduleSymbol.TypeArguments.Length is 1 ? "IModuleSnapshot" : moduleSymbol.TypeArguments[1].ToDisplayString(SymbolDisplayFormat.FullyQualifiedFormat);
+            // context.ReportDiagnostic(Diagnostic.Create(SubModuleMustBeIModule, propertySymbol.Locations[0], snapshotTypeName));
+        }
+    }
+
+    private static void AnalyzeClassSymbol(SymbolAnalysisContext context)
+    {
+        var typeSymbol = (INamedTypeSymbol)context.Symbol;
+        if (typeSymbol.HasAttribute(IsGeneratedModuleAttribute))
+        {
+            if (!ImplementsIModule(typeSymbol))
+            {
+                context.ReportDiagnostic(Diagnostic.Create(InvalidGeneratedModule, typeSymbol.Locations[0]));
+            }
+        }
+        // else if (typeSymbol.HasAttribute(IsLayerSerializerAttribute))
+        // {
+        //     context.ReportDiagnostic(Diagnostic.Create(InvalidLayerSerializer, typeSymbol.Locations[0]));
+        // }
+
+        if (typeSymbol.TryGetAttribute(IsGeneratedAdamAttribute) is { } adam)
+        {
+            if (adam.ConstructorArguments[0].Value is not INamedTypeSymbol { } type)
+            {
+                context.ReportDiagnostic(Diagnostic.Create(InvalidGeneratedAdam, typeSymbol.Locations[0], "null"));
+            }
+            else if (!ImplementsIModule(type))
+            {
+                context.ReportDiagnostic(Diagnostic.Create(InvalidGeneratedAdam, typeSymbol.Locations[0], type));
+            }
+        }
+    }
+}
diff --git a/ML.SourceGenerator/ML.SourceGenerator.csproj b/ML.SourceGenerator/ML.SourceGenerator.csproj
new file mode 100644
index 0000000..1065fed
--- /dev/null
+++ b/ML.SourceGenerator/ML.SourceGenerator.csproj
@@ -0,0 +1,20 @@
+﻿<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>$(DotNetVersion)</TargetFramework>
+    <EnforceExtendedAnalyzerRules>true</EnforceExtendedAnalyzerRules>
+    <NoWarn>$(NoWarn);RS1041;RS2008</NoWarn>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <OutputItemType>Analyzer</OutputItemType>
+    <IsRoslynComponent>true</IsRoslynComponent>
+    <IncludeBuildOutput>false</IncludeBuildOutput>
+    <PrivateAssets>all</PrivateAssets>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="5.3.0" PrivateAssets="all" />
+  </ItemGroup>
+
+</Project>
diff --git a/ML.SourceGenerator/ModuleGenerator.cs b/ML.SourceGenerator/ModuleGenerator.cs
new file mode 100644
index 0000000..27a882c
--- /dev/null
+++ b/ML.SourceGenerator/ModuleGenerator.cs
@@ -0,0 +1,351 @@
+using System.Text;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+
+namespace ML.SourceGenerator;
+
+[Generator]
+public sealed class ModuleGenerator : IIncrementalGenerator
+{
+    public void Initialize(IncrementalGeneratorInitializationContext context)
+    {
+        var layers = context.SyntaxProvider.CreateSyntaxProvider(
+            static (node, _) => node is ClassDeclarationSyntax { AttributeLists.Count: > 0 },
+            static (ctx, token) => ctx.SemanticModel.GetDeclaredSymbol(ctx.Node, token) as INamedTypeSymbol
+        ).Where(symbol => symbol!.HasAttribute(IsGeneratedModuleAttribute) && ImplementsIModule(symbol!));
+
+
+        context.RegisterSourceOutput(layers.Combine(context.CompilationProvider), GenerateModule);
+    }
+
+    private static void GenerateModule(SourceProductionContext context, (INamedTypeSymbol?, Compilation) pair)
+    {
+        var (module, compilation) = pair;
+        Debug.Assert(module is not null);
+
+        var attribute = module.TryGetAttribute(IsGeneratedModuleAttribute);
+        Debug.Assert(attribute is not null);
+
+        var includeSerializer = (bool)attribute.ConstructorArguments[0].Value!;
+
+        var moduleInfo = SubModuleInfo.CreateFull(module, canGenerateDataClasses: true);
+        Debug.Assert(moduleInfo is not null);
+
+        var sb = new StringBuilder();
+
+        if (includeSerializer)
+        {
+            sb.AppendLine($$"""using global::Ametrin.Serializer;""");
+            sb.AppendLine($$"""using global::ML.Core.Converters;""");
+            sb.AppendLine($$"""using global::ML.Core.Modules;""");
+            sb.AppendLine();
+            sb.AppendLine();
+        }
+
+        var containerCount = BuildFileHeaderFor(module, sb);
+
+        sb.Append($$"""
+        partial class {{moduleInfo.ModuleDefinitionString}}
+        """);
+
+        if (moduleInfo.GenerateDataClasses || includeSerializer)
+        {
+            sb.Append($$""" : """);
+        }
+
+        if (moduleInfo.GenerateDataClasses)
+        {
+            sb.Append(moduleInfo.RootModule.Name switch
+            {
+                ModuleName => $$"""{{ModuleName}}<{{moduleInfo.ArchType}}, {{moduleInfo.SnapshotTypeString}}, {{moduleInfo.GradientsTypeString}}>""",
+                HiddenModuleName => $$"""{{HiddenModuleName}}<{{moduleInfo.ArchType}}, {{moduleInfo.SnapshotTypeString}}, {{moduleInfo.GradientsTypeString}}>""",
+                InputModuleName => $$"""{{InputModuleName}}<{{moduleInfo.RootModule.TypeArguments[0]}}, {{moduleInfo.ArchType}}, {{moduleInfo.SnapshotTypeString}}, {{moduleInfo.GradientsTypeString}}>""",
+                OutputModuleName => $$"""{{InputModuleName}}<{{moduleInfo.ArchType}}, {{moduleInfo.RootModule.TypeArguments[1]}}, {{moduleInfo.SnapshotTypeString}}, {{moduleInfo.GradientsTypeString}}>""",
+                _ => throw new NotImplementedException($"cannot impl {moduleInfo.RootModule.Name}"),
+            });
+        }
+
+        if (includeSerializer)
+        {
+            if (moduleInfo.GenerateDataClasses)
+            {
+                sb.Append($$""", """);
+            }
+            sb.Append($$"""ISerializationConverter<{{moduleInfo.ModuleDefinitionString}}>""");
+        }
+
+        sb.AppendLine();
+
+        var parameterCount = string.Join(" + ", [.. moduleInfo.SubModules.Select(m => $"{m.Name}.ParameterCount"), .. moduleInfo.Weights.Select(w => $"(ulong){w.Name}.FlatCount")]);
+        if (string.IsNullOrEmpty(parameterCount))
+        {
+            parameterCount = "0";
+        }
+        sb.AppendLine($$"""
+        {
+            public ulong ParameterCount => {{parameterCount}};
+
+            public {{moduleInfo.SnapshotTypeString}} CreateSnapshot() => {{(IsEmptyModuleData(moduleInfo.SnapshotType) ? "global::ML.Core.Modules.EmptyModuleData.Instance" : "new(this)")}};
+            public {{moduleInfo.GradientsTypeString}} CreateGradients() => {{(IsEmptyModuleData(moduleInfo.GradientsType) ? "global::ML.Core.Modules.EmptyModuleData.Instance" : "new(this)")}};
+        """);
+
+        if (moduleInfo.GenerateDataClasses)
+        {
+            sb.AppendLine();
+            GenerateSnapshot(sb, moduleInfo, moduleInfo.SubModules, moduleInfo.Weights);
+            sb.AppendLine();
+            GenerateGradients(sb, moduleInfo.ModuleDefinitionString, moduleInfo.SubModules, moduleInfo.Weights);
+        }
+
+        if (IsEmptyModuleData(moduleInfo.GradientsType))
+        {
+            sb.AppendLine($$"""
+
+            [global::System.Runtime.CompilerServices.ModuleInitializer]
+            internal static void RegisterOptimizer()
+            {
+                global::ML.Core.Training.AdamOptimizer.Registry.RegisterEmpty<{{moduleInfo.ModuleDefinitionString}}>();
+            }
+        """);
+        }
+
+        if (includeSerializer)
+        {
+            sb.AppendLine();
+            GenerateSerializer(sb, moduleInfo);
+        }
+
+
+        sb.AppendLine($$"""
+        }
+        """);
+
+        foreach (var _ in Enumerable.Range(0, containerCount))
+        {
+            sb.AppendLine("}");
+        }
+
+        context.AddSource($"{module.Name}.g.cs", sb.ToString());
+    }
+
+    private static void GenerateSnapshot(StringBuilder sb, ModuleInfo moduleInfo, IEnumerable<ModulePropertyInfo> modules, IEnumerable<IPropertySymbol> weights)
+    {
+        sb.AppendLine($$"""
+            public sealed partial class Snapshot({{moduleInfo.ModuleDefinitionString}} module) : IModuleSnapshot
+            {
+        """);
+
+        foreach (var sub in modules)
+        {
+            sb.AppendLine($$"""
+                public {{sub.Module.SnapshotTypeString}} {{sub.Name}} { get; } = module.{{sub.Name}}.CreateSnapshot();
+        """);
+        }
+
+        sb.AppendLine($$"""
+        
+                public void Dispose()
+                {
+        """);
+
+        foreach (var sub in modules)
+        {
+            sb.AppendLine($$"""
+                    {{sub.Name}}.Dispose();
+        """);
+        }
+
+        if (moduleInfo.Type.GetTypeMembers().FirstOrDefault(static t => t is { Name: "Snapshot" }) is { } snapshotType)
+        {
+            var fields = snapshotType.GetMembers().OfType<IFieldSymbol>().Where(static f => f.AssociatedSymbol is null && f.Type.AllInterfaces.Any(IsIDisposable));
+            var properties = snapshotType.GetMembers().OfType<IPropertySymbol>().Where(static p => p.Type.AllInterfaces.Any(IsIDisposable));
+            foreach (var field in fields)
+            {
+                sb.AppendLine($$"""
+                        {{field.Name}}.Dispose();
+            """);
+            }
+            foreach (var property in properties)
+            {
+                sb.AppendLine($$"""
+                        {{property.Name}}.Dispose();
+            """);
+            }
+        }
+
+
+        sb.AppendLine($$"""
+                }
+            }
+        """);
+    }
+
+    private static void GenerateGradients(StringBuilder sb, string moduleDefinitionString, IEnumerable<ModulePropertyInfo> modules, IEnumerable<IPropertySymbol> weights)
+    {
+        sb.AppendLine($$"""
+            public sealed partial class Gradients({{moduleDefinitionString}} module) : IModuleGradients<Gradients>
+            {
+        """);
+
+        foreach (var sub in modules)
+        {
+            sb.AppendLine($$"""
+                public {{sub.Module.GradientsTypeString}} {{sub.Name}} { get; } = module.{{sub.Name}}.CreateGradients();
+        """);
+        }
+
+        foreach (var weight in weights)
+        {
+            sb.AppendLine($$"""
+                public {{weight.Type}} {{weight.Name}} { get; } = {{weight.Type}}.OfSize(module.{{weight.Name}});
+        """);
+        }
+
+        sb.AppendLine($$"""
+        
+                public void Add(Gradients other)
+                {
+        """);
+
+        foreach (var module in modules)
+        {
+            sb.AppendLine($$"""
+                    {{module.Name}}.Add(other.{{module.Name}});
+        """);
+        }
+
+        foreach (var weight in weights)
+        {
+            sb.AppendLine($$"""
+                    {{weight.Name}}.AddToSelf(other.{{weight.Name}});
+        """);
+        }
+
+        sb.AppendLine($$"""
+                }
+
+                public void Reset()
+                {
+        """);
+
+        foreach (var module in modules)
+        {
+            sb.AppendLine($$"""
+                    {{module.Name}}.Reset();
+        """);
+        }
+
+        foreach (var weight in weights)
+        {
+            sb.AppendLine($$"""
+                    {{weight.Name}}.ResetZero();
+        """);
+        }
+
+        sb.AppendLine($$"""
+                }
+            }
+        """);
+    }
+
+    private static void GenerateSerializer(StringBuilder sb, ModuleInfo module)
+    {
+        if (module.SubModules.Length is 0 && module.Weights.Length is 0 && module.Properties.Length is 0)
+        {
+            sb.AppendLine($$"""
+            public static Result<{{module.ModuleDefinitionString}}, DeserializationError> TryReadValue(IAmetrinReader reader) => Instance;
+            public static void WriteValue(IAmetrinWriter writer, {{module.ModuleDefinitionString}} value) { }
+        """);
+        }
+        else
+        {
+            sb.AppendLine($$"""
+            public static Result<{{module.ModuleDefinitionString}}, DeserializationError> TryReadValue(IAmetrinReader reader)
+            {
+                using var objectReader = reader.ReadStartObject();
+                DeserializationError error = default;
+        
+        """);
+
+            foreach (var property in module.Properties)
+            {
+                sb.AppendLine($$"""
+                if (!objectReader.TryRead{{property.Type.Name}}Value().Branch(out var {{property.Name}}, out error))
+                {
+                    return error;
+                }
+        """);
+            }
+
+            foreach (var subModule in module.SubModules)
+            {
+                sb.AppendLine($$"""
+                if (!AmetrinSerializer.TryReadDynamic<{{subModule.Property.Type}}>(objectReader).Branch(out var {{subModule.Name}}, out error))
+                {
+                    return error;
+                }
+        """);
+            }
+
+            foreach (var weight in module.Weights)
+            {
+                sb.AppendLine($$"""
+                if (!{{weight.Type.Name}}Converter.TryReadValue(objectReader).Branch(out var {{weight.Name}}, out error))
+                {
+                    return error;
+                }
+        """);
+            }
+
+            sb.AppendLine($$"""
+
+                reader.ReadEndObject();
+                return new {{module.ModuleDefinitionString}}({{string.Join(", ", [..module.Properties.Select(static p => p.Name), ..module.SubModules.Select(static m => m.Name), ..module.Weights.Select(static w => w.Name)])}});
+            }
+
+            public static void WriteValue(IAmetrinWriter writer, {{module.ModuleDefinitionString}} value) 
+            {
+                using var objectWriter = writer.WriteStartObject();
+
+        """);
+
+            foreach (var property in module.Properties)
+            {
+                sb.AppendLine($$"""
+                objectWriter.Write{{property.Type.Name}}Value(value.{{property.Name}});
+        """);
+            }
+
+            foreach (var subModule in module.SubModules)
+            {
+                sb.AppendLine($$"""
+                AmetrinSerializer.WriteDynamic<{{subModule.Property.Type}}>(objectWriter, value.{{subModule.Name}});
+        """);
+            }
+
+            foreach (var weight in module.Weights)
+            {
+                sb.AppendLine($$"""
+                {{weight.Type.Name}}Converter.WriteValue(objectWriter, value.{{weight.Name}});
+        """);
+            }
+
+            sb.AppendLine($$"""
+
+                writer.WriteEndObject();
+            }
+        """);
+        }
+
+        if (!module.Type.IsGenericType)
+        {
+            sb.AppendLine($$"""
+
+            [global::System.Runtime.CompilerServices.ModuleInitializer]
+            internal static void RegisterSerializer()
+            {
+                AmetrinSerializer.RegisterSerializer<{{module.ModuleDefinitionString}}, {{module.ModuleDefinitionString}}>();
+            }
+        """);
+        }
+    }
+}
diff --git a/MachineLearning.Benchmarks/ActivationBenchmarks.cs b/MachineLearning.Benchmarks/ActivationBenchmarks.cs
deleted file mode 100644
index 4ba0b49..0000000
--- a/MachineLearning.Benchmarks/ActivationBenchmarks.cs
+++ /dev/null
@@ -1,51 +0,0 @@
-﻿using MachineLearning.Model.Activation;
-
-namespace MachineLearning.Benchmarks;
-
-public class ActivationBenchmarks
-{
-
-    readonly IActivationFunction ActivationFunction = SigmoidActivation.Instance;
-    readonly IActivationFunction NewActivationFunction = SoftMaxActivation.Instance;
-
-
-    Weight[] InputArray = [];
-    Vector input = default!;
-    Vector result = default!;
-
-    //[Params(128, 512, 2048)]
-    public int size = 512;
-
-    [GlobalSetup]
-    public void Setup()
-    {
-        InputArray = [.. Enumerable.Range(0, size).Select(n => Random.Shared.NextSingle())];
-        input = Vector.Of(InputArray);
-        result = Vector.Create(size);
-    }
-
-
-    [Benchmark(Baseline = true)]
-    public void Activate_Current()
-    {
-        ActivationFunction.ActivateTo(input, result);
-    }
-
-    [Benchmark]
-    public void Activate_Simd()
-    {
-        NewActivationFunction.ActivateTo(input, result);
-    }
-
-    [Benchmark]
-    public void Derivative_Current()
-    {
-        ActivationFunction.DerivativeTo(input, result);
-    }
-
-    [Benchmark]
-    public void Derivative_Simd()
-    {
-        NewActivationFunction.DerivativeTo(input, result);
-    }
-}
\ No newline at end of file
diff --git a/MachineLearning.Benchmarks/GlobalUsings.cs b/MachineLearning.Benchmarks/GlobalUsings.cs
deleted file mode 100644
index 2161b66..0000000
--- a/MachineLearning.Benchmarks/GlobalUsings.cs
+++ /dev/null
@@ -1,6 +0,0 @@
-﻿global using Ametrin.Numerics;
-global using BenchmarkDotNet.Attributes;
-global using System.Runtime.InteropServices;
-global using SimdVector = System.Numerics.Vector<float>;
-global using SimdVectorHelper = System.Numerics.Vector;
-global using Weight = float;
\ No newline at end of file
diff --git a/MachineLearning.Benchmarks/MatrixOperationsBenchmark.cs b/MachineLearning.Benchmarks/MatrixOperationsBenchmark.cs
deleted file mode 100644
index fac251e..0000000
--- a/MachineLearning.Benchmarks/MatrixOperationsBenchmark.cs
+++ /dev/null
@@ -1,64 +0,0 @@
-﻿namespace MachineLearning.Benchmarks;
-
-//[SimpleJob]
-public class MatrixOperationsBenchmark
-{
-    public Matrix matrix1 = default!;
-    public Matrix matrix2 = default!;
-    public Vector rowVector = default!;
-    public Vector columnVector = default!;
-    public Matrix resultMatrix = default!;
-    //private Matrix resultVector = default!;
-
-    //[Params(16, 128, 512)]
-    public int Rows { get; set; } = 6;
-
-    //[Params(16, 128, 512)]
-    public int Columns { get; set; } = 6;
-
-    [GlobalSetup]
-    public void Setup()
-    {
-        var random = new Random(69);
-        matrix1 = Matrix.Of(Rows, Columns, [.. Enumerable.Range(0, Rows * Columns).Select(n => random.NextSingle())]);
-        matrix2 = Matrix.Of(Rows, Columns, [.. Enumerable.Range(0, Rows * Columns).Select(n => random.NextSingle())]);
-        rowVector = Vector.Of([.. Enumerable.Range(0, Rows).Select(n => random.NextSingle())]);
-        columnVector = Vector.Of([.. Enumerable.Range(0, Columns).Select(n => random.NextSingle())]);
-        resultMatrix = Matrix.Create(Rows, Columns);
-    }
-
-    //these are not the same!!!
-
-    [Benchmark(Baseline = true)]
-    public void MultiplyRowwise()
-    {
-        matrix1.MultiplyRowwiseTo(matrix2, resultMatrix);
-    }
-
-    [Benchmark]
-    public void Multiply()
-    {
-        Multiply(matrix1, matrix2, resultMatrix);
-    }
-
-    public static Matrix Multiply(Matrix matrixA, Matrix matrixB, Matrix resultMatrix)
-    {
-        if (matrixA.ColumnCount != matrixB.RowCount)
-            throw new InvalidOperationException("Matrix dimensions do not match for multiplication.");
-
-        for (int i = 0; i < matrixA.RowCount; i++)
-        {
-            for (int j = 0; j < matrixB.ColumnCount; j++)
-            {
-                Weight sum = 0;
-                for (int k = 0; k < matrixA.ColumnCount; k++)
-                {
-                    sum += matrixA[i, k] * matrixB[k, j];
-                }
-                resultMatrix[i, j] = sum;
-            }
-        }
-
-        return resultMatrix;
-    }
-}
diff --git a/MachineLearning.Benchmarks/ModelBenchmarks.cs b/MachineLearning.Benchmarks/ModelBenchmarks.cs
deleted file mode 100644
index 7e65342..0000000
--- a/MachineLearning.Benchmarks/ModelBenchmarks.cs
+++ /dev/null
@@ -1,27 +0,0 @@
-﻿using MachineLearning.Data.Entry;
-using MachineLearning.Samples.MNIST;
-using MachineLearning.Training;
-
-namespace MachineLearning.Benchmarks;
-
-[MemoryDiagnoser]
-public class ModelBenchmarks
-{
-    private IEnumerable<TrainingData<double[], int>> dataSet = [];
-    private EmbeddedModelTrainer<double[], int> trainer = null!;
-
-    [GlobalSetup]
-    public void Setup()
-    {
-        var source = MNISTModel.GetTrainingSet();
-        dataSet = [.. source.GetBatches().First().Cast<TrainingData<double[], int>>()];
-        trainer = new EmbeddedModelTrainer<double[], int>(MNISTModel.CreateModel(), MNISTModel.DefaultTrainingConfig(), source);
-    }
-
-    [Benchmark]
-    public void Benchmark()
-    {
-        // trainer.FullReset();
-        trainer.TrainAndEvaluate(dataSet);
-    }
-}
diff --git a/MachineLearning.Benchmarks/Program.cs b/MachineLearning.Benchmarks/Program.cs
deleted file mode 100644
index 12f19cb..0000000
--- a/MachineLearning.Benchmarks/Program.cs
+++ /dev/null
@@ -1,4 +0,0 @@
-﻿using BenchmarkDotNet.Running;
-using MachineLearning.Benchmarks;
-
-BenchmarkRunner.Run<TensorBenchmarks>();
\ No newline at end of file
diff --git a/MachineLearning.Benchmarks/RandomBenchmarks.cs b/MachineLearning.Benchmarks/RandomBenchmarks.cs
deleted file mode 100644
index 4045195..0000000
--- a/MachineLearning.Benchmarks/RandomBenchmarks.cs
+++ /dev/null
@@ -1,35 +0,0 @@
-﻿namespace MachineLearning.Benchmarks;
-
-//[ShortRunJob]
-public class RandomBenchmarks
-{
-    public Matrix left = default!;
-    public Vector right = default!;
-    public Vector result = default!;
-
-    [Params(1024)]
-    public int Count = 5;
-
-    [GlobalSetup]
-    public void Setup()
-    {
-        left = Matrix.CreateSquare(Count);
-        right = Vector.Create(Count);
-        result = Vector.Create(Count);
-
-        left.MapToSelf(_ => Random.Shared.NextSingle());
-        right.MapToSelf(_ => Random.Shared.NextSingle());
-    }
-
-    [Benchmark(Baseline = true)]
-    public void Multiply_Old()
-    {
-        //MatrixHelper.MultiplySimd(left, right, result);
-    }
-
-    [Benchmark]
-    public void Multiply_New()
-    {
-        MatrixHelper.MultiplyTo(left, right, result);
-    }
-}
diff --git a/MachineLearning.Benchmarks/TensorBenchmarks.cs b/MachineLearning.Benchmarks/TensorBenchmarks.cs
deleted file mode 100644
index d4fc4a3..0000000
--- a/MachineLearning.Benchmarks/TensorBenchmarks.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-﻿using System.Numerics.Tensors;
-
-namespace MachineLearning.Benchmarks;
-
-[MemoryDiagnoser(false), ShortRunJob]
-public class TensorBenchmarks
-{
-    internal Vector vector_l = default!;
-    internal Vector vector_r = default!;
-    internal Vector result_v = default!;
-
-    [Params(255, 2048 * 8)]
-    public int Size = 16;
-
-    [GlobalSetup]
-    public void GlobalSetup()
-    {
-        var random = new Random(69);
-        var data_l = Enumerable.Range(0, Size).Select(v => random.NextSingle()).ToArray();
-        var data_r = Enumerable.Range(0, Size).Select(v => random.NextSingle()).ToArray();
-        vector_l = Vector.Of(data_l);
-        vector_r = Vector.Of(data_r);
-        result_v = Vector.Create(Size);
-    }
-
-    [Benchmark(Baseline = true)]
-    public void MyVector()
-    {
-        vector_l.SoftMaxTo(result_v);
-    }
-
-    [Benchmark]
-    public void Vector_Primitives()
-    {
-        TensorPrimitives.SoftMax(vector_l.AsSpan(), result_v.AsSpan());
-    }
-
-    //TensorPrimitives is faster (keep my type and use AsSpan())
-}
diff --git a/MachineLearning.Benchmarks/VectorBenchmarks.cs b/MachineLearning.Benchmarks/VectorBenchmarks.cs
deleted file mode 100644
index e8c357a..0000000
--- a/MachineLearning.Benchmarks/VectorBenchmarks.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-﻿using System.Numerics.Tensors;
-
-namespace MachineLearning.Benchmarks;
-
-public class VectorBenchmarks
-{
-    private Vector Vector1 = Vector.Empty;
-
-    [Params(1023, 1_000_000)]
-    public int Size = 4;
-
-    [GlobalSetup]
-    public void Setup()
-    {
-        Vector1 = Vector.Create(Size);
-    }
-
-    [Benchmark(Baseline = true)]
-    public Weight MySum() => Vector1.Sum();
-
-    [Benchmark]
-    public Weight NativeSum() => TensorPrimitives.Sum(Vector1.AsSpan());
-}
diff --git a/MachineLearning.Data/Batch.cs b/MachineLearning.Data/Batch.cs
deleted file mode 100644
index 5d002ed..0000000
--- a/MachineLearning.Data/Batch.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-using System.Collections;
-using MachineLearning.Data.Entry;
-
-namespace MachineLearning.Data;
-
-public sealed record Batch(IEnumerable<TrainingData> DataPoints) : IEnumerable<TrainingData>
-{
-    public IEnumerable<TrainingData> DataPoints { get; private set; } = DataPoints;
-
-    public static Batch Create(IEnumerable<TrainingData> source, int startIndex, int batchSize)
-    => Create(source.Skip(startIndex), batchSize);
-
-    public static Batch Create(IEnumerable<TrainingData> source, int batchSize)
-        => new(source.Take(batchSize));
-
-    public static Batch CreateRandom(ICollection<TrainingData> source, int batchSize, Random? random = null)
-        => new(source.GetRandomElements(batchSize, random));
-
-    public IEnumerator<TrainingData> GetEnumerator() => DataPoints.GetEnumerator();
-    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-}
\ No newline at end of file
diff --git a/MachineLearning.Data/Entry/BinaryDataEntry.cs b/MachineLearning.Data/Entry/BinaryDataEntry.cs
deleted file mode 100644
index 3f6f814..0000000
--- a/MachineLearning.Data/Entry/BinaryDataEntry.cs
+++ /dev/null
@@ -1,13 +0,0 @@
-using System.Text;
-
-namespace MachineLearning.Data.Entry;
-
-public sealed record BinaryDataEntry(float[] Input, bool Expected) : DataEntry<float[], bool>(Input, Expected)
-{
-    public override string ToString()
-    {
-        var sb = new StringBuilder();
-        sb.Append('[').AppendJoin(", ", Input).Append("] => ").Append(Expected);
-        return sb.ToString();
-    }
-}
diff --git a/MachineLearning.Data/Entry/DataEntry.cs b/MachineLearning.Data/Entry/DataEntry.cs
deleted file mode 100644
index 75eaa59..0000000
--- a/MachineLearning.Data/Entry/DataEntry.cs
+++ /dev/null
@@ -1,6 +0,0 @@
-namespace MachineLearning.Data.Entry;
-
-public record DataEntry<TInput, TExpected>(TInput Input, TExpected Expected);
-
-public record TrainingData<TInput, TExpected>(TInput InputValue, TExpected ExpectedValue, Vector ExpectedWeights) : TrainingData(ExpectedWeights);
-public record TrainingData(Vector ExpectedWeights);
\ No newline at end of file
diff --git a/MachineLearning.Data/Entry/ImageDataEntry.cs b/MachineLearning.Data/Entry/ImageDataEntry.cs
deleted file mode 100644
index e6ae944..0000000
--- a/MachineLearning.Data/Entry/ImageDataEntry.cs
+++ /dev/null
@@ -1,53 +0,0 @@
-using System.Text;
-using SixLabors.ImageSharp;
-using SixLabors.ImageSharp.PixelFormats;
-
-namespace MachineLearning.Data.Entry;
-
-public sealed record ImageDataEntry(double[] Image, int Digit) : DataEntry<double[], int>(Image, Digit)
-{
-    public const int SIZE = 28; //TODO: un-hardcode
-    public string DumpImage()
-    {
-        var sb = new StringBuilder();
-        var i = 0;
-        foreach(var l in ..SIZE)
-        {
-            if(l > 0)
-                sb.AppendLine();
-            foreach(var c in ..SIZE)
-            {
-                sb.Append((Image[i] * 9).ToString("0"));
-                sb.Append(' ');
-                i++;
-            }
-        }
-
-        sb.Replace('0', '.');
-
-        return sb.ToString();
-    }
-
-    public static ImageDataEntry FromRaw(byte[] rawImage, byte rawDigit)
-    {
-        var image = new double[rawImage.Length];
-        foreach(var i in ..rawImage.Length)
-        {
-            image[i] = rawImage[i] / 255.0;
-        }
-
-        return new(image, rawDigit);
-    }
-
-    public void SaveImage(FileInfo fileInfo)
-    {
-        var image = new Image<Rgba32>(SIZE, SIZE);
-        foreach(var i in ..Image.Length)
-        {
-            int row = i / SIZE;
-            int column = i % SIZE;
-            image[column, row] = new Rgba32((float) Image[i], (float) Image[i], (float) Image[i]);
-        }
-        image.SaveAsPng(fileInfo.FullName);
-    }
-}
diff --git a/MachineLearning.Data/Epoch.cs b/MachineLearning.Data/Epoch.cs
deleted file mode 100644
index 9ea791b..0000000
--- a/MachineLearning.Data/Epoch.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-﻿using System.Collections;
-
-namespace MachineLearning.Data;
-
-public sealed record Epoch(int BatchCount, IEnumerable<Batch> Batches) : IEnumerable<Batch>
-{
-    public IEnumerator<Batch> GetEnumerator() => Batches.GetEnumerator();
-    IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
-}
diff --git a/MachineLearning.Data/GlobalUsings.cs b/MachineLearning.Data/GlobalUsings.cs
deleted file mode 100644
index ffef412..0000000
--- a/MachineLearning.Data/GlobalUsings.cs
+++ /dev/null
@@ -1,7 +0,0 @@
-﻿global using Ametrin.Utils;
-global using Ametrin.Optional;
-global using Ametrin.Numerics;
-global using System;
-global using System.Collections.Generic;
-global using System.Collections.Immutable;
-global using System.Collections.Frozen;
\ No newline at end of file
diff --git a/MachineLearning.Data/ITrainingSet.cs b/MachineLearning.Data/ITrainingSet.cs
deleted file mode 100644
index 1d0b2c7..0000000
--- a/MachineLearning.Data/ITrainingSet.cs
+++ /dev/null
@@ -1,15 +0,0 @@
-﻿using System.Collections;
-
-namespace MachineLearning.Data;
-
-public interface ITrainingSet : IEnumerable<Batch>
-{
-    public int BatchCount { get; }
-    public int BatchSize { get; }
-
-    public IEnumerable<Batch> GetBatches();
-    public void Reset() { }
-
-    IEnumerator<Batch> IEnumerable<Batch>.GetEnumerator() => GetBatches().GetEnumerator();
-    IEnumerator IEnumerable.GetEnumerator() => GetBatches().GetEnumerator();
-}
\ No newline at end of file
diff --git a/MachineLearning.Data/MachineLearning.Data.csproj b/MachineLearning.Data/MachineLearning.Data.csproj
deleted file mode 100644
index 36291b8..0000000
--- a/MachineLearning.Data/MachineLearning.Data.csproj
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <PackageReference Include="SixLabors.ImageSharp" Version="3.1.12" />
-  </ItemGroup>
-
-  <ItemGroup>
-    <ProjectReference Include="..\..\..\Packages\Ametrin.Numerics\src\Ametrin.Numerics.csproj" />
-    <ProjectReference Include="..\..\..\Packages\Ametrin.Utils\src\Ametrin.Utils.csproj" />
-  </ItemGroup>
-
-</Project>
diff --git a/MachineLearning.Data/Noise/IInputDataNoise.cs b/MachineLearning.Data/Noise/IInputDataNoise.cs
deleted file mode 100644
index 4ee871b..0000000
--- a/MachineLearning.Data/Noise/IInputDataNoise.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace MachineLearning.Data.Noise;
-
-public interface IInputDataNoise<TData>
-{
-    public TData Apply(TData data);
-}
-
-public sealed class NoInputNoise<TData> : IInputDataNoise<TData>
-{
-    public static readonly NoInputNoise<TData> Instance = new();
-    public TData Apply(TData data) => data;
-}
diff --git a/MachineLearning.Data/Noise/RandomInputNoise.cs b/MachineLearning.Data/Noise/RandomInputNoise.cs
deleted file mode 100644
index 5dc3da8..0000000
--- a/MachineLearning.Data/Noise/RandomInputNoise.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿namespace MachineLearning.Data.Noise;
-
-public sealed class RandomInputNoise(float Strength, Random? Random = null) : IInputDataNoise<float[]>
-{
-    public float Strength { get; } = Strength;
-    public Random Random { get; } = Random ?? Random.Shared;
-
-    public float[] Apply(float[] data)
-    {
-        var result = new float[data.Length];
-        foreach(var i in ..data.Length)
-        {
-            result[i] = float.Clamp(data[i] + (Random.NextSingle() - 0.5f) * 2 * Strength, 0, 1);
-        }
-
-        return result;
-    }
-}
diff --git a/MachineLearning.Data/PredefinedTrainingSet.cs b/MachineLearning.Data/PredefinedTrainingSet.cs
deleted file mode 100644
index 5e1ae28..0000000
--- a/MachineLearning.Data/PredefinedTrainingSet.cs
+++ /dev/null
@@ -1,30 +0,0 @@
-﻿using MachineLearning.Data.Entry;
-
-namespace MachineLearning.Data;
-
-public sealed class PredefinedTrainingSet(IEnumerable<TrainingData> data) : ITrainingSet
-{
-    public bool ShuffleOnReset { get; init; } = true;
-    public Random Random { get; init; } = Random.Shared;
-    public required int BatchCount { get; init; }
-    public int BatchSize => data.Length / BatchCount;
-
-    private readonly TrainingData[] data = [.. data];
-
-    public IEnumerable<Batch> GetBatches()
-    {
-        var batchSize = BatchSize;
-        foreach (var i in ..BatchCount)
-        {
-            yield return Batch.Create(data, i * batchSize, batchSize);
-        }
-    }
-
-    public void Reset()
-    {
-        if (ShuffleOnReset)
-        {
-            Random.Shuffle(data);
-        }
-    }
-}
diff --git a/MachineLearning.Data/Source/ImageDataSource.cs b/MachineLearning.Data/Source/ImageDataSource.cs
deleted file mode 100644
index bd4a5f6..0000000
--- a/MachineLearning.Data/Source/ImageDataSource.cs
+++ /dev/null
@@ -1,36 +0,0 @@
-using MachineLearning.Data.Entry;
-using SixLabors.ImageSharp;
-using SixLabors.ImageSharp.PixelFormats;
-using SixLabors.ImageSharp.Processing;
-
-namespace MachineLearning.Data.Source;
-
-/// <summary>
-/// reads all pngs in <paramref name="directoryInfo"/> and returns them as <see cref="float[]"/>
-/// </summary>
-public sealed class ImageDataSource(DirectoryInfo directoryInfo)
-{
-    public ImageDataEntry[] DataSet { get; } = [.. directoryInfo.EnumerateFiles("*.png")
-            .Select(file => new ImageDataEntry(
-                GetGrayscaleImageArray(file),
-                file.NameWithoutExtension.Parse<int>()
-            ))];
-
-    public static double[] GetGrayscaleImageArray(FileInfo imageFile)
-    {
-        using Image<Rgba32> image = Image.Load<Rgba32>(imageFile.FullName);
-        image.Mutate(x => x.Grayscale());
-
-        var grayscaleValues = new double[image.Width * image.Height];
-
-        for(int y = 0; y < image.Height; y++)
-        {
-            for(int x = 0; x < image.Width; x++)
-            {
-                grayscaleValues[y * image.Width + x] = image[x, y].R / 255.0;
-            }
-        }
-
-        return grayscaleValues;
-    }
-}
diff --git a/MachineLearning.Mamba/CumulativeSumTrainingSet.cs b/MachineLearning.Mamba/CumulativeSumTrainingSet.cs
deleted file mode 100644
index d05a884..0000000
--- a/MachineLearning.Mamba/CumulativeSumTrainingSet.cs
+++ /dev/null
@@ -1,42 +0,0 @@
-using MachineLearning.Data;
-using MachineLearning.Data.Entry;
-
-namespace MachineLearning.Mamba;
-
-public sealed class CumulativeSumTrainingSet(int sequenceLength) : ITrainingSet
-{
-    private readonly int _sequenceLength = sequenceLength;
-
-    public required int BatchCount { get; init; }
-    public required int BatchSize { get; init; }
-    public Random Random { get; init; } = Random.Shared;
-
-    public IEnumerable<Batch> GetBatches() => Enumerable.Range(0, BatchCount).Select(_ => Batch.Create(GetTrainingData(), BatchSize));
-
-    public IEnumerable<TrainingData<Vector, Vector>> GetTrainingData()
-    {
-        while (true)
-        {
-            yield return GenerateSample();
-        }
-    }
-
-    public TrainingData<Vector, Vector> GenerateSample()
-    {
-        var input = Vector.Create(_sequenceLength);
-        for (int t = 0; t < _sequenceLength; t++)
-        {
-            input[t] = Random.NextSingle() * 5 + 2.5f;
-        }
-
-        var expected = Vector.Create(_sequenceLength);
-        var runningSum = 0f;
-        for (int t = 0; t < _sequenceLength; t++)
-        {
-            runningSum += input[t];
-            expected[t] = runningSum;
-        }
-
-        return new TrainingData<Vector, Vector>(input, expected, expected);
-    }
-}
diff --git a/MachineLearning.Mamba/EmbeddingLayer.cs b/MachineLearning.Mamba/EmbeddingLayer.cs
deleted file mode 100644
index f310f66..0000000
--- a/MachineLearning.Mamba/EmbeddingLayer.cs
+++ /dev/null
@@ -1,76 +0,0 @@
-﻿using System.Numerics.Tensors;
-using MachineLearning.Model.Attributes;
-using MachineLearning.Model.Initialization;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Initialization;
-using MachineLearning.Serialization;
-using MachineLearning.Training.Attributes;
-
-
-namespace MachineLearning.Mamba;
-
-[GeneratedLayer, LayerSerializer("emb", 2), GenerateOptimizers]
-public sealed partial class EmbeddingLayer : ILayer<int[], Matrix, EmbeddingLayer.Snapshot>
-{
-    [Parameter] public int ContextSize { get; }
-    // init randomly with [-0.1; 0.1] or [-0.01; 0.01]
-    [Weights] public Matrix EmbeddingMatrix { get; }
-
-    public int EmbeddingSize => EmbeddingMatrix.ColumnCount;
-    public int TokenCount => EmbeddingMatrix.RowCount;
-
-    public EmbeddingLayer(int tokenCount, int contextSize, int embeddingSize)
-        : this(contextSize, Matrix.Create(tokenCount, embeddingSize)) { }
-
-    public Matrix Forward(int[] input, Snapshot snapshot)
-    {
-        Debug.Assert(input.Length <= ContextSize);
-        snapshot.Input = input;
-
-        foreach (var i in ..input.Length)
-        {
-            GetEmbedding(input[i]).CopyTo(snapshot.Output.RowSpan(i));
-        }
-
-        return snapshot.Output.Rows(..input.Length);
-    }
-
-    private Span<Weight> GetEmbedding(int index)
-    {
-        if (index < 0 || index >= EmbeddingMatrix.RowCount)
-        {
-            throw new ArgumentException($"Unknown token: {index}");
-        }
-
-        return EmbeddingMatrix.RowSpan(index);
-    }
-
-    public Matrix Backward(Matrix outputGradients, Snapshot snapshot, Gradients gradients)
-    {
-        foreach (var i in ..snapshot.Input.Length)
-        {
-            var token = snapshot.Input[i];
-            var embeddingGradient = gradients.EmbeddingMatrixGradient.RowSpan(token);
-            TensorPrimitives.Add(embeddingGradient, outputGradients.RowSpan(i), embeddingGradient);
-        }
-
-        return Matrix.Empty;
-    }
-
-    partial class Snapshot
-    {
-        public int[] Input { get; set; } = [];
-        public Matrix Output { get; } = Matrix.Create(layer.ContextSize, layer.EmbeddingSize);
-    }
-
-    public sealed class Initializer(Random? random = null) : IInitializer<EmbeddingLayer>
-    {
-        public Random Random { get; } = random ?? Random.Shared;
-
-        public void Initialize(EmbeddingLayer layer)
-        {
-            var limit = Weight.Sqrt(6 / (Weight)(layer.TokenCount + layer.EmbeddingSize));
-            layer.EmbeddingMatrix.MapToSelf(_ => InitializationHelper.RandomInUniformDistribution(Random, 0, limit));
-        }
-    }
-}
diff --git a/MachineLearning.Mamba/GenericModelTrainer.cs b/MachineLearning.Mamba/GenericModelTrainer.cs
deleted file mode 100644
index ba7b7bd..0000000
--- a/MachineLearning.Mamba/GenericModelTrainer.cs
+++ /dev/null
@@ -1,146 +0,0 @@
-// using System.Collections.Immutable;
-// using System.Diagnostics;
-// using System.Formats.Tar;
-// using MachineLearning.Data;
-// using MachineLearning.Data.Entry;
-// using MachineLearning.Model;
-// using MachineLearning.Model.Layer.Snapshot;
-// using MachineLearning.Training;
-// using MachineLearning.Training.Cost;
-// using MachineLearning.Training.Evaluation;
-// using MachineLearning.Training.Optimization;
-
-// namespace MachineLearning.Mamba;
-
-// public sealed class GenericModelTrainer<TModel, TArch, TSnapshot> : ITrainer<TModel> 
-// where TModel : IModel<TArch, TSnapshot>
-// where TSnapshot : ILayerSnapshot
-// {
-//     public TrainingConfig Config { get; }
-//     public ITrainingSet TrainingSet { get; }
-//     public ICostFunction<TArch> CostFunction { get; }
-//     public TModel Model { get; }
-//     public Optimizer Optimizer { get; }
-//     public ImmutableArray<ILayerOptimizer> LayerOptimizers { get; }
-
-//     public GenericModelTrainer(TModel model, TrainingConfig config, ITrainingSet trainingSet, ICostFunction<TArch> costFunction)
-//     {
-//         Config = config;
-//         TrainingSet = trainingSet;
-//         CostFunction = costFunction;
-//         Model = model;
-//         Optimizer = config.Optimizer;
-//         LayerOptimizers = [.. Model.Layers.Select(Optimizer.CreateLayerOptimizer)];
-//     }
-
-//     public void Train(CancellationToken? token = null)
-//     {
-//         Optimizer.Init();
-//         FullReset();
-//         var cachedEvaluation = DataSetEvaluationResult.ZERO;
-//         foreach (var (epochIndex, epoch) in TrainerHelper.GetEpochs(TrainingSet, Config.EpochCount).Index())
-//         {
-//             foreach (var (batchIndex, batch) in epoch.Index())
-//             {
-//                 cachedEvaluation += TrainAndEvaluate(batch.Cast<TrainingData<TArch>>());
-//                 if (Config.DumpBatchEvaluation && batchIndex % Config.DumpEvaluationAfterBatches == 0 || batchIndex + 1 == epoch.BatchCount && Config.DumpEpochEvaluation)
-//                 {
-//                     Config.EvaluationCallback!.Invoke(new DataSetEvaluation { Context = GetContext(), Result = cachedEvaluation });
-//                     cachedEvaluation = DataSetEvaluationResult.ZERO;
-//                 }
-//                 Optimizer.OnBatchCompleted();
-
-//                 if (token?.IsCancellationRequested is true)
-//                 {
-//                     Optimizer.OnEpochCompleted();
-//                     return;
-//                 }
-
-//                 TrainingEvaluationContext GetContext() => new()
-//                 {
-//                     CurrentBatch = batchIndex + 1,
-//                     MaxBatch = epoch.BatchCount,
-//                     CurrentEpoch = epochIndex + 1,
-//                     MaxEpoch = Config.EpochCount,
-//                     LearnRate = Config.Optimizer.LearningRate,
-//                 };
-//             }
-
-//             Optimizer.OnEpochCompleted();
-//         }
-//     }
-
-//     public DataSetEvaluationResult TrainAndEvaluate(IEnumerable<TrainingData<TArch>> trainingBatch)
-//     {
-//         var timeStamp = Stopwatch.GetTimestamp();
-//         GradientCostReset();
-//         int correctCounter = 0;
-//         double totalCost = 0;
-//         var dataCounter = 0;
-
-//         if (Config.MultiThread)
-//         {
-//             var _lock = new Lock();
-//             Parallel.ForEach(trainingBatch, (data) =>
-//             {
-//                 var weights = Update(data);
-
-//                 lock (_lock)
-//                 {
-//                     UpdateCounters(data, weights);
-//                 }
-//             });
-//         }
-//         else
-//         {
-//             foreach (var data in trainingBatch)
-//             {
-//                 var weights = Update(data);
-//                 UpdateCounters(data, weights);
-//             }
-//         }
-
-//         void UpdateCounters(TrainingData<TArch> data, TArch weights)
-//         {
-//             dataCounter++;
-//             totalCost += CostFunction.TotalCost(weights, data.Expected);
-//         }
-
-//         Apply(dataCounter);
-
-//         return new()
-//         {
-//             TotalCount = dataCounter,
-//             CorrectCount = correctCounter,
-//             TotalCost = totalCost,
-//             TotalElapsedTime = Stopwatch.GetElapsedTime(timeStamp),
-//         };
-//     }
-
-//     private TArch Update(TrainingData<TArch> data)
-//     {
-//         var snapshots = Model.Layers.Select(LayerSnapshots.Get).Cast<TSnapshot>().ToImmutableArray();
-//         var result = Model.Process(data.Input, snapshots);
-
-//         var gradient = CostFunction.Derivative(result, data.Expected);
-//         // NumericsDebug.AssertValidNumbers(gradient);
-
-//         for (int layerIndex = LayerOptimizers.Length - 1; layerIndex >= 0; layerIndex--)
-//         {
-//             // LayerOptimizers[layerIndex].Update(gradient, snapshots[layerIndex]);
-//             // gradient = snapshots[layerIndex].GradientInput;
-//             // NumericsDebug.AssertValidNumbers(gradient);
-//         }
-
-//         foreach (var (layer, snapshot) in Model.Layers.Zip(snapshots))
-//         {
-//             LayerSnapshots.Return(layer, snapshot);
-//         }
-
-//         return result;
-//     }
-
-//     private void Apply(int dataCounter) => LayerOptimizers.Consume(layer => layer.Apply(dataCounter));
-//     private void GradientCostReset() => LayerOptimizers.Consume(layer => layer.GradientCostReset());
-//     private void FullReset() => LayerOptimizers.Consume(layer => layer.FullReset());
-// }
diff --git a/MachineLearning.Mamba/GlobalUsings.cs b/MachineLearning.Mamba/GlobalUsings.cs
deleted file mode 100644
index d24820a..0000000
--- a/MachineLearning.Mamba/GlobalUsings.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-global using Ametrin.Numerics;
-global using Ametrin.Utils;
-global using Ametrin.Optional;
-global using System.Diagnostics;
-global using System.Collections.Generic;
-global using System.Collections.Immutable;
-global using System.Collections.Frozen;
-global using System.Linq;
-global using System;
-global using Weight = float;
\ No newline at end of file
diff --git a/MachineLearning.Mamba/MachineLearning.Mamba.csproj b/MachineLearning.Mamba/MachineLearning.Mamba.csproj
deleted file mode 100644
index 3942d56..0000000
--- a/MachineLearning.Mamba/MachineLearning.Mamba.csproj
+++ /dev/null
@@ -1,17 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <LangVersion>preview</LangVersion>
-    <Nullable>enable</Nullable>
-    <!-- <EmitCompilerGeneratedFiles>true</EmitCompilerGeneratedFiles> -->
-  </PropertyGroup>
-
-  <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Model\MachineLearning.Model.csproj" />
-    <ProjectReference Include="..\MachineLearning.Serialization\MachineLearning.Serialization.csproj" />
-    <ProjectReference Include="..\MachineLearning.Training\MachineLearning.Training.csproj" />
-    <ProjectReference Include="..\analyzer\ML.Analyzer.csproj" ReferenceOutputAssembly="false" OutputItemType="Analyzer" />
-  </ItemGroup>
-
-</Project>
diff --git a/MachineLearning.Mamba/Mamba2Layer.layer b/MachineLearning.Mamba/Mamba2Layer.layer
deleted file mode 100644
index 110255a..0000000
--- a/MachineLearning.Mamba/Mamba2Layer.layer
+++ /dev/null
@@ -1,29 +0,0 @@
-# namespace MachineLearning.Mamba
-
-# Activation Function a
-# Parameters
-MaxSequenceLength
-StateDimensions
-EmbeddingDimensions
-
-# Weights
-Alpha [MaxSequenceLength]
-B [StateDimensions, EmbeddingDimensions]
-C [EmbeddingDimensions, StateDimensions]
-
-# Snapshot
-Memory [MaxSequenceLength, StateDimensions]
-ActivatedMemory [MaxSequenceLength, StateDimensions]
-Output [MaxSequenceLength, EmbeddingDimensions]
-
-# Forward Input [MaxSequenceLength, EmbeddingDimensions]
-recur over Input Memory Alpha Output ActivatedMemory
-    Memory_current = B * Input_current
-    Memory_current += Memory_previous * Alpha_current
-    ActivatedMemory_current = activate a Memory_current
-    Output_current = C * ActivatedMemory_current
-end
-Output
-
-
-# Serializer m2l 1
\ No newline at end of file
diff --git a/MachineLearning.Mamba/Mamba2Model.cs b/MachineLearning.Mamba/Mamba2Model.cs
deleted file mode 100644
index f360758..0000000
--- a/MachineLearning.Mamba/Mamba2Model.cs
+++ /dev/null
@@ -1,156 +0,0 @@
-using System.IO;
-using System.Runtime.InteropServices;
-using MachineLearning.Model;
-using MachineLearning.Model.Activation;
-using MachineLearning.Model.Initialization;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Initialization;
-using MachineLearning.Model.Layer.Snapshot;
-using MachineLearning.Serialization;
-
-namespace MachineLearning.Mamba;
-
-public sealed class Mamba2Model(int layerCount, int contextSize, int dims) : IModel<Vector, Mamba2ScalarLayer.Snapshot>
-{
-    public ImmutableArray<Mamba2ScalarLayer> Layers { get; } = [.. Enumerable.Range(0, layerCount).Select(_ => new Mamba2ScalarLayer(contextSize, dims))];
-
-    public Vector Process(Vector input)
-    {
-        return Layers.Aggregate(input, (v, l) => l.Forward(v, l.CreateSnapshot()));
-    }
-
-    public Vector Process(Vector input, ImmutableArray<Mamba2ScalarLayer.Snapshot> snapshots)
-    {
-        return Layers.Zip(snapshots).Aggregate(input, (v, l) => l.First.Forward(v, l.Second));
-    }
-
-    // public Vector Backward(Vector outputGradient, ImmutableArray<Mamba2ScalarLayer.Snapshot> snapshots)
-    // {
-    //     return Layers.Reverse().Zip(snapshots.Reverse()).Aggregate(outputGradient, (g, l) => l.First.BackwardPass(l.Second, g));
-    // }
-
-    public long WeightCount => Layers.Sum(l => l.WeightCount);
-    public override string ToString() => $"Mamba 2 (Scalar) ({WeightCount})";
-
-    IEnumerable<ILayer> IModel<Vector, Mamba2ScalarLayer.Snapshot>.Layers => Layers;
-}
-
-public sealed class Mamba2VectorModel(EmbeddingLayer inputLayer, ImmutableArray<Mamba2Layer> hiddenLayers, UnEmbeddingLayer outputLayer) : IEmbeddedModel<int[], int>
-{
-    public EmbeddingLayer InputLayer { get; } = inputLayer;
-    public ImmutableArray<Mamba2Layer> HiddenLayers { get; } = hiddenLayers;
-    public UnEmbeddingLayer OutputLayer { get; } = outputLayer;
-
-    public int ContextSize => InputLayer.ContextSize;
-
-    public Mamba2VectorModel(int layerCount, int tokenCount, int contextSize, int stateDimensions, int embeddingDimensions)
-        : this(new EmbeddingLayer(tokenCount, contextSize, embeddingDimensions), [.. Enumerable.Range(0, layerCount).Select(_ => new Mamba2Layer(LeakyReLUActivation.Instance, contextSize, stateDimensions, embeddingDimensions))], new UnEmbeddingLayer(tokenCount, contextSize, embeddingDimensions)) { }
-
-    public (Matrix, int) Process(int[] input)
-    {
-        return OutputLayer.Forward(HiddenLayers.Aggregate(InputLayer.Forward(input, InputLayer.CreateSnapshot()), static (v, l) => l.Forward(v, l.CreateSnapshot())), OutputLayer.CreateSnapshot());
-    }
-
-    public (Matrix, int) Process(int[] input, ImmutableArray<ILayerSnapshot> snapshots)
-    {
-        Debug.Assert(snapshots.Length == HiddenLayers.Length + 2);
-        return OutputLayer.Forward(HiddenLayers.Zip(snapshots.Skip(1).Take(HiddenLayers.Length).Cast<Mamba2Layer.Snapshot>()).Aggregate(InputLayer.Forward(input, (EmbeddingLayer.Snapshot)snapshots[0]), (v, l) => l.First.Forward(v, l.Second)), (UnEmbeddingLayer.Snapshot)snapshots[^1]);
-    }
-
-    public void Initialize(Random? random = null)
-    {
-        new EmbeddingLayer.Initializer(random).Initialize(InputLayer);
-        var initer = new Initializer(random);
-        HiddenLayers.ForEach(initer.Initialize);
-        new UnEmbeddingLayer.Initializer(random).Initialize(OutputLayer);
-    }
-
-    public sealed class Initializer(Random? random = null) : IInitializer<Mamba2Layer>
-    {
-        public Random Random { get; } = random ?? Random.Shared;
-
-        public void Initialize(Mamba2Layer layer)
-        {
-            var scale = Weight.Sqrt(6 / ((Weight)layer.StateDimensions + layer.EmbeddingDimensions));
-
-            // affects how much memory the layer can keep from the previous step
-            // optimally [0.9,1.0] must be [0,1] to prevent vanishing/exploding gradients
-            layer.Alpha.Fill(0.9f);
-
-            layer.B.MapToSelf(_ => InitializationHelper.RandomInUniformDistribution(Random, 0f, scale));
-            layer.C.MapToSelf(_ => InitializationHelper.RandomInUniformDistribution(Random, 0f, scale));
-        }
-    }
-
-    // public Vector Backward(Matrix outputGradient, ImmutableArray<EmbeddedMamba2Layer.Snapshot> snapshots)
-    // {
-    //     return Layers.Reverse().Zip(snapshots.Reverse()).Aggregate(outputGradient, (g, l) => l.First.BackwardPass(l.Second, g));
-    // }
-
-    public static ErrorState Save(Mamba2VectorModel model, BinaryWriter writer)
-    {
-        writer.Write(model.HiddenLayers.Length);
-
-        if (!ModelSerializer.SaveLayer(model.InputLayer, writer).Branch(out var error1))
-        {
-            return error1;
-        }
-
-        foreach (var layer in model.HiddenLayers)
-        {
-            if (!ModelSerializer.SaveLayer(layer, writer).Branch(out var error2))
-            {
-                return error2;
-            }
-        }
-
-        if (!ModelSerializer.SaveLayer(model.OutputLayer, writer).Branch(out var error3))
-        {
-            return error3;
-        }
-
-        return default;
-    }
-
-    public static Result<Mamba2VectorModel> Read(BinaryReader reader)
-    {
-        var hiddenLayerCount = reader.ReadInt32();
-
-        var input = ModelSerializer.ReadLayer(reader).Require<EmbeddingLayer>(v => new InvalidCastException("Mamba requires an EmbeddingLayer"));
-        if (!input.Branch(out _, out var error1))
-        {
-            return error1;
-        }
-
-        var hiddenLayers = new Mamba2Layer[hiddenLayerCount];
-        foreach (var i in ..hiddenLayerCount)
-        {
-            var layer = ModelSerializer.ReadLayer(reader).Require<Mamba2Layer>(v => new InvalidCastException("Mamba requires EmbeddedMamba2Layer"));
-            if (!layer.Branch(out _, out var error2))
-            {
-                return error2;
-            }
-            hiddenLayers[i] = layer.OrThrow();
-        }
-
-        var output = ModelSerializer.ReadLayer(reader).Require<UnEmbeddingLayer>(v => new InvalidCastException("Mamba requires an UnEmbeddingLayer"));
-        if (!output.Branch(out _, out var error3))
-        {
-            return error3;
-        }
-
-        return new Mamba2VectorModel(input.OrThrow(), ImmutableCollectionsMarshal.AsImmutableArray(hiddenLayers), output.OrThrow());
-    }
-
-    public long WeightCount => InputLayer.WeightCount + HiddenLayers.Sum(l => l.WeightCount) + OutputLayer.WeightCount;
-    public override string ToString() => $"Mamba 2 (Vector) ({WeightCount})";
-
-
-    (int prediction, float confidence) IEmbeddedModel<int[], int>.Process(int[] input)
-    {
-        var (weights, result) = Process(input);
-        return (result, weights.RowSpan(weights.RowCount - 1)[result]);
-    }
-
-    // IEnumerable<ILayer> IModel<Matrix, EmbeddedMamba2Layer.Snapshot>.Layers => Layers;
-}
diff --git a/MachineLearning.Mamba/Mamba2ModelTrainer.cs b/MachineLearning.Mamba/Mamba2ModelTrainer.cs
deleted file mode 100644
index 83cdbbe..0000000
--- a/MachineLearning.Mamba/Mamba2ModelTrainer.cs
+++ /dev/null
@@ -1,191 +0,0 @@
-using Ametrin.Guards;
-using MachineLearning.Data;
-using MachineLearning.Data.Entry;
-using MachineLearning.Model;
-using MachineLearning.Model.Layer.Snapshot;
-using MachineLearning.Training;
-using MachineLearning.Training.Evaluation;
-using MachineLearning.Training.Optimization;
-
-namespace MachineLearning.Mamba;
-
-public sealed class Mamba2ModelTrainer : ITrainer<Mamba2Model>
-{
-    public TrainingConfig Config { get; }
-    public ITrainingSet TrainingSet { get; }
-    public Mamba2Model Model { get; }
-    public Optimizer Optimizer { get; }
-    public ImmutableArray<ILayerOptimizer> LayerOptimizers { get; }
-    public ILayerOptimizer OutputLayerOptimizer => LayerOptimizers[^1];
-    public ModelCachePool CachePool { get; }
-
-    public Mamba2ModelTrainer(Mamba2Model model, TrainingConfig config, ITrainingSet trainingSet)
-    {
-        Config = config;
-        TrainingSet = trainingSet;
-        Model = model;
-        Optimizer = config.Optimizer;
-        LayerOptimizers = [.. Model.Layers.Select(Optimizer.CreateLayerOptimizer)];
-        CachePool = new([.. Model.Layers]);
-    }
-
-    public DataSetEvaluationResult TrainAndEvaluate(IEnumerable<TrainingData> trainingBatch)
-    {
-        var timeStamp = Stopwatch.GetTimestamp();
-
-        var context = ThreadedTrainer.Train(
-            trainingBatch,
-            CachePool,
-            Config.Threading,
-            (entry, context) =>
-            {
-                var data = Guard.Is<TrainingData<Vector, Vector>>(entry);
-                var weights = Update(data, context.Gradients);
-
-                context.TotalCount++;
-                context.TotalCost += Config.Optimizer.CostFunction.TotalCost(weights, data.ExpectedWeights);
-            }
-        );
-
-        Apply(context.Gradients);
-
-        var evalutaion = new DataSetEvaluationResult()
-        {
-            TotalCount = context.TotalCount,
-            CorrectCount = context.CorrectCount,
-            TotalCost = context.TotalCost,
-            TotalElapsedTime = Stopwatch.GetElapsedTime(timeStamp),
-        };
-
-        CachePool.Return(context.Gradients);
-        return evalutaion;
-    }
-
-    private Vector Update(TrainingData<Vector, Vector> data, ImmutableArray<IGradients> gradients)
-    {
-        Debug.Assert(gradients.Length == Model.Layers.Length);
-
-        using var marker = CachePool.RentSnapshots(out var rented);
-        var snapshots = rented.Cast<Mamba2ScalarLayer.Snapshot>().ToImmutableArray();
-        var result = Model.Process(data.InputValue, snapshots);
-
-        var outGradient = Optimizer.CostFunction.Derivative(result, data.ExpectedWeights);
-        NumericsDebug.AssertValidNumbers(outGradient);
-
-        for (int layerIndex = LayerOptimizers.Length - 1; layerIndex >= 0; layerIndex--)
-        {
-            LayerOptimizers[layerIndex].Update(outGradient, snapshots[layerIndex], gradients[layerIndex]);
-            outGradient = snapshots[layerIndex].GradientInput;
-            NumericsDebug.AssertValidNumbers(outGradient);
-        }
-
-        return result;
-    }
-
-    private void Apply(ImmutableArray<IGradients> gradients) => LayerOptimizers.Zip(gradients).ForEach(p => p.First.Apply(p.Second));
-    public void FullReset() => LayerOptimizers.ForEach(layer => layer.FullReset());
-}
-
-public sealed class Mamba2VectorModelTrainer : ITrainer<Mamba2VectorModel>
-{
-    public TrainingConfig Config { get; }
-    public ITrainingSet TrainingSet { get; }
-    public Mamba2VectorModel Model { get; }
-    public Optimizer Optimizer { get; }
-    public ILayerOptimizer InputLayerOptimizer { get; }
-    public ImmutableArray<ILayerOptimizer> LayerOptimizers { get; }
-    public ILayerOptimizer OutputLayerOptimizer { get; }
-    public ModelCachePool CachePool { get; }
-
-
-    public Mamba2VectorModelTrainer(Mamba2VectorModel model, TrainingConfig config, ITrainingSet trainingSet)
-    {
-        Config = config;
-        TrainingSet = trainingSet;
-        Model = model;
-        Optimizer = config.Optimizer;
-        InputLayerOptimizer = Optimizer.CreateLayerOptimizer(Model.InputLayer);
-        LayerOptimizers = [.. Model.HiddenLayers.Select(Optimizer.CreateLayerOptimizer)];
-        OutputLayerOptimizer = Optimizer.CreateLayerOptimizer(Model.OutputLayer);
-        CachePool = new([Model.InputLayer, .. Model.HiddenLayers, Model.OutputLayer]);
-    }
-
-    public DataSetEvaluationResult TrainAndEvaluate(IEnumerable<TrainingData> trainingBatch)
-    {
-        var timeStamp = Stopwatch.GetTimestamp();
-
-        var context = ThreadedTrainer.Train(
-            trainingBatch,
-            CachePool,
-            Config.Threading,
-            (entry, context) =>
-            {
-                var data = Guard.Is<TrainingData<int[], int>>(entry);
-                var (weights, result) = Update(data, context.Gradients);
-
-                if (result == data.ExpectedValue)
-                {
-                    context.CorrectCount++;
-                }
-                context.TotalCount++;
-                context.TotalCost += Config.Optimizer.CostFunction.TotalCost(weights.Storage, data.ExpectedWeights);
-            }
-        );
-
-        Apply(context.Gradients);
-
-
-        var evalutaion = new DataSetEvaluationResult()
-        {
-            TotalCount = context.TotalCount,
-            CorrectCount = context.CorrectCount,
-            TotalCost = context.TotalCost,
-            TotalElapsedTime = Stopwatch.GetElapsedTime(timeStamp),
-        };
-
-        CachePool.Return(context.Gradients);
-        return evalutaion;
-    }
-
-    private (Matrix, int) Update(TrainingData<int[], int> data, ImmutableArray<IGradients> gradients)
-    {
-        Debug.Assert(gradients.Length == Model.HiddenLayers.Length + 2);
-        using var marker = CachePool.RentSnapshots(out var snapshots);
-        var inputSnapshot = snapshots[0];
-        ReadOnlySpan<Mamba2VectorLayer.Snapshot> hiddenSnapshots = [.. snapshots.Skip(1).Take(Model.HiddenLayers.Length).Cast<Mamba2VectorLayer.Snapshot>()];
-        var outputSnapshot = (UnEmbeddingLayer.Snapshot)snapshots[^1];
-
-        var (weights, result) = Model.Process(data.InputValue, snapshots);
-
-        var gradient = Optimizer.CostFunction.Derivative(weights.Storage, data.ExpectedWeights);
-        NumericsDebug.AssertValidNumbers(gradient);
-
-        OutputLayerOptimizer.Update(gradient, outputSnapshot, gradients[^1]);
-        gradient = outputSnapshot.InputGradient.Storage;
-
-        for (int layerIndex = LayerOptimizers.Length - 1; layerIndex >= 0; layerIndex--)
-        {
-            LayerOptimizers[layerIndex].Update(gradient, hiddenSnapshots[layerIndex], gradients[layerIndex + 1]);
-            gradient = hiddenSnapshots[layerIndex].GradientInput.Storage;
-            NumericsDebug.AssertValidNumbers(gradient);
-        }
-
-        InputLayerOptimizer.Update(gradient, inputSnapshot, gradients[0]);
-
-        return (weights, result);
-    }
-
-    private void Apply(ImmutableArray<IGradients> gradients)
-    {
-        InputLayerOptimizer.Apply(gradients[0]);
-        LayerOptimizers.Zip(gradients.Skip(1).Take(LayerOptimizers.Length)).ForEach(p => p.First.Apply(p.Second));
-        OutputLayerOptimizer.Apply(gradients[^1]);
-    }
-
-    public void FullReset()
-    {
-        InputLayerOptimizer.FullReset();
-        LayerOptimizers.ForEach(layer => layer.FullReset());
-        OutputLayerOptimizer.FullReset();
-    }
-}
diff --git a/MachineLearning.Mamba/Mamba2ScalarLayer.cs b/MachineLearning.Mamba/Mamba2ScalarLayer.cs
deleted file mode 100644
index 1cc0190..0000000
--- a/MachineLearning.Mamba/Mamba2ScalarLayer.cs
+++ /dev/null
@@ -1,127 +0,0 @@
-﻿using MachineLearning.Model.Attributes;
-using MachineLearning.Model.Initialization;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Initialization;
-
-namespace MachineLearning.Mamba;
-
-[GeneratedLayer]
-public sealed partial class Mamba2ScalarLayer : ILayer<Vector, Mamba2ScalarLayer.Snapshot>
-{
-    public int MaxSequenceLength /*T*/ => Alpha.Count;
-    public int StateDimensions /*N*/ => B.ColumnCount;
-    [Weights] public Vector Alpha { get; } // how much memory to keep from the previous step
-    [Weights] public Matrix B { get; } // how does the input_t affect the memory h_t
-    [Weights] public Matrix C { get; } // how does the memory h_t affect the output_t
-
-    public Mamba2ScalarLayer(int sequenceLength, int stateDimensions)
-    {
-        Alpha = Vector.Create(sequenceLength);
-        B = Matrix.Create(sequenceLength, stateDimensions);
-        C = Matrix.Create(sequenceLength, stateDimensions);
-    }
-
-    public Vector Forward(Vector input, Snapshot snapshot)
-    {
-        Debug.Assert(input.Count <= MaxSequenceLength);
-
-        snapshot.Input = input;
-        snapshot.Memory.ResetZero();
-
-        for (int t = 0; t < snapshot.SequenceLength; t++)
-        {
-            // h = alpha_t * h + B_t * x_t
-            var h = snapshot.Memory.RowRef(t);
-            if (t > 0)
-            {
-                snapshot.Memory.RowRef(t - 1).MultiplyTo(Alpha[t], h);
-            }
-
-            h.AddToSelf(B.RowRef(t).Multiply(input[t])); // add B_t * x_t
-
-            // output[t] = C_t^T * h
-            snapshot.Output[t] = C.RowRef(t).Dot(h);
-        }
-
-        return snapshot.Output;
-    }
-
-    public Vector BackwardPass(Vector outputGradient, Snapshot snapshot, Gradients gradients)
-    {
-        Debug.Assert(outputGradient.Count <= MaxSequenceLength);
-
-        snapshot.GradientInput.ResetZero();
-        snapshot.GradientMemory.ResetZero();
-
-        for (int t = snapshot.SequenceLength - 1; t >= 0; t--)
-        {
-            // dY = derivative of L wrt outputY[t]
-            float dY = outputGradient[t];
-
-            // (a) output[t] = C[t] * H[t]
-            // => dC[t] += H[t] * dY
-            // => dH[t] += C[t] * dY
-            gradients.CGradient.RowRef(t).AddToSelf(snapshot.Memory.RowRef(t).Multiply(dY));
-            snapshot.GradientMemory.RowRef(t).AddToSelf(C.RowRef(t).Multiply(dY));
-
-            // (b) h[t] = alpha[t] * h[t-1] + B[t] * inputX[t]
-            // => partial wrt alpha[t] = (h[t-1] dot dH[t])
-            // => partial wrt h[t-1]  += alpha[t] * dH[t]
-            // => partial wrt B[t]    += dH[t] * inputX[t]
-            // => partial wrt inputX[t] = (B[t] dot dH[t])
-
-            // derivative w.r.t alpha[t]
-            // h[t-1] = (t>0) ? st.h[t-1] : zero
-            var hPrev = (t == 0)
-                           ? Vector.Create(StateDimensions)
-                           : snapshot.Memory.RowRef(t - 1);
-
-            gradients.AlphaGradient[t] += hPrev.Dot(snapshot.GradientMemory.RowRef(t));  // dAlpha
-
-            // derivative w.r.t H[t-1]
-            // if t>0, add alpha[t]*dH[t] to dH[t-1]
-            if (t > 0)
-            {
-                snapshot.GradientMemory.RowRef(t - 1).AddToSelf(snapshot.GradientMemory.RowRef(t).Multiply(Alpha[t]));
-            }
-
-            // derivative w.r.t. B[t] and input[t]
-            // dB[t] = input[t] * dH[t]
-            gradients.BGradient.RowRef(t).AddToSelf(snapshot.GradientMemory.RowRef(t).Multiply(snapshot.Input[t]));
-
-            snapshot.GradientInput[t] += B.RowRef(t).Dot(snapshot.GradientMemory.RowRef(t));  // partial w.r.t. input[t]
-
-        }
-
-        return snapshot.GradientInput.Slice(0, snapshot.SequenceLength);
-    }
-
-    partial class Snapshot
-    {
-        public int SequenceLength => Input.Count;
-        public Vector Input { get; set; } = Vector.Empty;
-        public Vector GradientInput { get; } = Vector.Create(layer.MaxSequenceLength);
-        public Vector Output { get; } = Vector.Create(layer.MaxSequenceLength);
-
-        public Matrix Memory /*H*/ { get; } = Matrix.Create(layer.MaxSequenceLength, layer.StateDimensions);
-        public Matrix GradientMemory { get; } = Matrix.Create(layer.MaxSequenceLength, layer.StateDimensions);
-    }
-
-    public sealed class Initializer(Random? random = null) : IInitializer<Mamba2ScalarLayer>
-    {
-        public Random Random { get; } = random ?? Random.Shared;
-
-        public void Initialize(Mamba2ScalarLayer layer)
-        {
-            float scale = 1.0f / MathF.Sqrt(layer.StateDimensions);
-
-            // affects how much memory the model can keep from the previous step
-            // optimally [0.9,1.0] must be [0,1] to prevent vanishing/exploding gradients
-            layer.Alpha.Fill(0.9f);
-
-            layer.B.MapToSelf(_ => InitializationHelper.RandomInNormalDistribution(Random, 0f, scale));
-            layer.C.MapToSelf(_ => InitializationHelper.RandomInNormalDistribution(Random, 0f, scale));
-        }
-    }
-
-}
diff --git a/MachineLearning.Mamba/Mamba2VectorLayer.cs b/MachineLearning.Mamba/Mamba2VectorLayer.cs
deleted file mode 100644
index 8d9e848..0000000
--- a/MachineLearning.Mamba/Mamba2VectorLayer.cs
+++ /dev/null
@@ -1,149 +0,0 @@
-using MachineLearning.Model.Attributes;
-using MachineLearning.Model.Initialization;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Initialization;
-using MachineLearning.Serialization;
-using MachineLearning.Training.Attributes;
-
-namespace MachineLearning.Mamba;
-
-[GeneratedLayer, LayerSerializer("vmam2", 2), GenerateOptimizers]
-public sealed partial class Mamba2VectorLayer : ILayer<Matrix, Mamba2VectorLayer.Snapshot>
-{
-    public int MaxSequenceLength /*T*/ => Alpha.Count;
-    public int StateDimensions /*N*/ => B.RowCount;
-    public int EmbeddingDimensions /*E*/ => B.ColumnCount;
-
-    // how much memory to keep from the previous step
-    [Weights] public Vector Alpha { get; }
-
-    // both could be a tensor (T*N*E) but it makes sense to share this transformation across steps so only (N*E)
-    [Weights] public Matrix B { get; } // how does the input_t affect the memory h_t
-    [Weights] public Matrix C { get; } // how does the memory h_t affect the output_t
-
-    public Mamba2VectorLayer(int sequenceLength, int stateDimensions, int embeddingDimensions)
-        : this(Vector.Create(sequenceLength), Matrix.Create(stateDimensions, embeddingDimensions), Matrix.Create(stateDimensions, embeddingDimensions)) { }
-
-    public Matrix Forward(Matrix input, Snapshot snapshot)
-    {
-        Debug.Assert(input.RowCount <= MaxSequenceLength);
-        Debug.Assert(input.ColumnCount == EmbeddingDimensions);
-
-        snapshot.Input = input;
-        snapshot.Memory.ResetZero();
-
-        for (int t = 0; t < snapshot.SequenceLength; t++)
-        {
-            // h = alpha_t * h + B * x_t
-            var h = snapshot.Memory.RowRef(t);
-            if (t > 0)
-            {
-                snapshot.Memory.RowRef(t - 1).MultiplyTo(Alpha[t], h);
-            }
-
-            B.MultiplyAddTo(input.RowRef(t), h); // h += B * x_t
-
-            // y_t = C^T * h
-            C.MultiplyTransposedTo(h, snapshot.Output.RowRef(t));
-        }
-
-        NumericsDebug.AssertValidNumbers(snapshot.Output);
-
-        return snapshot.Output.Rows(..snapshot.SequenceLength);
-    }
-
-    private Vector Zero
-    {
-        get
-        {
-            if (field.Count is 0)
-            {
-                field = Vector.Create(StateDimensions);
-            }
-
-            return field;
-        }
-    }
-
-    public Matrix Backward(Matrix outputGradient, Snapshot snapshot, Gradients gradients)
-    {
-        Debug.Assert(outputGradient.RowCount <= MaxSequenceLength);
-        Debug.Assert(outputGradient.ColumnCount == EmbeddingDimensions);
-
-        snapshot.GradientInput.ResetZero();
-        snapshot.GradientMemory.ResetZero();
-
-        for (int t = snapshot.SequenceLength - 1; t >= 0; t--)
-        {
-            var outputGradient_t = outputGradient.RowRef(t);
-
-            // output[t] = C^T * H[t]
-            // => dC += H[t] * dY
-            // => dH[t] += C * dY
-            VectorHelper.MultiplyToMatrixAddTo(snapshot.Memory.RowRef(t), outputGradient_t, gradients.CGradient);
-            C.MultiplyAddTo(outputGradient_t, snapshot.GradientMemory.RowRef(t));
-
-            // h[t] = alpha[t] * h[t-1] + B * input[t]
-            // => wrt alpha[t] = (h[t-1] dot dH[t])
-            // => wrt h[t-1]   += alpha[t] * dH[t]
-            // => wrt B        += dH[t] * input[t]
-            // => wrt input[t] = B^T * dH[t]
-
-            // derivative wrt alpha[t]
-            // h[t-1] = (t>0) ? st.h[t-1] : zero
-            var hPrev = (t == 0) ? Zero : snapshot.Memory.RowRef(t - 1);
-
-            gradients.AlphaGradient[t] = hPrev.Dot(snapshot.GradientMemory.RowRef(t));
-
-            // derivative wrt H[t-1]
-            // if t>0, add alpha[t]*dH[t] to dH[t-1]
-            if (t > 0)
-            {
-                snapshot.GradientMemory.RowRef(t).MultiplyTo(Alpha[t], snapshot.GradientMemory.RowRef(t - 1));
-            }
-
-            // derivative wrt B[t] and input[t]
-            // dB[t] = input[t] * dH[t]
-            VectorHelper.MultiplyToMatrixAddTo(snapshot.GradientMemory.RowRef(t), snapshot.Input.RowRef(t), gradients.BGradient);
-
-            B.MultiplyTransposedTo(snapshot.GradientMemory.RowRef(t), snapshot.GradientInput.RowRef(t));
-
-        }
-
-        return snapshot.GradientInput.Rows(..snapshot.SequenceLength);
-    }
-
-    public partial class Snapshot
-    {
-        public int SequenceLength => Input.RowCount;
-        public Matrix Input { get; set; } = Matrix.Empty;
-        public Matrix GradientInput { get; } = Matrix.Create(layer.MaxSequenceLength, layer.EmbeddingDimensions);
-        public Matrix Output { get; } = Matrix.Create(layer.MaxSequenceLength, layer.EmbeddingDimensions);
-
-        public Matrix Memory /*H*/ { get; } = Matrix.Create(layer.MaxSequenceLength, layer.StateDimensions); // one row per timestep
-        public Matrix GradientMemory { get; } = Matrix.Create(layer.MaxSequenceLength, layer.StateDimensions);
-    }
-
-    public sealed class Initializer(Random? random = null) : IInitializer<Mamba2Layer>
-    {
-        public Random Random { get; } = random ?? Random.Shared;
-
-        public void Initialize(Mamba2Layer layer)
-        {
-            var scale = Weight.Sqrt(6 / ((Weight)layer.StateDimensions + layer.EmbeddingDimensions));
-
-            // affects how much memory the layer can keep from the previous step
-            // optimally [0.9,1.0] must be [0,1] to prevent vanishing/exploding gradients
-            layer.Alpha.Fill(0.9f);
-
-            layer.B.MapToSelf(_ => InitializationHelper.RandomInUniformDistribution(Random, 0f, scale));
-            layer.C.MapToSelf(_ => InitializationHelper.RandomInUniformDistribution(Random, 0f, scale));
-        }
-    }
-
-}
-
-partial class Mamba2Layer
-{
-    private readonly Vector ZERO = Vector.Create(StateDimensions);
-}
\ No newline at end of file
diff --git a/MachineLearning.Mamba/UnEmbeddingLayer.cs b/MachineLearning.Mamba/UnEmbeddingLayer.cs
deleted file mode 100644
index adeeabd..0000000
--- a/MachineLearning.Mamba/UnEmbeddingLayer.cs
+++ /dev/null
@@ -1,83 +0,0 @@
-using MachineLearning.Model.Activation;
-using MachineLearning.Model.Attributes;
-using MachineLearning.Model.Initialization;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Initialization;
-using MachineLearning.Serialization;
-using MachineLearning.Training.Attributes;
-
-namespace MachineLearning.Mamba;
-
-[GeneratedLayer(OutputGradientType = typeof(Matrix)), LayerSerializer("unemb", 2), GenerateOptimizers]
-public sealed partial class UnEmbeddingLayer : ILayer<Matrix, (Vector, int), UnEmbeddingLayer.Snapshot>
-{
-    [Parameter] public int ContextSize { get; }
-    [Weights] public Matrix UnEmbeddingMatrix { get; }
-    public int TokenCount => UnEmbeddingMatrix.RowCount;
-
-    public int EmbeddingSize => UnEmbeddingMatrix.ColumnCount;
-
-    public UnEmbeddingLayer(int tokenCount, int contextSize, int embeddingSize)
-        : this(contextSize, Matrix.Create(tokenCount, embeddingSize)) { }
-
-    public (Matrix, int) Forward(Matrix input, Snapshot snapshot)
-    {
-        Debug.Assert(input.RowCount <= ContextSize);
-        Debug.Assert(input.ColumnCount == EmbeddingSize);
-
-        snapshot.Input = input;
-
-        foreach (var i in ..snapshot.SequenceLength)
-        {
-            UnEmbeddingMatrix.MultiplyTo(input.RowRef(i), snapshot.WeightedInput);
-            SoftMaxActivation.Instance.ActivateTo(snapshot.WeightedInput, snapshot.Output.RowRef(i));
-        }
-
-        return (snapshot.Output.Rows(..snapshot.SequenceLength), snapshot.Output.RowRef(snapshot.SequenceLength - 1).MaximumIndex());
-    }
-
-    public Matrix Backward(Matrix outputGradients, Snapshot snapshot, Gradients gradients)
-    {
-        Debug.Assert(outputGradients.ColumnCount == TokenCount);
-        Debug.Assert(outputGradients.RowCount == snapshot.SequenceLength);
-
-        // this would be neccecary without CrossEntropyFromSoftmaxLoss (not sure if it is correct)
-        // var tmp = Vector.Create(outputGradient.Count);
-        // SoftMaxActivation.Instance.DerivativeTo(snapshot.WeightedInput, tmp);
-        // tmp.PointwiseMultiplyToSelf(outputGradient);
-
-        // y = W * v
-        // dy = y - expected // because CrossEntropy and Softmax cancel out
-        // => dW += v * dy
-        // => dv += W^T * dy
-
-        foreach (var i in ..snapshot.SequenceLength)
-        {
-            VectorHelper.MultiplyToMatrixAddTo(outputGradients.RowRef(i), snapshot.Input.RowRef(i), gradients.UnEmbeddingMatrixGradient);
-            UnEmbeddingMatrix.MultiplyTransposedTo(outputGradients.RowRef(i), snapshot.InputGradient.RowRef(i));
-        }
-
-        return snapshot.InputGradient;
-    }
-
-    partial class Snapshot
-    {
-        public int SequenceLength => Input.RowCount;
-        public Matrix Input { get; set; } = Matrix.Empty;
-        public Vector WeightedInput { get; } = Vector.Create(layer.TokenCount);
-        public Matrix Output { get; } = Matrix.Create(layer.ContextSize, layer.TokenCount);
-
-        public Matrix InputGradient { get; } = Matrix.Create(layer.ContextSize, layer.EmbeddingSize);
-    }
-
-    public sealed class Initializer(Random? random = null) : IInitializer<UnEmbeddingLayer>
-    {
-        public Random Random { get; } = random ?? Random.Shared;
-
-        public void Initialize(UnEmbeddingLayer layer)
-        {
-            var limit = Weight.Sqrt(6 / (Weight)(layer.TokenCount + layer.EmbeddingSize));
-            layer.UnEmbeddingMatrix.MapToSelf(_ => InitializationHelper.RandomInUniformDistribution(Random, 0, limit));
-        }
-    }
-}
diff --git a/MachineLearning.Model/Activation/IActivationFunction.cs b/MachineLearning.Model/Activation/IActivationFunction.cs
deleted file mode 100644
index 474ff62..0000000
--- a/MachineLearning.Model/Activation/IActivationFunction.cs
+++ /dev/null
@@ -1,42 +0,0 @@
-﻿namespace MachineLearning.Model.Activation;
-
-public interface IActivationFunction
-{
-    public void ActivateTo(Vector input, Vector result);
-    public void DerivativeTo(Vector input, Vector result);
-
-    public Vector Activate(Vector input)
-    {
-        var result = Vector.Create(input.Count);
-        ActivateTo(input, result);
-        return result;
-    }
-    public Vector Derivative(Vector input)
-    {
-        var result = Vector.Create(input.Count);
-        DerivativeTo(input, result);
-        return result;
-    }
-}
-
-
-public interface ISimpleActivationMethod : IActivationFunction
-{
-    public Weight Activate(Weight input);
-    public Weight Derivative(Weight input);
-
-    void IActivationFunction.ActivateTo(Vector input, Vector result) => input.MapTo(Activate, result);
-    void IActivationFunction.DerivativeTo(Vector input, Vector result) => input.MapTo(Derivative, result);
-}
-
-public interface ISimdActivationMethod : IActivationFunction
-{
-    public Weight Activate(Weight input);
-    public SimdVector Activate(SimdVector input);
-
-    public Weight Derivative(Weight input);
-    public SimdVector Derivative(SimdVector input);
-
-    void IActivationFunction.ActivateTo(Vector input, Vector result) => input.MapTo(Activate, Activate, result);
-    void IActivationFunction.DerivativeTo(Vector input, Vector result) => input.MapTo(Derivative, Derivative, result);
-}
\ No newline at end of file
diff --git a/MachineLearning.Model/Activation/LeakyReLUActivation.cs b/MachineLearning.Model/Activation/LeakyReLUActivation.cs
deleted file mode 100644
index 1f3697d..0000000
--- a/MachineLearning.Model/Activation/LeakyReLUActivation.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-﻿namespace MachineLearning.Model.Activation;
-/// <summary>
-/// use learn rate below 0.1 <br/>
-/// usually bad for output layer because values can go infinite high <br/>
-/// helps with death neurons in <see cref="ReLUActivation"/> <br/>
-/// </summary>
-/// <param name="alpha">slope for x &lt; 0</param>
-public sealed class LeakyReLUActivation(Weight alpha = 0.01f) : ISimdActivationMethod
-{
-    public static readonly LeakyReLUActivation Instance = new();
-    public Weight Alpha { get; } = alpha;
-    public Weight Activate(Weight input) => input > 0 ? input : Alpha * input;
-    public SimdVector Activate(SimdVector input) => SimdVectorHelper.ConditionalSelect(SimdVectorHelper.GreaterThan(input, SimdVector.Zero), input, input * Alpha);
-
-    public Weight Derivative(Weight input) => input > 0 ? 1 : Alpha;
-    public SimdVector Derivative(SimdVector input) => SimdVectorHelper.ConditionalSelect(SimdVectorHelper.GreaterThan(input, SimdVector.Zero), SimdVector.One, new SimdVector(Alpha));
-}
diff --git a/MachineLearning.Model/Activation/ReLUActivation.cs b/MachineLearning.Model/Activation/ReLUActivation.cs
deleted file mode 100644
index b4ac9da..0000000
--- a/MachineLearning.Model/Activation/ReLUActivation.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿namespace MachineLearning.Model.Activation;
-
-/// <summary>
-/// rectified linear unit: overall best <br/>
-/// use learn rate below 0.1 <br/>
-/// usually bad for output layer because values go uncontrollable high <br/>
-/// can cause death neurons (better <see cref="LeakyReLUActivation"/>) <br/>
-/// </summary>
-public sealed class ReLUActivation : ISimdActivationMethod
-{
-    public static readonly ReLUActivation Instance = new();
-
-    public Weight Activate(Weight input) => float.Max(0, input);
-    public SimdVector Activate(SimdVector input) => SimdVectorHelper.Max(SimdVector.Zero, input);
-
-    public Weight Derivative(Weight input) => input > 0 ? 1 : 0;
-    public SimdVector Derivative(SimdVector input) => SimdVectorHelper.ConditionalSelect(SimdVectorHelper.GreaterThan(input, SimdVector.Zero), SimdVector.One, SimdVector.Zero);
-}
diff --git a/MachineLearning.Model/Activation/SigmoidActivation.cs b/MachineLearning.Model/Activation/SigmoidActivation.cs
deleted file mode 100644
index 68cdadf..0000000
--- a/MachineLearning.Model/Activation/SigmoidActivation.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿namespace MachineLearning.Model.Activation;
-
-
-/// <summary>
-/// Produces values between 0 and 1 <br/>
-/// Good for Binary Classification <br/>
-/// can cause vanishing gradients <br/>
-/// high learn rates (1..0.25) <br/>
-/// </summary>
-public sealed class SigmoidActivation : ISimpleActivationMethod
-{
-    public static readonly SigmoidActivation Instance = new();
-    public Weight Activate(Weight input) => 1 / (1 + float.Exp(-input));
-    public Weight Derivative(Weight input)
-    {
-        var sigmoid = Activate(input);
-        return sigmoid * (1 - sigmoid);
-    }
-}
diff --git a/MachineLearning.Model/Activation/SoftMaxActivation.cs b/MachineLearning.Model/Activation/SoftMaxActivation.cs
deleted file mode 100644
index 16165a4..0000000
--- a/MachineLearning.Model/Activation/SoftMaxActivation.cs
+++ /dev/null
@@ -1,37 +0,0 @@
-﻿using System.Runtime.InteropServices;
-
-namespace MachineLearning.Model.Activation;
-
-public sealed class SoftMaxActivation : IActivationFunction
-{
-    public static readonly SoftMaxActivation Instance = new();
-
-    public void ActivateTo(Vector input, Vector result) => input.SoftMaxTo(result);
-    public void DerivativeTo(Vector input, Vector result)
-    {
-        var max = input.Max();
-        input.SubtractPointwiseTo(max, result);
-        result.PointwiseExpToSelf();
-        var sum = result.Sum();
-        var inverseSumSquared = 1 / (sum * sum);
-
-        ref var vectorPtr = ref MemoryMarshal.GetReference(result.AsSpan());
-        ref var resultPtr = ref MemoryMarshal.GetReference(result.AsSpan());
-        var mdSize = (nuint)SimdVector.Count;
-        var length = (nuint)result.Count;
-
-        nuint index = 0;
-        for (; index + mdSize <= length; index += mdSize)
-        {
-            var simdVector = SimdVectorHelper.LoadUnsafe(ref vectorPtr, index);
-            SimdVectorHelper.StoreUnsafe((simdVector * sum - simdVector * simdVector) * inverseSumSquared, ref resultPtr, index);
-        }
-
-        for (; index < length; index++)
-        {
-            result[index] = (result[index] * sum - result[index] * result[index]) * inverseSumSquared;
-        }
-
-        NumericsDebug.AssertValidNumbers(result);
-    }
-}
diff --git a/MachineLearning.Model/Activation/TanhActivation.cs b/MachineLearning.Model/Activation/TanhActivation.cs
deleted file mode 100644
index 9d1bc95..0000000
--- a/MachineLearning.Model/Activation/TanhActivation.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-﻿namespace MachineLearning.Model.Activation;
-
-/// <summary>
-/// outputs values between -1 and 1 (adjust output resolver!) <br/>
-/// can cause vanishing gradients (often better than <see cref="SigmoidActivation"/> because centered around zero)
-/// </summary>
-public sealed class TanhActivation : ISimpleActivationMethod
-{
-    public static readonly TanhActivation Instance = new();
-    public Weight Activate(Weight input) => float.Tanh(input);
-    public Weight Derivative(Weight input) => 1 - float.Pow(float.Tanh(input), 2);
-}
diff --git a/MachineLearning.Model/Attributes/GeneratedLayerAttribute.cs b/MachineLearning.Model/Attributes/GeneratedLayerAttribute.cs
deleted file mode 100644
index 07b5556..0000000
--- a/MachineLearning.Model/Attributes/GeneratedLayerAttribute.cs
+++ /dev/null
@@ -1,7 +0,0 @@
-namespace MachineLearning.Model.Attributes;
-
-[AttributeUsage(AttributeTargets.Class)]
-public sealed class GeneratedLayerAttribute : Attribute
-{
-    public Type? OutputGradientType { get; init; }
-}
diff --git a/MachineLearning.Model/Attributes/ModuleAttribute.cs b/MachineLearning.Model/Attributes/ModuleAttribute.cs
deleted file mode 100644
index 1db28e7..0000000
--- a/MachineLearning.Model/Attributes/ModuleAttribute.cs
+++ /dev/null
@@ -1,4 +0,0 @@
-namespace MachineLearning.Model.Attributes;
-
-[AttributeUsage(AttributeTargets.Property)]
-public sealed class ModuleAttribute : Attribute;
diff --git a/MachineLearning.Model/Attributes/ParameterAttribute.cs b/MachineLearning.Model/Attributes/ParameterAttribute.cs
deleted file mode 100644
index 38398f9..0000000
--- a/MachineLearning.Model/Attributes/ParameterAttribute.cs
+++ /dev/null
@@ -1,4 +0,0 @@
-namespace MachineLearning.Model.Attributes;
-
-[AttributeUsage(AttributeTargets.Property)]
-public sealed class ParameterAttribute : Attribute;
\ No newline at end of file
diff --git a/MachineLearning.Model/Attributes/WeightsAttribute.cs b/MachineLearning.Model/Attributes/WeightsAttribute.cs
deleted file mode 100644
index 327fb34..0000000
--- a/MachineLearning.Model/Attributes/WeightsAttribute.cs
+++ /dev/null
@@ -1,4 +0,0 @@
-namespace MachineLearning.Model.Attributes;
-
-[AttributeUsage(AttributeTargets.Property)]
-public sealed class WeightsAttribute : Attribute;
diff --git a/MachineLearning.Model/GlobalUsings.cs b/MachineLearning.Model/GlobalUsings.cs
deleted file mode 100644
index 6541d23..0000000
--- a/MachineLearning.Model/GlobalUsings.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-﻿global using Ametrin.Numerics;
-global using Ametrin.Optional;
-global using Ametrin.Guards;
-global using Ametrin.Utils;
-global using System;
-global using System.Collections.Generic;
-global using System.Collections.Frozen;
-global using System.Collections.Immutable;
-global using Weight = float;
-global using SimdVector = System.Numerics.Vector<float>;
-global using SimdVectorHelper = System.Numerics.Vector;
diff --git a/MachineLearning.Model/IModel.cs b/MachineLearning.Model/IModel.cs
deleted file mode 100644
index 154a1f9..0000000
--- a/MachineLearning.Model/IModel.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿using MachineLearning.Model.Layer;
-
-namespace MachineLearning.Model;
-
-public interface IModel<TArch, TArchSnapshot> : IModel
-{
-    public IEnumerable<ILayer> Layers { get; }
-    public TArch Process(TArch input);
-    public TArch Process(TArch input, ImmutableArray<TArchSnapshot> snapshots);
-}
-public interface IEmbeddedModel<in TIn, TOut> : IModel
-{
-    public (TOut prediction, Weight confidence) Process(TIn input);
-}
-
-public interface IModel
-{
-    public long WeightCount { get; }
-}
diff --git a/MachineLearning.Model/Initialization/IInitializer.cs b/MachineLearning.Model/Initialization/IInitializer.cs
deleted file mode 100644
index e7f40ff..0000000
--- a/MachineLearning.Model/Initialization/IInitializer.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace MachineLearning.Model.Initialization;
-
-public interface IInitializer
-{
-    public void Initialize(Span<Weight> span);
-    public void Initialize(Vector vector) => Initialize(vector.AsSpan());
-    public void Initialize(Matrix matrix) => Initialize(matrix.AsSpan());
-}
diff --git a/MachineLearning.Model/Initialization/LayerInitializationHelper.cs b/MachineLearning.Model/Initialization/LayerInitializationHelper.cs
deleted file mode 100644
index e965a85..0000000
--- a/MachineLearning.Model/Initialization/LayerInitializationHelper.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-namespace MachineLearning.Model.Initialization;
-
-public static class InitializationHelper
-{
-    public static Weight RandomInNormalDistribution(Random random, Weight mean, Weight standardDeviation)
-    {
-        var x1 = 1 - random.NextSingle();
-        var x2 = 1 - random.NextSingle();
-
-        var y1 = MathF.Sqrt(-2.0f * MathF.Log(x1)) * MathF.Cos(2.0f * MathF.PI * x2);
-        return y1 * standardDeviation + mean;
-    }
-    public static Weight RandomInUniformDistribution(Random random, Weight mean, Weight scale)
-    {
-        return (random.NextSingle() * 2 * scale) - scale + mean;
-    }
-}
\ No newline at end of file
diff --git a/MachineLearning.Model/Initialization/TensorInitializer.cs b/MachineLearning.Model/Initialization/TensorInitializer.cs
deleted file mode 100644
index ba73f48..0000000
--- a/MachineLearning.Model/Initialization/TensorInitializer.cs
+++ /dev/null
@@ -1,156 +0,0 @@
-using System.Diagnostics;
-using System.Runtime.CompilerServices;
-using MachineLearning.Model.Activation;
-
-namespace MachineLearning.Model.Initialization;
-
-public enum FanMode { FanIn, FanOut, FanAvg }
-public static class TensorInitializer
-{
-    public static void Uniform(this Vector v, Weight low, Weight high, Random random)
-    {
-        v.MapToSelf(_ => low + (high - low) * random.NextSingle());
-    }
-    public static void Uniform(this Matrix m, Weight low, Weight high, Random random)
-    {
-        m.Storage.Uniform(low, high, random);
-    }
-    public static void Uniform(this Tensor t, Weight low, Weight high, Random random)
-    {
-        t.Storage.Uniform(low, high, random);
-    }
-
-
-    public static void Normal(this Vector v, Weight mean, Weight std, Random random)
-    {
-        v.MapToSelf(_ => mean + std * NextGaussian(random));
-    }
-    public static void Normal(this Matrix m, Weight mean, Weight std, Random random)
-    {
-        m.Storage.Normal(mean, std, random);
-    }
-    public static void Normal(this Tensor t, Weight mean, Weight std, Random random)
-    {
-        t.Storage.Normal(mean, std, random);
-    }
-
-    public static void XavierUniform(this Matrix w, Random random)
-    {
-        var (fanIn, fanOut) = Fans(w);
-        var bound = Weight.Sqrt(6f / (fanIn + fanOut));
-        w.Uniform(-bound, bound, random);
-    }
-    public static void XavierNormal(this Matrix w, Random random)
-    {
-        var (fanIn, fanOut) = Fans(w);
-        var std = Weight.Sqrt(2f / (fanIn + fanOut));
-        w.Normal(0, std, random);
-    }
-
-    public static void KaimingUniform(this Matrix w, IActivationFunction nl, Random random, FanMode mode = FanMode.FanIn, Weight a = 0)
-    {
-        var fan = Fan(w, mode);
-        var gain = Gain(nl, a);
-        var bound = gain * Weight.Sqrt(6f / fan);
-        w.Uniform(-bound, bound, random);
-    }
-    public static void KaimingNormal(this Matrix w, IActivationFunction nl, Random random, FanMode mode = FanMode.FanIn, Weight a = 0)
-    {
-        var fan = Fan(w, mode);
-        var gain = Gain(nl, a);
-        var std = gain * Weight.Sqrt(2f / fan);
-        w.Normal(0, std, random);
-    }
-
-    public static void LeCunUniform(this Matrix w, Random random, FanMode mode = FanMode.FanIn)
-    {
-        var fan = Fan(w, mode);
-        var bound = Weight.Sqrt(3f / fan);
-        w.Uniform(-bound, bound, random);
-    }
-    public static void LeCunNormal(this Matrix w, Random random, FanMode mode = FanMode.FanIn)
-    {
-        var fan = Fan(w, mode);
-        var std = Weight.Sqrt(1f / fan);
-        w.Normal(0, std, random);
-    }
-
-    public static void XavierUniform(this Tensor t, int fanInAxis, int fanOutAxis, Random random)
-    {
-        var (fi, fo) = FansForTensor(t, fanInAxis, fanOutAxis);
-        var bound = Weight.Sqrt(6f / (fi + fo));
-        t.Uniform(-bound, bound, random);
-    }
-    public static void XavierNormal(this Tensor t, int fanInAxis, int fanOutAxis, Random random)
-    {
-        var (fi, fo) = FansForTensor(t, fanInAxis, fanOutAxis);
-        var std = Weight.Sqrt(2f / (fi + fo));
-        t.Normal(0, std, random);
-    }
-    public static void KaimingUniform(this Tensor t, int fanInAxis, IActivationFunction nl, Random random, FanMode mode = FanMode.FanIn, Weight a = 0)
-    {
-        var (fi, fo) = FansForTensor(t, fanInAxis, mode is FanMode.FanOut ? fanInAxis : fanInAxis);
-        int fan = mode switch
-        {
-            FanMode.FanIn => fi,
-            FanMode.FanOut => fo,
-            FanMode.FanAvg => (fi + fo) / 2,
-            _ => throw new UnreachableException()
-        };
-        var bound = Gain(nl, a) * Weight.Sqrt(6 / fan);
-        t.Uniform(-bound, bound, random);
-    }
-    public static void KaimingNormal(this Tensor t, int fanInAxis, IActivationFunction nl, Random random, FanMode mode = FanMode.FanIn, Weight a = 0)
-    {
-        var (fi, fo) = FansForTensor(t, fanInAxis, mode is FanMode.FanOut ? fanInAxis : fanInAxis);
-        int fan = mode switch
-        {
-            FanMode.FanIn => fi,
-            FanMode.FanOut => fo,
-            FanMode.FanAvg => (fi + fo) / 2,
-            _ => throw new UnreachableException()
-        };
-        var std = Gain(nl, a) * Weight.Sqrt(2 / fan);
-        t.Normal(0, std, random);
-    }
-
-    [MethodImpl(MethodImplOptions.AggressiveInlining)]
-    static (int fanIn, int fanOut) Fans(Matrix m) => (m.ColumnCount, m.RowCount); // output x input → fan_in = inputs, fan_out = outputs
-
-    [MethodImpl(MethodImplOptions.AggressiveInlining)]
-    static (int fanIn, int fanOut) FansForTensor(Tensor t, int fanInAxis, int fanOutAxis)
-    {
-        ReadOnlySpan<int> dims = [t.RowCount, t.ColumnCount, t.LayerCount];
-        return (dims[fanInAxis], dims[fanOutAxis]);
-    }
-
-    static int Fan(Matrix w, FanMode mode)
-    {
-        var (fi, fo) = Fans(w);
-        return mode switch
-        {
-            FanMode.FanIn => fi,
-            FanMode.FanOut => fo,
-            FanMode.FanAvg => (fi + fo) / 2,
-            _ => fi
-        };
-    }
-
-    static Weight Gain(IActivationFunction n, Weight a) => n switch
-    {
-        SigmoidActivation => 1,
-        TanhActivation => 5 / 3,
-        ReLUActivation => Weight.Sqrt(2f),
-        LeakyReLUActivation => Weight.Sqrt(2 / (1 + a * a)),
-        // Nonlinearity.GELU => Weight.Sqrt(2.0),   // common approx
-        // Nonlinearity.Swish => Weight.Sqrt(2.0),  // reasonable default
-        _ => throw new NotImplementedException(),
-    };
-
-    static Weight NextGaussian(Random r)
-    {
-        var u1 = 1 - r.NextSingle();
-        var u2 = 1 - r.NextSingle();
-        return Weight.Sqrt(-2 * Weight.Log(u1)) * Weight.Cos(2 * MathF.PI * u2);
-    }
-}
diff --git a/MachineLearning.Model/Layer/EncodedEmbeddingLayer.cs b/MachineLearning.Model/Layer/EncodedEmbeddingLayer.cs
deleted file mode 100644
index 91125cb..0000000
--- a/MachineLearning.Model/Layer/EncodedEmbeddingLayer.cs
+++ /dev/null
@@ -1,50 +0,0 @@
-﻿using MachineLearning.Model.Layer.Snapshot;
-
-namespace MachineLearning.Model.Layer;
-
-public sealed class EncodedEmbeddingLayer : IEmbeddingLayer<int[]>
-{
-    public int OutputNodeCount => ContextSize * EmbeddingSize;
-    public long WeightCount => 0;
-
-    public int EmbeddingSize => EmbeddingMatrix.ColumnCount;
-    public int TokenCount => EmbeddingMatrix.RowCount;
-    public int ContextSize { get; }
-
-    public Matrix EmbeddingMatrix;
-
-    public EncodedEmbeddingLayer(int tokenCount, int contextSize) : this(tokenCount, contextSize, (int) Math.Log2(tokenCount) + 1) { }
-    public EncodedEmbeddingLayer(int tokenCount, int contextSize, int embeddingSize)
-    {
-        ContextSize = contextSize;
-        EmbeddingMatrix = Matrix.Create(tokenCount, embeddingSize);
-
-        var pattern = 0;
-        foreach (var row in ..EmbeddingMatrix.RowCount)
-        {
-            pattern++;
-            var embedding = EmbeddingMatrix.RowSpan(row);
-            for (var i = 0; i < embedding.Length; i++)
-            {
-                embedding[i] = (pattern & (1 << i)) >> i;
-            }
-        }
-    }
-
-    public Vector Process(int[] input)
-    {
-        var output = Vector.Create(OutputNodeCount);
-        var outSpan = output.AsSpan();
-
-        foreach (var i in ..input.Length)
-        {
-            EmbeddingMatrix.RowSpan(input[i]).CopyTo(outSpan.Slice((i + (ContextSize - input.Length)) * EmbeddingSize, EmbeddingSize));
-        }
-
-        return output;
-    }
-
-    public Vector Process(int[] input, ILayerSnapshot _) => Process(input);
-    public ILayerSnapshot CreateSnapshot() => ILayerSnapshot.Empty;
-    public IGradients CreateGradientAccumulator() => IGradients.Empty;
-}
diff --git a/MachineLearning.Model/Layer/IEmbedder.cs b/MachineLearning.Model/Layer/IEmbedder.cs
deleted file mode 100644
index b846f06..0000000
--- a/MachineLearning.Model/Layer/IEmbedder.cs
+++ /dev/null
@@ -1,24 +0,0 @@
-﻿namespace MachineLearning.Model.Embedding;
-
-// public interface IEmbedder<TInput, TOutput> : IEmbeddingLayer<TInput>, IUnembeddingLayer<TOutput>
-// {
-//     public Vector Embed(TInput input);
-//     public Vector Embed(TInput input, ILayerSnapshot snapshot) => Embed(input);
-//     public (TOutput output, Weight confidence) Unembed(Vector input);
-//     public (TOutput output, Weight confidence, Vector weights) Unembed(Vector input, ILayerSnapshot snapshot)
-//     {
-//         var (output, confidence) = Unembed(input);
-//         return (output, confidence, input);
-//     }
-
-//     int IEmbeddingLayer<TInput>.OutputNodeCount => 0;
-//     int IUnembeddingLayer<TOutput>.InputNodeCount => 0;
-//     long ILayer.ParameterCount => 0;
-
-//     Vector IEmbeddingLayer<TInput>.Process(TInput input) => Embed(input);
-//     Vector IEmbeddingLayer<TInput>.Process(TInput input, ILayerSnapshot snapshot) => Embed(input, snapshot);
-
-//     (TOutput output, Weight confidence) IUnembeddingLayer<TOutput>.Process(Vector input) => Unembed(input);
-//     (TOutput output, Weight confidence, Vector weights) IUnembeddingLayer<TOutput>.Process(Vector input, ILayerSnapshot snapshot) => Unembed(input, snapshot);
-//     ILayerSnapshot ILayer.CreateSnapshot() => LayerSnapshots.Empty;
-// }
\ No newline at end of file
diff --git a/MachineLearning.Model/Layer/ILayer.cs b/MachineLearning.Model/Layer/ILayer.cs
deleted file mode 100644
index 7751c3b..0000000
--- a/MachineLearning.Model/Layer/ILayer.cs
+++ /dev/null
@@ -1,33 +0,0 @@
-﻿using MachineLearning.Model.Layer.Snapshot;
-
-namespace MachineLearning.Model.Layer;
-
-public interface IEmbeddingLayer<TInput> : ILayer<TInput, Vector, ILayerSnapshot>
-{
-    public int OutputNodeCount { get; }
-
-    public Vector Process(TInput input);
-    public Vector Process(TInput input, ILayerSnapshot snapshot);
-}
-
-public interface IUnembeddingLayer<TOutput> : ILayer<Vector, TOutput, ILayerSnapshot>
-{
-    public int InputNodeCount { get; }
-
-    public (TOutput output, Weight confidence) Process(Vector input);
-    public (TOutput output, Weight confidence, Vector weights) Process(Vector input, ILayerSnapshot snapshot);
-}
-
-public interface ILayer
-{
-    public long WeightCount { get; }
-
-    public ILayerSnapshot CreateSnapshot();
-    public IGradients CreateGradientAccumulator();
-};
-
-public interface ILayer<TIn, TOut, TSnapshot> : ILayer where TSnapshot : ILayerSnapshot;
-public interface ILayer<TArch, TSnapshot> : ILayer<TArch, TArch, TSnapshot> where TSnapshot : ILayerSnapshot
-{
-    public TArch Forward(TArch input, TSnapshot snapshot);
-}
\ No newline at end of file
diff --git a/MachineLearning.Model/Layer/Initialization/ILayerInitializer.cs b/MachineLearning.Model/Layer/Initialization/ILayerInitializer.cs
deleted file mode 100644
index c0aebf0..0000000
--- a/MachineLearning.Model/Layer/Initialization/ILayerInitializer.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-namespace MachineLearning.Model.Layer.Initialization;
-
-public interface IInitializer<TValue> 
-{
-    public void Initialize(TValue layer);
-}
-
-public sealed class NoInitializer<T> : IInitializer<T>
-{
-    public static NoInitializer<T> Instance { get; } = new();
-    public void Initialize(T layer) { }
-} 
\ No newline at end of file
diff --git a/MachineLearning.Model/Layer/Snapshot/IGradients.cs b/MachineLearning.Model/Layer/Snapshot/IGradients.cs
deleted file mode 100644
index 6c93c4f..0000000
--- a/MachineLearning.Model/Layer/Snapshot/IGradients.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-namespace MachineLearning.Model.Layer.Snapshot;
-
-public interface IGradients
-{
-    public static IGradients Empty { get; } = new EmptyGradients();
-    public void Add(IGradients other);
-
-    public void Reset();
-}
-
-file sealed record EmptyGradients : IGradients
-{
-    public void Add(IGradients other)
-    {
-        Guard.Is<EmptyGradients>(other);
-    }
-
-    public void Reset()
-    {
-    }
-}
\ No newline at end of file
diff --git a/MachineLearning.Model/Layer/Snapshot/ILayerSnapshot.cs b/MachineLearning.Model/Layer/Snapshot/ILayerSnapshot.cs
deleted file mode 100644
index 4f6f838..0000000
--- a/MachineLearning.Model/Layer/Snapshot/ILayerSnapshot.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace MachineLearning.Model.Layer.Snapshot;
-
-public interface ILayerSnapshot
-{
-    public static ILayerSnapshot Empty { get; } = new EmptySnapshot();
-}
-
-file sealed class EmptySnapshot : ILayerSnapshot;
diff --git a/MachineLearning.Model/Layer/TokenOutputLayer.cs b/MachineLearning.Model/Layer/TokenOutputLayer.cs
deleted file mode 100644
index 087e324..0000000
--- a/MachineLearning.Model/Layer/TokenOutputLayer.cs
+++ /dev/null
@@ -1,43 +0,0 @@
-using System.Diagnostics;
-using MachineLearning.Model.Layer.Snapshot;
-
-namespace MachineLearning.Model.Layer;
-
-public sealed class TokenOutputLayer(int tokenCount, bool weightedRandom, Random? random = null) : IUnembeddingLayer<int>
-{
-    public int TokenCount { get; } = tokenCount;
-    public bool WeightedRandom { get; } = weightedRandom;
-    public Random Random { get; } = random ?? Random.Shared;
-
-    public int InputNodeCount => TokenCount;
-    public long WeightCount => 0;
-
-    public (int output, Weight confidence) Process(Vector input)
-    {
-        var (result, confidence, _) = Process(input, default!);
-        return (result, confidence);
-    }
-
-    public (int output, Weight confidence, Vector weights) Process(Vector input, ILayerSnapshot snapshot)
-    {
-        Debug.Assert(input.Count == TokenCount);
-
-        var index = WeightedRandom ? GetWeightedRandomIndex(input, Random) : input.MaximumIndex();
-        return (index, input[index], input);
-    }
-
-    private static int GetWeightedRandomIndex(Vector weights, Random random)
-    {
-        var value = random.NextDouble();
-        for (int i = 0; i < weights.Count; i++)
-        {
-            value -= weights[i];
-            if (value < 0)
-                return i;
-        }
-        return weights.Count - 1;
-    }
-
-    public ILayerSnapshot CreateSnapshot() => ILayerSnapshot.Empty;
-    public IGradients CreateGradientAccumulator() => IGradients.Empty;
-}
diff --git a/MachineLearning.Model/MachineLearning.Model.csproj b/MachineLearning.Model/MachineLearning.Model.csproj
deleted file mode 100644
index 7beed3c..0000000
--- a/MachineLearning.Model/MachineLearning.Model.csproj
+++ /dev/null
@@ -1,15 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-  
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-	<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <ProjectReference Include="..\..\..\Packages\Ametrin.Numerics\src\Ametrin.Numerics.csproj" />
-    <ProjectReference Include="..\..\..\Packages\Ametrin.Utils\src\Ametrin.Utils.csproj" />
-  </ItemGroup>
-
-</Project>
diff --git a/MachineLearning.Model/ModelCachePool.cs b/MachineLearning.Model/ModelCachePool.cs
deleted file mode 100644
index 38fd8b5..0000000
--- a/MachineLearning.Model/ModelCachePool.cs
+++ /dev/null
@@ -1,67 +0,0 @@
-using System.Collections.Concurrent;
-using System.Diagnostics;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Snapshot;
-
-namespace MachineLearning.Model;
-
-public sealed class ModelCachePool(Func<ImmutableArray<IGradients>> gradientGetter, Func<ImmutableArray<ILayerSnapshot>> snapshotGetter)
-{
-    private readonly ConcurrentBag<ImmutableArray<IGradients>> gradientCache = [];
-    private readonly ConcurrentBag<ImmutableArray<ILayerSnapshot>> snaphotCache = [];
-
-    public int UnusedItems => gradientCache.Count;
-
-    public ModelCachePool(ImmutableArray<ILayer> layers)
-    : this(() => [.. layers.Select(static l => l.CreateGradientAccumulator())], () => [.. layers.Select(static l => l.CreateSnapshot())])
-    {
-
-    }
-
-    public ImmutableArray<IGradients> RentGradients()
-    {
-        if (gradientCache.TryTake(out var gradients))
-        {
-            return gradients;
-        }
-
-        return gradientGetter();
-    }
-
-    public RentedSnapshotsMarker RentSnapshots(out ImmutableArray<ILayerSnapshot> rented)
-    {
-        rented = snaphotCache.TryTake(out var snapshots) ? snapshots : snapshotGetter();
-        return new(this, rented);
-    }
-
-    public void Return(ImmutableArray<IGradients> gradients)
-    {
-        Debug.Assert(!gradientCache.Contains(gradients));
-        foreach (var gradient in gradients)
-        {
-            gradient.Reset();
-        }
-        gradientCache.Add(gradients);
-    }
-
-    public void Return(ImmutableArray<ILayerSnapshot> snapshots)
-    {
-        // snapshots are always overriden, so no reset
-        Debug.Assert(!snaphotCache.Contains(snapshots));
-        snaphotCache.Add(snapshots);
-    }
-
-    public void Clear()
-    {
-        snaphotCache.Clear();
-        gradientCache.Clear();
-    }
-
-    public ref struct RentedSnapshotsMarker(ModelCachePool pool, ImmutableArray<ILayerSnapshot> rented)
-    {
-        public readonly void Dispose()
-        {
-            pool.Return(rented);
-        }
-    }
-}
\ No newline at end of file
diff --git a/MachineLearning.Samples/BinaryClassifier.cs b/MachineLearning.Samples/BinaryClassifier.cs
deleted file mode 100644
index 7407795..0000000
--- a/MachineLearning.Samples/BinaryClassifier.cs
+++ /dev/null
@@ -1,134 +0,0 @@
-﻿//using MachineLearning.Model.Layer.Snapshot;
-
-//namespace MachineLearning.Samples;
-
-//public static class BinaryClassifier
-//{
-//    public static EmbeddedModel<double[], bool> GetModel()
-//    {
-//        var initializer = XavierInitializer.Instance;
-//        return new ModelBuilder(2)
-//            .DefaultActivation(SigmoidActivation.Instance)
-//            .AddLayer(7, initializer)
-//            .AddLayer(4, initializer)
-//            .AddLayer(2, initializer)
-//            .Build(new Embedder());
-//    }
-
-//    public static TrainingConfig<double[], bool> GetTrainingConfig()
-//    {
-//        return new TrainingConfig<double[], bool>()
-//        {
-//            TrainingSet = ConstructTrainingData(1028 * 12).ToArray(),
-//            TestSet = ConstructTrainingData(1028).ToArray(),
-
-//            EpochCount = 64 * 4,
-//            BatchCount = 32,
-
-//            Optimizer = new AdamOptimizer
-//            {
-//                LearningRate = 0.2,
-//                CostFunction = CrossEntropyLoss.Instance,
-//            },
-
-//            OutputResolver = new OutputResolver(),
-
-//            ShuffleTrainingSetPerEpoch = true,
-
-//            EvaluationCallback = result => Console.WriteLine(result.Dump()),
-//        };
-//    }
-
-//    public static void TrainDefault()
-//    {
-
-//        var model = GetModel();
-//        var config = GetTrainingConfig();
-//        var trainer = ModelTrainer.Legacy(model, config);
-
-//        trainer.Train();
-
-//        var viewSize = 48;
-
-//        Console.WriteLine("Trained Model: ");
-//        WriteModelView(viewSize);
-//        Console.WriteLine();
-//        Console.WriteLine("Actual: ");
-//        WriteActualView(viewSize);
-
-//        void WriteModelView(int size)
-//        {
-//            foreach(var lineIndex in ..(size / 2))
-//            {
-//                foreach(var charIndex in ..size)
-//                {
-//                    var (result, _) = model.Process([(double) charIndex / size, (double) lineIndex / (size / 2)]);
-//                    //Console.Write($"{result[0]*100:F0} ");
-//                    Console.Write(result ? '0' : '.');
-//                }
-//                Console.WriteLine();
-//            }
-//        }
-
-//        static void WriteActualView(int size)
-//        {
-//            foreach(var lineIndex in ..(size / 2))
-//            {
-//                foreach(var charIndex in ..size)
-//                {
-//                    Console.Write(IsInsideShapes((double) charIndex / size, (double) lineIndex / (size / 2)) ? '0' : '.');
-//                }
-//                Console.WriteLine();
-//            }
-//        }
-//    }
-
-//    private static IEnumerable<BinaryDataEntry> ConstructTrainingData(int count)
-//    {
-//        foreach(var _ in ..count)
-//        {
-//            var x = Random.Shared.NextDouble();
-//            var y = Random.Shared.NextDouble();
-//            yield return new BinaryDataEntry([x, y], IsInsideShapes(x, y));
-//        }
-//    }
-
-//    private static bool IsInsideShapes(double x, double y)
-//    {
-//        x = 2 * (x - 0.5);
-//        y = 2 * (y - 0.5);
-
-//        y = -y;
-
-//        bool insideCircle = Math.Pow(x, 2) + Math.Pow(y, 2) <= Math.Pow(0.5, 2);
-//        bool insideRectangle = x >= -1.0 && x <= 0.5 && y >= -0.0 && y <= 0.5;
-
-//        return insideCircle || insideRectangle;
-//    }
-
-//    private sealed class OutputResolver : IOutputResolver<bool>
-//    {
-//        private static readonly Vector TRUE = Vector.Of([1, 0]);
-//        private static readonly Vector FALSE = Vector.Of([0, 1]);
-//        public Vector Expected(bool output) => output ? TRUE : FALSE;
-//    }
-//    public sealed class Embedder : IEmbedder<double[], bool>
-//    {
-//        public Vector Embed(double[] input) => Vector.Of(input);
-
-//        public Vector Embed(double[] input, ILayerSnapshot snapshot)
-//        {
-//            throw new NotImplementedException();
-//        }
-
-//        public (bool output, Weight confidence) Unembed(Vector input)
-//        {
-//            return (input[0] > input[1], Math.Abs(input[0] - input[1]));
-//        }
-
-//        public (bool output, int index, Vector weights) Unembed(Vector input, ILayerSnapshot snapshot)
-//        {
-//            throw new NotImplementedException();
-//        }
-//    }
-//}
\ No newline at end of file
diff --git a/MachineLearning.Samples/GlobalUsings.cs b/MachineLearning.Samples/GlobalUsings.cs
deleted file mode 100644
index bbdedc4..0000000
--- a/MachineLearning.Samples/GlobalUsings.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿global using Ametrin.Numerics;
-global using Ametrin.Utils;
-global using Ametrin.Optional;
-global using MachineLearning.Data.Entry;
-global using MachineLearning.Model.Activation;
-global using MachineLearning.Model;
-global using MachineLearning.Model.Layer.Initialization;
-global using MachineLearning.Serialization;
-global using MachineLearning.Training;
-global using MachineLearning.Training.Cost;
-global using MachineLearning.Training.Optimization.Adam;
-global using MachineLearning.Training.Optimization.AdamW;
-global using MachineLearning.Training.Optimization.Nadam;
-global using MachineLearning.Training.Optimization.SGDMomentum;
-global using System;
-global using System.Collections.Generic;
-global using System.Collections.Immutable;
-global using System.Collections.Frozen;
-global using Weight = float;
\ No newline at end of file
diff --git a/MachineLearning.Samples/ISample.cs b/MachineLearning.Samples/ISample.cs
deleted file mode 100644
index a4c52b9..0000000
--- a/MachineLearning.Samples/ISample.cs
+++ /dev/null
@@ -1,12 +0,0 @@
-﻿using MachineLearning.Data;
-using ML.MultiLayerPerceptron;
-
-namespace MachineLearning.Samples;
-
-public interface ISample<TInput, TOutput>
-{
-    public static abstract EmbeddedModel<TInput, TOutput> TrainDefault(EmbeddedModel<TInput, TOutput>? model = null, TrainingConfig? config = null, Random? random = null);
-    public static abstract EmbeddedModel<TInput, TOutput> CreateModel(Random? random = null);
-    public static abstract TrainingConfig DefaultTrainingConfig(Random? random = null);
-    public static abstract ITrainingSet GetTrainingSet(Random? random = null);
-}
diff --git a/MachineLearning.Samples/Language/SLM2.cs b/MachineLearning.Samples/Language/SLM2.cs
deleted file mode 100644
index 380f42b..0000000
--- a/MachineLearning.Samples/Language/SLM2.cs
+++ /dev/null
@@ -1,86 +0,0 @@
-﻿//using MachineLearning.Serialization;
-
-//namespace MachineLearning.Samples.Language;
-
-//public sealed class SLM2 : ISample<string, char>
-//{
-//    public const int CONTEXT_SIZE = 128;
-//    public const string TOKENS = " %'(),-.0123456789:=abcdefghijklmnopqrstuvwxyz";
-//    public static IEmbedder<string, char> Embedder { get; } = new BinaryStringEmbedder(CONTEXT_SIZE, TOKENS, true);
-//    public static IOutputResolver<char> OutputResolver { get; } = new CharOutputResolver(TOKENS);
-//    public static ModelSerializer Serializer { get; } = new(AssetManager.GetModelFile("sentence_2.nnw"));
-
-//    public static EmbeddedModel<string, char> CreateModel(Random? random = null)
-//    {
-//        var initializer = new HeInitializer(random);
-//        return new ModelBuilder(CONTEXT_SIZE * 8)
-//            .DefaultActivation(LeakyReLUActivation.Instance)
-//            .AddLayer(1024 * 2, initializer)
-//            .AddLayer(1024 * 2, initializer)
-//            .AddLayer(512 + 256, initializer)
-//            .AddLayer(512, initializer)
-//            .AddLayer(TOKENS.Length, new XavierInitializer(random), new SoftMaxActivation(0.5))
-//            .Build(Embedder);
-//    }
-
-//    public static TrainingConfig<string, char> DefaultTrainingConfig(Random? random = null)
-//    {
-//        random ??= Random.Shared;
-
-//        var dataSet = GetTrainingSet().ToArray();
-//        random.Shuffle(dataSet);
-
-//        var trainingSetSize = (int) (dataSet.Length * 0.9);
-
-//        return new TrainingConfig<string, char>()
-//        {
-//            TrainingSet = dataSet,
-//            TestSet = dataSet.Skip(trainingSetSize).ToArray(),
-
-//            EpochCount = 32,
-//            BatchCount = 256 + 128,
-
-//            Optimizer = new AdamOptimizer
-//            {
-//                LearningRate = 0.01,
-//                CostFunction = CrossEntropyLoss.Instance,
-//            },
-
-//            OutputResolver = OutputResolver,
-
-//            EvaluationCallback = result => Console.WriteLine(result.Dump()),
-//            DumpEvaluationAfterBatches = 32,
-
-//            RandomSource = random,
-//        };
-//    }
-
-//    public static IEnumerable<DataEntry<string, char>> GetTrainingSet(Random? random = null)
-//    {
-//        Console.WriteLine("Analyzing Training Data...");
-//        var lines = LanguageDataSource.GetLines(AssetManager.Sentences).ToArray();
-//        //lines.ForEach(l => Embedder.Embed(l));
-//        Console.WriteLine($"Longest sentence {lines.Max(s => s.Length)} tokens");
-//        var tokensUsedBySource = new string([.. lines.SelectMany(s => s).Distinct().Order()]);
-//        Console.WriteLine($"Source uses '{tokensUsedBySource}'");
-//        tokensUsedBySource.Consume(t => OutputResolver.Expected(t));
-
-//        Console.WriteLine(lines.SelectDuplicates().Dump('\n'));
-//        return lines.InContextSize(CONTEXT_SIZE).ExpandPerChar();
-//    }
-
-//    public static EmbeddedModel<string, char> TrainDefault(EmbeddedModel<string, char>? model = null, TrainingConfig<string, char>? trainingConfig = null, Random? random = null)
-//    {
-//        model ??= Serializer.Load(Embedder).OrThrow();
-
-//        SimpleLM.TrainDefault(model, trainingConfig ?? DefaultTrainingConfig(), random);
-//        Serializer.Save(model);
-//        Console.WriteLine("Model saved!");
-//        LMHelper.StartChat(model, CONTEXT_SIZE);
-//        return model;
-//    }
-
-//    public static void StartChat() {
-//        LMHelper.StartChat(Serializer.Load(Embedder).OrThrow(), CONTEXT_SIZE);
-//    }
-//}
\ No newline at end of file
diff --git a/MachineLearning.Samples/Language/SLM3.cs b/MachineLearning.Samples/Language/SLM3.cs
deleted file mode 100644
index 6c2bb0b..0000000
--- a/MachineLearning.Samples/Language/SLM3.cs
+++ /dev/null
@@ -1,90 +0,0 @@
-using MachineLearning.Data;
-using MachineLearning.Model.Layer;
-using ML.MultiLayerPerceptron;
-using ML.MultiLayerPerceptron.Initialization;
-
-namespace MachineLearning.Samples.Language;
-
-public static class SLM3
-{
-    // Console.WriteLine(string.Join(", ",
-    //     dataSet.GetLines().Take(20000)
-    //     .SelectMany(l => l.Split([' ', '.', ',', '!', '?', ';', ':'], StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
-    //     .Select(s => s.Replace('’', '\''))
-    //     .CountBy(w => w, StringComparer.InvariantCultureIgnoreCase).OrderByDescending(g => g.Value).Select(g => g.Key).Where(s => s.Length > 1).Select(s => $"\"{s.ToLower()}\"").Take(255-SLM3.SYMBOLS.Length)
-    // ));
-    public static readonly HashSet<string> WORD_TOKENS = ["the", "and", "to", "of", "in", "is", "for", "that", "you", "with", "on", "it", "are", "as", "this", "be", "your", "or", "have", "at", "was", "from", "we", "by", "will", "not", "can", "an", "but", "all", "they", "if", "has", "our", "my", "more", "their", "one", "so", "he", "about", "which", "when", "what", "also", "out", "his", "up", "there", "time", "new", "do", "who", "like", "some", "other", "been", "just", "get", "how", "her", "would", "had", "them", "were", "any", "no", "these", "into", "me", "than", "people", "its", "make", "most", "only", "may", "she", "us", "first", "over", "use", "work", "very", "after", "well", "then", "now", "many", "need", "even", "through", "way", "two", "good", "best", "because", "see", "years", "know", "where", "day", "should", "much", "could", "such", "great", "here", "while", "take", "help", "home", "said", "back", "want", "it's", "being", "before", "year", "those", "find", "each", "made", "right", "used", "life", "go", "world", "free", "information", "business", "really", "every", "love", "think", "own", "both", "still", "around", "him", "last", "going", "off", "same", "different", "look", "place", "part", "between", "too", "did", "down", "service", "am", "during", "does", "since", "using", "high", "things", "company", "always", "another", "few", "set", "little", "available", "long", "services", "without", "online", "don't", "system", "family", "experience", "something", "come", "data", "next", "school", "why", "better", "sure", "under", "give", "however", "must", "including", "support", "can't"];
-    public const string SYMBOLS = "\0 ?!\"#$%&'()*+,-./0123456789:;=?_abcdefghijklmnopqrstuvwxyz|ßäöü€";
-    public const int CONTEXT_SIZE = 128 + 64;
-    public static StringTokenizer Tokenizer { get; } = new(WORD_TOKENS, SYMBOLS, [("“", "\""), ("”", "\""), ("\n", " "), ("–", "-"), ("—", "-"), ("’", "'"), ("it’s", "it's"), ("don’t", "don't"), ("can’t", "can't")]);
-    public static ModelSerializer Serializer { get; } = new(AssetManager.GetModelFile("slm3"));
-    public static EmbeddedModel<int[], int> CreateModel(Random? random = null)
-    {
-        var initializer = new HeInitializer(random);
-        return EmbeddedModelBuilder
-            .Create(new EncodedEmbeddingLayer(Tokenizer.TokenCount, CONTEXT_SIZE))
-                .DefaultActivation(LeakyReLUActivation.Instance)
-                .AddLayer(1024 * 2, initializer)
-                .AddLayer(1024 * 2 , initializer)
-                .AddLayer(1024, initializer)
-                .AddLayer(1024, initializer)
-                .AddLayer(Tokenizer.TokenCount, new XavierInitializer(random), SoftMaxActivation.Instance)
-            .AddOutputLayer(new TokenOutputLayer(Tokenizer.TokenCount, true, random));
-    }
-
-    public static EmbeddedModel<int[], int> TrainDefault(EmbeddedModel<int[], int>? model = null, TrainingConfig? config = null, ITrainingSet? trainingSet = null, Random? random = null)
-    {
-        model ??= Serializer.Load<EmbeddedModel<int[], int>>().Or(error =>
-        {
-            Console.WriteLine("No existing model found! Creating new!");
-            return CreateModel(random);
-        });
-
-        var trainer = new EmbeddedModelTrainer<int[], int>(model, config ?? DefaultTrainingConfig(), trainingSet ?? GetTrainingSet());
-        trainer.TrainConsole();
-        // Serializer.Save(model).Consume(
-        //     () => Console.WriteLine("Model saved!"),
-        //     error => Console.WriteLine($"Error saving model: {error.Message}")
-        // );
-        LMHelper.StartChat(model, CONTEXT_SIZE, Tokenizer);
-        return model;
-    }
-
-    public static TrainingConfig DefaultTrainingConfig(Random? random = null) => new()
-    {
-        EpochCount = 32,
-
-        Optimizer = new AdamOptimizer
-        {
-            LearningRate = 0.0001f,
-            CostFunction = CrossEntropyLoss.Instance,
-        },
-
-        EvaluationCallback = result => Console.WriteLine(result.Dump()),
-        DumpEvaluationAfterBatches = 16,
-        // MultiThread = false,
-        RandomSource = random ?? Random.Shared,
-    };
-
-    public static ITrainingSet GetTrainingSet(Random? random = null)
-    {
-        Console.WriteLine("Analyzing Trainings Data...");
-        var lines = LanguageDataHelper.GetLines(AssetManager.Sentences).ToArray();
-        Console.WriteLine($"Longest sentence {lines.Max(s => s.Length)} chars");
-        var tokensUsedBySource = new string([.. lines.SelectMany(s => s).Distinct().Order()]);
-        Console.WriteLine($"Source uses '{tokensUsedBySource}'");
-
-        Console.WriteLine(lines.SelectDuplicates().Dump('\n'));
-
-        // var words = lines.SelectMany(l => l.Split([' ', '.', ','], StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries));
-        // var usages = words.CountBy(w => w).OrderByDescending(g => g.Value).Select(g => $"{g.Key}: {g.Value}");
-        // Console.WriteLine(string.Join('\n', usages.Take(50)));
-        var endToken = Tokenizer.TokenizeSingle("\0");
-
-        return new PredefinedTrainingSet(lines.Tokenize(Tokenizer).ExpandPerToken(endToken, CONTEXT_SIZE).ToTrainingData(Tokenizer.TokenCount)) 
-        {
-            BatchCount = 256,
-            Random = random ?? Random.Shared,
-        };
-    }
-}
diff --git a/MachineLearning.Samples/Language/SLM3Mini.cs b/MachineLearning.Samples/Language/SLM3Mini.cs
deleted file mode 100644
index 600a118..0000000
--- a/MachineLearning.Samples/Language/SLM3Mini.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-using MachineLearning.Data;
-using MachineLearning.Model.Layer;
-using ML.MultiLayerPerceptron;
-using ML.MultiLayerPerceptron.Initialization;
-
-namespace MachineLearning.Samples.Language;
-
-public sealed class SLM3Mini : ISample<int[], int>
-{
-    public const int CONTEXT_SIZE = 64;
-
-    public static CharTokenizer Tokenizer { get; } = new("\0 !%'(),-.0123456789:=?_abcdefghijklmnopqrstuvwxyzß");
-    public static ModelSerializer Serializer { get; } = new(AssetManager.GetModelFile("slm3_mini"));
-    public static EmbeddedModel<int[], int> CreateModel(Random? random = null)
-    {
-        var initializer = new HeInitializer(random);
-        return EmbeddedModelBuilder
-            .Create(new EncodedEmbeddingLayer(Tokenizer.TokenCount, CONTEXT_SIZE))
-                .DefaultActivation(LeakyReLUActivation.Instance)
-                .AddLayer(512 + 256, initializer)
-                .AddLayer(512, initializer)
-                .AddLayer(512, initializer)
-                .AddLayer(Tokenizer.TokenCount, new XavierInitializer(random), SoftMaxActivation.Instance)
-            .AddOutputLayer(new TokenOutputLayer(Tokenizer.TokenCount, true, random));
-    }
-
-    public static EmbeddedModel<int[], int> TrainDefault(EmbeddedModel<int[], int>? model = null, TrainingConfig? config = null, Random? random = null)
-    {
-        model ??= Serializer.Load<EmbeddedModel<int[], int>>().Or(error =>
-        {
-            Console.WriteLine("No existing model found! Creating new!");
-            return CreateModel(random);
-        });
-
-        var trainer = new EmbeddedModelTrainer<int[], int>(model, config ?? DefaultTrainingConfig(), GetTrainingSet());
-        trainer.TrainConsole();
-        Serializer.Save(model).Consume(
-            () => Console.WriteLine("Model saved!"),
-            error => Console.WriteLine($"Error saving model: {error.Message}")
-        );
-        LMHelper.StartChat(model, CONTEXT_SIZE, Tokenizer);
-        return model;
-    }
-
-    public static TrainingConfig DefaultTrainingConfig(Random? random = null) => new()
-    {
-        EpochCount = 32,
-
-        Optimizer = new AdamOptimizer
-        {
-            LearningRate = 0.02f,
-            CostFunction = CrossEntropyLoss.Instance,
-        },
-
-        EvaluationCallback = result => Console.WriteLine(result.Dump()),
-        DumpEvaluationAfterBatches = 32,
-
-        RandomSource = random ?? Random.Shared,
-    };
-
-    public static ITrainingSet GetTrainingSet(Random? random = null)
-    {
-        Console.WriteLine("Analyzing Trainings Data...");
-        var lines = LanguageDataHelper.GetLines(AssetManager.Sentences).ToArray();
-        Console.WriteLine($"Longest sentence {lines.Max(s => s.Length)} chars");
-        var tokensUsedBySource = new string([.. lines.SelectMany(s => s).Distinct().Order()]);
-        Console.WriteLine($"Source uses '{tokensUsedBySource}'");
-
-        Console.WriteLine(lines.SelectDuplicates().Dump('\n'));
-        
-        var entries = lines.Select(s => s.EndsWith('\0') ? s : s + '\0').InContextSize(CONTEXT_SIZE).ExpandPerChar();
-        
-        return new PredefinedTrainingSet(entries.ToTrainingData(Tokenizer))
-        {
-            BatchCount = 256,
-            Random = random ?? Random.Shared,
-        };
-    }
-}
diff --git a/MachineLearning.Samples/Language/SimpleLM.cs b/MachineLearning.Samples/Language/SimpleLM.cs
deleted file mode 100644
index 9a776fa..0000000
--- a/MachineLearning.Samples/Language/SimpleLM.cs
+++ /dev/null
@@ -1,63 +0,0 @@
-﻿//using ModelDefinition = MachineLearning.Model.EmbeddedModel<string, char>;
-
-//namespace MachineLearning.Samples.Language;
-
-//public sealed class SimpleLM : ISample<string, char>
-//{
-//    public const int CONTEXT_SIZE = 256 + 64;
-//    public static ModelDefinition CreateModel(Random? random = null)
-//    {
-//        var initializer = new HeInitializer(random);
-//        return new ModelBuilder(CONTEXT_SIZE * 8)
-//            .DefaultActivation(LeakyReLUActivation.Instance)
-//            .AddLayer(2048, initializer)
-//            .AddLayer(512, initializer)
-//            .AddLayer(128, initializer)
-//            .AddLayer(LanguageDataSource.TOKENS.Length, builder => builder.SetInitializer(new XavierInitializer(random)).SetActivationFunction(SoftMaxActivation.Instance))
-//            .Build(new BinaryStringEmbedder(CONTEXT_SIZE, LanguageDataSource.TOKENS, true));
-//    }
-
-//    public static TrainingConfig<string, char> DefaultTrainingConfig(Random? random = null)
-//    {
-//        random ??= Random.Shared;
-//        var dataSet = GetTrainingSet().ToArray();
-//        random.Shuffle(dataSet);
-
-//        var trainingSetSize = (int)(dataSet.Length * 0.9);
-//        return new TrainingConfig<string, char>()
-//        {
-//            TrainingSet = dataSet.Take(trainingSetSize).ToArray(),
-//            TestSet = dataSet.Skip(trainingSetSize).ToArray(),
-
-//            EpochCount = 8,
-//            BatchCount = 256,
-
-//            Optimizer = new AdamOptimizer
-//            {
-//                LearningRate = 0.01,
-//                CostFunction = CrossEntropyLoss.Instance,
-//            },
-
-//            OutputResolver = new CharOutputResolver(LanguageDataSource.TOKENS),
-
-//            EvaluationCallback = result => Console.WriteLine(result.Dump()),
-//            DumpEvaluationAfterBatches = 32,
-
-//            RandomSource = random,
-//            ShuffleTrainingSetPerEpoch = true,
-//        };
-//    }
-
-//    public static ModelDefinition TrainDefault(ModelDefinition? model = null, TrainingConfig<string, char>? config = null, Random? random = null)
-//    {
-//        model ??= CreateModel(random);
-//        config ??= DefaultTrainingConfig(random);
-
-//        var trainer = ModelTrainer.Legacy(model, config);
-//        trainer.TrainConsole();
-
-//        return model;
-//    }
-
-//    public static IEnumerable<DataEntry<string, char>> GetTrainingSet(Random? random = null) => LanguageDataSource.GetLines(AssetManager.Speech).InContextSize(CONTEXT_SIZE).ExpandPerChar();
-//}
diff --git a/MachineLearning.Samples/Logic/LogicModel.cs b/MachineLearning.Samples/Logic/LogicModel.cs
deleted file mode 100644
index 7623f27..0000000
--- a/MachineLearning.Samples/Logic/LogicModel.cs
+++ /dev/null
@@ -1,86 +0,0 @@
-﻿//using MachineLearning.Samples.Language;
-//using MachineLearning.Serialization;
-
-//namespace MachineLearning.Samples.Logic;
-
-//public sealed class LogicModel : ISample<string, char>
-//{
-//    public const int CONTEXT_SIZE = 24;
-//    public const string TOKENS = "0123456789+-*/^\0";
-
-//    public static IEmbedder<string, char> Embedder { get; } = new BinaryStringEmbedder(CONTEXT_SIZE, TOKENS, false);
-//    public static IOutputResolver<char> OutputResolver { get; } = new CharOutputResolver(TOKENS);
-//    public static ModelSerializer Serializer { get; } = new(AssetManager.GetModelFile("logic.nnw"));
-//    public static EmbeddedModel<string, char> CreateModel(Random? random = null)
-//    {
-//        var initializer = new HeInitializer(random);
-//        return new ModelBuilder(CONTEXT_SIZE * 8)
-//            .DefaultActivation(LeakyReLUActivation.Instance)
-//            .AddLayer(512, initializer)
-//            .AddLayer(512, initializer)
-//            .AddLayer(256, initializer)
-//            .AddLayer(TOKENS.Length, initializer, SoftMaxActivation.Instance)
-//            .Build(Embedder);
-//    }
-
-//    public static TrainingConfig<string, char> DefaultTrainingConfig(Random? random = null) => new()
-//    {
-//        TrainingSet = LogicalStatementSource.GenerateAdditionStatements(1024, random).ToArray(),
-//        TestSet = LogicalStatementSource.GenerateAdditionStatements(128 * 2, random).ToArray(),
-
-//        EpochCount = 32,
-//        BatchCount = 512 * 3,
-
-//        Optimizer = new AdamWOptimizer
-//        {
-//            LearningRate = 0.1,
-//            CostFunction = CrossEntropyLoss.Instance,
-//        },
-
-//        OutputResolver = new CharOutputResolver(TOKENS),
-
-//        DumpEvaluationAfterBatches = 32,
-//        EvaluationCallback = result => Console.WriteLine(result.Dump()),
-
-//        RandomSource = random ?? Random.Shared,
-//    };
-
-//    public static void OpenChat(EmbeddedModel<string, char> model) {
-//        string input;
-//        do
-//        {
-//            input = Console.ReadLine() ?? string.Empty;
-//            if (string.IsNullOrEmpty(input))
-//            {
-//                return;
-//            }
-//            Console.CursorTop--;
-//            Console.CursorLeft = 0;
-//            Console.WriteLine(Complete(input, model));
-//        } while (true);
-//    }
-
-//    public static string Complete(string input, EmbeddedModel<string, char> model)
-//    {
-//        while (true)
-//        {
-//            var (prediction, _) = model.Process(input);
-//            input += prediction;
-//            if (prediction == '\0')
-//            {
-//                break;
-//            }
-//        }
-
-//        return input;
-//    }
-
-//    public static EmbeddedModel<string, char> TrainDefault(EmbeddedModel<string, char>? model = null, TrainingConfig<string, char>? trainingConfig = null, Random? random = null)
-//    {
-//        var trainer = ModelTrainer.Legacy(model ?? CreateModel(random), trainingConfig ?? DefaultTrainingConfig(random));
-//        trainer.TrainConsole();
-//        return trainer.Model;
-//    }
-
-//    public static IEnumerable<DataEntry<string, char>> GetTrainingSet(Random? random = null) => LogicalStatementSource.GenerateAdditionStatements(1024, random);
-//}
diff --git a/MachineLearning.Samples/Logic/LogicalStatementSource.cs b/MachineLearning.Samples/Logic/LogicalStatementSource.cs
deleted file mode 100644
index 3b0f2bb..0000000
--- a/MachineLearning.Samples/Logic/LogicalStatementSource.cs
+++ /dev/null
@@ -1,24 +0,0 @@
-﻿namespace MachineLearning.Samples.Logic;
-
-public static class LogicalStatementSource
-{
-    public static IEnumerable<(string, char)> GenerateAdditionStatements(int count, Random? random = null)
-    {
-        random ??= Random.Shared;
-        foreach (var a in 1..(count + 1))
-        {
-            foreach (var b in 1..(count + 1))
-            {
-                var result = a + b;
-                var statement = $"{a}+{b}=";
-                var resultString = $"{result}\0";
-
-                foreach (var sub in resultString)
-                {
-                    yield return (statement, sub);
-                    statement += sub;
-                }
-            }
-        }
-    }
-}
diff --git a/MachineLearning.Samples/MNIST/MNISTDataSet.cs b/MachineLearning.Samples/MNIST/MNISTDataSet.cs
deleted file mode 100644
index 2cf8287..0000000
--- a/MachineLearning.Samples/MNIST/MNISTDataSet.cs
+++ /dev/null
@@ -1,51 +0,0 @@
-﻿using MachineLearning.Data;
-using MachineLearning.Data.Noise;
-
-namespace MachineLearning.Samples.MNIST;
-
-public sealed class MNISTDataSet(IEnumerable<DataEntry<double[], int>> data) : ITrainingSet
-{
-    public bool ShuffleOnReset { get; init; } = true;
-    public Random Random { get; init; } = Random.Shared;
-    public required int BatchCount { get; init; }
-    public IInputDataNoise<double[]> Noise { get; init; } = NoInputNoise<double[]>.Instance;
-    public int BatchSize => data.Length / BatchCount;
-
-    private readonly DataEntry<double[], int>[] data = [.. data];
-
-    public IEnumerable<Batch> GetBatches()
-    {
-        var batchSize = BatchSize;
-        foreach (var i in ..BatchCount)
-        {
-            yield return new Batch(data.Skip(i * batchSize).Take(batchSize).Select(d =>
-            {
-                var data = Noise.Apply(d.Input);
-                return new TrainingData<double[], int>(data, d.Expected, Expected(d.Expected));
-            }));
-        }
-    }
-
-    public void Reset()
-    {
-        if (ShuffleOnReset)
-        {
-            Random.Shuffle(data);
-        }
-    }
-
-    private readonly FrozenDictionary<int, Vector> _map = new Dictionary<int, Vector>() {
-            { 0, Vector.Of([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])},
-            { 1, Vector.Of([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])},
-            { 2, Vector.Of([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])},
-            { 3, Vector.Of([0, 0, 0, 1, 0, 0, 0, 0, 0, 0])},
-            { 4, Vector.Of([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])},
-            { 5, Vector.Of([0, 0, 0, 0, 0, 1, 0, 0, 0, 0])},
-            { 6, Vector.Of([0, 0, 0, 0, 0, 0, 1, 0, 0, 0])},
-            { 7, Vector.Of([0, 0, 0, 0, 0, 0, 0, 1, 0, 0])},
-            { 8, Vector.Of([0, 0, 0, 0, 0, 0, 0, 0, 1, 0])},
-            { 9, Vector.Of([0, 0, 0, 0, 0, 0, 0, 0, 0, 1])},
-        }.ToFrozenDictionary();
-
-    private Vector Expected(int output) => _map[output];
-}
\ No newline at end of file
diff --git a/MachineLearning.Samples/MNIST/MNISTDataSource.cs b/MachineLearning.Samples/MNIST/MNISTDataSource.cs
deleted file mode 100644
index 40b0e34..0000000
--- a/MachineLearning.Samples/MNIST/MNISTDataSource.cs
+++ /dev/null
@@ -1,67 +0,0 @@
-using System.IO.Compression;
-
-namespace MachineLearning.Samples.MNIST;
-
-public sealed class MNISTDataSource
-{
-    public ImageDataEntry[] TrainingSet { get; }
-    public ImageDataEntry[] TestingSet { get; }
-
-    public MNISTDataSource(FileInfo mnistFileInfo)
-    {
-        using var mnistStream = mnistFileInfo.OpenRead();
-        using var mnistArchive = new ZipArchive(mnistStream);
-
-        var trainingImages = ReadImages(mnistArchive.GetEntry("train-images.idx3-ubyte")!);
-        var trainingLabels = ReadLabels(mnistArchive.GetEntry("train-labels.idx1-ubyte")!);
-
-        TrainingSet = new ImageDataEntry[trainingImages.Length];
-        foreach(var i in ..trainingImages.Length)
-        {
-            TrainingSet[i] = ImageDataEntry.FromRaw(trainingImages[i], trainingLabels[i]);
-        }
-
-        var testingImages = ReadImages(mnistArchive.GetEntry("t10k-images.idx3-ubyte")!);
-        var testingLabels = ReadLabels(mnistArchive.GetEntry("t10k-labels.idx1-ubyte")!);
-
-        TestingSet = new ImageDataEntry[testingImages.Length];
-        foreach(var i in ..testingImages.Length)
-        {
-            TestingSet[i] = ImageDataEntry.FromRaw(testingImages[i], testingLabels[i]);
-        }
-    }
-
-    private static byte[][] ReadImages(ZipArchiveEntry entry)
-    {
-        using var stream = entry.Open();
-        using var reader = new BinaryReader(stream);
-
-        reader.ReadInt32BigEndian(); // magic starting value
-        var imageCount = reader.ReadInt32BigEndian();
-        var rowCount = reader.ReadInt32BigEndian();
-        var columnCount = reader.ReadInt32BigEndian();
-
-        var images = new byte[imageCount][];
-        foreach(var i in ..imageCount)
-        {
-            images[i] = reader.ReadBytes(rowCount * columnCount);
-        }
-        return images;
-    }
-
-    private static byte[] ReadLabels(ZipArchiveEntry entry)
-    {
-        using var stream = entry.Open();
-        using var reader = new BinaryReader(stream);
-
-        reader.ReadInt32BigEndian(); // magic starting value
-        var labelCount = reader.ReadInt32BigEndian();
-        var labels = new byte[labelCount];
-        foreach(var i in ..labelCount)
-        {
-            labels[i] = reader.ReadByte();
-        }
-
-        return labels;
-    }
-}
diff --git a/MachineLearning.Samples/MNIST/MNISTEmbedder.cs b/MachineLearning.Samples/MNIST/MNISTEmbedder.cs
deleted file mode 100644
index 4dc3a27..0000000
--- a/MachineLearning.Samples/MNIST/MNISTEmbedder.cs
+++ /dev/null
@@ -1,43 +0,0 @@
-﻿using System.Collections.Immutable;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Snapshot;
-
-namespace MachineLearning.Samples.MNIST;
-
-public sealed class MNISTEmbedder() : IEmbeddingLayer<double[]>
-{
-    public static MNISTEmbedder Instance { get; } = new MNISTEmbedder();
-
-    public Vector Process(double[] input) => Vector.Of([.. input.Select(x => (float)x)]);
-
-    public Vector Process(double[] input, ILayerSnapshot _) => Process(input);
-
-    int IEmbeddingLayer<double[]>.OutputNodeCount => 784;
-    long ILayer.WeightCount => 0;
-    ILayerSnapshot ILayer.CreateSnapshot() => ILayerSnapshot.Empty;
-    public IGradients CreateGradientAccumulator() => IGradients.Empty;
-}
-
-public sealed class MNISTUnEmbedder(ImmutableArray<int> _nodeMapping) : IUnembeddingLayer<int>
-{
-    public static MNISTUnEmbedder Instance { get; } = new MNISTUnEmbedder([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
-
-    private readonly ImmutableArray<int> _nodeMapping = _nodeMapping;
-
-    public (int output, Weight confidence) Process(Vector input)
-    {
-        var index = input.MaximumIndex();
-        return (_nodeMapping[index], input[index]);
-    }
-
-    public (int output, float confidence, Vector weights) Process(Vector input, ILayerSnapshot _)
-    {
-        var index = input.MaximumIndex();
-        return (_nodeMapping[index], input[index], input);
-    }
-
-    int IUnembeddingLayer<int>.InputNodeCount => 10;
-    long ILayer.WeightCount => 0;
-    ILayerSnapshot ILayer.CreateSnapshot() => ILayerSnapshot.Empty;
-    public IGradients CreateGradientAccumulator() => IGradients.Empty;
-}
diff --git a/MachineLearning.Samples/MNIST/MNISTModel.cs b/MachineLearning.Samples/MNIST/MNISTModel.cs
deleted file mode 100644
index d6200e2..0000000
--- a/MachineLearning.Samples/MNIST/MNISTModel.cs
+++ /dev/null
@@ -1,107 +0,0 @@
-﻿using MachineLearning.Data;
-using MachineLearning.Data.Noise;
-using MachineLearning.Data.Source;
-using MachineLearning.Model.Layer;
-using ML.MultiLayerPerceptron;
-using ML.MultiLayerPerceptron.Initialization;
-using ModelDefinition = ML.MultiLayerPerceptron.EmbeddedModel<double[], int>;
-
-namespace MachineLearning.Samples.MNIST;
-
-public static class MNISTModel
-{
-    public static IEmbeddingLayer<double[]> Embedder => MNISTEmbedder.Instance;
-    public static IUnembeddingLayer<int> UnEmbedder => MNISTUnEmbedder.Instance;
-
-    public static ModelSerializer Serializer { get; } = new(AssetManager.GetModelFile("mnist"));
-
-    public static ModelDefinition CreateModel(Random? random = null)
-    {
-        var initializer = new HeInitializer(random);
-
-        var network = EmbeddedModelBuilder.Create(Embedder)
-                .DefaultActivation(LeakyReLUActivation.Instance)
-                .AddLayer(256, initializer)
-                .AddLayer(128, initializer)
-                .AddLayer(10, new XavierInitializer(random), SoftMaxActivation.Instance)
-                .AddOutputLayer(UnEmbedder);
-
-        return network;
-    }
-
-    public static ITrainingSet GetTrainingSet(Random? random = null)
-    {
-        var dataSource = new MNISTDataSource(AssetManager.MNISTArchive);
-
-        return new MNISTDataSet(dataSource.TrainingSet)
-        {
-            BatchCount = 128,
-            Noise = new ImageInputNoise
-            {
-                Size = ImageDataEntry.SIZE,
-                NoiseStrength = 0.35,
-                NoiseProbability = 0.75,
-                MaxShift = 2,
-                MaxAngle = 30,
-                MinScale = 0.8,
-                MaxScale = 1.2,
-                Random = random ?? Random.Shared,
-            },
-            Random = random ?? Random.Shared,
-        };
-    }
-
-    public static TrainingConfig DefaultTrainingConfig(Random? random = null)
-    {
-        return new TrainingConfig()
-        {
-            EpochCount = 8,
-
-            Optimizer = new AdamOptimizer
-            {
-                LearningRate = 0.0046225016f,
-                CostFunction = CrossEntropyLoss.Instance,
-            },
-
-            DumpEvaluationAfterBatches = 8,
-            EvaluationCallback = evaluation => Console.WriteLine(evaluation.Dump()),
-            Threading = ThreadingMode.Single,
-            RandomSource = random ?? Random.Shared,
-        };
-    }
-
-    public static ModelDefinition TrainDefault(ModelDefinition? model = null, TrainingConfig? config = null, Random? random = null)
-    {
-        model ??= CreateModel(random);
-        var trainer = new EmbeddedModelTrainer<double[], int>(model, config ?? DefaultTrainingConfig(random), GetTrainingSet(random));
-
-        trainer.TrainConsole();
-
-        var images = new ImageDataSource(AssetManager.CustomDigits);
-        Benchmark(model, images);
-
-        return model;
-    }
-
-    public static void Benchmark(ModelDefinition model, ImageDataSource dataSource)
-    {
-        var correctCounter = 0;
-        var counter = 0;
-        var previousColor = Console.ForegroundColor;
-        foreach (var image in dataSource.DataSet)
-        {
-            var (prediction, confidence) = model.Process(image.Image);
-
-            if (prediction == image.Digit)
-            {
-                correctCounter++;
-            }
-
-            Console.ForegroundColor = prediction == image.Digit ? ConsoleColor.Green : ConsoleColor.Red;
-            Console.WriteLine($"Predicted: {prediction} ({confidence:P})\tActual: {image.Digit}");
-            counter++;
-        }
-        Console.ForegroundColor = previousColor;
-        Console.WriteLine($"Correct: {(double)correctCounter / counter:P0}");
-    }
-}
diff --git a/MachineLearning.Samples/MachineLearning.Samples.csproj b/MachineLearning.Samples/MachineLearning.Samples.csproj
deleted file mode 100644
index 700dfd9..0000000
--- a/MachineLearning.Samples/MachineLearning.Samples.csproj
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  	<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Mamba\MachineLearning.Mamba.csproj" />
-    <ProjectReference Include="..\MachineLearning.Serialization\MachineLearning.Serialization.csproj" />
-    <ProjectReference Include="..\MachineLearning.Training\MachineLearning.Training.csproj" />
-    <ProjectReference Include="..\MachineLearning.Model\MachineLearning.Model.csproj" />
-    <ProjectReference Include="..\mlp\ML.MultiLayerPerceptron.csproj" />
-  </ItemGroup>
-
-</Project>
diff --git a/MachineLearning.Samples/SimpleTokenPrediction.cs b/MachineLearning.Samples/SimpleTokenPrediction.cs
deleted file mode 100644
index 971fa37..0000000
--- a/MachineLearning.Samples/SimpleTokenPrediction.cs
+++ /dev/null
@@ -1,60 +0,0 @@
-using MachineLearning.Data;
-using MachineLearning.Mamba;
-using MachineLearning.Samples.Language;
-
-namespace MachineLearning.Samples;
-
-public class SimpleTokenPrediction
-{
-    public const int CONTEXT_SIZE = 10;
-    public static CharTokenizer Tokenizer { get; } = new("0123456789");
-    public static Mamba2VectorModel CreateModel(Random? random = null)
-    {
-        var model = new Mamba2VectorModel(layerCount: 5, Tokenizer.TokenCount, CONTEXT_SIZE, stateDimensions: 12, embeddingDimensions: 16);
-
-        new EmbeddingLayer.Initializer(random).Initialize(model.InputLayer);
-        new UnEmbeddingLayer.Initializer(random).Initialize(model.OutputLayer);
-        var initer = new Mamba2VectorLayer.Initializer(random);
-        model.HiddenLayers.ForEach(initer.Initialize);
-
-        return model;
-    }
-
-    public static TrainingConfig DefaultTrainingConfig(Random? random = null) => new()
-    {
-        EpochCount = 100,
-        Threading = ThreadingMode.Single,
-        EvaluationCallback = r => Console.WriteLine(r.Dump()),
-        DumpEvaluationAfterBatches = 1,
-        RandomSource = random ?? Random.Shared,
-        Optimizer = new AdamOptimizer
-        {
-            LearningRate = 0.015f,
-            CostFunction = CrossEntropyFromSoftmaxLoss.Instance,
-        },
-    };
-
-    public static ITrainingSet GetTrainingSet(Random? random = null)
-    {
-        var tokens = Tokenizer.Tokenize("0123456789");
-
-        var trainingsData = ((int[])[.. tokens, .. tokens]).SlidingWindow(null, CONTEXT_SIZE).ToTrainingDataMatrix(Tokenizer.TokenCount, CONTEXT_SIZE, null);
-        return new PredefinedTrainingSet(trainingsData)
-        {
-            BatchCount = 1
-        };
-    }
-
-    public static Mamba2VectorModel TrainDefault(Mamba2VectorModel? model = null, TrainingConfig? config = null, ITrainingSet? trainingSet = null, Random? random = null)
-    {
-        model ??= CreateModel(random);
-        config ??= DefaultTrainingConfig(random);
-        trainingSet ??= GetTrainingSet(random);
-
-        var trainer = new Mamba2VectorModelTrainer(model, config, trainingSet);
-
-        trainer.TrainConsole();
-
-        return model;
-    }
-}
diff --git a/MachineLearning.Serialization/ActivationFunctionSerializer.cs b/MachineLearning.Serialization/ActivationFunctionSerializer.cs
deleted file mode 100644
index eb5cb4c..0000000
--- a/MachineLearning.Serialization/ActivationFunctionSerializer.cs
+++ /dev/null
@@ -1,70 +0,0 @@
-﻿using MachineLearning.Model.Activation;
-
-namespace MachineLearning.Serialization;
-
-public static class ActivationFunctionSerializer
-{
-    private static readonly Dictionary<Type, string> _registryV2 = [];
-    private static readonly Dictionary<string, Func<BinaryReader, IActivationFunction>> _factoryV2 = [];
-    private static readonly Dictionary<Type, (string key, uint version)> _registry = [];
-    private static readonly Dictionary<(string key, uint version), Func<BinaryReader, IActivationFunction>> _factory = [];
-
-    public static void RegisterV2<T>(string key, Func<BinaryReader, IActivationFunction> factory) where T : IActivationFunction
-    {
-        _registryV2.Add(typeof(T), key);
-        _factoryV2.Add(key, factory);
-    }
-    
-    public static void Register<T>(string key, uint version, Func<BinaryReader, IActivationFunction> factory) where T : IActivationFunction
-    {
-        _registry.Add(typeof(T), (key, version));
-        _factory.Add((key, version), factory);
-    }
-
-    public static void Write(BinaryWriter writer, IActivationFunction data)
-    {
-        var (key, version) = _registry[data.GetType()];
-        writer.Write(key);
-        writer.Write(version);
-
-        // Update reader when changing
-        switch (data)
-        {
-            case LeakyReLUActivation leakyReLU:
-                writer.Write(leakyReLU.Alpha);
-                break;
-
-
-            case SoftMaxActivation:
-            case TanhActivation:
-            case SigmoidActivation:
-            case ReLUActivation:
-                break;
-                
-            default:
-                throw new NotImplementedException();
-        }
-    }
-
-    public static IActivationFunction ReadV2(BinaryReader reader) => _factoryV2[reader.ReadString()](reader);
-    public static IActivationFunction Read(BinaryReader reader) => _factory[(reader.ReadString(), reader.ReadUInt32())](reader);
-
-    static ActivationFunctionSerializer()
-    {
-        Register<SigmoidActivation>("sigmoid", 1, reader => SigmoidActivation.Instance);
-        Register<SoftMaxActivation>("softmax", 1, reader => SoftMaxActivation.Instance);
-        Register<ReLUActivation>("relu", 1, reader => ReLUActivation.Instance);
-        Register<LeakyReLUActivation>("leakyrelu", 1, reader => new LeakyReLUActivation(reader.ReadSingle()));
-        Register<TanhActivation>("tanh", 1, reader => TanhActivation.Instance);
-        
-        RegisterV2<SigmoidActivation>("sigmoid", reader => SigmoidActivation.Instance);
-        RegisterV2<SoftMaxActivation>("softmax", reader =>
-        {
-            reader.ReadDouble(); //ignore temperature
-            return new SoftMaxActivation();
-        });
-        RegisterV2<ReLUActivation>("relu", reader => ReLUActivation.Instance);
-        RegisterV2<LeakyReLUActivation>("leakyrelu", reader => new LeakyReLUActivation((float) reader.ReadDouble()));
-        RegisterV2<TanhActivation>("tanh", reader => TanhActivation.Instance);
-    }
-}
diff --git a/MachineLearning.Serialization/GlobalUsings.cs b/MachineLearning.Serialization/GlobalUsings.cs
deleted file mode 100644
index 5e2a2cc..0000000
--- a/MachineLearning.Serialization/GlobalUsings.cs
+++ /dev/null
@@ -1,3 +0,0 @@
-﻿global using Ametrin.Numerics;
-global using Ametrin.Optional;
-global using Ametrin.Utils;
\ No newline at end of file
diff --git a/MachineLearning.Serialization/LayerSerializerAttribute.cs b/MachineLearning.Serialization/LayerSerializerAttribute.cs
deleted file mode 100644
index dc46e27..0000000
--- a/MachineLearning.Serialization/LayerSerializerAttribute.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-using System;
-using MachineLearning.Model.Layer;
-
-namespace MachineLearning.Serialization;
-
-#pragma warning disable CS9113 // Parameter is unread.
-[AttributeUsage(AttributeTargets.Class)]
-public sealed class LayerSerializerAttribute(string key, int version) : Attribute;
-#pragma warning restore CS9113 // Parameter is unread.
diff --git a/MachineLearning.Serialization/MachineLearning.Serialization.csproj b/MachineLearning.Serialization/MachineLearning.Serialization.csproj
deleted file mode 100644
index 2ceed48..0000000
--- a/MachineLearning.Serialization/MachineLearning.Serialization.csproj
+++ /dev/null
@@ -1,13 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Model\MachineLearning.Model.csproj" />
-  </ItemGroup>
-
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-</Project>
diff --git a/MachineLearning.Serialization/ModelSerializationHelper.cs b/MachineLearning.Serialization/ModelSerializationHelper.cs
deleted file mode 100644
index 9112624..0000000
--- a/MachineLearning.Serialization/ModelSerializationHelper.cs
+++ /dev/null
@@ -1,94 +0,0 @@
-namespace MachineLearning.Serialization;
-
-public static class ModelSerializationHelper
-{
-    #region Write
-    public static void WriteTensor(Tensor tensor, BinaryWriter writer)
-    {
-        writer.Write(tensor.RowCount);
-        writer.Write(tensor.ColumnCount);
-        writer.Write(tensor.LayerCount);
-        WriteVectorRaw(tensor.Storage, writer);
-    }
-
-    public static void WriteMatrix(Matrix matrix, BinaryWriter writer)
-    {
-        writer.Write(matrix.RowCount);
-        writer.Write(matrix.ColumnCount);
-        WriteVectorRaw(matrix.Storage, writer);
-    }
-
-    public static void WriteVector(Vector vector, BinaryWriter writer)
-    {
-        writer.Write(vector.Count);
-        WriteVectorRaw(vector, writer);
-    }
-
-    public static void WriteInt32(int scalar, BinaryWriter writer)
-    {
-        writer.Write(scalar);
-    }
-    #endregion
-
-    #region Read
-    public static Tensor ReadTensor(BinaryReader reader)
-    {
-        int rowCount = reader.ReadInt32();
-        int columnCount = reader.ReadInt32();
-        int layerCount = reader.ReadInt32();
-        return ReadTensorRaw(rowCount, columnCount, layerCount, reader);
-    }
-
-    public static Matrix ReadMatrix(BinaryReader reader)
-    {
-        int rowCount = reader.ReadInt32();
-        int columnCount = reader.ReadInt32();
-        return ReadMatrixRaw(rowCount, columnCount, reader);
-    }
-
-    public static Vector ReadVector(BinaryReader reader)
-    {
-        var count = reader.ReadInt32();
-        return ReadVectorRaw(count, reader);
-    }
-
-    public static int ReadInt32(BinaryReader reader)
-    {
-        return reader.ReadInt32();
-    }
-    #endregion
-
-    #region Raw
-    public static Tensor ReadTensorRaw(int rowCount, int columnCount, int layerCount, BinaryReader reader)
-    {
-        return Tensor.Of(rowCount, columnCount, layerCount, ReadVectorRaw(rowCount * columnCount * layerCount, reader));
-    }
-    public static Matrix ReadMatrixRaw(int rowCount, int columnCount, BinaryReader reader)
-    {
-        return Matrix.Of(rowCount, columnCount, ReadVectorRaw(rowCount * columnCount, reader));
-    }
-
-    public static Vector ReadVectorRaw(int count, BinaryReader reader)
-    {
-        var result = Vector.Create(count);
-        foreach (var i in ..count)
-        {
-            result[i] = reader.ReadSingle();
-        }
-        return result;
-    }
-
-    public static void WriteMatrixRaw(Matrix matrix, BinaryWriter writer)
-    {
-        WriteVectorRaw(matrix.Storage, writer);
-    }
-
-    public static void WriteVectorRaw(Vector vector, BinaryWriter writer)
-    {
-        foreach (var i in ..vector.Count)
-        {
-            writer.Write(vector[i]);
-        }
-    }
-    #endregion
-}
\ No newline at end of file
diff --git a/MachineLearning.Serialization/ModelSerializer.cs b/MachineLearning.Serialization/ModelSerializer.cs
deleted file mode 100644
index f0915f2..0000000
--- a/MachineLearning.Serialization/ModelSerializer.cs
+++ /dev/null
@@ -1,159 +0,0 @@
-﻿using MachineLearning.Model;
-using MachineLearning.Model.Layer;
-
-namespace MachineLearning.Serialization;
-
-public sealed class ModelSerializer(FileInfo fileInfo)
-{
-    public const string FILE_EXTENSION = ".gmw";
-    public const uint FORMAT_VERSION = 2;
-
-    public static readonly Dictionary<Type, (string key, uint version, Func<IModel, BinaryWriter, ErrorState> writer)> ModelSerializers = [];
-    public static readonly Dictionary<(string key, uint version), Func<BinaryReader, Result<IModel>>> ModelDeserializers = [];
-
-    public static readonly Dictionary<Type, (string key, uint version, Func<ILayer, BinaryWriter, ErrorState> writer)> LayerSerializers = [];
-    public static readonly Dictionary<(string key, uint version), Func<BinaryReader, Result<ILayer>>> LayerDeserializers = [];
-
-    static ModelSerializer()
-    {
-        RegisterLayer("eel", 1, SaveEncodedEmbeddingLayer, ReadEncodedEmbeddingLayer);
-        RegisterLayer("tol", 1, SaveTokenOutputLayer, ReadTokenOutputLayer);
-    }
-
-    public static ErrorState SaveEncodedEmbeddingLayer(EncodedEmbeddingLayer layer, BinaryWriter writer)
-    {
-        writer.Write(layer.TokenCount);
-        writer.Write(layer.ContextSize);
-        writer.Write(layer.EmbeddingSize);
-
-        return default;
-    }
-
-    public static Result<EncodedEmbeddingLayer> ReadEncodedEmbeddingLayer(BinaryReader reader)
-    {
-        var tokenCount = reader.ReadInt32();
-        var contextSize = reader.ReadInt32();
-        var embeddingSize = reader.ReadInt32();
-        return new EncodedEmbeddingLayer(tokenCount, contextSize, embeddingSize);
-    }
-
-    public static ErrorState SaveTokenOutputLayer(TokenOutputLayer layer, BinaryWriter writer)
-    {
-        writer.Write(layer.TokenCount);
-        writer.Write(layer.WeightedRandom);
-
-        return default;
-    }
-
-    public static Result<TokenOutputLayer> ReadTokenOutputLayer(BinaryReader reader)
-    {
-        var tokenCount = reader.ReadInt32();
-        var weightedRandom = reader.ReadBoolean();
-        return new TokenOutputLayer(tokenCount, weightedRandom);
-    }
-
-    public static ErrorState SaveLayer(ILayer layer, BinaryWriter writer)
-    {
-        if (!LayerSerializers.TryGetValue(layer.GetType(), out var data))
-        {
-            return new NotImplementedException($"No writer registered for {layer.GetType().Name}");
-        }
-
-        var (key, subVersion, serializer) = data;
-
-        writer.Write(key);
-        writer.Write(subVersion);
-        return serializer(layer, writer);
-    }
-
-    public static Result<ILayer> ReadLayer(BinaryReader reader)
-    {
-        var layerKey = reader.ReadString();
-        var layerVersion = reader.ReadUInt32();
-
-        if (!LayerDeserializers.TryGetValue((layerKey, layerVersion), out var deserializer))
-        {
-            return new NotImplementedException($"No reader registered for {layerKey} v{layerVersion} layer");
-        }
-
-        return deserializer(reader);
-    }
-
-    public ErrorState Save(IModel model)
-    {
-        using var stream = fileInfo.Create();
-        using var writer = new BinaryWriter(stream);
-        writer.Write(FILE_EXTENSION);
-        writer.Write(FORMAT_VERSION);
-
-        return SaveModel(model, writer);
-    }
-
-    public static ErrorState SaveModel(IModel model, BinaryWriter writer)
-    {
-        if (!ModelSerializers.TryGetValue(model.GetType(), out var data))
-        {
-            return new NotImplementedException($"Saving {model.GetType()} is not implemented");
-        }
-
-        var (key, modelVersion, serializer) = data;
-
-        writer.Write(key);
-        writer.Write(modelVersion);
-
-        return serializer(model, writer);
-    }
-
-    public Result<TModel> Load<TModel>() where TModel : IModel => Load().Require<TModel>();
-    public Result<IModel> Load()
-    {
-        if (!fileInfo.Exists)
-        {
-            return new FileNotFoundException(null, fileInfo.FullName);
-        }
-
-        using var stream = fileInfo.OpenRead();
-        using var reader = new BinaryReader(stream);
-
-        var fileType = reader.ReadString();
-        if (fileType is not FILE_EXTENSION)
-        {
-            return new InvalidDataException();
-        }
-
-        var formatVersion = reader.ReadUInt32();
-        return formatVersion switch
-        {
-            2 => ReadModel(reader),
-            _ => new NotImplementedException($".gmw version {formatVersion} is unsupported"),
-        };
-    }
-
-    public static Result<IModel> ReadModel(BinaryReader reader)
-    {
-        var modelKey = reader.ReadString();
-        var modelVersion = reader.ReadUInt32();
-        if (!ModelDeserializers.TryGetValue((modelKey, modelVersion), out var deserializer))
-        {
-            return new NotImplementedException($"No reader registered for {modelKey} v{modelVersion} model");
-        }
-
-        return deserializer(reader);
-    }
-
-    public static void RegisterModelReader<TModel>(string key, uint version, Func<BinaryReader, Result<TModel>> reader) where TModel : IModel
-        => ModelDeserializers.Add((key, version), (br) => reader(br).As<IModel>());
-    public static void RegisterModel<TModel>(string key, uint version, Func<TModel, BinaryWriter, ErrorState> writer, Func<BinaryReader, Result<TModel>> reader) where TModel : IModel
-    {
-        RegisterModelReader(key, version, reader);
-        ModelSerializers.Add(typeof(TModel), (key, version, (layer, bw) => writer((TModel)layer, bw)));
-    }
-
-    public static void RegisterLayerReader<TLayer>(string key, uint version, Func<BinaryReader, Result<TLayer>> reader) where TLayer : ILayer
-        => LayerDeserializers.Add((key, version), (br) => reader(br).As<ILayer>());
-    public static void RegisterLayer<TLayer>(string key, uint version, Func<TLayer, BinaryWriter, ErrorState> writer, Func<BinaryReader, Result<TLayer>> reader) where TLayer : ILayer
-    {
-        RegisterLayerReader(key, version, reader);
-        LayerSerializers.Add(typeof(TLayer), (key, version, (layer, bw) => writer((TLayer)layer, bw)));
-    }
-}
diff --git a/MachineLearning.Training.GUI/App.xaml b/MachineLearning.Training.GUI/App.xaml
deleted file mode 100644
index 4234f4c..0000000
--- a/MachineLearning.Training.GUI/App.xaml
+++ /dev/null
@@ -1,13 +0,0 @@
-﻿<Application x:Class="MachineLearning.Training.GUI.App"
-             xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
-             xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
-             xmlns:local="clr-namespace:MachineLearning.Training.GUI"
-             StartupUri="MainWindow.xaml">
-    <Application.Resources>
-        <ResourceDictionary>
-            <ResourceDictionary.MergedDictionaries>
-                <ResourceDictionary Source="pack://application:,,,/PresentationFramework.Fluent;component/Themes/Fluent.xaml" />
-            </ResourceDictionary.MergedDictionaries>
-        </ResourceDictionary>
-    </Application.Resources>
-</Application>
diff --git a/MachineLearning.Training.GUI/App.xaml.cs b/MachineLearning.Training.GUI/App.xaml.cs
deleted file mode 100644
index f8504ee..0000000
--- a/MachineLearning.Training.GUI/App.xaml.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-﻿using System.Windows;
-
-namespace MachineLearning.Training.GUI;
-
-/// <summary>
-/// Interaction logic for App.xaml
-/// </summary>
-public partial class App : Application
-{
-
-}
diff --git a/MachineLearning.Training.GUI/AssemblyInfo.cs b/MachineLearning.Training.GUI/AssemblyInfo.cs
deleted file mode 100644
index b0ec827..0000000
--- a/MachineLearning.Training.GUI/AssemblyInfo.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-using System.Windows;
-
-[assembly: ThemeInfo(
-    ResourceDictionaryLocation.None,            //where theme specific resource dictionaries are located
-                                                //(used if a resource is not found in the page,
-                                                // or application resource dictionaries)
-    ResourceDictionaryLocation.SourceAssembly   //where the generic resource dictionary is located
-                                                //(used if a resource is not found in the page,
-                                                // app, or any theme specific resource dictionaries)
-)]
diff --git a/MachineLearning.Training.GUI/GlobalUsings.cs b/MachineLearning.Training.GUI/GlobalUsings.cs
deleted file mode 100644
index effe48b..0000000
--- a/MachineLearning.Training.GUI/GlobalUsings.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-﻿global using Ametrin.Optional;
-global using Ametrin.Numerics;
-global using Ametrin.Guards;
-global using Ametrin.Utils;
-global using System;
-global using System.Collections.Generic;
-global using System.Collections.Immutable;
-global using System.Collections.Frozen;
-global using System.Linq;
diff --git a/MachineLearning.Training.GUI/LayerView.xaml b/MachineLearning.Training.GUI/LayerView.xaml
deleted file mode 100644
index b8bd599..0000000
--- a/MachineLearning.Training.GUI/LayerView.xaml
+++ /dev/null
@@ -1,13 +0,0 @@
-﻿<UserControl x:Class="MachineLearning.Training.GUI.LayerView"
-             xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
-             xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
-             xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" 
-             xmlns:d="http://schemas.microsoft.com/expression/blend/2008" 
-             xmlns:local="clr-namespace:MachineLearning.Training.GUI"
-             xmlns:skia="clr-namespace:SkiaSharp.Views.WPF;assembly=SkiaSharp.Views.WPF"
-             mc:Ignorable="d" 
-             d:DesignHeight="450" d:DesignWidth="800">
-    <Grid>
-        <skia:SKElement x:Name="canvas" PaintSurface="canvas_PaintSurface" MinHeight="128" MinWidth="128"/>
-    </Grid>
-</UserControl>
diff --git a/MachineLearning.Training.GUI/LayerView.xaml.cs b/MachineLearning.Training.GUI/LayerView.xaml.cs
deleted file mode 100644
index 05bf244..0000000
--- a/MachineLearning.Training.GUI/LayerView.xaml.cs
+++ /dev/null
@@ -1,68 +0,0 @@
-﻿using SkiaSharp;
-using System.Windows.Controls;
-
-namespace MachineLearning.Training.GUI;
-
-public partial class LayerView : UserControl
-{
-    private readonly Matrix[] weights;
-    private readonly SKBitmap[] bitmaps;
-
-    public LayerView(Matrix[] weights)
-    {
-        InitializeComponent();
-        this.weights = weights;
-        bitmaps = [.. weights.Select(m => new SKBitmap(m.ColumnCount, m.RowCount))];
-    }
-
-    public void Update()
-    {
-        GenerateHeatmap(weights[0],  bitmaps[0]);
-    }
-
-    private void canvas_PaintSurface(object sender, SkiaSharp.Views.Desktop.SKPaintSurfaceEventArgs e)
-    {
-        e.Surface.Canvas.Clear();
-
-        float scaleX = (float) RenderSize.Width / bitmaps[0].Width;
-        float scaleY = (float) RenderSize.Height / bitmaps[0].Height;
-        float scale = float.Min(scaleX, scaleY);
-
-        float targetWidth = bitmaps[0].Width * scale;
-        float targetHeight = bitmaps[0].Height * scale;
-
-        float left = ((float) RenderSize.Width - targetWidth) / 2;
-        float top = ((float) RenderSize.Height - targetHeight) / 2;
-        var destRect = new SKRect(left, top, left + targetWidth, top + targetHeight);
-
-        e.Surface.Canvas.DrawBitmap(bitmaps[0], destRect);
-    }
-
-    public static SKBitmap GenerateHeatmap(Matrix matrix, SKBitmap bitmap)
-    {
-        var width = matrix.ColumnCount;
-        var height = matrix.RowCount;
-
-        var min = -0.5;
-        var max = 0.5;
-        var range = max - min;
-
-        for (int y = 0; y < height; y++)
-        {
-            for (int x = 0; x < width; x++)
-            {
-                double normalizedValue = (matrix[y, x] - min) / range;
-                bitmap.SetPixel(x, y, normalizedValue < 0 ? SKColors.Black : normalizedValue > 1 ? SKColors.White : GetHeatmapColor(normalizedValue));
-            }
-        }
-
-        return bitmap;
-    }
-
-    private static SKColor GetHeatmapColor(double value)
-    {
-        byte r = (byte) (255 * value);
-        byte b = (byte) (255 * (1 - value));
-        return new SKColor(r, 0, b, 255);
-    }
-}
diff --git a/MachineLearning.Training.GUI/MachineLearning.Training.GUI.csproj b/MachineLearning.Training.GUI/MachineLearning.Training.GUI.csproj
deleted file mode 100644
index 8229538..0000000
--- a/MachineLearning.Training.GUI/MachineLearning.Training.GUI.csproj
+++ /dev/null
@@ -1,21 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>WinExe</OutputType>
-    <TargetFramework>$(DotNetVersion)-windows</TargetFramework>
-    <Nullable>enable</Nullable>
-    <UseWPF>true</UseWPF>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Training\MachineLearning.Training.csproj" />
-    <ProjectReference Include="..\MachineLearning.Serialization\MachineLearning.Serialization.csproj" />
-    <ProjectReference Include="..\MachineLearning.Samples\MachineLearning.Samples.csproj" />
-    <ProjectReference Include="..\MachineLearning.Visual\MachineLearning.Visual.csproj" />
-  </ItemGroup>
-
-  <ItemGroup>
-    <PackageReference Include="LiveChartsCore.SkiaSharpView.WPF" Version="2.0.0-rc6.1" />
-  </ItemGroup>
-
-</Project>
diff --git a/MachineLearning.Training.GUI/MainWindow.xaml b/MachineLearning.Training.GUI/MainWindow.xaml
deleted file mode 100644
index 92bde8e..0000000
--- a/MachineLearning.Training.GUI/MainWindow.xaml
+++ /dev/null
@@ -1,12 +0,0 @@
-﻿<Window x:Class="MachineLearning.Training.GUI.MainWindow"
-    xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
-    xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
-    xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
-    xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
-    xmlns:local="clr-namespace:MachineLearning.Training.GUI"
-    mc:Ignorable="d" Title="Neural Network Tests" Height="450" Width="800"> 
-    
-    <Grid>
-        <TabControl ItemsSource="{Binding LayerViews}"/>
-    </Grid>
-</Window>
diff --git a/MachineLearning.Training.GUI/MainWindow.xaml.cs b/MachineLearning.Training.GUI/MainWindow.xaml.cs
deleted file mode 100644
index 3354403..0000000
--- a/MachineLearning.Training.GUI/MainWindow.xaml.cs
+++ /dev/null
@@ -1,37 +0,0 @@
-﻿using MachineLearning.Mamba;
-using System.Collections.ObjectModel;
-using System.Globalization;
-using System.Windows;
-using System.Windows.Controls;
-
-namespace MachineLearning.Training.GUI;
-
-/// <summary>
-/// Interaction logic for MainWindow.xaml
-/// </summary>
-public partial class MainWindow : Window
-{
-    public TrainingProgressTracker ProgressTracker { get; } = new();
-
-    public ObservableCollection<TabItem> LayerViews { get; } = [];
-
-    public MainWindow()
-    {
-        InitializeComponent();
-        DataContext = this;
-
-        CultureInfo.CurrentCulture = CultureInfo.InvariantCulture;
-
-        var model = new Mamba2VectorModel(6, 51, 64, 128, 32);
-        model.Initialize();
-
-
-        foreach(var (i, layer) in model.HiddenLayers.Index())
-        {
-            LayerViews.Add(new TabItem { Header = $"Layer {i}", Content = new LayerView([layer.C]) });
-        }
-
-
-        LayerViews.Consume(v => (v.Content as LayerView)?.Update());
-    }
-}
\ No newline at end of file
diff --git a/MachineLearning.Training.GUI/ProgressChart.xaml b/MachineLearning.Training.GUI/ProgressChart.xaml
deleted file mode 100644
index bf903c3..0000000
--- a/MachineLearning.Training.GUI/ProgressChart.xaml
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿<Window x:Class="MachineLearning.Training.GUI.ProgressChart"
-        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
-        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
-        xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
-        xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
-        xmlns:local="clr-namespace:MachineLearning.Training.GUI"
-        xmlns:charts="clr-namespace:LiveChartsCore.SkiaSharpView.WPF;assembly=LiveChartsCore.SkiaSharpView.WPF"
-        mc:Ignorable="d"
-        Title="ProgressChart" Height="450" Width="800">
-    
-    <Grid>
-        <Grid.RowDefinitions>
-            <RowDefinition Height="AUTO"/>
-            <RowDefinition/>
-        </Grid.RowDefinitions>
-        <Label x:Name="StatusLabel" Content="Loading..."/>
-        <charts:CartesianChart Grid.Row="1" Series="{Binding ProgressTracker.EvaluationSeries}" />
-    </Grid>
-</Window>
diff --git a/MachineLearning.Training.GUI/ProgressChart.xaml.cs b/MachineLearning.Training.GUI/ProgressChart.xaml.cs
deleted file mode 100644
index fddee1c..0000000
--- a/MachineLearning.Training.GUI/ProgressChart.xaml.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-﻿using System.Windows;
-
-namespace MachineLearning.Training.GUI;
-
-public partial class ProgressChart : Window
-{
-    public ProgressChart()
-    {
-        InitializeComponent();
-    }
-}
diff --git a/MachineLearning.Training.GUI/TrainingProgressTracker.cs b/MachineLearning.Training.GUI/TrainingProgressTracker.cs
deleted file mode 100644
index 274c079..0000000
--- a/MachineLearning.Training.GUI/TrainingProgressTracker.cs
+++ /dev/null
@@ -1,93 +0,0 @@
-﻿using System.Collections.ObjectModel;
-using LiveChartsCore;
-using LiveChartsCore.SkiaSharpView;
-using LiveChartsCore.SkiaSharpView.Painting;
-using LiveChartsCore.SkiaSharpView.Painting.Effects;
-using MachineLearning.Data;
-using MachineLearning.Model;
-using MachineLearning.Model.Layer;
-using ML.MultiLayerPerceptron;
-using SkiaSharp;
-
-namespace MachineLearning.Training.GUI;
-
-public sealed class TrainingProgressTracker
-{
-    public IEnumerable<ISeries<double>> EvaluationSeries => Entries.SelectMany<Entry, LineSeries<double>>(e => [e.Series, e.TrendSeries]);
-    private readonly List<Entry> Entries = [];
-
-    public EmbeddedModelTrainer<TInput, TOutput> CreateLinkedTrainer<TInput, TOutput>(string name, SKColor color, EmbeddedModel<TInput, TOutput> model, TrainingConfig config, ITrainingSet trainingset) where TInput : notnull where TOutput : notnull
-    {
-        var entry = new Entry(name, color);
-        config = config with
-        {
-            EvaluationCallback = results => entry.Results.Add(results.Result.CorrectPercentage * 100),
-        };
-        var trainer = new EmbeddedModelTrainer<TInput, TOutput>(model, config, trainingset);
-        Entries.Add(entry);
-        return trainer;
-    }
-
-    public sealed class Entry
-    {
-        public LineSeries<double> Series { get; }
-        public LineSeries<double> TrendSeries { get; }
-        public readonly ObservableCollection<double> Results = [];
-        public readonly ObservableCollection<double> Trends = [];
-
-        public Entry(string name, SKColor color)
-        {
-            Series = new()
-            {
-                Values = Results,
-                Name = name,
-                Fill = null,
-                GeometrySize = 5,
-                GeometryStroke = new SolidColorPaint
-                {
-                    Color = color,
-                    StrokeThickness = 3
-                },
-                Stroke = new SolidColorPaint
-                {
-                    StrokeThickness = 2,
-                    Color = color,
-                }
-            };
-            Results.CollectionChanged += (s, e) => UpdateTrends();
-            TrendSeries = new()
-            {
-                Values = Trends,
-                //Name = $"{name} - Trend",
-                Fill = null,
-                GeometrySize = 0,
-                GeometryStroke = new SolidColorPaint
-                {
-                    Color = color,
-                    StrokeThickness = 0
-                },
-                Stroke = new SolidColorPaint
-                {
-                    StrokeThickness = 2,
-                    Color = color,
-                    PathEffect = new DashEffect([4, 3]),
-                },
-            };
-        }
-
-        private void UpdateTrends()
-        {
-            Trends.Clear();
-            if(Results.Count < 10)
-                return;
-
-            int windowSize = 7;
-            for(int i = 0; i < Results.Count; i++)
-            {
-                var window = Results.Skip(Math.Max(0, i - windowSize + 1)).Take(Math.Min(windowSize, i + 1));
-                Trends.Add(window.Average());
-            }
-        }
-
-    }
-}
diff --git a/MachineLearning.Training/Attributes/GenerateOptimizersAttribute.cs b/MachineLearning.Training/Attributes/GenerateOptimizersAttribute.cs
deleted file mode 100644
index 0688719..0000000
--- a/MachineLearning.Training/Attributes/GenerateOptimizersAttribute.cs
+++ /dev/null
@@ -1,4 +0,0 @@
-namespace MachineLearning.Training.Attributes;
-
-[AttributeUsage(AttributeTargets.Class)]
-public sealed class GenerateOptimizersAttribute : Attribute;
\ No newline at end of file
diff --git a/MachineLearning.Training/Cost/CategoricalCrossEntropy.cs b/MachineLearning.Training/Cost/CategoricalCrossEntropy.cs
deleted file mode 100644
index d3db5f2..0000000
--- a/MachineLearning.Training/Cost/CategoricalCrossEntropy.cs
+++ /dev/null
@@ -1,49 +0,0 @@
-namespace MachineLearning.Training.Cost;
-
-
-//public class CategoricalCrossEntropy : ICostFunction
-//{
-//    public static readonly CategoricalCrossEntropy Instance = new();
-//    public double Cost(double output, double expected)
-//    {
-//        if (expected == 0)
-//        {
-//            return -Math.Log(1 - output);
-//        }
-//        else
-//        {
-//            return -Math.Log(output);
-//        }
-//    }
-
-//    public double Derivative(double output, double expected)
-//    {
-//        if (output == 0 || output == 1)
-//        {
-//            throw new InvalidOperationException("Derivative is not defined for output values 0 or 1.");
-//        }
-
-//        return -(expected / output) + ((1 - expected) / (1 - output));
-//    }
-//}
-
-/*
-/// <summary>
-/// Good for classification
-/// only when expected is 0 or 1
-/// </summary>
-public sealed class CategoricalCrossEntropy(double Epsilon) : ICostFunction {
-    public static readonly CategoricalCrossEntropy Instance = new(1e-15);
-
-    public double Epsilon { get; } = Epsilon;
-
-    public double Cost(double output, double expected)
-    {
-        return -expected * Math.Log(output + Epsilon);
-    }
-
-    public double Derivative(double output, double expected)
-    {
-        return -expected / (output + Epsilon);
-    }
-*/
\ No newline at end of file
diff --git a/MachineLearning.Training/Cost/CrossEntropyLoss.cs b/MachineLearning.Training/Cost/CrossEntropyLoss.cs
deleted file mode 100644
index ed0dc47..0000000
--- a/MachineLearning.Training/Cost/CrossEntropyLoss.cs
+++ /dev/null
@@ -1,42 +0,0 @@
-﻿namespace MachineLearning.Training.Cost;
-
-/// <summary>
-/// Cross-Entropy Cost Function<br/>
-/// classification tasks, particularly binary<br/>
-/// requires outputs in range 0..1<br/>
-/// Cons: Numerically unstable (e.g., log(0) issues)<br/>
-/// </summary>
-public sealed class CrossEntropyLoss : ICostFunction
-{
-    public static readonly CrossEntropyLoss Instance = new();
-    const Weight EPSILON = 1e-7f;
-
-    public Weight Cost(Weight output, Weight expected)
-    {
-        output = float.Clamp(output, EPSILON, 1 - EPSILON); //just return 0 or 1?
-        return -(expected * float.Log(output) + (1 - expected) * float.Log(1 - output));
-    }
-
-    public Weight Derivative(Weight output, Weight expected)
-    {
-        output = float.Clamp(output, EPSILON, 1 - EPSILON); //just return 0 or 1?
-        return (output - expected) / (output * (1 - output));
-    }
-}
-
-public sealed class CrossEntropyFromSoftmaxLoss : ICostFunction
-{
-    public static readonly CrossEntropyFromSoftmaxLoss Instance = new();
-    const Weight EPSILON = 1e-7f;
-
-    public Weight Cost(Weight output, Weight expected)
-    {
-        output = float.Clamp(output, EPSILON, 1 - EPSILON);
-        return -expected * MathF.Log(output);
-    }
-
-    public Weight Derivative(Weight output, Weight expected)
-    {
-        return output - expected;
-    }
-}
diff --git a/MachineLearning.Training/Cost/ICostFunction.cs b/MachineLearning.Training/Cost/ICostFunction.cs
deleted file mode 100644
index 1438555..0000000
--- a/MachineLearning.Training/Cost/ICostFunction.cs
+++ /dev/null
@@ -1,34 +0,0 @@
-﻿namespace MachineLearning.Training.Cost;
-
-public interface ICostFunction
-{
-    public Weight Cost(Weight output, Weight expected);
-    public Weight Derivative(Weight output, Weight expected);
-    public void DerivativeTo(Vector output, Vector expected, Vector destination)
-    {
-        NumericsDebug.AssertSameDimensions(output, expected, destination);
-        for(int i = 0; i < destination.Count; i++)
-        {
-            destination[i] = Derivative(output[i], expected[i]);
-        }
-    }
-    public Vector Derivative(Vector output, Vector expected)
-    {
-        var result = Vector.Create(output.Count);
-        DerivativeTo(output, expected, result);
-        return result;
-    }
-
-    public Weight TotalCost(Vector output, Vector expected)
-    {
-        NumericsDebug.AssertSameDimensions(output, expected);
-        var totalCost = 0.0f;
-
-        foreach(var i in ..output.Count)
-        {
-            totalCost += Cost(output[i], expected[i]);
-        }
-
-        return totalCost;
-    }
-}
diff --git a/MachineLearning.Training/Cost/MeanSquaredErrorCost.cs b/MachineLearning.Training/Cost/MeanSquaredErrorCost.cs
deleted file mode 100644
index 480ea8c..0000000
--- a/MachineLearning.Training/Cost/MeanSquaredErrorCost.cs
+++ /dev/null
@@ -1,16 +0,0 @@
-﻿namespace MachineLearning.Training.Cost;
-
-/// <summary>
-/// Mean Squared Error (MSE) Cost Function <br/>
-/// widely used for regression <br/>
-/// Cons: Can be sensitive to outliers, often bad for classification <br/>
-/// </summary>
-public sealed class MeanSquaredErrorCost : ICostFunction
-{
-    public static readonly MeanSquaredErrorCost Instance = new();
-    public Weight Cost(Weight outputActivation, Weight expected) =>
-        0.5f * float.Pow(outputActivation - expected, 2);
-
-    public Weight Derivative(Weight outputActivation, Weight expected) =>
-        outputActivation - expected;
-}
diff --git a/MachineLearning.Training/Evaluation/DataSetEvaluation.cs b/MachineLearning.Training/Evaluation/DataSetEvaluation.cs
deleted file mode 100644
index 5e5832f..0000000
--- a/MachineLearning.Training/Evaluation/DataSetEvaluation.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-namespace MachineLearning.Training.Evaluation;
-
-public sealed class DataSetEvaluation
-{
-    public required TrainingEvaluationContext Context { get; init; }
-    public required DataSetEvaluationResult Result { get; init; }
-    public TimeSpan Duration { get; init; }
-    public string Dump() => $"Correct: {Result.CorrectPercentage:P0} ({Result.AverageCost:F4})\t({Result.AverageElapsedTime:ss\\.ff}s/batch\t{Context.Dump()} ({Result.AverageCount}))";
-}
diff --git a/MachineLearning.Training/Evaluation/DataSetEvaluationResult.cs b/MachineLearning.Training/Evaluation/DataSetEvaluationResult.cs
deleted file mode 100644
index bac4838..0000000
--- a/MachineLearning.Training/Evaluation/DataSetEvaluationResult.cs
+++ /dev/null
@@ -1,24 +0,0 @@
-namespace MachineLearning.Training.Evaluation;
-
-public sealed class DataSetEvaluationResult
-{
-    public static readonly DataSetEvaluationResult ZERO = new() { TotalCount = 0, CorrectCount = 0, TotalCost = 0, TotalElapsedTime = TimeSpan.Zero, stackCount = 0 };
-    public required int TotalCount { get; init; }
-    public int AverageCount => TotalCount / stackCount;
-    public required int CorrectCount { get; init; }
-    public float CorrectPercentage => (float) CorrectCount / TotalCount;
-    public required double TotalCost { get; init; }
-    public double AverageCost => TotalCost / TotalCount;
-    public TimeSpan TotalElapsedTime { get; init; } = TimeSpan.Zero;
-    public TimeSpan AverageElapsedTime => TotalElapsedTime / stackCount;
-    private int stackCount = 1;
-
-    public static DataSetEvaluationResult operator +(DataSetEvaluationResult left, DataSetEvaluationResult right) => new()
-    {
-        TotalCount = left.TotalCount + right.TotalCount,
-        CorrectCount = left.CorrectCount + right.CorrectCount,
-        TotalCost = left.TotalCost + right.TotalCost,
-        TotalElapsedTime = left.TotalElapsedTime + right.TotalElapsedTime,
-        stackCount = left.stackCount + right.stackCount
-    };
-}
diff --git a/MachineLearning.Training/Evaluation/Evaluator.cs b/MachineLearning.Training/Evaluation/Evaluator.cs
deleted file mode 100644
index b5bc84e..0000000
--- a/MachineLearning.Training/Evaluation/Evaluator.cs
+++ /dev/null
@@ -1,63 +0,0 @@
-﻿using MachineLearning.Data.Entry;
-using MachineLearning.Model.Layer.Snapshot;
-using MachineLearning.Training.Cost;
-
-namespace MachineLearning.Training.Evaluation;
-
-public static class Evaluator
-{
-    // public static DataSetEvaluationResult Evaluate<TInput, TOutput>(
-    //     this EmbeddedModel<TInput, TOutput> model,
-    //     ICostFunction costFunction,
-    //     IEnumerable<TrainingData<TInput, TOutput>> dataSet
-    //     ) where TInput : notnull where TOutput : notnull
-    // {
-    //     int correctCounter = 0;
-    //     double totalCost = 0;
-    //     int totalCounter = 0;
-    //     foreach (var entry in dataSet)
-    //     {
-    //         totalCounter++;
-    //         var outputWeights = model.InnerModel.Process(model.InputLayer.Process(entry.InputValue));
-    //         var (output, confidence) = model.OutputLayer.Process(outputWeights);
-
-    //         if (output.Equals(entry.ExpectedValue))
-    //         {
-    //             correctCounter++;
-    //         }
-
-    //         totalCost += costFunction.TotalCost(outputWeights, entry.ExpectedWeights);
-    //     }
-
-    //     return new()
-    //     {
-    //         TotalCount = totalCounter,
-    //         CorrectCount = correctCounter,
-    //         TotalCost = totalCost,
-    //     };
-    // }
-
-    public static DataSetEvaluationResult Evaluate<TSnapshot>(
-        this IModel<Vector, TSnapshot> model,
-        ICostFunction costFunction,
-        IEnumerable<TrainingData<Vector, Vector>> dataSet
-        ) where TSnapshot : ILayerSnapshot
-    {
-        double totalCost = 0;
-        int totalCounter = 0;
-        foreach (var entry in dataSet)
-        {
-            totalCounter++;
-            var outputWeights = model.Process(entry.InputValue);
-
-            totalCost += costFunction.TotalCost(outputWeights, entry.ExpectedWeights);
-        }
-
-        return new()
-        {
-            TotalCount = totalCounter,
-            CorrectCount = 0,
-            TotalCost = totalCost,
-        };
-    }
-}
diff --git a/MachineLearning.Training/Evaluation/ModelEvaluation.cs b/MachineLearning.Training/Evaluation/ModelEvaluation.cs
deleted file mode 100644
index ed460fb..0000000
--- a/MachineLearning.Training/Evaluation/ModelEvaluation.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace MachineLearning.Training.Evaluation;
-
-public sealed class ModelEvaluation
-{
-    public required TrainingEvaluationContext Context { get; init; }
-    public required ModelEvaluationResult Result { get; init; }
-    public string Dump() => $"Correct: {Result.DumpCorrectPrecentages()}\t({Context.Dump()})";
-}
diff --git a/MachineLearning.Training/Evaluation/ModelEvaluationResult.cs b/MachineLearning.Training/Evaluation/ModelEvaluationResult.cs
deleted file mode 100644
index f4373f2..0000000
--- a/MachineLearning.Training/Evaluation/ModelEvaluationResult.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-namespace MachineLearning.Training.Evaluation;
-
-public sealed class ModelEvaluationResult
-{
-    public required DataSetEvaluationResult TrainingSetResult { get; init; }
-    public required DataSetEvaluationResult TestSetResult { get; init; }
-    public string DumpCorrectPrecentages() => $"{TrainingSetResult.CorrectPercentage:P} | {TestSetResult.CorrectPercentage:P}";
-}
\ No newline at end of file
diff --git a/MachineLearning.Training/Evaluation/ModelTrainingResult.cs b/MachineLearning.Training/Evaluation/ModelTrainingResult.cs
deleted file mode 100644
index 08480b0..0000000
--- a/MachineLearning.Training/Evaluation/ModelTrainingResult.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-using System.Text;
-
-namespace MachineLearning.Training.Evaluation;
-
-public sealed class ModelTrainingResult
-{
-    public required int EpochCount { get; init; }
-    public required ModelEvaluationResult Before { get; init; }
-    public required ModelEvaluationResult After { get; init; }
-
-    public string DumpShort()
-    {
-        var sb = new StringBuilder();
-        sb.Append("Training Results: ")
-        .Append(Before.DumpCorrectPrecentages())
-        .Append(" -> ")
-        .Append(After.DumpCorrectPrecentages());
-
-        return sb.ToString();
-    }
-}
diff --git a/MachineLearning.Training/GlobalUsings.cs b/MachineLearning.Training/GlobalUsings.cs
deleted file mode 100644
index 7bbd435..0000000
--- a/MachineLearning.Training/GlobalUsings.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-﻿global using Ametrin.Numerics;
-global using Ametrin.Optional;
-global using Ametrin.Utils;
-global using MachineLearning.Model;
-global using System.Diagnostics;
-global using Weight = float;
-
-using System.Runtime.CompilerServices;
-[assembly: InternalsVisibleTo("MachineLearning.Benchmarks")]
diff --git a/MachineLearning.Training/IOutputResolver.cs b/MachineLearning.Training/IOutputResolver.cs
deleted file mode 100644
index 50f491c..0000000
--- a/MachineLearning.Training/IOutputResolver.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-﻿namespace MachineLearning.Training;
-
-/// <summary>
-/// Converts data into expected output weights
-/// </summary>
-/// <typeparam name="TOutput">Network Output type</typeparam>
-[Obsolete]
-public interface IOutputResolver<in TOutput>
-{
-    public Vector Expected(TOutput output);
-}
diff --git a/MachineLearning.Training/ITrainer.cs b/MachineLearning.Training/ITrainer.cs
deleted file mode 100644
index 5b357cb..0000000
--- a/MachineLearning.Training/ITrainer.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-using MachineLearning.Data;
-using MachineLearning.Data.Entry;
-using MachineLearning.Training.Evaluation;
-
-namespace MachineLearning.Training;
-
-public interface ITrainer<TModel>
-{
-    public TModel Model { get; }
-    public TrainingConfig Config { get; }
-    public ITrainingSet TrainingSet { get; }
-    DataSetEvaluationResult TrainAndEvaluate(IEnumerable<TrainingData> data);
-    void FullReset();
-}
diff --git a/MachineLearning.Training/MachineLearning.Training.csproj b/MachineLearning.Training/MachineLearning.Training.csproj
deleted file mode 100644
index 5cffc10..0000000
--- a/MachineLearning.Training/MachineLearning.Training.csproj
+++ /dev/null
@@ -1,14 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Model\MachineLearning.Model.csproj" />
-    <ProjectReference Include="..\MachineLearning.Data\MachineLearning.Data.csproj" />
-  </ItemGroup>
-
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-</Project>
diff --git a/MachineLearning.Training/Optimization/Adam/AdamOptimizer.cs b/MachineLearning.Training/Optimization/Adam/AdamOptimizer.cs
deleted file mode 100644
index da4c44e..0000000
--- a/MachineLearning.Training/Optimization/Adam/AdamOptimizer.cs
+++ /dev/null
@@ -1,30 +0,0 @@
-﻿namespace MachineLearning.Training.Optimization.Adam;
-
-public class AdamOptimizer : Optimizer
-{
-    public static LayerOptimizerRegistry<AdamOptimizer> Registry { get; } = [];
-    protected override LayerOptimizerRegistry RegistryGetter => Registry;
-    //public required Weight LearningRate { get; init; } = 0.1f;
-    public Weight FirstDecayRate { get; init; } = 0.9f;
-    public Weight SecondDecayRate { get; init; } = 0.99f; //or 0.999
-    public Weight Epsilon { get; init; } = 1e-8f;
-
-    public Weight Iteration { get; set; } = 1; // even when retraining!
-
-    public float FirstMomentEstimate(float lastMoment, float gradient) => FirstDecayRate * lastMoment + (1 - FirstDecayRate) * gradient;
-    public float SecondMomentEstimate(float lastMoment, float gradient) => SecondDecayRate * lastMoment + (1 - SecondDecayRate) * gradient * gradient;
-
-    public float WeightReduction(float firstMoment, float secondMoment)
-    {
-        var mHat = firstMoment / (1 - Weight.Pow(FirstDecayRate, Iteration));
-        var vHat = secondMoment / (1 - Weight.Pow(SecondDecayRate, Iteration));
-        return LearningRate * mHat / (Weight.Sqrt(vHat) + Epsilon);
-    }
-
-
-    public override void OnBatchCompleted()
-    {
-        Iteration++;
-    }
-
-}
\ No newline at end of file
diff --git a/MachineLearning.Training/Optimization/Adam/SimpleAdamOptimizer.cs b/MachineLearning.Training/Optimization/Adam/SimpleAdamOptimizer.cs
deleted file mode 100644
index 10788fe..0000000
--- a/MachineLearning.Training/Optimization/Adam/SimpleAdamOptimizer.cs
+++ /dev/null
@@ -1,104 +0,0 @@
-// using MachineLearning.Model.Layer;
-// using MachineLearning.Model.Layer.Snapshot;
-// using MachineLearning.Training.Cost;
-
-// namespace MachineLearning.Training.Optimization.Adam;
-
-// public class SimpleAdamOptimizer : ILayerOptimizer<FeedForwardLayer, LayerSnapshots.Simple>
-// {
-//     public FeedForwardLayer Layer { get; }
-//     public ICostFunction CostFunction => Optimizer.CostFunction;
-//     public AdamOptimizer Optimizer { get; }
-
-//     public readonly Vector GradientCostBiases;
-//     public readonly Matrix GradientCostWeights;
-
-//     // formula symbol M 
-//     // exponentially decaying average of past gradients. It is akin to the mean of the gradients.
-//     public readonly Vector FirstMomentBiases;
-//     public readonly Matrix FirstMomentWeights;
-
-//     // formula symbol V
-//     // exponentially decaying average of the squared gradients. It is akin to the uncentered variance of the gradients.
-//     public readonly Vector SecondMomentBiases;
-//     public readonly Matrix SecondMomentWeights;
-
-
-//     public SimpleAdamOptimizer(AdamOptimizer optimizer, FeedForwardLayer layer)
-//     {
-//         Optimizer = optimizer;
-//         Layer = layer;
-
-//         GradientCostBiases = Vector.Create(Layer.OutputNodeCount);
-//         GradientCostWeights = Matrix.Create(Layer.OutputNodeCount, Layer.InputNodeCount);
-
-//         FirstMomentBiases = Vector.Create(Layer.OutputNodeCount);
-//         FirstMomentWeights = Matrix.Create(Layer.OutputNodeCount, Layer.InputNodeCount);
-
-//         SecondMomentBiases = Vector.Create(Layer.OutputNodeCount);
-//         SecondMomentWeights = Matrix.Create(Layer.OutputNodeCount, Layer.InputNodeCount);
-//     }
-
-//     private readonly Lock _lock = new();
-//     public void Update(Vector nodeValues, LayerSnapshots.Simple snapshot)
-//     {
-//         // Compute the gradient for weights
-//         VectorHelper.MultiplyToMatrixTo(nodeValues, snapshot.LastRawInput, snapshot.WeightGradients); // GradientCostWeights.AddInPlaceMultiplied ?
-
-//         NumericsDebug.AssertValidNumbers(nodeValues);
-//         NumericsDebug.AssertValidNumbers(snapshot.WeightGradients);
-
-//         lock (_lock)
-//         {
-//             GradientCostWeights.AddToSelf(snapshot.WeightGradients);
-//             GradientCostBiases.AddToSelf(nodeValues);
-//         }
-//     }
-
-//     // update child methods
-//     public virtual void Apply(int dataCounter)
-//     {
-//         // do i need gradient clipping?
-//         var averagedLearningRate = Optimizer.LearningRate / Weight.Sqrt(dataCounter);
-
-//         // parallelizing makes no difference
-//         // Update biases
-//         (FirstMomentBiases, GradientCostBiases).MapToFirst(FirstMomentEstimate);
-//         (SecondMomentBiases, GradientCostBiases).MapToFirst(SecondMomentEstimate);
-//         Layer.Biases.SubtractToSelf((FirstMomentBiases, SecondMomentBiases).Map(WeightReduction));
-
-//         // Update weights
-//         (FirstMomentWeights, GradientCostWeights).MapToFirst(FirstMomentEstimate);
-//         (SecondMomentWeights, GradientCostWeights).MapToFirst(SecondMomentEstimate);
-//         Layer.Weights.SubtractToSelf((FirstMomentWeights, SecondMomentWeights).Map(WeightReduction));
-
-//         Weight WeightReduction(Weight firstMoment, Weight secondMoment)
-//         {
-//             var mHat = firstMoment / (1 - Weight.Pow(Optimizer.FirstDecayRate, Optimizer.Iteration));
-//             var vHat = secondMoment / (1 - Weight.Pow(Optimizer.SecondDecayRate, Optimizer.Iteration));
-//             return averagedLearningRate * mHat / (Weight.Sqrt(vHat) + Optimizer.Epsilon);
-//         }
-//         Weight FirstMomentEstimate(Weight lastMoment, Weight gradient)
-//             => Optimizer.FirstDecayRate * lastMoment + (1 - Optimizer.FirstDecayRate) * gradient;
-
-//         Weight SecondMomentEstimate(Weight lastMoment, Weight gradient)
-//             => Optimizer.SecondDecayRate * lastMoment + (1 - Optimizer.SecondDecayRate) * gradient * gradient;
-//     }
-
-//     public void GradientCostReset()
-//     {
-//         GradientCostBiases.ResetZero();
-//         GradientCostWeights.ResetZero();
-//     }
-
-//     public void FullReset()
-//     {
-//         GradientCostBiases.ResetZero();
-//         FirstMomentBiases.ResetZero();
-//         SecondMomentBiases.ResetZero();
-
-//         GradientCostWeights.ResetZero();
-//         FirstMomentWeights.ResetZero();
-//         SecondMomentWeights.ResetZero();
-//     }
-// }
diff --git a/MachineLearning.Training/Optimization/Adam/StringAdamOptimizer.cs b/MachineLearning.Training/Optimization/Adam/StringAdamOptimizer.cs
deleted file mode 100644
index d533c3c..0000000
--- a/MachineLearning.Training/Optimization/Adam/StringAdamOptimizer.cs
+++ /dev/null
@@ -1,96 +0,0 @@
-//using System.Numerics.Tensors;
-//using MachineLearning.Model.Layer;
-//using MachineLearning.Model.Layer.Snapshot;
-//using MachineLearning.Training.Cost;
-
-//namespace MachineLearning.Training.Optimization.Adam;
-
-//public sealed class StringAdamOptimizer : ILayerOptimizer<TrainedEmbeddingLayer, LayerSnapshots.Embedding>
-//{
-//    public TrainedEmbeddingLayer Layer { get; }
-//    public ICostFunction CostFunction => Optimizer.CostFunction;
-//    public AdamOptimizer Optimizer { get; }
-
-//    public readonly Matrix GradientCostWeights;
-//    public readonly Matrix FirstMomentWeights;
-//    public readonly Matrix SecondMomentWeights;
-//    public readonly Vector GradientCounts;
-
-
-//    public StringAdamOptimizer(AdamOptimizer optimizer, TrainedEmbeddingLayer layer)
-//    {
-//        Optimizer = optimizer;
-//        Layer = layer;
-
-//        GradientCostWeights = Matrix.OfSize(Layer.EmbeddingMatrix);
-//        GradientCounts = Vector.Create(layer.Tokens.Length);
-//        FirstMomentWeights = Matrix.OfSize(Layer.EmbeddingMatrix);
-//        SecondMomentWeights = Matrix.OfSize(Layer.EmbeddingMatrix);
-//    }
-
-//    private readonly Lock _lock = new();
-//    public void Update(Vector nodeValues, LayerSnapshots.Embedding snapshot)
-//    {
-//        var i = 0;
-//        lock (_lock)
-//        {
-//            foreach (var c in snapshot.LastInput)
-//            {
-//                var index = Layer.Tokens.IndexOf(c);
-//                var embedding = GradientCostWeights.RowSpan(index);
-//                TensorPrimitives.Add(embedding, nodeValues.Slice(i, Layer.EmbeddingSize), embedding);
-//                GradientCounts[index]++;
-
-//                i += Layer.EmbeddingSize;
-//            }
-//        }
-//    }
-
-//    public void Apply(int dataCounter)
-//    { 
-//        var averagedLearningRate = Optimizer.LearningRate / MathF.Sqrt(dataCounter);
-
-//        for(int tokenIndex = 0; tokenIndex < Layer.Tokens.Length; tokenIndex++)
-//        {
-//            var count = GradientCounts[tokenIndex];
-//            if(count > 0)
-//            {
-//                var gradientCosts = GradientCostWeights.RowRef(tokenIndex);
-//                gradientCosts.DivideToSelf(count);
-
-//                var firstMoment = FirstMomentWeights.RowRef(tokenIndex);
-//                var secondMoment = SecondMomentWeights.RowRef(tokenIndex);
-//                (firstMoment, gradientCosts).MapToFirst(FirstMomentEstimate);
-//                (secondMoment, gradientCosts).MapToFirst(SecondMomentEstimate);
-//                Layer.EmbeddingMatrix.RowRef(tokenIndex).SubtractToSelf((firstMoment, secondMoment).Map(WeightReduction));
-//            }
-//        }
-//        NumericsDebug.AssertValidNumbers(GradientCostWeights);
-
-//        Weight WeightReduction(Weight firstMoment, Weight secondMoment)
-//        {
-//            var mHat = firstMoment / (1 - MathF.Pow(Optimizer.FirstDecayRate, Optimizer.Iteration));
-//            var vHat = secondMoment / (1 - MathF.Pow(Optimizer.SecondDecayRate, Optimizer.Iteration));
-//            return averagedLearningRate * mHat / (MathF.Sqrt(vHat) + Optimizer.Epsilon);
-//        }
-
-//        Weight FirstMomentEstimate(Weight lastMoment, Weight gradient)
-//            => Optimizer.FirstDecayRate * lastMoment + (1 - Optimizer.FirstDecayRate) * gradient;
-
-//        Weight SecondMomentEstimate(Weight lastMoment, Weight gradient)
-//            => Optimizer.SecondDecayRate * lastMoment + (1 - Optimizer.SecondDecayRate) * gradient * gradient;
-//    }
-
-//    public void GradientCostReset()
-//    {
-//        GradientCostWeights.ResetZero();
-//        GradientCounts.ResetZero();
-//    }
-
-//    public void FullReset()
-//    {
-//        GradientCostReset();
-//        FirstMomentWeights.ResetZero();
-//        SecondMomentWeights.ResetZero();
-//    }
-//}
diff --git a/MachineLearning.Training/Optimization/AdamW/AdamWOptimizer.cs b/MachineLearning.Training/Optimization/AdamW/AdamWOptimizer.cs
deleted file mode 100644
index f0de7c2..0000000
--- a/MachineLearning.Training/Optimization/AdamW/AdamWOptimizer.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-﻿using System.ComponentModel;
-using MachineLearning.Model.Layer;
-using MachineLearning.Training.Optimization.Adam;
-
-namespace MachineLearning.Training.Optimization.AdamW;
-
-public sealed class AdamWOptimizer : AdamOptimizer
-{
-    public new static LayerOptimizerRegistry<AdamWOptimizer> Registry { get; } = [];
-    protected override LayerOptimizerRegistry RegistryGetter => Registry;
-
-    public Weight WeightDecayCoefficient /*λ*/ { get; init; } = 1e-3f; // (1e-5 - 1e-2)
-    
-    static AdamWOptimizer()
-    {
-        // Registry.Register<FeedForwardLayer>((op, layer) => new SimpleAdamWOptimizer(op, layer));
-    }
-}
\ No newline at end of file
diff --git a/MachineLearning.Training/Optimization/AdamW/SimpleAdamWOptimizer.cs b/MachineLearning.Training/Optimization/AdamW/SimpleAdamWOptimizer.cs
deleted file mode 100644
index a1f7801..0000000
--- a/MachineLearning.Training/Optimization/AdamW/SimpleAdamWOptimizer.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-﻿// using MachineLearning.Model.Layer;
-// using MachineLearning.Training.Optimization.Adam;
-
-// namespace MachineLearning.Training.Optimization.AdamW;
-
-// public sealed class SimpleAdamWOptimizer(AdamWOptimizer optimizer, FeedForwardLayer layer) : SimpleAdamOptimizer(optimizer, layer)
-// {
-//     public new AdamWOptimizer Optimizer { get; } = optimizer;
-
-//     public override void Apply(int dataCounter)
-//     {
-//         var averagedLearningRate = Optimizer.LearningRate / Weight.Sqrt(dataCounter);
-
-//         (FirstMomentBiases, GradientCostBiases).MapToFirst(FirstMomentEstimate);
-//         (SecondMomentBiases, GradientCostBiases).MapToFirst(SecondMomentEstimate);
-//         Layer.Biases.SubtractToSelf((FirstMomentBiases, SecondMomentBiases).Map(WeightReduction));
-
-//         (FirstMomentWeights, GradientCostWeights).MapToFirst(FirstMomentEstimate);
-//         (SecondMomentWeights, GradientCostWeights).MapToFirst(SecondMomentEstimate);
-//         var tmp = (FirstMomentWeights, SecondMomentWeights).Map(WeightReduction);
-//         (Layer.Weights, tmp).MapToFirst(Reduce);
-
-//         Weight Reduce(Weight original, Weight reduction)
-//             => original - reduction - Optimizer.WeightDecayCoefficient * original;
-
-//         Weight WeightReduction(Weight firstMoment, Weight secondMoment)
-//         {
-//             var mHat = firstMoment / (1 - Weight.Pow(Optimizer.FirstDecayRate, Optimizer.Iteration));
-//             var vHat = secondMoment / (1 - Weight.Pow(Optimizer.SecondDecayRate, Optimizer.Iteration));
-//             return averagedLearningRate * mHat / (Weight.Sqrt(vHat) + Optimizer.Epsilon);
-//         }
-
-//         Weight FirstMomentEstimate(Weight lastMoment, Weight gradient)
-//             => Optimizer.FirstDecayRate * lastMoment + (1 - Optimizer.FirstDecayRate) * gradient;
-
-//         Weight SecondMomentEstimate(Weight lastMoment, Weight gradient)
-//             => Optimizer.SecondDecayRate * lastMoment + (1 - Optimizer.SecondDecayRate) * gradient * gradient;
-//     }
-// }
\ No newline at end of file
diff --git a/MachineLearning.Training/Optimization/EmptyLayerOptimizer.cs b/MachineLearning.Training/Optimization/EmptyLayerOptimizer.cs
deleted file mode 100644
index e4e217b..0000000
--- a/MachineLearning.Training/Optimization/EmptyLayerOptimizer.cs
+++ /dev/null
@@ -1,13 +0,0 @@
-using MachineLearning.Model.Layer.Snapshot;
-
-namespace MachineLearning.Training.Optimization;
-
-public sealed class EmptyLayerOptimizer : ILayerOptimizer
-{
-    public static EmptyLayerOptimizer Instance { get; } = new();
-
-    public void Apply(IGradients gradients) { }
-    public void FullReset() { }
-    public void GradientCostReset() { }
-    public Vector Update(Vector outputGradient, ILayerSnapshot snapshot, IGradients gradients) => outputGradient;
-}
\ No newline at end of file
diff --git a/MachineLearning.Training/Optimization/ILayerOptimizer.cs b/MachineLearning.Training/Optimization/ILayerOptimizer.cs
deleted file mode 100644
index 046c6dc..0000000
--- a/MachineLearning.Training/Optimization/ILayerOptimizer.cs
+++ /dev/null
@@ -1,18 +0,0 @@
-using Ametrin.Guards;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Snapshot;
-
-namespace MachineLearning.Training.Optimization;
-
-public interface ILayerOptimizer
-{
-    public Vector Update(Vector nodeValues, ILayerSnapshot snapshot, IGradients gradients);
-    public void Apply(IGradients gradients);
-    public void FullReset();
-};
-
-public interface ILayerOptimizer<TLayer, TSnapshot> : ILayerOptimizer where TLayer : ILayer where TSnapshot : ILayerSnapshot
-{
-    public Vector Update(Vector nodeValues, TSnapshot snapshot, IGradients gradients);
-    Vector ILayerOptimizer.Update(Vector nodeValues, ILayerSnapshot snapshot, IGradients gradients) => Update(nodeValues, Guard.Is<TSnapshot>(snapshot), gradients);
-}
diff --git a/MachineLearning.Training/Optimization/Nadam/NadamOptimizer.cs b/MachineLearning.Training/Optimization/Nadam/NadamOptimizer.cs
deleted file mode 100644
index 9183e1f..0000000
--- a/MachineLearning.Training/Optimization/Nadam/NadamOptimizer.cs
+++ /dev/null
@@ -1,14 +0,0 @@
-using MachineLearning.Model.Layer;
-using MachineLearning.Training.Optimization.Adam;
-
-namespace MachineLearning.Training.Optimization.Nadam;
-
-public sealed class NadamOptimizer : AdamOptimizer
-{
-    public new static LayerOptimizerRegistry<NadamOptimizer> Registry { get; } = [];
-    protected override LayerOptimizerRegistry RegistryGetter => Registry;
-    static NadamOptimizer()
-    {
-        // Registry.Register<FeedForwardLayer>((op, layer) => new SimpleNadamOptimizer(op, layer));
-    }
-}
diff --git a/MachineLearning.Training/Optimization/Nadam/SimpleNadamOptimizer.cs b/MachineLearning.Training/Optimization/Nadam/SimpleNadamOptimizer.cs
deleted file mode 100644
index 61a6a81..0000000
--- a/MachineLearning.Training/Optimization/Nadam/SimpleNadamOptimizer.cs
+++ /dev/null
@@ -1,32 +0,0 @@
-// using MachineLearning.Model.Layer;
-// using MachineLearning.Training.Optimization.Adam;
-
-// namespace MachineLearning.Training.Optimization.Nadam;
-
-// public sealed class SimpleNadamOptimizer(NadamOptimizer optimizer, FeedForwardLayer layer) : SimpleAdamOptimizer(optimizer, layer)
-// {
-//     public override void Apply(int dataCounter)
-//     {
-//         var averagedLearningRate = Optimizer.LearningRate / Weight.Sqrt(dataCounter);
-
-//         (FirstMomentBiases, GradientCostBiases).MapToFirst(FirstMomentEstimate);
-//         (SecondMomentBiases, GradientCostBiases).MapToFirst(SecondMomentEstimate);
-//         Layer.Biases.SubtractToSelf((FirstMomentBiases, SecondMomentBiases, GradientCostBiases).Map(WeightReduction));
-
-//         (FirstMomentWeights, GradientCostWeights).MapToFirst(FirstMomentEstimate);
-//         (SecondMomentWeights, GradientCostWeights).MapToFirst(SecondMomentEstimate);
-//         Layer.Weights.SubtractToSelf((FirstMomentWeights, SecondMomentWeights, GradientCostWeights).Map(WeightReduction));
-
-//         Weight WeightReduction(Weight firstMoment, Weight secondMoment, Weight gradient)
-//         {
-//             var mHat = Optimizer.FirstDecayRate * firstMoment / (1 - Weight.Pow(Optimizer.FirstDecayRate, Optimizer.Iteration + 1)) + (1 - Optimizer.FirstDecayRate) * gradient / (1 - float.Pow(Optimizer.FirstDecayRate, Optimizer.Iteration));
-//             var vHat = secondMoment / (1 - Weight.Pow(Optimizer.SecondDecayRate, Optimizer.Iteration));
-//             return averagedLearningRate * mHat / (Weight.Sqrt(vHat) + Optimizer.Epsilon);
-//         }
-//         Weight FirstMomentEstimate(Weight lastMoment, Weight gradient)
-//             => Optimizer.FirstDecayRate * lastMoment + (1 - Optimizer.FirstDecayRate) * gradient;
-
-//         Weight SecondMomentEstimate(Weight lastMoment, Weight gradient)
-//             => Optimizer.SecondDecayRate * lastMoment + (1 - Optimizer.SecondDecayRate) * gradient * gradient;
-//     }
-// }
diff --git a/MachineLearning.Training/Optimization/Nadam/StringNadamOptimizer.cs b/MachineLearning.Training/Optimization/Nadam/StringNadamOptimizer.cs
deleted file mode 100644
index cb6a800..0000000
--- a/MachineLearning.Training/Optimization/Nadam/StringNadamOptimizer.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-//using System.Numerics.Tensors;
-//using MachineLearning.Model.Layer;
-//using MachineLearning.Model.Layer.Snapshot;
-//using MachineLearning.Training.Cost;
-
-//namespace MachineLearning.Training.Optimization.Nadam;
-
-//public sealed class StringNadamOptimizer : ILayerOptimizer<TrainedEmbeddingLayer, LayerSnapshots.Embedding>
-//{
-//    public TrainedEmbeddingLayer Layer { get; }
-//    public ICostFunction CostFunction => Optimizer.CostFunction;
-//    public NadamOptimizer Optimizer { get; }
-
-//    public readonly Matrix GradientCostWeights;
-//    public readonly Matrix FirstMomentWeights;
-//    public readonly Matrix SecondMomentWeights;
-
-
-//    public StringNadamOptimizer(NadamOptimizer optimizer, TrainedEmbeddingLayer layer)
-//    {
-//        Optimizer = optimizer;
-//        Layer = layer;
-
-//        GradientCostWeights = Matrix.OfSize(Layer.EmbeddingMatrix);
-//        FirstMomentWeights = Matrix.OfSize(Layer.EmbeddingMatrix);
-//        SecondMomentWeights = Matrix.OfSize(Layer.EmbeddingMatrix);
-//    }
-
-//    //private readonly Lock _lock = new();
-//    public void Update(Vector nodeValues, LayerSnapshots.Embedding snapshot)
-//    {
-//        throw new NotImplementedException("fix this first (see base Adam)");
-//        //var i = 0;
-//        //lock (_lock)
-//        //{
-//        //    foreach (var c in snapshot.LastInput)
-//        //    {
-//        //        var embedding = GradientCostWeights.RowSpan(Layer.Tokens.IndexOf(c));
-//        //        TensorPrimitives.Add(embedding, nodeValues.Slice(i, Layer.EmbeddingSize), embedding);
-
-//        //        i += Layer.EmbeddingSize;
-//        //    }
-//        //}
-//    }
-
-//    public void Apply(int dataCounter)
-//    {
-//        var averagedLearningRate = Optimizer.LearningRate / MathF.Sqrt(dataCounter);
-
-//        (FirstMomentWeights, GradientCostWeights).MapToFirst(FirstMomentEstimate);
-//        (SecondMomentWeights, GradientCostWeights).MapToFirst(SecondMomentEstimate);
-//        Layer.EmbeddingMatrix.SubtractToSelf((FirstMomentWeights, SecondMomentWeights, GradientCostWeights).Map(WeightReduction));
-
-//        Weight WeightReduction(Weight firstMoment, Weight secondMoment, Weight gradient)
-//        {
-//            var mHat = Optimizer.FirstDecayRate * firstMoment / (1 - MathF.Pow(Optimizer.FirstDecayRate, Optimizer.Iteration + 1)) + (1 - Optimizer.FirstDecayRate) * gradient / (1 - MathF.Pow(Optimizer.FirstDecayRate, Optimizer.Iteration));
-//            var vHat = secondMoment / (1 - MathF.Pow(Optimizer.SecondDecayRate, Optimizer.Iteration));
-//            return averagedLearningRate * mHat / (MathF.Sqrt(vHat) + Optimizer.Epsilon);
-//        }
-//        Weight FirstMomentEstimate(Weight lastMoment, Weight gradient)
-//            => Optimizer.FirstDecayRate * lastMoment + (1 - Optimizer.FirstDecayRate) * gradient;
-
-//        Weight SecondMomentEstimate(Weight lastMoment, Weight gradient)
-//            => Optimizer.SecondDecayRate * lastMoment + (1 - Optimizer.SecondDecayRate) * gradient * gradient;
-//    }
-
-//    public void GradientCostReset()
-//    {
-//        GradientCostWeights.ResetZero();
-//    }
-
-//    public void FullReset()
-//    {
-//        GradientCostReset();
-
-//        FirstMomentWeights.ResetZero();
-//        SecondMomentWeights.ResetZero();
-//    }
-//}
diff --git a/MachineLearning.Training/Optimization/Optimizer.cs b/MachineLearning.Training/Optimization/Optimizer.cs
deleted file mode 100644
index 19148b0..0000000
--- a/MachineLearning.Training/Optimization/Optimizer.cs
+++ /dev/null
@@ -1,36 +0,0 @@
-using Ametrin.Guards;
-using MachineLearning.Model.Layer;
-using MachineLearning.Training.Cost;
-
-namespace MachineLearning.Training.Optimization;
-
-public abstract class Optimizer
-{
-    public required Weight LearningRate { get; set; }
-    public required ICostFunction CostFunction { get; init; }
-
-    public virtual void Init() { }
-    public virtual void OnBatchCompleted() { }
-    public virtual void OnEpochCompleted() { }
-
-    protected abstract LayerOptimizerRegistry RegistryGetter { get; }
-    public ILayerOptimizer CreateLayerOptimizer(ILayer layer)
-    {
-        if (RegistryGetter.TryGetValue(layer.GetType(), out var factory))
-        {
-            return factory(this, layer);
-        }
-
-        throw new NotImplementedException($"No known {GetType().Name} for {layer.GetType().Name}");
-    }
-}
-
-public class LayerOptimizerRegistry : Dictionary<Type, Func<Optimizer, ILayer, ILayerOptimizer>>;
-public sealed class LayerOptimizerRegistry<TOptimizer> : LayerOptimizerRegistry
-{
-    public void Register<TLayer>(Func<TOptimizer, TLayer, ILayerOptimizer> factory) where TLayer : ILayer
-        => Add(typeof(TLayer), (op, layer) => factory(Guard.Is<TOptimizer>(op), Guard.Is<TLayer>(layer)));
-
-    public void RegisterEmpty<TLayer>() where TLayer : ILayer
-        => Add(typeof(TLayer), static (_, _) => EmptyLayerOptimizer.Instance);
-}
\ No newline at end of file
diff --git a/MachineLearning.Training/Optimization/SGDMomentum/SGDMomentumOptimizer.cs b/MachineLearning.Training/Optimization/SGDMomentum/SGDMomentumOptimizer.cs
deleted file mode 100644
index 1122975..0000000
--- a/MachineLearning.Training/Optimization/SGDMomentum/SGDMomentumOptimizer.cs
+++ /dev/null
@@ -1,50 +0,0 @@
-using MachineLearning.Model.Layer;
-
-namespace MachineLearning.Training.Optimization.SGDMomentum;
-
-public sealed class SGDMomentumOptimizer : Optimizer
-{
-    public static LayerOptimizerRegistry<SGDMomentumOptimizer> Registry { get; } = [];
-    protected override LayerOptimizerRegistry RegistryGetter => Registry;
-    public required Weight InitialLearningRate { get; init; } = 0.7f;
-    public Weight LearningRateEpochMultiplier { get; init; } = 0.94f;
-    public Weight Momentum { get; init; } = 0.85f;
-    public Weight Regularization { get; init; } = 0.01f;
-
-    static SGDMomentumOptimizer()
-    {
-        // Registry.Register<FeedForwardLayer>((op, layer) => new SimpleSGDMomentumOptimizer(op, layer));
-    }
-
-    public override void Init()
-    {
-        LearningRate = InitialLearningRate;
-    }
-
-    public override void OnEpochCompleted()
-    {
-        LearningRate *= LearningRateEpochMultiplier;
-    }
-}
-
-/*
-When scaling up neural network models in size and complexity, various hyperparameters need adjustment to maintain or improve the model�s training efficiency and performance. Here's a table overview that outlines general trends for tweaking key hyperparameters like Epoch Count, Batch Size, Learning Rate, Learning Rate Multiplier, Momentum, and Regularization as the model size increases:
-
-| Hyperparameter        | Adjustment for Larger Model | Rationale                                                      |
-|-----------------------|-----------------------------|----------------------------------------------------------------|
-| **Epoch Count**       | Increase                    | Larger networks often require more training to converge as they can capture more complex patterns and may need more iterations through the data to adequately fit all parameters. |
-| **Batch Size**        | Increase                    | Larger batches can provide more stable gradient estimates, which is beneficial for training larger networks. However, memory constraints and the point of diminishing returns on the hardware efficiency need consideration. |
-| **Learning Rate**     | Decrease                    | A lower learning rate can help prevent overshooting the minimum during optimization. Larger models are more susceptible to destabilizing due to larger updates. |
-| **Learning Rate Multiplier** | Adjust based on layer or parameter sensitivity | In larger models, finer control of learning rates across different layers can help address the varying learning speed of features, often decreasing the learning rate more on deeper layers to avoid instabilities. |
-| **Momentum**          | Adjust as needed            | While momentum helps accelerate convergence in the relevant direction and dampens oscillations, the optimal setting might need tuning based on the network's response to updates, especially if training becomes unstable. |
-| **Regularization**    | Increase                    | Larger models are more prone to overfitting due to their increased capacity. Regularization (e.g., L2, dropout) helps mitigate this by penalizing large weights or randomly dropping units during training. |
-
-### Explanation of Adjustments:
-- **Epoch Count:**More parameters and more complex functions necessitate longer training to explore the loss landscape adequately.
-- **Batch Size:**Larger models benefit from larger batch sizes because they provide a more accurate estimate of the gradient. However, the ideal batch size should balance between computational efficiency (larger batches can be processed faster on parallel architectures like GPUs) and training stability.
-- **Learning Rate:**Smaller learning rates help in fine-tuning the adjustments in weights without causing significant disruptions in the learned patterns, which is critical as the model size increases and the surface of the optimization landscape becomes more complex.
-- **Learning Rate Multiplier:**This allows different parts of the network to train at different speeds, which can be particularly useful in very deep networks where earlier layers might need less adjustment as training progresses.
-- **Momentum:**Maintaining or adjusting momentum is crucial since it helps in overcoming local minima and accelerates convergence, but too much momentum can cause overshooting in larger models where the gradients are inherently more variable.
-- **Regularization:**As the capacity to memorize data increases with model size, regularization becomes more important to ensure that the model generalizes well to unseen data instead of memorizing the training set.
-These adjustments are general guidelines and should be tailored to specific models and training conditions through systematic hyperparameter tuning, such as using grid search or Bayesian optimization methods.
-*/
\ No newline at end of file
diff --git a/MachineLearning.Training/Optimization/SGDMomentum/SimpleSGDMomentumOptimizer.cs b/MachineLearning.Training/Optimization/SGDMomentum/SimpleSGDMomentumOptimizer.cs
deleted file mode 100644
index 0db42c9..0000000
--- a/MachineLearning.Training/Optimization/SGDMomentum/SimpleSGDMomentumOptimizer.cs
+++ /dev/null
@@ -1,79 +0,0 @@
-// using MachineLearning.Model.Layer;
-// using MachineLearning.Model.Layer.Snapshot;
-// using MachineLearning.Training.Cost;
-
-// namespace MachineLearning.Training.Optimization.SGDMomentum;
-
-// public sealed class SimpleSGDMomentumOptimizer : ILayerOptimizer<FeedForwardLayer, LayerSnapshots.Simple>
-// {
-//     public FeedForwardLayer Layer { get; }
-//     public readonly Matrix CostGradientWeights;
-//     public readonly Vector CostGradientBiases;
-//     public readonly Matrix WeightVelocities;
-//     public readonly Vector BiasVelocities;
-//     public ICostFunction CostFunction => Optimizer.CostFunction;
-//     public SGDMomentumOptimizer Optimizer { get; }
-
-//     public SimpleSGDMomentumOptimizer(SGDMomentumOptimizer optimizer, FeedForwardLayer layer)
-//     {
-//         Optimizer = optimizer;
-//         Layer = layer;
-//         CostGradientWeights = Matrix.Create(Layer.OutputNodeCount, Layer.InputNodeCount);
-//         CostGradientBiases = Vector.Create(Layer.OutputNodeCount);
-//         WeightVelocities = Matrix.Create(Layer.OutputNodeCount, Layer.InputNodeCount);
-//         BiasVelocities = Vector.Create(Layer.OutputNodeCount);
-//     }
-
-//     public void Update(Vector nodeValues, LayerSnapshots.Simple snapshot)
-//     {
-//         foreach (int outputNodeIndex in ..Layer.OutputNodeCount)
-//         {
-//             foreach(int inputNodeIndex in ..Layer.InputNodeCount)
-//             {
-//                 // partial derivative cost with respect to weight of current connection
-//                 var derivativeCostWrtWeight = snapshot.LastRawInput[inputNodeIndex] * nodeValues[outputNodeIndex];
-//                 CostGradientWeights[outputNodeIndex, inputNodeIndex] += derivativeCostWrtWeight;
-//             }
-
-//             // derivative cost with respect to bias (bias' = 1)
-//             var derivativeCostWrtBias = 1 * nodeValues[outputNodeIndex];
-//             CostGradientBiases[outputNodeIndex] += derivativeCostWrtBias;
-//         }
-//     }
-
-//     public void Apply(int dataCounter)
-//     {
-//         var averagedLearningRate = Optimizer.LearningRate / dataCounter;
-//         var weightDecay = 1 - Optimizer.Regularization * averagedLearningRate; //used against overfitting
-
-//         foreach(int outputNodeIndex in ..Layer.OutputNodeCount)
-//         {
-//             var biasVelocity = BiasVelocities[outputNodeIndex] * Optimizer.Momentum - CostGradientBiases[outputNodeIndex] * averagedLearningRate;
-//             BiasVelocities[outputNodeIndex] = biasVelocity;
-//             Layer.Biases[outputNodeIndex] += biasVelocity;
-//             CostGradientBiases[outputNodeIndex] = 0;
-
-//             foreach(int inputNodeIndex in ..Layer.InputNodeCount)
-//             {
-//                 var weight = Layer.Weights[outputNodeIndex, inputNodeIndex];
-//                 var weightVelocity = WeightVelocities[outputNodeIndex, inputNodeIndex] * Optimizer.Momentum - CostGradientWeights[outputNodeIndex, inputNodeIndex] * averagedLearningRate;
-//                 WeightVelocities[outputNodeIndex, inputNodeIndex] = weightVelocity;
-//                 Layer.Weights[outputNodeIndex, inputNodeIndex] = weight * weightDecay + weightVelocity;
-//             }
-//         }
-//     }
-
-//     public void GradientCostReset()
-//     {
-//         CostGradientBiases.ResetZero();
-//         CostGradientWeights.ResetZero();
-//     }
-
-//     public void FullReset()
-//     {
-//         GradientCostReset();
-
-//         BiasVelocities.ResetZero();
-//         WeightVelocities.ResetZero();
-//     }
-// }
\ No newline at end of file
diff --git a/MachineLearning.Training/TrainerHelper.cs b/MachineLearning.Training/TrainerHelper.cs
deleted file mode 100644
index dfc1c77..0000000
--- a/MachineLearning.Training/TrainerHelper.cs
+++ /dev/null
@@ -1,116 +0,0 @@
-﻿using MachineLearning.Data;
-using MachineLearning.Training.Evaluation;
-using System.Text;
-
-namespace MachineLearning.Training;
-
-public static class TrainerHelper
-{
-    public static void TrainConsole<TModel>(this ITrainer<TModel> trainer, bool cancelable = true)
-    {
-        using var cts = new CancellationTokenSource();
-        if (cancelable)
-        {
-            Task.Run(async () =>
-            {
-                while (!cts.IsCancellationRequested)
-                {
-                    if (Console.KeyAvailable && Console.ReadKey(intercept: true).Key == ConsoleKey.C)
-                    {
-                        Console.WriteLine("Canceling...");
-                        cts.Cancel();
-                        break;
-                    }
-                    await Task.Delay(500);
-                }
-            });
-        }
-
-        Console.WriteLine($"Training {trainer.Model}");
-        Console.WriteLine(GenerateTrainingOverview(trainer.Config, trainer.TrainingSet));
-        Console.WriteLine("Starting Training...");
-        trainer.Train(cts.Token);
-        cts.Cancel();
-        Console.WriteLine("Training Done!");
-    }
-
-    public static void Train<TModel>(this ITrainer<TModel> trainer, CancellationToken? token = null)
-    {
-        trainer.Config.Optimizer.Init();
-        trainer.FullReset();
-        var cachedEvaluation = DataSetEvaluationResult.ZERO;
-        foreach (var (epochIndex, epoch) in GetEpochs(trainer.TrainingSet, trainer.Config.EpochCount).Index())
-        {
-            foreach (var (batchIndex, batch) in epoch.Index())
-            {
-                cachedEvaluation += trainer.TrainAndEvaluate(batch);
-                if (trainer.Config.DumpBatchEvaluation && batchIndex % trainer.Config.DumpEvaluationAfterBatches == 0 || batchIndex + 1 == epoch.BatchCount && trainer.Config.DumpEpochEvaluation)
-                {
-                    trainer.Config.EvaluationCallback!.Invoke(new DataSetEvaluation { Context = GetContext(), Result = cachedEvaluation });
-                    cachedEvaluation = DataSetEvaluationResult.ZERO;
-                }
-                trainer.Config.Optimizer.OnBatchCompleted();
-
-                if (token?.IsCancellationRequested is true)
-                {
-                    trainer.Config.Optimizer.OnEpochCompleted();
-                    return;
-                }
-
-                TrainingEvaluationContext GetContext() => new()
-                {
-                    CurrentBatch = batchIndex + 1,
-                    MaxBatch = epoch.BatchCount,
-                    CurrentEpoch = epochIndex + 1,
-                    MaxEpoch = trainer.Config.EpochCount,
-                    LearnRate = trainer.Config.Optimizer.LearningRate,
-                };
-            }
-
-            trainer.Config.Optimizer.OnEpochCompleted();
-        }
-    }
-
-    public static IEnumerable<Epoch> GetEpochs(ITrainingSet trainingSet, int epochCount)
-    {
-        foreach (var _ in ..epochCount)
-        {
-            trainingSet.Reset();
-            yield return new Epoch(trainingSet.BatchCount, trainingSet.GetBatches());
-        }
-    }
-
-    public static string GenerateTrainingOverview(TrainingConfig config, ITrainingSet trainingSet)
-    {
-        var sb = new StringBuilder();
-        sb.AppendLine();
-        sb.AppendLine("Training Info:");
-        sb.AppendLine($"using {config.Optimizer.GetType().Name} ({config.Threading})");
-        sb.AppendLine("Training for");
-        sb.AppendLine($" - {config.EpochCount} epochs");
-        sb.AppendLine($"  - {trainingSet.BatchCount} batches");
-        sb.AppendLine($"   - {trainingSet.BatchSize} entries");
-
-        if (config.DumpEvaluation)
-        {
-            if (config.DumpBatchEvaluation)
-            {
-                if (config.DumpEvaluationAfterBatches == 1)
-                {
-                    sb.AppendLine("Dumping every batch");
-                }
-                else
-                {
-                    sb.AppendLine($"Dumping every {config.DumpEvaluationAfterBatches} batches");
-                }
-            }
-            else
-            {
-                sb.AppendLine($"Dumping every epoch");
-            }
-        }
-
-        sb.AppendLine();
-        return sb.ToString();
-    }
-}
\ No newline at end of file
diff --git a/MachineLearning.Training/TrainingConfig.cs b/MachineLearning.Training/TrainingConfig.cs
deleted file mode 100644
index 1aa6a51..0000000
--- a/MachineLearning.Training/TrainingConfig.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿using MachineLearning.Training.Evaluation;
-using MachineLearning.Training.Optimization;
-
-namespace MachineLearning.Training;
-
-public sealed record TrainingConfig
-{
-    public required int EpochCount { get; init; }
-
-    public required Optimizer Optimizer { get; init; }
-    public ThreadingMode Threading { get; init; } = ThreadingMode.Full;
-
-    public Action<DataSetEvaluation>? EvaluationCallback { get; init; } = null;
-    public bool DumpEvaluation => EvaluationCallback is not null;
-    public bool DumpEpochEvaluation => DumpEvaluation && !DumpBatchEvaluation;
-    public int DumpEvaluationAfterBatches { get; init; } = -1;
-    public bool DumpBatchEvaluation => DumpEvaluation && DumpEvaluationAfterBatches > 0;
-    public Random RandomSource { get; init; } = Random.Shared;
-}
\ No newline at end of file
diff --git a/MachineLearning.Visual/GlobalUsings.cs b/MachineLearning.Visual/GlobalUsings.cs
deleted file mode 100644
index 5a56c9c..0000000
--- a/MachineLearning.Visual/GlobalUsings.cs
+++ /dev/null
@@ -1,3 +0,0 @@
-﻿global using Ametrin.Utils;
-global using Ametrin.Optional;
-global using Ametrin.Numerics;
\ No newline at end of file
diff --git a/MachineLearning.Visual/MachineLearning.Visual.csproj b/MachineLearning.Visual/MachineLearning.Visual.csproj
deleted file mode 100644
index 1c9b1a3..0000000
--- a/MachineLearning.Visual/MachineLearning.Visual.csproj
+++ /dev/null
@@ -1,17 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-	<ItemGroup>
-		<PackageReference Include="System.Drawing.Common" Version="10.0.3" />
-	</ItemGroup>
-
-	<ItemGroup>
-	  <ProjectReference Include="..\MachineLearning.Model\MachineLearning.Model.csproj" />
-	</ItemGroup>
-
-</Project>
diff --git a/MachineLearning.Visual/ModelVisualizer.cs b/MachineLearning.Visual/ModelVisualizer.cs
deleted file mode 100644
index d1299b7..0000000
--- a/MachineLearning.Visual/ModelVisualizer.cs
+++ /dev/null
@@ -1,85 +0,0 @@
-﻿using MachineLearning.Model;
-using MachineLearning.Model.Layer;
-using System.Drawing;
-using System.Drawing.Imaging;
-
-namespace MachineLearning.Visual;
-
-#pragma warning disable CA1416 // Validate platform compatibility
-public static class ModelVisualizer
-{
-    // public static void Visualize(FeedForwardModel model, DirectoryInfo path)
-    // {
-    //     path.CreateIfNotExists();
-    //     var count = 0;
-
-    //     foreach(var layer in model.Layers)
-    //     {
-    //         count++;
-    //         var map = GenerateHeatmap(layer.Weights);
-
-    //         map?.Save(path.File($"layer_{count}.png").FullName, ImageFormat.Png);
-    //     }
-    // }
-
-    public static Bitmap GenerateHeatmap(Matrix matrix)
-    {
-        var width = matrix.ColumnCount;
-        var height = matrix.RowCount;
-        var bitmap = new Bitmap(width, height);
-
-        GenerateHeatmap(matrix, bitmap);
-
-        return bitmap;
-    }
-    public static Bitmap GenerateHeatmap(Matrix matrix, Bitmap bitmap)
-    {
-        var width = matrix.ColumnCount;
-        var height = matrix.RowCount;
-
-        var min = -0.5;
-        var max = 0.5;
-        var range = max - min;
-
-        for (int y = 0; y < height; y++)
-        {
-            for (int x = 0; x < width; x++)
-            {
-                double normalizedValue = (matrix[y, x] - min) / range;
-                bitmap.SetPixel(x, y, normalizedValue < 0 ? Color.Black : normalizedValue > 1 ? Color.White : GetHeatmapColor(normalizedValue));
-            }
-        }
-
-        return bitmap;
-    }
-
-    private static Color GetHeatmapColor(double value)
-    {
-        int r = (int) (255 * value);
-        int b = (int) (255 * (1 - value));
-        return Color.FromArgb(255, r, 0, b);
-    }
-}
-#pragma warning restore CA1416 // Validate platform compatibility
-
-public static class ModelAnalyzer
-{
-    public static void Analyze(IEnumerable<ILayer> layers)
-    {
-        foreach(var layer in layers)
-        {
-            switch(layer)
-            {
-                //case StringEmbeddingLayer em:
-                //    Console.WriteLine($"Embedding Layer: Av: {em.EmbeddingMatrix.Sum() / em.EmbeddingMatrix.FlatCount:F4}; Max: {em.EmbeddingMatrix.Max():F4}; Min: {em.EmbeddingMatrix.Min():F4}");
-                //    break;
-
-                // case FeedForwardLayer sl:
-                //     Console.WriteLine($"Simple Layer:");
-                //     Console.WriteLine($"\tWeights: Av: {sl.Weights.Sum()/sl.Weights.FlatCount:F4}; Max: {sl.Weights.Max():F4}; Min: {sl.Weights.Min():F4}");
-                //     Console.WriteLine($"\tBiases: Av: {sl.Biases.Sum()/sl.Biases.Count:F4}; Max: {sl.Biases.Max():F4}; Min: {sl.Biases.Min():F4}");
-                //     break;
-            }
-        }
-    }
-}
diff --git a/MachineLearning.code-workspace b/MachineLearning.code-workspace
index f522efe..fe427ee 100644
--- a/MachineLearning.code-workspace
+++ b/MachineLearning.code-workspace
@@ -8,6 +8,9 @@
 		},
 		{
 			"path": "../../Packages/Ametrin.Numerics"
+		},
+		{
+			"path": "../../Packages/Ametrin.Serializer"
 		}
 	],
 	"settings": {},
diff --git a/MachineLearning.slnx b/MachineLearning.slnx
index 5a98c2f..840eeb9 100644
--- a/MachineLearning.slnx
+++ b/MachineLearning.slnx
@@ -1,23 +1,11 @@
 <Solution>
-  <Configurations>
-    <Platform Name="Any CPU" />
-    <Platform Name="x64" />
-    <Platform Name="x86" />
-  </Configurations>
   <Project Path="../../Packages/Ametrin.Numerics/generator/Ametrin.Numerics.Generator.csproj" />
   <Project Path="../../Packages/Ametrin.Numerics/src/Ametrin.Numerics.csproj" />
+  <Project Path="../../Packages/Ametrin.Serializer/Ametrin.Serializer.Generator/Ametrin.Serializer.Generator.csproj" />
+  <Project Path="../../Packages/Ametrin.Serializer/Ametrin.Serializer/Ametrin.Serializer.csproj" />
   <Project Path="../../Packages/Ametrin.Utils/src/Ametrin.Utils.csproj" />
-  <Project Path="analyzer/ML.Analyzer.csproj" />
-  <Project Path="MachineLearning.Benchmarks/MachineLearning.Benchmarks.csproj" />
-  <Project Path="MachineLearning.Data/MachineLearning.Data.csproj" />
-  <Project Path="MachineLearning.Mamba/MachineLearning.Mamba.csproj" />
-  <Project Path="MachineLearning.Model/MachineLearning.Model.csproj" />
-  <Project Path="MachineLearning.Samples/MachineLearning.Samples.csproj" />
-  <Project Path="MachineLearning.Serialization/MachineLearning.Serialization.csproj" />
-  <Project Path="MachineLearning.Training.GUI/MachineLearning.Training.GUI.csproj" />
-  <Project Path="MachineLearning.Training/MachineLearning.Training.csproj" />
-  <Project Path="MachineLearning.Visual/MachineLearning.Visual.csproj" />
-  <Project Path="mlp/ML.MultiLayerPerceptron.csproj" />
-  <Project Path="Simple/Simple.csproj" />
-  <Project Path="test/ML.Tests.csproj" />
+  <Project Path="ML.Benchy/ML.Benchy.csproj" />
+  <Project Path="ML.Core/ML.Core.csproj" />
+  <Project Path="ML.Runner/ML.Runner.csproj" />
+  <Project Path="ML.SourceGenerator/ML.SourceGenerator.csproj" />
 </Solution>
diff --git a/analyzer/AdamLayerGenerator.cs b/analyzer/AdamLayerGenerator.cs
deleted file mode 100644
index 66a817f..0000000
--- a/analyzer/AdamLayerGenerator.cs
+++ /dev/null
@@ -1,171 +0,0 @@
-using System.Collections.Generic;
-using ML.Analyzer.LayerFile;
-
-namespace ML.Analyzer;
-
-internal static class AdamLayerGenerator
-{
-    private const string WeightType = "float";
-
-    public static void GenerateAdam(SourceProductionContext context, LayerData data)
-    {
-        var sb = new StringBuilder();
-
-        var (name, @namespace, _, output, snapshot, weights, modules) = data;
-
-        sb.AppendLine($$"""
-        using Ametrin.Guards;
-        using Ametrin.Numerics;
-        """);
-
-        if (!string.IsNullOrEmpty(@namespace))
-        {
-            sb.AppendLine($$"""namespace {{@namespace}};""");
-        }
-
-        sb.AppendLine($$"""
-        partial class {{name}}
-        {
-            public sealed class Adam : MachineLearning.Training.Optimization.ILayerOptimizer
-            {
-                public {{name}} Layer { get; }
-                public MachineLearning.Training.Optimization.Adam.AdamOptimizer Optimizer { get; }
-
-        """);
-
-        #region Moments
-
-        foreach (var weight in weights)
-        {
-            sb.AppendLine($"        public {weight.Type} FirstMoment{weight.Name} {{ get; }}");
-            sb.AppendLine($"        public {weight.Type} SecondMoment{weight.Name} {{ get; }}");
-            sb.AppendLine();
-        }
-
-        foreach (var module in modules)
-        {
-            sb.AppendLine($"        public {module.Type}.Adam {module.Name}Adam {{ get; }}");
-        }
-        #endregion
-
-        sb.AppendLine($$"""
-                public Adam(MachineLearning.Training.Optimization.Adam.AdamOptimizer optimizer, {{name}} layer)
-                {
-                    this.Optimizer = optimizer;
-                    this.Layer = layer;
-        """);
-
-        foreach (var weight in weights)
-        {
-            sb.AppendLine($"            this.FirstMoment{weight.Name} = {weight.Type}.OfSize(layer.{weight.Name});");
-            sb.AppendLine($"            this.SecondMoment{weight.Name} = {weight.Type}.OfSize(layer.{weight.Name});");
-        }
-
-        foreach (var module in modules)
-        {
-            sb.AppendLine($"            this.{module.Name}Adam = new(optimizer, {module.Access(LayerFile.Location.Gradients)});");
-        }
-
-        sb.AppendLine($$"""
-                }
-
-                [System.Runtime.CompilerServices.ModuleInitializer]
-                internal static void Register()
-                {
-                    MachineLearning.Training.Optimization.Adam.AdamOptimizer.Registry.Register<{{name}}>((op, l) => new Adam(op, l));
-                }
-        """);
-
-        #region Update
-        sb.AppendLine($$"""
-                public Vector Update(Vector costGradient, MachineLearning.Model.Layer.Snapshot.ILayerSnapshot snapshot, MachineLearning.Model.Layer.Snapshot.IGradients gradients)
-                {
-                    var g = Guard.Is<{{name}}.Gradients>(gradients);
-                    var s = Guard.Is<{{snapshot}}>(snapshot);
-                    var result = Layer.Backward({{(output is NumberType.Vector ? "costGradient" : $"{output}.Of(costGradient.Count / s.Output.ColumnCount, s.Output.ColumnCount, costGradient)")}}, s, g){{(output is NumberType.Vector ? "" : ".Storage")}};
-
-        """);
-
-        foreach (var weight in weights)
-        {
-            sb.AppendLine($"\t\t\tNumericsDebug.AssertValidNumbers(g.{weight.GetGradientName()});");
-        }
-
-        sb.AppendLine($$"""
-                    return result;
-                }
-        """);
-
-        #endregion
-
-        #region Apply
-        sb.AppendLine($$"""
-                public void Apply(MachineLearning.Model.Layer.Snapshot.IGradients gradients)
-                {
-                    if(gradients is not {{name}}.Gradients gradient)
-                    {
-                        throw new Exception();
-                    }
-        """);
-
-        if (weights.Any())
-        {
-            sb.Append($$"""
-                    {{WeightType}} max;
-        """);
-        }
-
-        foreach (var weight in weights)
-        {
-            sb.AppendLine($$"""
-
-                    max = Weight.Abs(gradient.{{weight.GetGradientName()}}.MaxMagnitude());
-                    if(max > 100_000)
-                    {
-                        gradient.{{weight.GetGradientName()}}.DivideToSelf(max/100_000);
-                    }
-                    (FirstMoment{{weight.Name}}, gradient.{{weight.GetGradientName()}}).MapToFirst(Optimizer.FirstMomentEstimate);
-                    NumericsDebug.AssertValidNumbers(FirstMoment{{weight.Name}});
-                    (SecondMoment{{weight.Name}}, gradient.{{weight.GetGradientName()}}).MapToFirst(Optimizer.SecondMomentEstimate);
-                    NumericsDebug.AssertValidNumbers(SecondMoment{{weight.Name}});
-                    Layer.{{weight.Name}}.SubtractToSelf((FirstMoment{{weight.Name}}, SecondMoment{{weight.Name}}).Map(Optimizer.WeightReduction));
-        """);
-        }
-
-        sb.AppendLine();
-
-        foreach (var module in modules)
-        {
-            sb.AppendLine($$"""
-                    {{module.Name}}Adam.Apply(gradient.{{module.Name}});
-        """);
-        }
-        #endregion
-
-        #region Reset
-
-        sb.AppendLine($$"""
-                }
-
-                public void FullReset()
-                {
-                    {{string.Join("\n            ", weights.Select(w => $"FirstMoment{w.Name}.ResetZero();"))}}
-                    
-                    {{string.Join("\n            ", weights.Select(w => $"SecondMoment{w.Name}.ResetZero();"))}}
-
-                    {{string.Join("\n            ", modules.Select(m => $"{m.Name}Adam.FullReset();"))}}
-                }
-        """);
-
-        #endregion
-
-        sb.AppendLine($$"""
-            }
-        }    
-        """);
-
-        context.AddSource($"{name}.Adam.g.cs", sb.ToString());
-    }
-}
-
-internal sealed record LayerData(string Name, string? Namespace, string InputType, NumberType OutputType, string SnapshotType, IEnumerable<DirectWeights> Weights, IEnumerable<Module> Modules);
\ No newline at end of file
diff --git a/analyzer/AnalyzerReleases.Shipped.md b/analyzer/AnalyzerReleases.Shipped.md
deleted file mode 100644
index f50bb1f..0000000
--- a/analyzer/AnalyzerReleases.Shipped.md
+++ /dev/null
@@ -1,2 +0,0 @@
-; Shipped analyzer releases
-; https://github.com/dotnet/roslyn-analyzers/blob/main/src/Microsoft.CodeAnalysis.Analyzers/ReleaseTrackingAnalyzers.Help.md
diff --git a/analyzer/AnalyzerReleases.Unshipped.md b/analyzer/AnalyzerReleases.Unshipped.md
deleted file mode 100644
index 9a703d1..0000000
--- a/analyzer/AnalyzerReleases.Unshipped.md
+++ /dev/null
@@ -1,10 +0,0 @@
-; Unshipped analyzer release
-; https://github.com/dotnet/roslyn-analyzers/blob/main/src/Microsoft.CodeAnalysis.Analyzers/ReleaseTrackingAnalyzers.Help.md
-### New Rules
-
-Rule ID | Category | Severity | Notes
---------|----------|----------|-------
-ML001 | Usage | Warning | AdamLayerOptimizerAnalyzer
-ML002 | Usage | Warning | AdamLayerOptimizerAnalyzer
-ML003 | Usage | Error | SerializationAnalyzer
-ML004 | Usage | Error | LayerAnalyzer
\ No newline at end of file
diff --git a/analyzer/GlobalUsings.cs b/analyzer/GlobalUsings.cs
deleted file mode 100644
index 7eade25..0000000
--- a/analyzer/GlobalUsings.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-global using static ML.Analyzer.Helper;
-global using Microsoft.CodeAnalysis;
-global using Microsoft.CodeAnalysis.CSharp.Syntax;
-global using Microsoft.CodeAnalysis.Diagnostics;
-global using System.Collections.Immutable;
-global using System.Diagnostics;
-global using System.Text;
-global using System.Linq;
-global using System;
\ No newline at end of file
diff --git a/analyzer/Helper.cs b/analyzer/Helper.cs
deleted file mode 100644
index 09766a8..0000000
--- a/analyzer/Helper.cs
+++ /dev/null
@@ -1,26 +0,0 @@
-namespace ML.Analyzer;
-
-public static class Helper
-{
-    public static bool IsGenericILayer(INamedTypeSymbol symbol)
-        => symbol is { Name: "ILayer", ContainingAssembly.Name: "MachineLearning.Model", ContainingNamespace.Name: "Layer", TypeArguments.Length: 3 };
-
-    public static bool ImplementsGenericILayer(INamedTypeSymbol symbol) => symbol.Interfaces.Any(i => IsGenericILayer(i) || ImplementsGenericILayer(i)) || (symbol.BaseType is not null && IsGenericILayer(symbol.BaseType));
-
-    public static INamedTypeSymbol? GetGenericILayer(INamedTypeSymbol symbol)
-        => IsGenericILayer(symbol) ? symbol
-        : symbol.Interfaces.FirstOrDefault(IsGenericILayer) is INamedTypeSymbol inter ? inter
-        : symbol.Interfaces.FirstOrDefault(ImplementsGenericILayer) is INamedTypeSymbol inter2 ? GetGenericILayer(inter2)
-        : symbol.BaseType is null ? null : GetGenericILayer(symbol.BaseType);
-
-    public static bool IsWeightAttribute(ITypeSymbol symbol) => symbol is { Name: "WeightsAttribute", ContainingAssembly.Name: "MachineLearning.Model", ContainingNamespace.Name: "Attributes" };
-    public static bool IsParameterAttribute(ITypeSymbol symbol) => symbol is { Name: "ParameterAttribute", ContainingAssembly.Name: "MachineLearning.Model", ContainingNamespace.Name: "Attributes" };
-    public static bool IsModuleAttribute(ITypeSymbol symbol) => symbol is { Name: "ModuleAttribute", ContainingAssembly.Name: "MachineLearning.Model", ContainingNamespace.Name: "Attributes" };
-    public static bool IsGeneratedLayerAttribute(ITypeSymbol symbol) => symbol is { Name: "GeneratedLayerAttribute", ContainingAssembly.Name: "MachineLearning.Model", ContainingNamespace.Name: "Attributes" };
-    public static bool IsGenerateOptimizersAttribute(ITypeSymbol symbol) => symbol is { Name: "GenerateOptimizersAttribute", ContainingAssembly.Name: "MachineLearning.Training", ContainingNamespace.Name: "Attributes" };
-    public static bool IsLayerSerializerAttribute(ITypeSymbol symbol) => symbol is { Name: "LayerSerializerAttribute", ContainingAssembly.Name: "MachineLearning.Serialization" };
-    public static bool IsVector(ITypeSymbol symbol) => symbol is { Name: "Vector", ContainingAssembly.Name: "Ametrin.Numerics" };
-    public static bool IsMatrix(ITypeSymbol symbol) => symbol is { Name: "Matrix", ContainingAssembly.Name: "Ametrin.Numerics" };
-    public static bool IsTensor(ITypeSymbol symbol) => symbol is { Name: "Tensor", ContainingAssembly.Name: "Ametrin.Numerics" };
-    public static bool IsTensorlike(ITypeSymbol symbol) => symbol is { Name: "Vector" or "Matrix" or "Tensor", ContainingAssembly.Name: "Ametrin.Numerics" };
-}
diff --git a/analyzer/LanguageExtensions.cs b/analyzer/LanguageExtensions.cs
deleted file mode 100644
index 152b5ae..0000000
--- a/analyzer/LanguageExtensions.cs
+++ /dev/null
@@ -1,157 +0,0 @@
-
-#pragma warning disable CS9113 // Parameter is unread.
-using System.Runtime.CompilerServices;
-
-namespace System.Runtime.CompilerServices
-{
-    public class IsExternalInit;
-    public class RequiredMemberAttribute;
-    public class CompilerFeatureRequiredAttribute(string featureName);
-}
-
-namespace System
-{
-    /// <summary>Represent a type can be used to index a collection either from the start or the end.</summary>
-    /// <remarks>
-    /// Index is used by the C# compiler to support the new index syntax
-    /// <code>
-    /// int[] someArray = new int[5] { 1, 2, 3, 4, 5 } ;
-    /// int lastElement = someArray[^1]; // lastElement = 5
-    /// </code>
-    /// </remarks>
-    public readonly struct Index : IEquatable<Index>
-    {
-        private readonly int _value;
-
-        /// <summary>Construct an Index using a value and indicating if the index is from the start or from the end.</summary>
-        /// <param name="value">The index value. it has to be zero or positive number.</param>
-        /// <param name="fromEnd">Indicating if the index is from the start or from the end.</param>
-        /// <remarks>
-        /// If the Index constructed from the end, index value 1 means pointing at the last element and index value 0 means pointing at beyond last element.
-        /// </remarks>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public Index(int value, bool fromEnd = false)
-        {
-            if (value < 0)
-            {
-                ThrowValueArgumentOutOfRange_NeedNonNegNumException();
-            }
-
-            if (fromEnd)
-                _value = ~value;
-            else
-                _value = value;
-        }
-
-        // The following private constructors mainly created for perf reason to avoid the checks
-        private Index(int value)
-        {
-            _value = value;
-        }
-
-        /// <summary>Create an Index pointing at first element.</summary>
-        public static Index Start => new Index(0);
-
-        /// <summary>Create an Index pointing at beyond last element.</summary>
-        public static Index End => new Index(~0);
-
-        /// <summary>Create an Index from the start at the position indicated by the value.</summary>
-        /// <param name="value">The index value from the start.</param>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Index FromStart(int value)
-        {
-            if (value < 0)
-            {
-                ThrowValueArgumentOutOfRange_NeedNonNegNumException();
-            }
-
-            return new Index(value);
-        }
-
-        /// <summary>Create an Index from the end at the position indicated by the value.</summary>
-        /// <param name="value">The index value from the end.</param>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Index FromEnd(int value)
-        {
-            if (value < 0)
-            {
-                ThrowValueArgumentOutOfRange_NeedNonNegNumException();
-            }
-
-            return new Index(~value);
-        }
-
-        /// <summary>Returns the index value.</summary>
-        public int Value
-        {
-            get
-            {
-                if (_value < 0)
-                    return ~_value;
-                else
-                    return _value;
-            }
-        }
-
-        /// <summary>Indicates whether the index is from the start or the end.</summary>
-        public bool IsFromEnd => _value < 0;
-
-        /// <summary>Calculate the offset from the start using the giving collection length.</summary>
-        /// <param name="length">The length of the collection that the Index will be used with. length has to be a positive value</param>
-        /// <remarks>
-        /// For performance reason, we don't validate the input length parameter and the returned offset value against negative values.
-        /// we don't validate either the returned offset is greater than the input length.
-        /// It is expected Index will be used with collections which always have non negative length/count. If the returned offset is negative and
-        /// then used to index a collection will get out of range exception which will be same affect as the validation.
-        /// </remarks>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public int GetOffset(int length)
-        {
-            int offset = _value;
-            if (IsFromEnd)
-            {
-                // offset = length - (~value)
-                // offset = length + (~(~value) + 1)
-                // offset = length + value + 1
-
-                offset += length + 1;
-            }
-            return offset;
-        }
-
-        /// <summary>Indicates whether the current Index object is equal to another object of the same type.</summary>
-        /// <param name="value">An object to compare with this object</param>
-        public override bool Equals(object? value) => value is Index && _value == ((Index)value)._value;
-
-        /// <summary>Indicates whether the current Index object is equal to another Index object.</summary>
-        /// <param name="other">An object to compare with this object</param>
-        public bool Equals(Index other) => _value == other._value;
-
-        /// <summary>Returns the hash code for this instance.</summary>
-        public override int GetHashCode() => _value;
-
-        /// <summary>Converts integer number to an Index.</summary>
-        public static implicit operator Index(int value) => FromStart(value);
-
-        /// <summary>Converts the value of the current Index object to its equivalent string representation.</summary>
-        public override string ToString()
-        {
-            if (IsFromEnd)
-                return ToStringFromEnd();
-
-            return ((uint)Value).ToString();
-        }
-
-        private static void ThrowValueArgumentOutOfRange_NeedNonNegNumException()
-        {
-            throw new ArgumentOutOfRangeException("value", "value must be non-negative");
-        }
-
-        private string ToStringFromEnd()
-        {
-            return '^' + Value.ToString();
-        }
-    }
-}
-
-#pragma warning restore CS9113 // Parameter is unread.
\ No newline at end of file
diff --git a/analyzer/LayerAnalyzer.cs b/analyzer/LayerAnalyzer.cs
deleted file mode 100644
index dc9e90a..0000000
--- a/analyzer/LayerAnalyzer.cs
+++ /dev/null
@@ -1,232 +0,0 @@
-﻿using ML.Analyzer.LayerFile;
-
-namespace ML.Analyzer;
-
-[Generator]
-[DiagnosticAnalyzer(LanguageNames.CSharp)]
-public sealed class LayerAnalyzer : DiagnosticAnalyzer, IIncrementalGenerator
-{
-    private static readonly DiagnosticDescriptor WeightsMustBeTensors = new(
-        "ML001", "Non-Tensor used as Weights", "WeightsAttribute can only be used on Tensors not {0}", "Usage", DiagnosticSeverity.Warning, isEnabledByDefault: true
-    );
-    private static readonly DiagnosticDescriptor InvalidGeneratedLayer = new(
-        "ML002", "Invalid GeneratedLayerAttribute", "GeneratedLayerAttribute can only be used on instances of ILayer<TInput, TOutput, TSnapshot>", "Usage", DiagnosticSeverity.Warning, isEnabledByDefault: true
-    );
-    private static readonly DiagnosticDescriptor InvalidLayerSerializer = new(
-        "ML003", "Invalid LayerSerializerAttribute", "LayerSerializerAttribute can only be used with GeneratedLayerAttribute", "Usage", DiagnosticSeverity.Error, isEnabledByDefault: true
-    );
-    private static readonly DiagnosticDescriptor BothParamWeightUsed = new(
-        "ML004", "Invalid Layer configuration", "WeightsAttribute and ParameterAttribute cannot be used together", "Usage", DiagnosticSeverity.Error, isEnabledByDefault: true
-    );
-
-    public override ImmutableArray<DiagnosticDescriptor> SupportedDiagnostics { get; } = [WeightsMustBeTensors, InvalidGeneratedLayer, InvalidLayerSerializer, BothParamWeightUsed];
-
-    public override void Initialize(AnalysisContext context)
-    {
-        context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None);
-        context.EnableConcurrentExecution();
-        context.RegisterSymbolAction(AnalyzePropertySymbol, SymbolKind.Property);
-        context.RegisterSymbolAction(AnalyzeClassSymbol, SymbolKind.NamedType);
-    }
-
-    public void Initialize(IncrementalGeneratorInitializationContext context)
-    {
-        var layers = context.SyntaxProvider.CreateSyntaxProvider(
-            static (node, _) => node is ClassDeclarationSyntax { AttributeLists.Count: > 0 },
-            static (ctx, token) => ctx.SemanticModel.GetDeclaredSymbol(ctx.Node, token) as INamedTypeSymbol
-        ).Where(symbol => symbol!.GetAttributes().Any(a => IsGeneratedLayerAttribute(a.AttributeClass!)) && ImplementsGenericILayer(symbol));
-
-        // Debugger.Launch();
-
-        context.RegisterSourceOutput(layers.Combine(context.CompilationProvider), GenerateLayer);
-    }
-
-    private static void AnalyzePropertySymbol(SymbolAnalysisContext context)
-    {
-        var propertySymbol = (IPropertySymbol)context.Symbol;
-        if (propertySymbol.GetAttributes().Any(a => IsWeightAttribute(a.AttributeClass!)))
-        {
-            if (!IsTensorlike(propertySymbol.Type!))
-            {
-                context.ReportDiagnostic(Diagnostic.Create(WeightsMustBeTensors, propertySymbol.Locations[0], propertySymbol.Type));
-            }
-
-            if (propertySymbol.GetAttributes().Any(a => IsParameterAttribute(a.AttributeClass!)))
-            {
-                context.ReportDiagnostic(Diagnostic.Create(BothParamWeightUsed, propertySymbol.Locations[0]));
-            }
-        }
-    }
-
-    private static void AnalyzeClassSymbol(SymbolAnalysisContext context)
-    {
-        var typeSymbol = (INamedTypeSymbol)context.Symbol;
-        if (typeSymbol.GetAttributes().Any(a => IsGeneratedLayerAttribute(a.AttributeClass!)))
-        {
-            if (!ImplementsGenericILayer(typeSymbol))
-            {
-                context.ReportDiagnostic(Diagnostic.Create(InvalidGeneratedLayer, typeSymbol.Locations[0]));
-            }
-        }
-        else if (typeSymbol.GetAttributes().Any(a => IsLayerSerializerAttribute(a.AttributeClass!)))
-        {
-            context.ReportDiagnostic(Diagnostic.Create(InvalidLayerSerializer, typeSymbol.Locations[0]));
-        }
-    }
-
-    private static void GenerateLayer(SourceProductionContext context, (INamedTypeSymbol?, Compilation) pair)
-    {
-        var (layer, compilation) = pair;
-
-        if (layer is null) return;
-
-        var attribute = layer.GetAttributes().First(a => IsGeneratedLayerAttribute(a.AttributeClass!));
-
-        var ilayer = GetGenericILayer(layer);
-
-        if (ilayer is null) return;
-
-        var tin = ilayer.TypeArguments[0];
-        var tout = ilayer.TypeArguments[1];
-        var tsnap = ilayer.TypeArguments[2];
-
-
-        if (attribute.NamedArguments.FirstOrDefault(p => p is { Key: "OutputGradientType", Value.Kind: TypedConstantKind.Type }) is { Key: not null } p)
-        {
-            tout = compilation.GetTypeByMetadataName(p.Value.Value!.ToString()!)!;
-        }
-
-        var weights = layer.GetMembers().OfType<IPropertySymbol>().Where(p => p.GetAttributes().Any(a => IsWeightAttribute(a.AttributeClass!)));
-        var parameter = layer.GetMembers().OfType<IPropertySymbol>().Where(p => p.GetAttributes().Any(a => IsParameterAttribute(a.AttributeClass!)));
-        var modules = layer.GetMembers().OfType<IPropertySymbol>().Where(p => p.GetAttributes().Any(a => IsModuleAttribute(a.AttributeClass!)));
-
-        var sb = new StringBuilder();
-        sb.AppendLine($$"""
-        using Ametrin.Guards;
-        using MachineLearning.Model.Layer.Snapshot;
-
-        namespace {{layer.ContainingNamespace}};
-        
-        partial class {{layer.ToDisplayString(SymbolDisplayFormat.MinimallyQualifiedFormat)}} 
-        {
-            public {{layer.Name}}({{string.Join(", ", parameter.Concat(weights).Select(p => $"{p.Type} {p.Name.ToLower()}"))}})
-            {
-                {{string.Join("\n    ", parameter.Concat(weights).Select(p => $"this.{p.Name} = {p.Name.ToLower()};"))}}
-            }
-
-            public Snapshot CreateSnapshot() => new(this);
-            ILayerSnapshot MachineLearning.Model.Layer.ILayer.CreateSnapshot() => CreateSnapshot();
-            public Gradients CreateGradientAccumulator() => new(this);
-            IGradients MachineLearning.Model.Layer.ILayer.CreateGradientAccumulator() => CreateGradientAccumulator();
-
-            public long WeightCount => {{string.Join(" + ", [..modules.Select(m => $"{m.Name}.WeightCount"), ..weights.Select(p => IsVector(p.Type) ? $"{p.Name}.Count" : $"{p.Name}.FlatCount")])}};
-
-            public sealed partial class Snapshot({{layer.ToDisplayString(SymbolDisplayFormat.MinimallyQualifiedFormat)}} layer) : ILayerSnapshot
-            {
-                // TODO: 
-                // public {{tin.Name}} Input { get; } = {{tin.Name}}.Create(T, E);
-                // public {{tin.Name}} GradientInput { get; } = {{tin.Name}}.Create(T, E);
-                // public {{tout.Name}} Output { get; } = {{tout.Name}}.Create(T, E);
-            }
-        
-        """);
-
-        sb.AppendLine($"    public sealed partial class Gradients({layer.ToDisplayString(SymbolDisplayFormat.MinimallyQualifiedFormat)} layer) : IGradients\n    {{");
-
-        foreach (var weight in weights)
-        {
-            sb.AppendLine($"        public {weight.Type} {weight.Name}Gradient {{ get; }} = {weight.Type}.OfSize(layer.{weight.Name});");
-        }
-        
-        foreach (var module in modules)
-        {
-            sb.AppendLine($"        public IGradients {module.Name} {{ get; }} = layer.{module.Name}.CreateGradientAccumulator();");
-        }
-
-        sb.AppendLine($$"""
-                public void Add(IGradients other)
-                {
-                    var o = Guard.Is<Gradients>(other);
-        """);
-
-        foreach (var weight in weights)
-        {
-            sb.AppendLine($"            {weight.Name}Gradient.AddToSelf(o.{weight.Name}Gradient);");
-        }
-        
-        foreach (var module in modules)
-        {
-            sb.AppendLine($"            {module.Name}.Add(o.{module.Name});");
-        }
-
-        sb.AppendLine("        }");
-
-        sb.AppendLine($$"""
-                public void Reset()
-                {
-        """);
-
-        foreach (var weight in weights)
-        {
-            sb.AppendLine($"            {weight.Name}Gradient.ResetZero();");
-        }
-        
-        foreach (var module in modules)
-        {
-            sb.AppendLine($"            {module.Name}.Reset();");
-        }
-
-        sb.AppendLine("        }\n    }");
-
-        if (layer!.GetAttributes().FirstOrDefault(a => IsLayerSerializerAttribute(a.AttributeClass!)) is AttributeData ad)
-        {
-            sb.Insert(0, "using MachineLearning.Serialization;\n");
-            sb.AppendLine($$"""
-                public static partial class Serializer
-                {
-                    [System.Runtime.CompilerServices.ModuleInitializer]
-                    internal static void Register()
-                    {
-                        MachineLearning.Serialization.ModelSerializer.RegisterLayer("{{ad.ConstructorArguments[0].Value}}", {{ad.ConstructorArguments[1].Value}}, Save, Read);
-                    }
-
-                    public static ErrorState Save({{layer.Name}} layer, System.IO.BinaryWriter writer)
-                    {
-                        {{string.Join("\n            ", parameter.Select(w => w.Type is { Name: "IActivationFunction" } ? $"ActivationFunctionSerializer.Write(writer, layer.{w.Name});" : $"writer.Write(layer.{w.Name});"))}}
-                        {{string.Join("\n            ", weights.Select(w => $"ModelSerializationHelper.Write{w.Type.Name}(layer.{w.Name}, writer);"))}}
-                        return default;
-                    }
-
-                    public static Result<{{layer.Name}}> Read(System.IO.BinaryReader reader)
-                    {
-                        return new {{layer.Name}}({{string.Join(", ", [.. parameter.Select(w => w.Type is { Name: "IActivationFunction" } ? $"ActivationFunctionSerializer.Read(reader)" : $"reader.Read{w.Type.Name}()"), .. weights.Select(w => $"ModelSerializationHelper.Read{w.Type.Name}(reader)")])}});
-                    }
-                }
-            """);
-        }
-
-        sb.AppendLine("}");
-
-        if (layer.GetAttributes().FirstOrDefault(a => IsGenerateOptimizersAttribute(a.AttributeClass!)) is not null)
-        {
-            AdamLayerGenerator.GenerateAdam(context, new(layer.Name, layer.ContainingNamespace.ToString(), tin.ToString()!, ToNumberType(tout), tsnap.ToString()!, weights.Select(symbol => new DirectWeights(symbol.Name, ToDims(symbol.Type), LayerFile.Location.Layer)), Modules: []));
-        }
-
-        context.AddSource($"{layer.Name}.g.cs", sb.ToString());
-    }
-
-    private static NumberType ToNumberType(ITypeSymbol type)
-    {
-        if (IsVector(type)) return NumberType.Vector;
-        if (IsMatrix(type)) return NumberType.Matrix;
-        if (IsTensor(type)) return NumberType.Tensor;
-        throw new InvalidOperationException($"{type} is not a numeric type");
-    }
-    private static ImmutableArray<Parameter> ToDims(ITypeSymbol type)
-    {
-        if (IsVector(type)) return [ValueParameter.Zero];
-        if (IsMatrix(type)) return [ValueParameter.Zero, ValueParameter.Zero];
-        if (IsTensor(type)) return [ValueParameter.Zero, ValueParameter.Zero, ValueParameter.Zero];
-        throw new InvalidOperationException($"{type} is not a numeric type");
-    }
-}
diff --git a/analyzer/LayerFile/LayerFileGenerator.cs b/analyzer/LayerFile/LayerFileGenerator.cs
deleted file mode 100644
index 85d36d1..0000000
--- a/analyzer/LayerFile/LayerFileGenerator.cs
+++ /dev/null
@@ -1,291 +0,0 @@
-using System.Collections.Generic;
-using System.IO;
-
-namespace ML.Analyzer.LayerFile;
-
-[Generator]
-public sealed class LayerFileGenerator : IIncrementalGenerator
-{
-    public void Initialize(IncrementalGeneratorInitializationContext context)
-    {
-        // Debugger.Launch();
-
-        var files = context.AdditionalTextsProvider.Where(a => a.Path.EndsWith(".layer"));
-
-        context.RegisterSourceOutput(files, static (context, file) =>
-        {
-            var text = file.GetText(context.CancellationToken)!.ToString();
-
-            var layer = LayerFileParser.Parse(Path.GetFileNameWithoutExtension(file.Path), text);
-            var registry = layer.Registry;
-            var learnedWeights = layer.LearnedWeights;
-
-            var sb = new StringBuilder();
-
-            sb.AppendLine("using Ametrin.Numerics;");
-            sb.AppendLine("using MachineLearning.Model.Layer;");
-            sb.AppendLine("using MachineLearning.Model.Layer.Snapshot;");
-
-            sb.AppendLine();
-
-            if (!string.IsNullOrEmpty(layer.Namespace))
-            {
-                sb.AppendLine($$"""namespace {{layer.Namespace}};""");
-            }
-
-            // generate model file
-
-            sb.AppendLine($$"""
-            public sealed partial class {{layer.Name}}({{string.Join(", ", [.. layer.ActivationFunctions.Select(a => $"MachineLearning.Model.Activation.IActivationFunction {a}"), ..registry.Parameters.Select(p => $"int {p.Name}")])}}) : ILayer<{{layer.Input.Type}}, {{layer.Name}}.Snapshot>
-            {
-            """);
-
-            foreach (var activationFunction in layer.ActivationFunctions)
-            {
-                sb.AppendLine($$"""
-                    public MachineLearning.Model.Activation.IActivationFunction {{activationFunction}} { get; } = {{activationFunction}}; 
-                """);
-            }
-
-            foreach (var parameter in registry.Parameters)
-            {
-                sb.AppendLine($$"""
-                    public int {{parameter.Access(Location.Layer)}} { get; } = {{parameter.Name}};
-                """);
-            }
-
-            foreach (var module in layer.Modules)
-            {
-                if (module.Args.HasValue)
-                {
-                    sb.AppendLine($$"""
-                        public {{module.Type}} {{module.Name}} { get; } = new({{string.Join(", ", module.Args.Value)}});
-                    """);
-                }
-                else
-                {
-                    sb.AppendLine($$"""
-                        public required {{module.Type}} {{module.Name}} { get; init; };
-                    """);
-                }
-            }
-
-            foreach (var weight in learnedWeights)
-            {
-                sb.AppendLine($$"""
-                    public {{weight.Type}} {{weight.Name}} { get; } = {{weight.Type}}.Create({{string.Join(", ", weight.Dimensions.Select(static p => p.Access(Location.Layer)))}});
-                """);
-            }
-
-            var weightCounts = learnedWeights.Select(static w => w.Type switch { NumberType.Single => "1", NumberType.Vector => $"{w.Access(Location.Layer)}.Count", _ => $"{w.Access(Location.Layer)}.FlatCount" });
-            var moduleCounts = layer.Modules.Select(module => $"{module.Access(Location.Layer)}.WeightCount");
-
-            sb.AppendLine($$"""
-
-                public long WeightCount => {{string.Join(" + ", [.. weightCounts, .. moduleCounts])}};
-
-                public {{layer.Output.Type}} Forward({{layer.Input.Type}} {{layer.Input.Name}}, Snapshot snapshot)
-                {
-            """);
-            var methodWriter = new MethodBodyWriter(sb, 2);
-            foreach (var operation in layer.ForwardPass)
-            {
-                operation.AppendCode(methodWriter);
-            }
-
-            sb.AppendLine($$"""
-                }
-            """);
-
-            sb.AppendLine($$"""
-                public {{layer.Input.Type}} Backward({{layer.Output.Type}} outputGradient, Snapshot snapshot, Gradients gradients)
-                {
-            """);
-
-            foreach (var operation in layer.BackwardPass)
-            {
-                operation.AppendCode(methodWriter);
-            }
-
-            sb.AppendLine($$"""
-                }
-            """);
-
-            sb.AppendLine($$"""
-
-                public Snapshot CreateSnapshot() => new(this);
-                ILayerSnapshot MachineLearning.Model.Layer.ILayer.CreateSnapshot() => CreateSnapshot();
-                public Gradients CreateGradientAccumulator() => new(this);
-                IGradients MachineLearning.Model.Layer.ILayer.CreateGradientAccumulator() => CreateGradientAccumulator();
-
-                public sealed class Snapshot({{layer.Name}} layer) : ILayerSnapshot
-                {
-            """);
-
-            foreach (var snap in registry.Weights.Distinct().OfType<DirectWeights>().Where(static w => w.Location is Location.Snapshot))
-            {
-                if (snap.PreAllocate)
-                {
-                    sb.AppendLine($$"""
-                    public {{snap.Type}} {{snap.Name}} { get; } = {{snap.Type}}.Create({{string.Join(", ", snap.Dimensions.Select(static p => p.Access(Location.Snapshot)))}});
-            """);
-                }
-                else
-                {
-                    sb.AppendLine($$"""
-                    public {{snap.Type}} {{snap.Name}} { get; set; }
-            """);
-                }
-            }
-
-            foreach (var module in layer.Modules)
-            {
-                sb.AppendLine($$"""
-                    public {{module.Type}}.Snapshot {{module.Name}} { get; } = {{module.Access(Location.Snapshot)}}.CreateSnapshot();
-            """);
-            }
-
-            sb.AppendLine($$"""
-                }
-
-                public sealed class Gradients({{layer.Name}} layer) : IGradients
-                {
-            """);
-
-            var weightsGradientPairs = learnedWeights.Select(w => (w, registry.GetGradient(w)));
-
-            foreach (var (weight, gradient) in weightsGradientPairs)
-            {
-                sb.AppendLine($$"""
-                    public {{gradient.Type}} {{gradient.Name}} { get; } = {{gradient.Type}}.OfSize({{weight.Access(Location.Gradients)}});
-            """);
-            }
-
-            foreach (var module in layer.Modules)
-            {
-                sb.AppendLine($$"""
-                    public {{module.Type}}.Gradients {{module.Name}} { get; } = {{module.Access(Location.Gradients)}}.CreateGradientAccumulator();
-            """);
-            }
-
-            sb.AppendLine($$"""
-                    public void Add(IGradients other)
-                    {
-                        var o = Ametrin.Guards.Guard.Is<Gradients>(other);
-            """);
-
-            foreach (var (weight, gradient) in weightsGradientPairs)
-            {
-                sb.AppendLine($"            {gradient.Name}.AddToSelf(o.{gradient.Name});");
-            }
-
-            foreach (var module in layer.Modules)
-            {
-                sb.AppendLine($"            {module.Name}.Add(o.{module.Name});");
-            }
-
-            sb.AppendLine($$"""
-                    }
-
-                    public void Reset()
-                    {
-            """);
-
-            foreach (var (weight, gradient) in weightsGradientPairs)
-            {
-                sb.AppendLine($"            {gradient.Name}.ResetZero();");
-            }
-
-            foreach (var module in layer.Modules)
-            {
-                sb.AppendLine($"            {module.Name}.Reset();");
-            }
-
-            sb.AppendLine($$"""
-                    }
-                }
-            """);
-
-            if (layer.Serializer is { } infos)
-            {
-                sb.Insert(0, "using MachineLearning.Serialization;\n");
-                sb.AppendLine($$"""
-                public static partial class Serializer
-                {
-                    [System.Runtime.CompilerServices.ModuleInitializer]
-                    internal static void Register()
-                    {
-                        MachineLearning.Serialization.ModelSerializer.RegisterLayer("{{infos.id}}", {{infos.version}}, Save, Read);
-                    }
-
-                    public static ErrorState Save({{layer.Name}} layer, System.IO.BinaryWriter writer)
-                    {
-                        {{string.Join("\n            ", layer.ActivationFunctions.Select(a => $"ActivationFunctionSerializer.Write(writer, layer.{a});"))}}
-                        {{string.Join("\n            ", registry.Parameters.Select(w => $"writer.Write({w.Access(Location.Serializer)});"))}}
-                        {{string.Join("\n            ", learnedWeights.Select(w => $"ModelSerializationHelper.Write{w.Type}({w.Access(Location.Serializer)}, writer);"))}}
-                        return default;
-                    }
-
-                    public static Result<{{layer.Name}}> Read(System.IO.BinaryReader reader)
-                    {
-                        var layer = new {{layer.Name}}({{string.Join(", ", [.. layer.ActivationFunctions.Select(_ => "ActivationFunctionSerializer.Read(reader)"), .. registry.Parameters.Select(static w => $"reader.ReadInt32()")])}});
-            """);
-
-                foreach (var weight in learnedWeights)
-                {
-                    sb.AppendLine($$"""            ModelSerializationHelper.Read{{weight.Type}}(reader).CopyTo({{weight.Access(Location.Serializer)}});""");
-                }
-
-                sb.AppendLine($$"""
-                        return layer;
-                    }
-                }
-            """);
-            }
-
-            sb.AppendLine($$"""
-            }
-            """);
-
-            context.AddSource($"{layer.Name}.g.cs", sb.ToString());
-
-            AdamLayerGenerator.GenerateAdam(context, new(layer.Name, layer.Namespace, layer.Input.Type.ToString(), layer.Output.Type, $"{layer.Name}.Snapshot", learnedWeights, layer.Modules));
-        });
-    }
-
-    private static IEnumerable<string> GetLines(string text)
-    {
-        foreach (var l in text.Split('\n'))
-        {
-            var line = l.Trim();
-
-            if (string.IsNullOrEmpty(line) || line.StartsWith("//")) continue;
-
-            yield return line;
-        }
-    }
-}
-
-internal abstract record Parameter
-{
-    public abstract string Access(Location location);
-}
-
-internal sealed record ValueParameter(int Value) : Parameter
-{
-    public static ValueParameter Zero { get; } = new(0);
-    public override string Access(Location location) => Value.ToString();
-}
-
-internal sealed record ReferenceParameter(string Name) : Parameter
-{
-    public override string Access(Location location)
-    {
-        if (location is Location.Layer or Location.Pass) return Name;
-        return $"layer.{Name}";
-    }
-}
-
-
-internal enum Location { Layer, Snapshot, Gradients, Serializer, Pass }
-internal enum NumberType { Single, Vector, Matrix, Tensor }
diff --git a/analyzer/LayerFile/LayerFileParser.cs b/analyzer/LayerFile/LayerFileParser.cs
deleted file mode 100644
index 0864f36..0000000
--- a/analyzer/LayerFile/LayerFileParser.cs
+++ /dev/null
@@ -1,187 +0,0 @@
-using System.Collections.Generic;
-using ML.Analyzer.LayerFile.Operations;
-
-namespace ML.Analyzer.LayerFile;
-
-internal static class LayerFileParser
-{
-    public static LayerDefinition Parse(string name, string text)
-    {
-        var def = new LayerDefinition
-        {
-            Name = name,
-        };
-
-        var lines = new LineEnumerator(text);
-
-        lines.MoveNext();
-
-        // namespace
-        if (lines.Current.StartsWith("# namespace ".AsSpan()))
-        {
-            def.Namespace = lines.Current.Slice("# namespace".Length).Trim().ToString();
-            lines.MoveNext();
-        }
-
-        // activation function
-        while (lines.Current.StartsWith("# Activation Function"))
-        {
-            def.ActivationFunctions.Add(lines.Current.Slice("# Activation Function".Length).Trim().ToString());
-            lines.MoveNext();
-        }
-
-        // parameters
-        if (lines.Current is "# Parameters")
-        {
-            while (lines.MoveNext())
-            {
-                if (lines.Current is "# Weights" or "# Modules") break;
-
-                def.Registry.CreateParameter(lines.Current.ToString());
-            }
-        }
-
-        if (lines.Current is "# Modules")
-        {
-            while (lines.MoveNext())
-            {
-                if (lines.Current is "# Weights") break;
-
-                def.Modules.Add(def.Registry.ParseModule(lines.Current));
-            }
-        }
-
-        // weights
-        if (lines.Current is not "# Weights") throw new InvalidOperationException($"{name} has no weights");
-
-        while (lines.MoveNext())
-        {
-            if (lines.Current.StartsWith("# Forward ") || lines.Current is "# Snapshot") break;
-
-            def.LearnedWeights.Add(def.Registry.ParseWeightDefinition(lines.Current, Location.Layer));
-        }
-
-        // snapshot
-        if (lines.Current is "# Snapshot")
-        {
-            while (lines.MoveNext())
-            {
-                if (lines.Current.StartsWith("# Forward ")) break;
-
-                def.Registry.ParseWeightDefinition(lines.Current, Location.Snapshot);
-            }
-        }
-
-        def.Input = def.Registry.ParseWeightDefinition(lines.Current.Slice("# Forward".Length).Trim(), Location.Snapshot, preAllocate: false);
-        def.ForwardPass.Add(new InputOperation(def.Input.WithLocation(Location.Pass), def.Input));
-
-        var factory = new OperationFactory(def.Registry);
-
-        var contextStack = new Stack<RowwiseRecurrenceOperation>();
-
-        // forward pass
-        while (lines.MoveNext())
-        {
-            var parts = lines.Current.ToString().Split(' ');
-            def.ForwardPass.Add(parts switch
-            {
-                [var result, "=", "activate", var activationFunction, var source] => def.ActivationFunctions.Contains(activationFunction) ? new ActivationOperation(def.Registry[source], def.Registry[result], activationFunction) : throw new InvalidOperationException($"activation function {activationFunction} not defined"),
-                [var result, "=", var left, "+", var right] => new AddOperation(def.Registry[left], def.Registry[right], def.Registry[result]),
-                [var result, "+=", var other] => new AddOperation(def.Registry[result], def.Registry[other], def.Registry[result]),
-                [var result, "=", var left, "*", var right] => factory.NewMultiply(def.Registry[left], def.Registry[right], def.Registry[result], add: false),
-                [var result, "+=", var left, "*", var right] => factory.NewMultiply(def.Registry[left], def.Registry[right], def.Registry[result], add: true),
-                [var result, "=", var left, "⊙", var right] => factory.NewPointwiseMultiply(def.Registry[left], def.Registry[right], def.Registry[result], add: false),
-                [var result, "+=", var left, "⊙", var right] => factory.NewPointwiseMultiply(def.Registry[left], def.Registry[right], def.Registry[result], add: true),
-                [var result, "=", var queries, "attend", var keys] => factory.CreateAttention(def.Registry[queries], def.Registry[keys], def.Registry[result]),
-                [var result, "=", "softmax", var input] => new ActivationOperation(def.Registry[input], def.Registry[result], "SoftMaxActivation.Instance"),
-                [var result, "=", var module, "forward", var input] => new NestedLayerOperation(def.Registry.moduleLookup[module], def.Registry[input], def.Registry[result]),
-                ["recur", "over", ..] => factory.CreateRecurrence([.. parts.Skip(2).Select(p => def.Registry[p])], reversed: false),
-                ["end"] => new EndLoopOperation(contextStack.Pop()),
-                [var output] => new OutputOperation(def.Registry[output]),
-                _ => throw new InvalidOperationException($"unkown operation '{lines.Current.ToString()}'"),
-            });
-
-            if (def.ForwardPass[^1] is RowwiseRecurrenceOperation lop)
-            {
-                contextStack.Push(lop);
-            }
-            if (def.ForwardPass[^1] is OutputOperation) break;
-        }
-
-        def.Output = def.ForwardPass[^1].Result;
-
-        // serializer
-        lines.MoveNext();
-        if (lines.Current.StartsWith("# Serializer ".AsSpan()))
-        {
-            var parts = lines.Current.ToString().Split(' ');
-            def.Serializer = (parts[2], int.Parse(parts[3]));
-        }
-
-        // generate backward pass
-        def.ForwardPass.Reverse();
-        foreach (var operation in def.ForwardPass)
-        {
-            operation.AppendGradientOp(def.BackwardPass, def.Registry, factory);
-        }
-        def.ForwardPass.Reverse();
-
-
-        return def;
-    }
-
-    private struct LineEnumerator(string text)
-    {
-        private readonly string text = text;
-        private int start = -1;
-        private int length = -1;
-        private int nextStart = 0;
-        public readonly ReadOnlySpan<char> Current => text.AsSpan(start, length);
-
-        public bool MoveNext()
-        {
-            if (nextStart >= text.Length) return false;
-            var rest = text.AsSpan(nextStart);
-            var lineBreakIndex = rest.IndexOf('\n');
-            if (lineBreakIndex == -1) lineBreakIndex = rest.Length;
-
-            var line = rest.Slice(0, lineBreakIndex);
-            var startTrimmed = line.TrimStart();
-            var endTrimmed = line.TrimEnd();
-
-            start = nextStart + (line.Length - startTrimmed.Length);
-            length = lineBreakIndex - (line.Length - endTrimmed.Length) - (line.Length - startTrimmed.Length);
-            nextStart += lineBreakIndex + 1;
-
-            if (length == -1 || Current.IsEmpty)
-            {
-                return MoveNext();
-            }
-
-            return true;
-        }
-    }
-}
-
-internal sealed class LayerDefinition
-{
-    public string Namespace { get; set; } = string.Empty;
-    public required string Name { get; init; }
-    public HashSet<string> ActivationFunctions { get; } = [];
-    public LayerRegistry Registry { get; } = new();
-    public List<DirectWeights> LearnedWeights { get; } = [];
-    public List<Operation> ForwardPass { get; } = [];
-    public List<Operation> BackwardPass { get; } = [];
-    public List<Module> Modules { get; } = [];
-    public DirectWeights Input { get; set; } = default!;
-    public Weights Output { get; set; } = default!;
-    public (string id, int version)? Serializer { get; set; } = null;
-}
-
-
-internal sealed record Module(string Type, string Name, ImmutableArray<string>? Args)
-{
-    public string Access(Location from) => LayerRegistry.GetAccessString(from, Location.Layer, Name);
-    public string AccessSnapshot(Location from) => LayerRegistry.GetAccessString(from, Location.Snapshot, Name);
-    public string AccessGradients(Location from) => LayerRegistry.GetAccessString(from, Location.Gradients, Name);
-}
\ No newline at end of file
diff --git a/analyzer/LayerFile/LayerRegistry.cs b/analyzer/LayerFile/LayerRegistry.cs
deleted file mode 100644
index 8a61069..0000000
--- a/analyzer/LayerFile/LayerRegistry.cs
+++ /dev/null
@@ -1,187 +0,0 @@
-using System.Collections.Generic;
-using System.IO;
-using System.Runtime.CompilerServices;
-
-namespace ML.Analyzer.LayerFile;
-
-internal sealed class LayerRegistry
-{
-    private readonly Dictionary<string, Weights> weightsLookup = [];
-    private readonly Dictionary<string, ReferenceParameter> paramLookup = [];
-    internal readonly Dictionary<string, Module> moduleLookup = [];
-
-    public Weights this[string name, [CallerFilePath] string file = default!, [CallerLineNumber] int line = -1]
-    {
-        get
-        {
-            if (name[^1] is ']')
-            {
-                var idx = name.IndexOf('[');
-                var accessor = name.Substring(idx + 1, name.Length - idx - 2).Split([", "], StringSplitOptions.None).Select(static s => s.Trim()).ToImmutableArray();
-                var weights = (DirectWeights)this[name.Substring(0, idx)];
-                return weights.Type is NumberType.Matrix && accessor.Length is 1
-                    ? new RowReferenceWeights(weights, accessor[0])
-                    : new ItemReferenceWeights(weights, accessor);
-            }
-            return weightsLookup.TryGetValue(name, out var value) ? value : throw new InvalidOperationException($"{name} requested at {Path.GetFileNameWithoutExtension(file)}:{line} is not defined! {string.Join(" ", Weights)}");
-        }
-    }
-
-    public Dictionary<string, Weights>.ValueCollection Weights => weightsLookup.Values;
-    public Dictionary<string, ReferenceParameter>.ValueCollection Parameters => paramLookup.Values;
-
-    public Weights GetOrCreateGradient(Weights original, bool preAllocate = true) => TryGetGradient(original) ?? CreateWeightsGradient(original, preAllocate);
-    public Weights GetGradient(Weights original) => TryGetGradient(original) ?? throw new InvalidOperationException($"Gradient for {original} not created");
-    public Weights? TryGetGradient(Weights original)
-    {
-        return original switch
-        {
-            DirectWeights dw => TryGetGradient(dw),
-            RowReferenceWeights rw => TryGetGradient(rw.Matrix) is { } w ? rw with { Matrix = w } : null,
-            ItemReferenceWeights rw => TryGetGradient(rw.Weights) is { } w ? rw with { Weights = w } : null,
-            ConditionalReferenceWeights cw => TryGetGradient(cw.WhenTrue) is { } w ? cw with { WhenTrue = w, WhenFalse = null } : null,
-            _ => throw new InvalidOperationException($"unkown weight type {original}"),
-        };
-    }
-
-    public DirectWeights GetGradient(DirectWeights original) => TryGetGradient(original) ?? throw new InvalidOperationException($"Gradient for {original} not created");
-    public DirectWeights? TryGetGradient(DirectWeights original) => weightsLookup.TryGetValue(original.GetGradientName(), out var value) ? (DirectWeights?)value : null;
-
-    public void AddAlias(string name, Weights weights)
-    {
-        ThrowIfDuplicate(name);
-        weightsLookup.Add(name, weights);
-    }
-
-    public void CreateParameter(string name)
-    {
-        ThrowIfDuplicate(name);
-        paramLookup.Add(name, new(name));
-    }
-
-    public DirectWeights CreateWeights(string name, ImmutableArray<Parameter> dimensions, Location location, bool readOnlyProperty = true)
-    {
-        ThrowIfDuplicate(name);
-
-        var obj = new DirectWeights(name, dimensions, location, readOnlyProperty);
-        weightsLookup.Add(name, obj);
-        return obj;
-    }
-
-    public Weights CreateWeightsGradient(Weights original, bool preAllocate = true) => original switch
-    {
-        DirectWeights dw => CreateWeightsGradient(dw, preAllocate),
-        RowReferenceWeights rw => rw with { Matrix = CreateWeightsGradient(rw.Matrix, preAllocate) },
-        ItemReferenceWeights rw => rw with { Weights = CreateWeightsGradient(rw.Weights, preAllocate) },
-        ConditionalReferenceWeights cw => cw with { WhenTrue = CreateWeightsGradient(cw.WhenTrue, preAllocate), WhenFalse = null },
-        _ => throw new InvalidOperationException($"unkown weight type {original}"),
-    };
-
-    public DirectWeights CreateWeightsGradient(DirectWeights original, bool preAllocate = true) => CreateWeightsGradient(original, original.Location switch
-    {
-        Location.Layer => Location.Gradients,
-        Location.Snapshot => Location.Snapshot,
-        _ => throw new InvalidOperationException($"unkown location type {original.Location}")
-    }, preAllocate);
-
-    public Weights CreateWeightsGradient(Weights original, Location location, bool preAllocate = true) => original switch
-    {
-        DirectWeights dw => CreateWeightsGradient(dw, location, preAllocate),
-        RowReferenceWeights rw => rw with { Matrix = CreateWeightsGradient(rw.Matrix, location, preAllocate) },
-        ItemReferenceWeights rw => rw with { Weights = CreateWeightsGradient(rw.Weights, location, preAllocate) },
-        ConditionalReferenceWeights cw => cw with { WhenTrue = CreateWeightsGradient(cw.WhenTrue, location, preAllocate), WhenFalse = null },
-        _ => throw new InvalidOperationException($"unkown weight type {original}"),
-    };
-    public DirectWeights CreateWeightsGradient(DirectWeights original, Location location, bool preAllocate = true)
-    {
-        var name = original.GetGradientName();
-
-        ThrowIfDuplicate(name);
-
-        var obj = new DirectWeights(name, original.Dimensions, location, preAllocate);
-        weightsLookup.Add(name, obj);
-        return obj;
-    }
-
-
-    public Weights CreateWeightsGradient(Weights original, Weights reference) => (original, reference) switch
-    {
-        (DirectWeights dw, DirectWeights) => CreateWeightsGradient(dw, reference),
-        (RowReferenceWeights rw, RowReferenceWeights rrw) => rw with { Matrix = CreateWeightsGradient(rw.Matrix, rrw.Matrix) },
-        (ItemReferenceWeights rw, ItemReferenceWeights rrw) => rw with { Weights = CreateWeightsGradient(rw.Weights, rrw.Weights) },
-        (ConditionalReferenceWeights cw, ConditionalReferenceWeights rcw) => cw with { WhenTrue = CreateWeightsGradient(cw.WhenTrue, rcw.WhenTrue), WhenFalse = null },
-        _ => throw new InvalidOperationException($"cannot use {reference} as gradient for {original}"),
-    };
-    public Weights CreateWeightsGradient(DirectWeights original, Weights reference)
-    {
-        var name = original.GetGradientName();
-        AddAlias(name, reference);
-        return reference;
-    }
-
-    public DirectWeights ParseWeightDefinition(ReadOnlySpan<char> line, Location location, bool preAllocate = true)
-    {
-        var nameEndIndex = line.IndexOf('[');
-        var name = line.Slice(0, nameEndIndex).Trim().ToString();
-
-        var dimensionsSpan = line.Slice(nameEndIndex + 1, line.IndexOf(']') - nameEndIndex - 1).Trim();
-        var rawDimensions = dimensionsSpan.ToString().Split(',').Select(static s => s.Trim()).ToArray();
-        (var dimensions, preAllocate) = rawDimensions switch
-        {
-            [""] => ([new ValueParameter(1)], false),
-            ["", ""] => ([new ValueParameter(1), new ValueParameter(1)], false),
-            ["", "", ""] => ([new ValueParameter(1), new ValueParameter(1), new ValueParameter(1)], false),
-            _ => (rawDimensions.Select<string, Parameter>(s => int.TryParse(s, out var value) ? new ValueParameter(value) : paramLookup.TryGetValue(s, out var reference) ? reference : throw new InvalidOperationException($"Undefined Parameter {s}")), preAllocate),
-        };
-
-        return CreateWeights(name, [.. dimensions], location, preAllocate);
-    }
-
-    public Module ParseModule(ReadOnlySpan<char> line)
-    {
-        var parts = line.ToString().Split([' '], StringSplitOptions.RemoveEmptyEntries);
-
-        if (parts.Length < 2)
-        {
-            throw new InvalidOperationException($"invalid module definition: {line.ToString()}");
-        }
-
-        var module = parts[0] is "extern" ? new Module(parts[1], parts[2], null) : new Module(parts[0], parts[1], [.. parts.AsSpan(2)]);
-
-        ThrowIfDuplicate(module.Name);
-        moduleLookup.Add(module.Name, module);
-        return module;
-    }
-
-    public static string GetAccessString(Location from, Location target, string name)
-    {
-        if (from == target) return name;
-
-        return (from, target) switch
-        {
-            (Location.Pass, Location.Layer) => name,
-            (Location.Pass, Location.Snapshot) => $"snapshot.{name}",
-            (Location.Pass, Location.Gradients) => $"gradients.{name}",
-            (Location.Gradients, Location.Layer) => $"layer.{name}",
-            (Location.Snapshot, Location.Layer) => $"layer.{name}",
-            (Location.Serializer, Location.Layer) => $"layer.{name}",
-            _ => throw new InvalidOperationException($"Cannot access {target} {name} from {from}"),
-        };
-    }
-
-    private void ThrowIfDuplicate(string name)
-    {
-        if (weightsLookup.TryGetValue(name, out var existingW))
-        {
-            throw new InvalidOperationException($"{name} is already defined as weight {existingW}");
-        }
-        if (paramLookup.TryGetValue(name, out var existingP))
-        {
-            throw new InvalidOperationException($"{name} is already defined as parameter {existingP}");
-        }
-        if (moduleLookup.TryGetValue(name, out var existingM))
-        {
-            throw new InvalidOperationException($"{name} is already defined as module {existingM}");
-        }
-    }
-}
\ No newline at end of file
diff --git a/analyzer/LayerFile/Operation.cs b/analyzer/LayerFile/Operation.cs
deleted file mode 100644
index f706d8b..0000000
--- a/analyzer/LayerFile/Operation.cs
+++ /dev/null
@@ -1,11 +0,0 @@
-using System.Collections.Generic;
-using ML.Analyzer.LayerFile.Operations;
-
-namespace ML.Analyzer.LayerFile;
-
-internal abstract class Operation
-{
-    public abstract Weights Result { get; }
-    public abstract void AppendCode(MethodBodyWriter sb);
-    public abstract void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory);
-}
diff --git a/analyzer/LayerFile/Operations/ActivationGradientOperation.cs b/analyzer/LayerFile/Operations/ActivationGradientOperation.cs
deleted file mode 100644
index 0355f58..0000000
--- a/analyzer/LayerFile/Operations/ActivationGradientOperation.cs
+++ /dev/null
@@ -1,20 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class ActivationGradientOperation(Weights source, Weights result, string location) : Operation
-{
-    public Weights Source { get; } = source;
-    public override Weights Result { get; } = result;
-    public string Location { get; } = location;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"{Location}.DerivativeTo({Source.PassAccess()}, {Result.PassAccess()});");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        throw new NotImplementedException();
-    }
-}
diff --git a/analyzer/LayerFile/Operations/ActivationOperation.cs b/analyzer/LayerFile/Operations/ActivationOperation.cs
deleted file mode 100644
index ed88335..0000000
--- a/analyzer/LayerFile/Operations/ActivationOperation.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class ActivationOperation(Weights source, Weights result, string location) : Operation
-{
-    public Weights Source { get; } = source;
-    public override Weights Result { get; } = result;
-    public string Location { get; } = location;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"{Location}.ActivateTo({Source.PassAccess()}, {Result.PassAccess()});");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        var sourceGradient = registry.GetOrCreateGradient(Source);
-        var resultGradient = registry.GetGradient(Result);
-        ops.Add(new ActivationGradientOperation(Source, sourceGradient, Location));
-        ops.Add(factory.NewPointwiseMultiply(sourceGradient, resultGradient, sourceGradient));
-    }
-}
\ No newline at end of file
diff --git a/analyzer/LayerFile/Operations/AddOperation.cs b/analyzer/LayerFile/Operations/AddOperation.cs
deleted file mode 100644
index 247da13..0000000
--- a/analyzer/LayerFile/Operations/AddOperation.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class AddOperation(Weights left, Weights right, Weights result) : Operation
-{
-    public Weights Left { get; } = left;
-    public Weights Right { get; } = right.Dimensions.SequenceEqual(left.Dimensions) ? right : throw new InvalidOperationException($"cannot add {left} and {right}");
-    public override Weights Result { get; } = right.Dimensions.SequenceEqual(result.Dimensions) ? result : throw new InvalidOperationException($"{result} cannot  store {left} + {right}");
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        if (ReferenceEquals(Left, Result))
-        {
-            sb.WriteOperation($"{Left.PassAccess()}.AddToSelf({Right.PassAccess()});");
-        }
-        else
-        {
-            sb.WriteOperation($"{Left.PassAccess()}.AddTo({Right.PassAccess()}, {Result.PassAccess()});");
-        }
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        var rightGradient = registry.GetOrCreateGradient(Right);
-        var resultGradient = registry.GetGradient(Result);
-        ops.Add(new AddOperation(rightGradient, resultGradient, rightGradient));
-        registry.AddAlias(((DirectWeights)Left).GetGradientName(), resultGradient);
-        // ops.Add(new DefineOperation(resultGradient, registry.CreateGradient(Left, Location.Pass)));
-    }
-}
diff --git a/analyzer/LayerFile/Operations/ConditionOperation.cs b/analyzer/LayerFile/Operations/ConditionOperation.cs
deleted file mode 100644
index 8afe537..0000000
--- a/analyzer/LayerFile/Operations/ConditionOperation.cs
+++ /dev/null
@@ -1,25 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class ConditionOperation(string condition, Operation whenTrue) : Operation
-{
-    public string Condition { get; } = condition;
-    public Operation WhenTrue { get; } = whenTrue;
-    public override Weights Result => WhenTrue.Result;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"if ({Condition})");
-        sb.WriteOperation("{");
-        sb.Indent++;
-        WhenTrue.AppendCode(sb);
-        sb.Indent--;
-        sb.WriteOperation("}");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        throw new NotImplementedException($"cannot reverse a conditional");
-    }
-}
\ No newline at end of file
diff --git a/analyzer/LayerFile/Operations/ConstructAttentionMatrixOperation.cs b/analyzer/LayerFile/Operations/ConstructAttentionMatrixOperation.cs
deleted file mode 100644
index 3dc0d59..0000000
--- a/analyzer/LayerFile/Operations/ConstructAttentionMatrixOperation.cs
+++ /dev/null
@@ -1,34 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class ConstructAttentionMatrixOperation(Weights queries, Weights keys, Weights result) : Operation
-{
-    public Weights Queries { get; } = queries;
-    public Weights Keys { get; } = keys;
-    public override Weights Result { get; } = result;
-
-    private readonly DotProductOperation dotProduct = new(
-        new RowReferenceWeights(queries, "queryRowIndex"),
-        new RowReferenceWeights(keys, "keyRowIndex"),
-        new ItemReferenceWeights(result, ["queryRowIndex", "keyRowIndex"])
-    );
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        Debug.Assert(Queries.Dimensions.SequenceEqual(Keys.Dimensions));
-
-        sb.WriteOperation($"var scale = (Weight)(1 / Weight.Sqrt({Queries.Dimensions[1]}));");
-        sb.WriteOperation($"foreach (var queryRowIndex in ..{Queries.Dimensions[0]})");
-        sb.OpenScope();
-        sb.WriteOperation($"foreach (var keyRowIndex in ..{Queries.Dimensions[0]})");
-        sb.OpenScope();
-        dotProduct.AppendCode(sb);
-        sb.CloseScope();
-        sb.CloseScope();
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        dotProduct.AppendGradientOp(ops, registry, factory);
-    }
-}
diff --git a/analyzer/LayerFile/Operations/DefineOperation.cs b/analyzer/LayerFile/Operations/DefineOperation.cs
deleted file mode 100644
index 7514203..0000000
--- a/analyzer/LayerFile/Operations/DefineOperation.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class DefineOperation(Weights weights, Weights result) : Operation
-{
-    public Weights Weights { get; } = weights;
-    public override Weights Result { get; } = result;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($$"""var {{Result.PassAccess()}} = {{Weights.PassAccess()}};""");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        throw new NotImplementedException();
-    }
-}
diff --git a/analyzer/LayerFile/Operations/DotProductOperation.cs b/analyzer/LayerFile/Operations/DotProductOperation.cs
deleted file mode 100644
index 531951b..0000000
--- a/analyzer/LayerFile/Operations/DotProductOperation.cs
+++ /dev/null
@@ -1,21 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class DotProductOperation(Weights left, Weights right, Weights result) : Operation
-{
-    public Weights Left { get; } = left.Type is NumberType.Vector ? left : throw new InvalidOperationException();
-    public Weights Right { get; } = right.Type is NumberType.Vector ? right : throw new InvalidOperationException();
-    public override Weights Result { get; } = result.Type is NumberType.Single ? result : throw new InvalidOperationException();
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($$"""{{Result.PassAccess()}} = {{Left.PassAccess()}}.Dot({{Right.PassAccess()}});""");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        registry.CreateWeightsGradient(left, right);
-        registry.CreateWeightsGradient(right, left);
-    }
-}
\ No newline at end of file
diff --git a/analyzer/LayerFile/Operations/InputOperation.cs b/analyzer/LayerFile/Operations/InputOperation.cs
deleted file mode 100644
index 2a133e1..0000000
--- a/analyzer/LayerFile/Operations/InputOperation.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class InputOperation(Weights weights, Weights result) : Operation
-{
-    public Weights Weights { get; } = weights;
-    public override Weights Result { get; } = result;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($$"""{{Result.PassAccess()}} = {{Weights.PassAccess()}};""");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        ops.Add(new OutputOperation(registry.GetGradient(Result)));
-    }
-}
diff --git a/analyzer/LayerFile/Operations/MatrixVectorMultiplyOperation.cs b/analyzer/LayerFile/Operations/MatrixVectorMultiplyOperation.cs
deleted file mode 100644
index 8dd1068..0000000
--- a/analyzer/LayerFile/Operations/MatrixVectorMultiplyOperation.cs
+++ /dev/null
@@ -1,39 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class MatrixVectorMultiplyOperation(Weights matrix, Weights vector, Weights result, bool? add = null) : Operation
-{
-    public Weights Matrix { get; } = matrix.Type is NumberType.Matrix ? matrix : throw new InvalidOperationException($"{matrix} is not a matrix");
-    public Weights Vector { get; } = vector.Type is NumberType.Vector ? vector : throw new InvalidOperationException($"{vector} is not a vector");
-    public override Weights Result { get; } = result;
-    public bool Add { get; } = add ?? result.Location is Location.Gradients;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"{Matrix.PassAccess()}.Multiply{(Add ? "Add" : "")}To({Vector.PassAccess()}, {Result.PassAccess()});");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        ops.Add(new VectorVectorMultiplyMatrixOperation(registry.GetGradient(Result), Vector, registry.GetOrCreateGradient(Matrix)));
-        ops.Add(new MatrixTransposedVectorMultiplyOperation(Matrix, registry.GetGradient(Result), registry.GetOrCreateGradient(Vector)));
-    }
-}
-
-internal sealed class MatrixTransposedVectorMultiplyOperation(Weights matrix, Weights vector, Weights result) : Operation
-{
-    public Weights Matrix { get; } = matrix.Type is NumberType.Matrix ? matrix : throw new InvalidOperationException($"{matrix} is not a matrix");
-    public Weights Vector { get; } = vector.Type is NumberType.Vector ? vector : throw new InvalidOperationException($"{vector} is not a vector");
-    public override Weights Result { get; } = result;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"{Matrix.PassAccess()}.MultiplyTransposed{(Result.Location is Location.Gradients ? "Add" : "")}To({Vector.PassAccess()}, {Result.PassAccess()});");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        throw new NotImplementedException();        
-    }
-}
diff --git a/analyzer/LayerFile/Operations/NestedLayerOperation.cs b/analyzer/LayerFile/Operations/NestedLayerOperation.cs
deleted file mode 100644
index 3968454..0000000
--- a/analyzer/LayerFile/Operations/NestedLayerOperation.cs
+++ /dev/null
@@ -1,37 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class NestedLayerOperation(Module module, Weights input, Weights result) : Operation
-{
-    public Module Module { get; } = module;
-    public Weights Input { get; } = input;
-    public override Weights Result { get; } = result;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"{Result.PassAccess()} = {Module.Access(Location.Pass)}.Forward({Input.PassAccess()}, {Module.AccessSnapshot(Location.Pass)});");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        ops.Add(new NestedLayerBackwardOperation(Module, registry.GetGradient(Result), registry.CreateWeightsGradient(Input, preAllocate: false)));
-    }
-}
-
-internal sealed class NestedLayerBackwardOperation(Module module, Weights input, Weights result) : Operation
-{
-    public Module Module { get; } = module;
-    public Weights Input { get; } = input;
-    public override Weights Result { get; } = result;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"{Result.PassAccess()} = {Module.Access(Location.Pass)}.Backward({Input.PassAccess()}, {Module.AccessSnapshot(Location.Pass)}, {Module.AccessGradients(Location.Pass)});");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        throw new NotImplementedException();
-    }
-}
diff --git a/analyzer/LayerFile/Operations/OperationFactory.cs b/analyzer/LayerFile/Operations/OperationFactory.cs
deleted file mode 100644
index 86507b9..0000000
--- a/analyzer/LayerFile/Operations/OperationFactory.cs
+++ /dev/null
@@ -1,70 +0,0 @@
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class OperationFactory(LayerRegistry registry)
-{
-    private readonly LayerRegistry registry = registry;
-
-    public Operation NewPointwiseMultiply(Weights left, Weights right, Weights result, bool? add = null)
-    {
-        return CreateConditionalAware(result, result => new PointwiseMultiplyOperation(left, right, result, add));
-    }
-
-    public Operation NewMultiply(Weights left, Weights right, Weights result, bool? add = null)
-    {
-        return CreateConditionalAware(result, result => (left.Type, right.Type) switch
-            {
-                (NumberType.Matrix, NumberType.Vector) when right is RowReferenceWeights rw => new MatrixVectorMultiplyOperation(left, right, result, add),
-                (NumberType.Matrix, NumberType.Vector) => new MatrixVectorMultiplyOperation(left, right, result, add),
-                (NumberType.Vector, NumberType.Single) => new VectorSingleMultiplyOperation(left, right, result, add),
-                _ => throw new NotImplementedException($"cannot multiply{(add is true ? "-add" : "")} {left} and {right}"),
-            }
-        );
-    }
-
-    public Operation CreateAttention(Weights queries, Weights keys, Weights result)
-    {
-        return CreateConditionalAware(result, result => new ConstructAttentionMatrixOperation(queries, keys, result));
-    }
-
-    private static Operation CreateConditionalAware(Weights relevant, Func<Weights, Operation> factory)
-    {
-        if (relevant is ConditionalReferenceWeights { Condition: { } condition, WhenTrue: { } inner, WhenFalse: null })
-        {
-            return new ConditionOperation(condition, factory(inner));
-        }
-        return factory(relevant);
-    }
-
-    public RowwiseRecurrenceOperation CreateRecurrence(ImmutableArray<Weights> weights, bool reversed)
-    {
-        var currWeights = weights.Cast<DirectWeights>()
-            .Select(w =>
-            {
-                Weights refe = w.Dimensions switch
-                {
-                    [var v] => new ItemReferenceWeights(w, ["t"]),
-                    [var r, var c] => new RowReferenceWeights(w, "t"),
-                    _ => throw new InvalidOperationException($"Cannot recur over {w}"),
-                };
-
-                registry.AddAlias($"{w.Name}_current", refe);
-                return refe;
-            });
-
-        var prevWeights = weights.Cast<DirectWeights>()
-            .Select(w =>
-            {
-                Weights refe = w.Dimensions switch
-                {
-                    [var v] => new ConditionalReferenceWeights("t > 0", new ItemReferenceWeights(w, ["t-1"]), "ZERO"),
-                    [var r, var c] => new ConditionalReferenceWeights("t > 0", new RowReferenceWeights(w, "t-1"), "ZERO"),
-                    _ => throw new InvalidOperationException($"Cannot recur over {w}"),
-                };
-
-                registry.AddAlias($"{w.Name}_previous", refe);
-                return refe;
-            });
-        var recurrence = new RowwiseRecurrenceOperation(weights, [.. currWeights, .. prevWeights], reversed);
-        return recurrence;
-    }
-}
diff --git a/analyzer/LayerFile/Operations/OutputOperation.cs b/analyzer/LayerFile/Operations/OutputOperation.cs
deleted file mode 100644
index 901e346..0000000
--- a/analyzer/LayerFile/Operations/OutputOperation.cs
+++ /dev/null
@@ -1,22 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class OutputOperation(Weights weights) : Operation
-{
-    public Weights Weights { get; } = weights;
-    public override Weights Result => Weights;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"return {Weights.PassAccess()};");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        var input = new DirectWeights("outputGradient", Weights.Dimensions, Location.Pass); // defined as method parameter
-        registry.AddAlias(((DirectWeights)Weights).GetGradientName(), input);
-        // var result = registry.CreateGradient(Weights, Location.Pass);
-        // ops.Add(new DefineOperation(input, result));
-    }
-}
diff --git a/analyzer/LayerFile/Operations/PointwiseMultiplyOperation.cs b/analyzer/LayerFile/Operations/PointwiseMultiplyOperation.cs
deleted file mode 100644
index bffce2d..0000000
--- a/analyzer/LayerFile/Operations/PointwiseMultiplyOperation.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class PointwiseMultiplyOperation(Weights left, Weights right, Weights result, bool? add = null) : Operation
-{
-    public Weights Left { get; } = left;
-    public Weights Right { get; } = right.Dimensions.SequenceEqual(left.Dimensions) ? right : throw new InvalidOperationException($"cannot pointwise multiply {left} and {right}");
-    public override Weights Result { get; } = right.Dimensions.SequenceEqual(result.Dimensions) ? result : throw new InvalidOperationException($"{result} cannot store {left} * {right}");
-    public bool Add { get; } = add ?? result.Location is Location.Gradients;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        if (ReferenceEquals(Left, Result))
-        {
-            sb.WriteOperation($"{Left.PassAccess()}.PointwiseMultiply{(Add ? "Add" : "")}ToSelf({Right.PassAccess()});");
-        }
-        else
-        {
-            sb.WriteOperation($"{Left.PassAccess()}.PointwiseMultiply{(Add ? "Add" : "")}To({Right.PassAccess()}, {Result.PassAccess()});");
-        }
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        var rightGradient = registry.GetOrCreateGradient(Right);
-        var resultGradient = registry.GetGradient(Result);
-        ops.Add(new AddOperation(rightGradient, resultGradient, rightGradient));
-        ops.Add(new DefineOperation(resultGradient, registry.CreateWeightsGradient(Left, Location.Pass)));
-    }
-}
diff --git a/analyzer/LayerFile/Operations/RowwiseRecurrenceOperation.cs b/analyzer/LayerFile/Operations/RowwiseRecurrenceOperation.cs
deleted file mode 100644
index 799456e..0000000
--- a/analyzer/LayerFile/Operations/RowwiseRecurrenceOperation.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class RowwiseRecurrenceOperation(ImmutableArray<Weights> weights, ImmutableArray<Weights> tempWeights, bool reversed) : Operation
-{
-    public ImmutableArray<Weights> Weights { get; } = weights;
-    public ImmutableArray<Weights> TempWeights { get; } = tempWeights;
-    public override Weights Result { get; } = null!;
-    public bool Reversed { get; } = reversed;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation(Reversed
-            ? $$"""for(int t = {{Weights[0].PassAccess()}}.RowCount - 1; t >= 0; t--)"""
-            : $$"""for(int t = 0; t < {{Weights[0].PassAccess()}}.RowCount; t++)"""
-        );
-        sb.WriteOperation("{");
-        sb.Indent++;
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        ops.Add(new EndLoopOperation(this));
-    }
-}
-
-internal sealed class EndLoopOperation(RowwiseRecurrenceOperation loop) : Operation
-{
-    public RowwiseRecurrenceOperation Loop { get; } = loop;
-    public override Weights Result => null!;
-
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.Indent--;
-        sb.WriteOperation("}");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        ops.Add(new RowwiseRecurrenceOperation(Loop.Weights, Loop.TempWeights, !Loop.Reversed));
-    }
-}
\ No newline at end of file
diff --git a/analyzer/LayerFile/Operations/VectorSingleMultiplyOperation.cs b/analyzer/LayerFile/Operations/VectorSingleMultiplyOperation.cs
deleted file mode 100644
index 85d0b6b..0000000
--- a/analyzer/LayerFile/Operations/VectorSingleMultiplyOperation.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class VectorSingleMultiplyOperation(Weights vector, Weights single, Weights result, bool? add = null) : Operation
-{
-    public Weights Vector { get; } = vector.Type is NumberType.Vector ? vector : throw new InvalidOperationException();
-    public Weights Single { get; } = single.Type is NumberType.Single ? single : throw new InvalidOperationException();
-    public override Weights Result { get; } = result.Dimensions.SequenceEqual(vector.Dimensions) ? result : throw new InvalidOperationException();
-    public bool Add { get; } = add ?? result.Location is Location.Gradients;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($$"""{{Vector.PassAccess()}}.Multiply{{(Add ? "Add" : "")}}To({{Single.PassAccess()}}, {{Result.PassAccess()}});""");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        var resultGradient = registry.GetGradient(Result);
-        ops.Add(factory.NewMultiply(resultGradient, Single, registry.GetOrCreateGradient(Vector)));
-        ops.Add(new DotProductOperation(resultGradient, Vector, registry.GetOrCreateGradient(Single)));
-    }
-}
diff --git a/analyzer/LayerFile/Operations/VectorVectorMultiplyMatrixOperation.cs b/analyzer/LayerFile/Operations/VectorVectorMultiplyMatrixOperation.cs
deleted file mode 100644
index 785ee21..0000000
--- a/analyzer/LayerFile/Operations/VectorVectorMultiplyMatrixOperation.cs
+++ /dev/null
@@ -1,20 +0,0 @@
-using System.Collections.Generic;
-
-namespace ML.Analyzer.LayerFile.Operations;
-
-internal sealed class VectorVectorMultiplyMatrixOperation(Weights rowVector, Weights columnVector, Weights result) : Operation
-{
-    public Weights RowVector { get; } = rowVector.Type is NumberType.Vector ? rowVector : throw new InvalidOperationException($"{rowVector} is not a vector");
-    public Weights ColumnVector { get; } = columnVector.Type is NumberType.Vector ? columnVector : throw new InvalidOperationException($"{columnVector} is not a vector");
-    public override Weights Result { get; } = result;
-
-    public override void AppendCode(MethodBodyWriter sb)
-    {
-        sb.WriteOperation($"VectorHelper.MultiplyToMatrix{(Result.Location is Location.Gradients ? "Add" : "")}To({RowVector.PassAccess()}, {ColumnVector.PassAccess()}, {Result.PassAccess()});");
-    }
-
-    public override void AppendGradientOp(List<Operation> ops, LayerRegistry registry, OperationFactory factory)
-    {
-        throw new NotImplementedException();
-    }
-}
diff --git a/analyzer/LayerFile/Weights.cs b/analyzer/LayerFile/Weights.cs
deleted file mode 100644
index 5a3d188..0000000
--- a/analyzer/LayerFile/Weights.cs
+++ /dev/null
@@ -1,49 +0,0 @@
-namespace ML.Analyzer.LayerFile;
-
-internal abstract record Weights(ImmutableArray<Parameter> Dimensions)
-{
-    public abstract Location Location { get; }
-    public NumberType Type { get; } = Dimensions.Length switch
-    {
-        0 => NumberType.Single,
-        1 => NumberType.Vector,
-        2 => NumberType.Matrix,
-        3 => NumberType.Tensor,
-        _ => throw new InvalidOperationException($"undefined dimension count {Dimensions.Length}"),
-    };
-
-    public string PassAccess() => Access(Location.Pass);
-    public abstract string Access(Location from);
-}
-
-internal sealed record DirectWeights(string Name, ImmutableArray<Parameter> Dimensions, Location Location, bool PreAllocate = true) : Weights(Dimensions)
-{
-    public override Location Location { get; } = Location;
-
-    public DirectWeights WithLocation(Location to) => new(Name, Dimensions, to, PreAllocate);
-    public override string ToString() => $"{Name} [{string.Join(", ", Dimensions.Select(d => d.Access(Location.Layer)))}]";
-    public string GetGradientName() => $"{Name}Gradient";
-    public override string Access(Location from) => LayerRegistry.GetAccessString(from, Location, Name);
-}
-
-internal sealed record RowReferenceWeights(Weights Matrix, string Row) : Weights(Matrix.Type is NumberType.Matrix ? [Matrix.Dimensions[1]] : throw new InvalidOperationException($"cannot refenrence a row of {Matrix}"))
-{
-    public override Location Location => Matrix.Location;
-    public override string ToString() => $"{Matrix}[{Row}] [{string.Join(", ", Dimensions.Select(d => d.Access(Location.Layer)))}]";
-
-    public override string Access(Location from) => $"{Matrix.Access(from)}.RowRef({Row})";
-}
-
-internal sealed record ItemReferenceWeights(Weights Weights, ImmutableArray<string> Accessor) : Weights(Weights.Dimensions.Length == Accessor.Length ? [] : throw new InvalidOperationException($"cannot reference into {Weights} with [{string.Join(", ", Accessor)}]"))
-{
-    public override Location Location => Weights.Location;
-    public override string ToString() => $"{Weights}[{string.Join(", ", Accessor)}]";
-    public override string Access(Location from) => $"{Weights.Access(from)}[{string.Join(", ", Accessor)}]";
-}
-
-internal sealed record ConditionalReferenceWeights(string Condition, Weights WhenTrue, string? WhenFalse) : Weights(WhenTrue.Dimensions)
-{
-    public override Location Location => WhenTrue.Location;
-    public override string ToString() => Access(Location);
-    public override string Access(Location from) => $"({Condition} ? {WhenTrue.Access(from)} : {WhenFalse})";
-}
\ No newline at end of file
diff --git a/analyzer/ML.Analyzer.csproj b/analyzer/ML.Analyzer.csproj
deleted file mode 100644
index 5e2d13f..0000000
--- a/analyzer/ML.Analyzer.csproj
+++ /dev/null
@@ -1,25 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <TargetFramework>netstandard2.0</TargetFramework>
-    <LangVersion>latest</LangVersion>
-    <Nullable>enable</Nullable>
-    <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
-    <IsRoslynComponent>true</IsRoslynComponent>
-    <EnforceExtendedAnalyzerRules>true</EnforceExtendedAnalyzerRules>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <PackageReference Include="Microsoft.CodeAnalysis.Analyzers" Version="3.11.0">
-      <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
-      <PrivateAssets>all</PrivateAssets>
-    </PackageReference>
-    <PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="4.14.0" />
-  </ItemGroup>
-
-  <ItemGroup>
-    <AdditionalFiles Include="AnalyzerReleases.Shipped.md" />
-    <AdditionalFiles Include="AnalyzerReleases.Unshipped.md" />
-  </ItemGroup>
-
-</Project>
diff --git a/analyzer/MethodBodyWriter.cs b/analyzer/MethodBodyWriter.cs
deleted file mode 100644
index c968f77..0000000
--- a/analyzer/MethodBodyWriter.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-namespace ML.Analyzer;
-
-internal sealed class MethodBodyWriter(StringBuilder sb, int indent = 0)
-{
-    private readonly StringBuilder sb = sb;
-
-    public int Indent { get; set; } = indent;
-
-    public void WriteOperation(string operation)
-    {
-        SbIndent();
-        sb.AppendLine(operation);
-    }
-
-    public void OpenScope()
-    {
-        WriteOperation("{");
-        Indent++;
-    }
-
-    public void CloseScope()
-    {
-        Indent--;
-        WriteOperation("}");
-    }
-    
-    private void SbIndent()
-    {
-        sb.Append(' ', Indent * 4);
-    }
-}
diff --git a/mlp/EmbeddedLayer.cs b/mlp/EmbeddedLayer.cs
deleted file mode 100644
index d8e891c..0000000
--- a/mlp/EmbeddedLayer.cs
+++ /dev/null
@@ -1,44 +0,0 @@
-using MachineLearning.Model.Attributes;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Snapshot;
-
-namespace ML.MultiLayerPerceptron;
-
-[GeneratedLayer]
-public sealed partial class EmbeddedLayer<TIn, TOut> : ILayer<TIn, TOut, EmbeddedLayer<TIn, TOut>.Snapshot>
-{
-    [Module] public required IEmbeddingLayer<TIn> EmbeddingLayer { get; init; }
-    [Module] public required ILayer<Vector, ILayerSnapshot> HiddenLayer { get; init; }
-    [Module] public required IUnembeddingLayer<TOut> UnembeddingLayer { get; init; }
-
-    public (TOut output, Weight confidence, Vector weights) Forward(TIn Input, Snapshot snapshot)
-    {
-        snapshot.Input = Input;
-        snapshot.InputResult = EmbeddingLayer.Process(snapshot.Input, snapshot.EmbeddingLayer);
-        snapshot.HiddenResult = HiddenLayer.Forward(snapshot.InputResult, snapshot.HiddenLayer);
-        var output = UnembeddingLayer.Process(snapshot.HiddenResult, snapshot.UnembeddingLayer);
-        return output;
-    }
-    public Vector Backward(Vector outputGradient, Snapshot snapshot, Gradients gradients)
-    {
-        // snapshot.HiddenResult1Gradient = UnembeddingLayer.Backward(outputGradient, snapshot.UnembeddingLayer, gradients.UnembeddingLayer);
-        // snapshot.InputResultGradient = HiddenLayer.Backward(snapshot.HiddenResult1Gradient, snapshot.HiddenLayer, gradients.HiddenLayer);
-        // snapshot.InputGradient = EmbeddingLayer.Backward(snapshot.InputResultGradient, snapshot.EmbeddingLayer, gradients.EmbeddingLayer);
-        return snapshot.InputGradient;
-    }
-
-
-    partial class Snapshot
-    {
-        public Vector InputResult { get; set; }
-        public Vector HiddenResult { get; set; }
-        public Vector OutputResult { get; set; }
-        public TIn Input { get; set; } = default;
-        public Vector HiddenResult1Gradient { get; set; }
-        public Vector InputResultGradient { get; set; }
-        public Vector InputGradient { get; set; }
-        public ILayerSnapshot EmbeddingLayer { get; } = layer.EmbeddingLayer.CreateSnapshot();
-        public ILayerSnapshot HiddenLayer { get; } = layer.HiddenLayer.CreateSnapshot();
-        public ILayerSnapshot UnembeddingLayer { get; } = layer.UnembeddingLayer.CreateSnapshot();
-    }
-}
diff --git a/mlp/EmbeddedModel.cs b/mlp/EmbeddedModel.cs
deleted file mode 100644
index 5407b76..0000000
--- a/mlp/EmbeddedModel.cs
+++ /dev/null
@@ -1,84 +0,0 @@
-﻿using System.IO;
-using MachineLearning.Model;
-using MachineLearning.Model.Layer;
-using MachineLearning.Serialization;
-
-namespace ML.MultiLayerPerceptron;
-
-public sealed class EmbeddedModel<TIn, TOut> : IEmbeddedModel<TIn, TOut>
-{
-    public required IEmbeddingLayer<TIn> InputLayer { get; init; }
-    public required MultiLayerPerceptronModel InnerModel { get; init; }
-    public required IUnembeddingLayer<TOut> OutputLayer { get; init; }
-
-    public long WeightCount => InputLayer.WeightCount + InnerModel.WeightCount + OutputLayer.WeightCount;
-
-    public (TOut prediction, Weight confidence) Process(TIn input)
-        => OutputLayer.Process(InnerModel.Process(InputLayer.Process(input)));
-
-    public override string ToString() => $"Embedded {InnerModel}";
-}
-
-public static class EmbeddedModel
-{
-    public static ErrorState Save(EmbeddedModel<int[], int> model, BinaryWriter writer)
-    {
-        if (model.InputLayer is not EncodedEmbeddingLayer eel)
-        {
-            return new NotImplementedException("EmbeddedModel<int[], int> only supports EncodedEmbeddingLayer rn");
-        }
-
-        var result = ModelSerializer.SaveEncodedEmbeddingLayer(eel, writer);
-        if (!OptionsMarshall.IsSuccess(result))
-        {
-            return result;
-        }
-
-        result = MultiLayerPerceptronModel.Save(model.InnerModel, writer);
-        if (!OptionsMarshall.IsSuccess(result))
-        {
-            return result;
-        }
-
-        if (model.OutputLayer is not TokenOutputLayer tol)
-        {
-            return new NotImplementedException("EmbeddedModel<int[], int> only supports TokenOutputLayer rn");
-        }
-
-        result = ModelSerializer.SaveTokenOutputLayer(tol, writer);
-        if (!OptionsMarshall.IsSuccess(result))
-        {
-            return result;
-        }
-
-        return default;
-    }
-
-    public static Result<EmbeddedModel<int[], int>> Read(BinaryReader reader)
-    {
-        var input = ModelSerializer.ReadEncodedEmbeddingLayer(reader);
-        if (!input.Branch(out _, out var error))
-        {
-            return error;
-        }
-
-        var inner = MultiLayerPerceptronModel.Read(reader);
-        if (!inner.Branch(out _, out error))
-        {
-            return error;
-        }
-
-        var output = ModelSerializer.ReadTokenOutputLayer(reader);
-        if (!output.Branch(out _, out error))
-        {
-            return error;
-        }
-
-        return new EmbeddedModel<int[], int>
-        {
-            InputLayer = input.OrThrow(),
-            InnerModel = inner.OrThrow(),
-            OutputLayer = output.OrThrow(),
-        };
-    }
-}
\ No newline at end of file
diff --git a/mlp/EmbeddedModelTrainer.cs b/mlp/EmbeddedModelTrainer.cs
deleted file mode 100644
index 23b9e38..0000000
--- a/mlp/EmbeddedModelTrainer.cs
+++ /dev/null
@@ -1,98 +0,0 @@
-using Ametrin.Guards;
-using MachineLearning.Data;
-using MachineLearning.Data.Entry;
-using MachineLearning.Model;
-using MachineLearning.Model.Layer.Snapshot;
-using MachineLearning.Training;
-using MachineLearning.Training.Evaluation;
-using MachineLearning.Training.Optimization;
-
-namespace ML.MultiLayerPerceptron;
-
-public sealed class EmbeddedModelTrainer<TIn, TOut> : ITrainer<EmbeddedModel<TIn, TOut>>
-{
-    public TrainingConfig Config { get; }
-    public ITrainingSet TrainingSet { get; }
-    public EmbeddedModel<TIn, TOut> Model { get; }
-    public Optimizer Optimizer { get; }
-    public ImmutableArray<ILayerOptimizer> LayerOptimizers { get; }
-    public ILayerOptimizer OutputLayerOptimizer => LayerOptimizers[^1];
-    public ModelCachePool CachePool { get; }
-
-    public EmbeddedModelTrainer(EmbeddedModel<TIn, TOut> model, TrainingConfig config, ITrainingSet trainingSet)
-    {
-        Config = config;
-        TrainingSet = trainingSet;
-        Model = model;
-        Optimizer = config.Optimizer;
-        LayerOptimizers = [.. Model.InnerModel.Layers.Select(Optimizer.CreateLayerOptimizer)];
-        CachePool = new([Model.InputLayer, .. Model.InnerModel.Layers, Model.OutputLayer]);
-    }
-
-    public DataSetEvaluationResult TrainAndEvaluate(IEnumerable<TrainingData> trainingBatch)
-    {
-        var timeStamp = Stopwatch.GetTimestamp();
-
-        var context = ThreadedTrainer.Train(
-            trainingBatch,
-            CachePool,
-            Config.Threading,
-            (entry, context) =>
-            {
-                var data = Guard.Is<TrainingData<TIn, TOut>>(entry);
-                var weights = Update(data, context.Gradients);
-
-                if (Model.OutputLayer.Process(weights).output!.Equals(data.ExpectedValue))
-                {
-                    context.CorrectCount++;
-                }
-
-                context.TotalCount++;
-                context.TotalCost += Config.Optimizer.CostFunction.TotalCost(weights, data.ExpectedWeights);
-            }
-        );
-
-        Apply(context.Gradients);
-
-        var evaluation = new DataSetEvaluationResult()
-        {
-            TotalCount = context.TotalCount,
-            CorrectCount = context.CorrectCount,
-            TotalCost = context.TotalCost,
-            TotalElapsedTime = Stopwatch.GetElapsedTime(timeStamp),
-        };
-
-        CachePool.Return(context.Gradients);
-
-        return evaluation;
-    }
-
-    private Vector Update(TrainingData<TIn, TOut> data, ImmutableArray<IGradients> gradients)
-    {
-        Debug.Assert(gradients.Length == Model.InnerModel.Layers.Length + 2);
-
-        using var marker = CachePool.RentSnapshots(out var rented);
-        var snapshots = rented.Skip(1).Take(Model.InnerModel.Layers.Length).Cast<PerceptronLayer.Snapshot>().ToImmutableArray();
-        var inputWeights = Model.InputLayer.Process(data.InputValue);
-        var result = Model.InnerModel.Process(inputWeights, snapshots);
-
-        var nodeValues = Config.Optimizer.CostFunction.Derivative(result, data.ExpectedWeights);
-        NumericsDebug.AssertValidNumbers(nodeValues);
-        OutputLayerOptimizer.Update(nodeValues, snapshots[^1], gradients[^2]);
-        nodeValues = snapshots[^1].InputGradient;
-        NumericsDebug.AssertValidNumbers(nodeValues);
-
-        for (int hiddenLayerIndex = LayerOptimizers.Length - 2; hiddenLayerIndex >= 0; hiddenLayerIndex--)
-        {
-            var hiddenLayer = LayerOptimizers[hiddenLayerIndex];
-            hiddenLayer.Update(nodeValues, snapshots[hiddenLayerIndex], gradients[hiddenLayerIndex + 1]);
-            nodeValues = snapshots[hiddenLayerIndex].InputGradient;
-            NumericsDebug.AssertValidNumbers(nodeValues);
-        }
-
-        return result;
-    }
-
-    private void Apply(ImmutableArray<IGradients> gradients) => LayerOptimizers.Zip(gradients.Skip(1).Take(Model.InnerModel.Layers.Length)).ForEach(p => p.First.Apply(p.Second));
-    public void FullReset() => LayerOptimizers.ForEach(static layer => layer.FullReset());
-}
diff --git a/mlp/GlobalUsings.cs b/mlp/GlobalUsings.cs
deleted file mode 100644
index b03c776..0000000
--- a/mlp/GlobalUsings.cs
+++ /dev/null
@@ -1,10 +0,0 @@
-global using Ametrin.Numerics;
-global using Ametrin.Optional;
-global using Ametrin.Utils;
-global using Weight = float;
-global using System.Collections.Generic;
-global using System.Collections.Frozen;
-global using System.Collections.Immutable;
-global using System.Diagnostics;
-global using System.Linq;
-global using System;
\ No newline at end of file
diff --git a/mlp/Initialization/HeInitializer.cs b/mlp/Initialization/HeInitializer.cs
deleted file mode 100644
index c850b2d..0000000
--- a/mlp/Initialization/HeInitializer.cs
+++ /dev/null
@@ -1,22 +0,0 @@
-using MachineLearning.Model.Initialization;
-using MachineLearning.Model.Layer.Initialization;
-
-namespace ML.MultiLayerPerceptron.Initialization;
-
-/// <summary>
-/// suited for ReLU activations
-/// </summary>
-public sealed class HeInitializer(Random? random = null) : IInitializer<PerceptronLayer>
-{
-    public static HeInitializer Instance { get; } = new HeInitializer();
-    public Random Random { get; } = random ?? Random.Shared;
-
-    public void Initialize(PerceptronLayer layer)
-    {
-        var inputCount = layer.Weights.ColumnCount;
-        var standardDeviation = MathF.Sqrt(2.0f / inputCount);
-
-        layer.Weights.MapToSelf(v => InitializationHelper.RandomInNormalDistribution(Random, 0, standardDeviation));
-        layer.Biases.MapToSelf(v => InitializationHelper.RandomInNormalDistribution(Random, 0, 0.1f));
-    }
-}
diff --git a/mlp/Initialization/XavierInitializer.cs b/mlp/Initialization/XavierInitializer.cs
deleted file mode 100644
index 0808f0f..0000000
--- a/mlp/Initialization/XavierInitializer.cs
+++ /dev/null
@@ -1,23 +0,0 @@
-using MachineLearning.Model.Initialization;
-using MachineLearning.Model.Layer.Initialization;
-
-namespace ML.MultiLayerPerceptron.Initialization;
-
-/// <summary>
-/// suited for sigmoid, tanh and softmax activations
-/// </summary>
-public sealed class XavierInitializer(Random? random = null) : IInitializer<PerceptronLayer>
-{
-    public static XavierInitializer Instance { get; } = new();
-    public Random Random { get; } = random ?? Random.Shared;
-
-    public void Initialize(PerceptronLayer layer)
-    {
-        var inputCount = layer.Weights.ColumnCount;
-        var outputCount = layer.Biases.Count;
-        var standardDeviation = MathF.Sqrt(2.0f / (inputCount + outputCount));
-
-        layer.Weights.MapToSelf(v => InitializationHelper.RandomInNormalDistribution(Random, 0, standardDeviation));
-        layer.Biases.MapToSelf(v => InitializationHelper.RandomInNormalDistribution(Random, 0, 0.1f));
-    }
-}
diff --git a/mlp/LayerFactory.cs b/mlp/LayerFactory.cs
deleted file mode 100644
index 75ea1ce..0000000
--- a/mlp/LayerFactory.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-﻿using MachineLearning.Model.Activation;
-using MachineLearning.Model.Layer.Initialization;
-
-namespace ML.MultiLayerPerceptron;
-
-public sealed class LayerFactory(int inputNodeCount, int outputNodeCount)
-{
-    public int OutputNodeCount { get; } = outputNodeCount;
-    public int InputNodeCount { get; } = inputNodeCount;
-    public IActivationFunction ActivationFunction { get; set; } = SigmoidActivation.Instance;
-    public IInitializer<PerceptronLayer> Initializer { get; set; } = NoInitializer<PerceptronLayer>.Instance;
-
-    public LayerFactory SetActivationFunction(IActivationFunction activationMethod)
-    {
-        ActivationFunction = activationMethod;
-        return this;
-    }
-
-    public LayerFactory SetInitializer(IInitializer<PerceptronLayer> initializer)
-    {
-        Initializer = initializer;
-        return this;
-    }
-
-    public PerceptronLayer Create()
-    {
-        var layer = new PerceptronLayer(ActivationFunction, InputNodeCount, OutputNodeCount);
-        Initializer.Initialize(layer);
-        return layer;
-    }
-}
diff --git a/mlp/ML.MultiLayerPerceptron.csproj b/mlp/ML.MultiLayerPerceptron.csproj
deleted file mode 100644
index e0a84a3..0000000
--- a/mlp/ML.MultiLayerPerceptron.csproj
+++ /dev/null
@@ -1,16 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <Nullable>enable</Nullable>
-    <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Model\MachineLearning.Model.csproj" />
-    <ProjectReference Include="..\MachineLearning.Serialization\MachineLearning.Serialization.csproj" />
-    <ProjectReference Include="..\MachineLearning.Training\MachineLearning.Training.csproj" />
-    <ProjectReference Include="..\analyzer\ML.Analyzer.csproj" ReferenceOutputAssembly="false" OutputItemType="Analyzer" />
-  </ItemGroup>
-
-</Project>
diff --git a/mlp/ModelBuilder.cs b/mlp/ModelBuilder.cs
deleted file mode 100644
index 8cfe05f..0000000
--- a/mlp/ModelBuilder.cs
+++ /dev/null
@@ -1,96 +0,0 @@
-﻿using MachineLearning.Model.Activation;
-using MachineLearning.Model.Layer;
-using MachineLearning.Model.Layer.Initialization;
-
-namespace ML.MultiLayerPerceptron;
-
-public sealed class ModelBuilder(int inputNodeCount)
-{
-    private List<LayerFactory> Layers { get; } = [];
-    public int InputNodeCount { get; } = inputNodeCount;
-    public IActivationFunction DefaultActivationFunction { get; set; } = SigmoidActivation.Instance;
-
-    public ModelBuilder DefaultActivation(IActivationFunction activationMethod)
-    {
-        DefaultActivationFunction = activationMethod;
-        return this;
-    }
-    public ModelBuilder AddLayer(int nodeCount, IInitializer<PerceptronLayer> initializer, IActivationFunction? activationMethod = null)
-    {
-        Layers.Add(
-            new LayerFactory(Layers.Count == 0 ? InputNodeCount : Layers[^1].OutputNodeCount, nodeCount)
-            .SetActivationFunction(activationMethod ?? DefaultActivationFunction).SetInitializer(initializer)
-        );
-        return this;
-    }
-    public ModelBuilder AddLayer(int nodeCount, Action<LayerFactory> consumer)
-    {
-        var layerBuilder = new LayerFactory(Layers.Count == 0 ? InputNodeCount : Layers[^1].OutputNodeCount, nodeCount)
-            .SetActivationFunction(DefaultActivationFunction);
-        consumer.Invoke(layerBuilder);
-        Layers.Add(layerBuilder);
-        return this;
-    }
-
-    public MultiLayerPerceptronModel Build() => new() { Layers = [.. Layers.Select(l => l.Create())] };
-}
-
-public static class EmbeddedModelBuilder
-{
-    public static HiddenLayerConfig<TInput> Create<TLayer, TInput>(TLayer layer, IInitializer<TLayer> initializer) where TLayer : IEmbeddingLayer<TInput>
-    {
-        initializer.Initialize(layer);
-        return Create(layer);
-    }
-
-    public static HiddenLayerConfig<TInput> Create<TInput>(IEmbeddingLayer<TInput> layer)
-        => new(layer);
-
-    public sealed class HiddenLayerConfig<TInput>(IEmbeddingLayer<TInput> layer)
-    {
-        public IEmbeddingLayer<TInput> InputLayer { get; } = layer;
-        public List<LayerFactory> Layers { get; } = [];
-        public IActivationFunction DefaultActivationFunction { get; set; } = SigmoidActivation.Instance;
-        public int LastOutputNodeCount = layer.OutputNodeCount;
-
-        public HiddenLayerConfig<TInput> DefaultActivation(IActivationFunction activationFunction)
-        {
-            DefaultActivationFunction = activationFunction;
-            return this;
-        }
-        public HiddenLayerConfig<TInput> AddLayer(int nodeCount, IInitializer<PerceptronLayer> initializer, IActivationFunction? activationMethod = null) 
-            => AddLayer(
-                new LayerFactory(LastOutputNodeCount, nodeCount)
-                .SetActivationFunction(activationMethod ?? DefaultActivationFunction).SetInitializer(initializer)
-            );
-        
-        public HiddenLayerConfig<TInput> AddLayer(int nodeCount, Action<LayerFactory> consumer)
-        {
-            var layerBuilder = new LayerFactory(LastOutputNodeCount, nodeCount)
-                .SetActivationFunction(DefaultActivationFunction);
-            consumer.Invoke(layerBuilder);
-            return AddLayer(layerBuilder);
-        }
-
-        private HiddenLayerConfig<TInput> AddLayer(LayerFactory layerFactory)
-        {
-            LastOutputNodeCount = layerFactory.OutputNodeCount;
-            Layers.Add(layerFactory);
-            return this;
-        }
-
-        public EmbeddedModel<TInput, TOutput> AddOutputLayer<TOutput>(Func<int, IUnembeddingLayer<TOutput>> outputLayer)
-            => AddOutputLayer(outputLayer(LastOutputNodeCount));
-
-        public EmbeddedModel<TInput, TOutput> AddOutputLayer<TOutput>(IUnembeddingLayer<TOutput> outputLayer)
-        {
-            Debug.Assert(LastOutputNodeCount == outputLayer.InputNodeCount);
-            return new()
-            {
-                InputLayer = InputLayer,
-                InnerModel = new() { Layers = Layers.Select(l => l.Create()).ToImmutableArray() },
-                OutputLayer = outputLayer,
-            };
-        }
-    }
-}
\ No newline at end of file
diff --git a/mlp/MultiLayerPerceptronModel.cs b/mlp/MultiLayerPerceptronModel.cs
deleted file mode 100644
index 56d829b..0000000
--- a/mlp/MultiLayerPerceptronModel.cs
+++ /dev/null
@@ -1,62 +0,0 @@
-﻿using System.IO;
-using MachineLearning.Model;
-using MachineLearning.Model.Layer;
-using MachineLearning.Serialization;
-
-namespace ML.MultiLayerPerceptron;
-
-public sealed class MultiLayerPerceptronModel : IModel<Vector, PerceptronLayer.Snapshot>
-{
-    public required ImmutableArray<PerceptronLayer> Layers { get; init; }
-    public long WeightCount => Layers.Sum(l => l.WeightCount);
-
-
-    public Vector Process(Vector input)
-        => Layers.Aggregate(input, (vector, layer) => layer.Forward(vector, layer.CreateSnapshot()));
-
-    public Vector Process(Vector input, ImmutableArray<PerceptronLayer.Snapshot> snapshots)
-    {
-        Debug.Assert(snapshots.Length == Layers.Length);
-        return Layers.Zip(snapshots).Aggregate(input, static (vector, pair) => pair.First.Forward(vector, pair.Second));
-    }
-
-    public override string ToString()
-        => $"MLP ({Layers.Length} Layers, {WeightCount} Weights)";
-
-    IEnumerable<ILayer> IModel<Vector, PerceptronLayer.Snapshot>.Layers => Layers;
-
-    public static ErrorState Save(MultiLayerPerceptronModel model, BinaryWriter writer)
-    {
-        writer.Write(model.Layers.Length);
-        foreach (var layer in model.Layers)
-        {
-            var flag = ModelSerializer.SaveLayer(layer, writer);
-            if (!OptionsMarshall.IsSuccess(flag))
-            {
-                return flag;
-            }
-        }
-
-        return default;
-    }
-
-    public static Result<MultiLayerPerceptronModel> Read(BinaryReader reader)
-    {
-        var layerCount = reader.ReadInt32();
-        var layers = new PerceptronLayer[layerCount];
-        foreach (var i in ..layerCount)
-        {
-            var result = ModelSerializer.ReadLayer(reader).Require<PerceptronLayer>();
-            if (!result.Branch(out _, out var error))
-            {
-                return error;
-            }
-            layers[i] = result.OrThrow();
-        }
-
-        return new MultiLayerPerceptronModel
-        {
-            Layers = [.. layers],
-        };
-    }
-}
diff --git a/mlp/PerceptronLayer.layer b/mlp/PerceptronLayer.layer
deleted file mode 100644
index 20cba25..0000000
--- a/mlp/PerceptronLayer.layer
+++ /dev/null
@@ -1,23 +0,0 @@
-# namespace ML.MultiLayerPerceptron
-
-# Activation Function ActivationFunction
-# Parameters
-InputNodes
-OutputNodes
-
-# Weights
-Weights [OutputNodes, InputNodes]
-Biases [OutputNodes]
-
-# Snapshot
-Weighted [OutputNodes]
-Biased [OutputNodes]
-Activated [OutputNodes]
-
-# Forward Input [InputNodes]
-Weighted = Weights * Input
-Biased = Weighted + Biases
-Activated = activate ActivationFunction Biased
-Activated
-
-# Serializer pl 2
\ No newline at end of file
diff --git a/mlp/TestLayer.layer b/mlp/TestLayer.layer
deleted file mode 100644
index 3c1c8e7..0000000
--- a/mlp/TestLayer.layer
+++ /dev/null
@@ -1,25 +0,0 @@
-# namespace ML.MultiLayerPerceptron
-
-# Activation Function Default
-# Activation Function Output
-# Parameters
-InputNodes
-OutputNodes
-
-# Modules
-ML.MultiLayerPerceptron.PerceptronLayer InputLayer Default 784 256
-ML.MultiLayerPerceptron.PerceptronLayer HiddenLayer1 Default 256 128
-ML.MultiLayerPerceptron.PerceptronLayer OutputLayer Output 128 10
-
-# Weights
-
-# Snapshot
-InputResult []
-HiddenResult1 []
-OutputResult []
-
-# Forward Input [InputNodes]
-InputResult = InputLayer forward Input
-HiddenResult1 = HiddenLayer1 forward InputResult
-OutputResult = OutputLayer forward HiddenResult1
-OutputResult
\ No newline at end of file
diff --git a/test/ML.Tests.csproj b/test/ML.Tests.csproj
deleted file mode 100644
index b0ac05e..0000000
--- a/test/ML.Tests.csproj
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿<Project Sdk="Microsoft.NET.Sdk">
-
-  <PropertyGroup>
-    <OutputType>Exe</OutputType>
-    <TargetFramework>$(DotNetVersion)</TargetFramework>
-    <ImplicitUsings>enable</ImplicitUsings>
-    <Nullable>enable</Nullable>
-  </PropertyGroup>
-
-  <ItemGroup>
-    <PackageReference Include="Ametrin.Optional.Testing.TUnit" Version="0.3.0" />
-    <PackageReference Include="TUnit" Version="1.1.10" />
-  </ItemGroup>
-
-  <ItemGroup>
-    <ProjectReference Include="..\MachineLearning.Samples\MachineLearning.Samples.csproj" />
-  </ItemGroup>
-
-</Project>
diff --git a/test/MNISTTest.cs b/test/MNISTTest.cs
deleted file mode 100644
index 9120cb1..0000000
--- a/test/MNISTTest.cs
+++ /dev/null
@@ -1,48 +0,0 @@
-using MachineLearning.Samples.MNIST;
-using MachineLearning.Training;
-using MachineLearning.Training.Cost;
-using MachineLearning.Training.Evaluation;
-using MachineLearning.Training.Optimization.Adam;
-using ML.MultiLayerPerceptron;
-
-namespace ML.Tests;
-
-public sealed class MNISTTest
-{
-    [Test]
-    public async Task TrainMNIST()
-    {
-        var model = MNISTModel.CreateModel(new Random(69));
-        DataSetEvaluation? evaluation = null;
-
-        var config = new TrainingConfig()
-        {
-            EpochCount = 1,
-            Threading = ThreadingMode.Full,
-            Optimizer = new AdamOptimizer
-            {
-                LearningRate = 0.0046225016f,
-                CostFunction = CrossEntropyLoss.Instance,
-            },
-
-            DumpEvaluationAfterBatches = 1,
-            EvaluationCallback = data =>
-            {
-                evaluation = data;
-            },
-            RandomSource = new Random(69),
-        };
-        var trainingSet = MNISTModel.GetTrainingSet(new Random(69));
-
-        await Assert.That(model.InnerModel.WeightCount).IsEqualTo(235146);
-
-        var trainer = new EmbeddedModelTrainer<double[], int>(model, config, trainingSet);
-        trainer.Train();
-
-        await Assert.That(evaluation).IsNotNull();
-        await Assert.That(evaluation!.Result.AverageCost).IsBetween(0.76, 0.79);
-
-        trainer.CachePool.Clear();
-    }
-
-}
diff --git a/test/TokenizationTests.cs b/test/TokenizationTests.cs
deleted file mode 100644
index aa1624f..0000000
--- a/test/TokenizationTests.cs
+++ /dev/null
@@ -1,87 +0,0 @@
-using System.Collections;
-using MachineLearning.Data;
-using MachineLearning.Samples.Language;
-
-namespace ML.Tests;
-
-public sealed class TokenizationTests
-{
-    [Test]
-    public async Task LanguageDataHelper_Test()
-    {
-        var pi = "314159265";
-        var tokenizer = new TestTokenizer();
-
-        var tokens = LanguageDataHelper.Tokenize([pi], tokenizer);
-        await Assert.That(tokens.First()).IsEquivalentTo([3, 1, 4, 1, 5, 9, 2, 6, 5]);
-
-        var slided = tokens.SlidingWindow(endToken: null, contextSize: 8);
-        await Assert.That(slided.Count()).IsEqualTo(9);
-
-        var slided2 = tokens.SlidingWindow(endToken: 10, contextSize: 8).Last();
-        await Assert.That(slided2.Input).IsEquivalentTo([1, 4, 1, 5, 9, 2, 6, 5]);
-        await Assert.That(slided2.Expected).IsEqualTo(10);
-
-        var expected1 = slided.ToTrainingDataMatrix(tokenCount: 10, contextSize: 8, fillerToken: null).Last();
-        await Assert.That(expected1.ExpectedWeights.AsSpan().SequenceEqual([
-        //  0  1  2  3  4  5  6  7  8  9
-            0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
-            0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-            0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
-            0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
-        ])).IsTrue();
-
-        var expected2 = slided.ToTrainingDataMatrix(tokenCount: 10, contextSize: 8, fillerToken: null).First();
-        await Assert.That(expected2.ExpectedWeights.AsSpan().SequenceEqual([
-        //  0  1  2  3  4  5  6  7  8  9
-            0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
-        ])).IsTrue();
-
-        var expected3 = slided.ToTrainingDataMatrix(tokenCount: 11, contextSize: 8, fillerToken: 10).First();
-        await Assert.That(expected3.ExpectedWeights.AsSpan().SequenceEqual([
-        //  0  1  2  3  4  5  6  7  8  9 10
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-            0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
-            0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        ])).IsTrue();
-
-
-        var expected4 = slided.ToTrainingDataMatrix(tokenCount: 11, contextSize: 8, fillerToken: 10).Last();
-        await Assert.That(expected4.ExpectedWeights.AsSpan().SequenceEqual([
-        //  0  1  2  3  4  5  6  7  8  9 10
-            0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
-            0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
-            0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
-            0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
-        ])).IsTrue();
-    }
-
-    private sealed class TestTokenizer : ITokenizer<string>
-    {
-        private const string Tokens = "0123456789";
-        public int TokenCount { get; }
-
-        public IEnumerable<int> Tokenize(string data) => data.Select(c => Tokens.IndexOf(c));
-
-        public int TokenizeSingle(string data) => data.Length is 1 ? Tokens.IndexOf(data) : throw new InvalidOperationException();
-
-
-        public string GetToken(int data) => Tokens[data].ToString();
-
-        public string Decode(IEnumerable<int> tokens) => string.Join("", tokens.Select(GetToken));
-
-    }
-}