Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ae6dd29
wipe
BarionLP Mar 2, 2026
a3add5f
core module work
BarionLP Mar 2, 2026
4616dcc
SoftMax module
BarionLP Mar 2, 2026
b7f8a06
evaluation types
BarionLP Mar 3, 2026
57e34a7
basic learning
BarionLP Mar 3, 2026
a6329fb
register adam impls
BarionLP Mar 3, 2026
75838cb
initializers & fixes
BarionLP Mar 3, 2026
21c1fbd
mnist test
BarionLP Mar 3, 2026
152fb3d
fix perceptron module
BarionLP Mar 4, 2026
2f77019
improve TrainingEvaluationResult tostring
BarionLP Mar 4, 2026
d1eee72
track confidence
BarionLP Mar 4, 2026
8b0e492
fix activation layers backward and perceptron module being tangled
BarionLP Mar 5, 2026
ff11585
make IModuleInitializer.Init return
BarionLP Mar 6, 2026
931fb52
move NumericsInitializer to Ametrin.Numerics
BarionLP Mar 6, 2026
00f2e6c
add serializer projects
BarionLP Mar 6, 2026
bf009f9
made IModuleSnapshot disposable
BarionLP Mar 8, 2026
a8a9dd1
proper VectorConverter and MatrixConverter
BarionLP Mar 9, 2026
119e8c9
vectorize adam
BarionLP Mar 9, 2026
23455da
pooling more vectors
BarionLP Mar 10, 2026
06e5915
make SoftMaxActivation dynamic
BarionLP Mar 10, 2026
c9137ae
reuse output vector of MnistInput
BarionLP Mar 10, 2026
1791cce
use new operator api
BarionLP Mar 10, 2026
a085953
implement AdamWeightReduction as ITernaryOperator
BarionLP Mar 10, 2026
eaeac7e
disable serialization code
BarionLP Mar 11, 2026
744c886
serialization
BarionLP Mar 28, 2026
a14f4b1
custom module serializer generator
BarionLP Mar 28, 2026
f7d65ec
readd text and embedding specific helpers
BarionLP Mar 28, 2026
48e77be
IndexEmbeddingModule
BarionLP Mar 29, 2026
e3b3222
readd SLM3
BarionLP Mar 29, 2026
fe7acde
add c4 data set
BarionLP Mar 29, 2026
37d831d
improvements
BarionLP Mar 29, 2026
0350360
Merge branch 'main' into rewrite
BarionLP Mar 29, 2026
a994524
updates
BarionLP Mar 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 121 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
## Get latest from `dotnet new gitignore`

Simple/
# dotenv files
.env

# User-specific files
*.rsuser
Expand Down Expand Up @@ -31,7 +32,6 @@ x86/
bld/
[Bb]in/
[Oo]bj/
[Oo]ut/
[Ll]og/
[Ll]ogs/

Expand Down Expand Up @@ -60,11 +60,14 @@ dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/

# .NET Core
# .NET
project.lock.json
project.fragment.lock.json
artifacts/

# Tye
.tye/

# ASP.NET Scaffolding
ScaffoldingReadMe.txt

Expand All @@ -85,6 +88,8 @@ StyleCopReport.xml
*.pgc
*.pgd
*.rsp
# but not Directory.Build.rsp, as it configures directory-level build defaults
!Directory.Build.rsp
*.sbr
*.tlb
*.tli
Expand All @@ -93,6 +98,7 @@ StyleCopReport.xml
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
Expand Down Expand Up @@ -296,6 +302,17 @@ node_modules/
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw

# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp

# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp

# Visual Studio 6 technical files
*.ncb
*.aps

# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
Expand Down Expand Up @@ -352,6 +369,9 @@ ASALocalRun/
# Local History for Visual Studio
.localhistory/

# Visual Studio History (VSHistory) files
.vshistory/

# BeatPulse healthcheck temp database
healthchecksdb

Expand All @@ -363,4 +383,100 @@ MigrationBackup/

# Fody - auto-generated XML schema
FodyWeavers.xsd
/Simple/Simple.csproj

# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace

# Local History for Visual Studio Code
.history/

# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp

# JetBrains Rider
*.sln.iml
.idea/

##
## Visual studio for Mac
##


# globs
Makefile.in
*.userprefs
*.usertasks
config.make
config.status
aclocal.m4
install-sh
autom4te.cache/
*.tar.gz
tarballs/
test-results/

# content below from: https://github.com/github/gitignore/blob/main/Global/macOS.gitignore
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon


# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

# content below from: https://github.com/github/gitignore/blob/main/Global/Windows.gitignore
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db

# Dump file
*.stackdump

# Folder config file
[Dd]esktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp

# Windows shortcuts
*.lnk

# Vim temporary swap files
*.swp
1 change: 1 addition & 0 deletions Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
<PropertyGroup>
<DotNetVersion>net10.0</DotNetVersion>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
</PropertyGroup>

<ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
<ProjectReference Include="..\ML.Runner\ML.Runner.csproj" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\MachineLearning.Training\MachineLearning.Training.csproj" />
<ProjectReference Include="..\MachineLearning.Samples\MachineLearning.Samples.csproj" />
<PackageReference Include="BenchmarkDotNet" Version="0.15.8" />
</ItemGroup>

</Project>
44 changes: 44 additions & 0 deletions ML.Benchy/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
using System.Buffers;
using Ametrin.Numerics;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using ML.Core.Training;
using Weight = float;

BenchmarkRunner.Run<Benchmarks>();

[MemoryDiagnoser(false)]
public class Benchmarks
{
[Params(512)]
public int Size { get; set; }
private Vector logits;
private Vector expected;
private Vector destination;

private AdamOptimizer optimizer = new() { LearningRate = 0.01f };


[GlobalSetup]
public void Setup()
{
logits = Vector.Create(Size);
logits.Uniform(-1, 1, new Random(43));
expected = Vector.Create(Size);
expected.Uniform(-1, 1, new Random(68));
destination = Vector.Create(Size);
optimizer.Init();
}

[Benchmark]
public void Delegates()
{
// SpanOperations.MapTo(logits.AsSpan(), expected.AsSpan(), destination.AsSpan(), optimizer.WeightReduction, optimizer.WeightReduction);
}

[Benchmark]
public void Static()
{
// SpanOperations.MapTo(optimizer.WeightReductionOperation, logits.AsSpan(), expected.AsSpan(), destination.AsSpan());
}
}
18 changes: 18 additions & 0 deletions ML.Core/Attributes.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
namespace ML.Core.Attributes;

#pragma warning disable CS9113 // Parameter is unread. only required by sourcegen
[AttributeUsage(AttributeTargets.Property)]
public sealed class SubModuleAttribute : Attribute;

[AttributeUsage(AttributeTargets.Property)]
public sealed class WeightsAttribute : Attribute;

[AttributeUsage(AttributeTargets.Property)]
public sealed class PropertyAttribute : Attribute;

[AttributeUsage(AttributeTargets.Class)]
public sealed class GeneratedModuleAttribute(bool IncludeSerializer = false) : Attribute;

[AttributeUsage(AttributeTargets.Class)]
public sealed class GeneratedAdamAttribute(Type module) : Attribute;
#pragma warning restore CS9113 // Parameter is unread.
32 changes: 32 additions & 0 deletions ML.Core/Converters/MatrixConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using Ametrin.Serializer;

namespace ML.Core.Converters;

public sealed class MatrixConverter : ISerializationConverter<Matrix>
{
static MatrixConverter()
{
AmetrinSerializer.RegisterSerializer<MatrixConverter, Matrix>();
}

public static Result<Matrix, DeserializationError> TryReadValue(IAmetrinReader reader)
{
using var objectReader = reader.ReadStartObject();
var rowCount = objectReader.ReadInt32Property("RowCount");
objectReader.ReadPropertyName("Storage");
var storage = VectorConverter.ReadValue<VectorConverter, Vector>(objectReader);
reader.ReadEndObject();
Debug.Assert(storage.Count % rowCount == 0);
var columnCount = storage.Count / rowCount;
return Matrix.Of(rowCount, columnCount, storage);
}

public static void WriteValue(IAmetrinWriter writer, Matrix value)
{
using var objectWriter = writer.WriteStartObject();
objectWriter.WriteInt32Property("RowCount", value.RowCount);
objectWriter.WritePropertyName("Storage");
VectorConverter.WriteValue(objectWriter, value.Storage);
writer.WriteEndObject();
}
}
49 changes: 49 additions & 0 deletions ML.Core/Converters/ModuleSerializer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using System.IO;
using System.Runtime.CompilerServices;
using Ametrin.Serializer;
using Ametrin.Serializer.Readers;
using Ametrin.Serializer.Writers;
using ML.Core.Modules;

namespace ML.Core.Converters;

public static class ModuleSerializer
{
public const string FILE_EXTENSION = ".gmw";
public const uint FORMAT_VERSION = 3;

#pragma warning disable CA2255
[ModuleInitializer]
#pragma warning restore
internal static void Init()
{
AmetrinSerializer.RegisterSerializer<SequenceModuleConverter<Vector>, SequenceModule<Vector>>();
AmetrinSerializer.RegisterSerializer<SequenceModuleConverter<Matrix>, SequenceModule<Matrix>>();
AmetrinSerializer.RegisterSerializer<SequenceModuleConverter<Tensor>, SequenceModule<Tensor>>();
AmetrinSerializer.RegisterSerializer<EmbeddedModule<int[], Vector, int>, EmbeddedModule<int[], Vector, int>>();
}

public static void Write(IModule module, FileInfo file)
{
using var stream = file.Create();
using var writer = new AmetrinBinaryWriter(stream);

writer.WriteStringProperty("$format", FILE_EXTENSION);
writer.WriteUInt32Property("$version", FORMAT_VERSION);

AmetrinSerializer.WriteDynamic(writer, module);
}

public static T Read<T>(FileInfo file)
{
using var stream = file.OpenRead();
using var reader = new AmetrinBinaryReader(stream);

var format = reader.ReadStringProperty("$format");
if (format is not FILE_EXTENSION) throw new InvalidOperationException();
var version = reader.ReadUInt32Property("$version");
if (version is not FORMAT_VERSION) throw new InvalidOperationException();

return AmetrinSerializer.TryReadDynamic<T>(reader).Or(e => e.Throw<T>());
}
}
21 changes: 21 additions & 0 deletions ML.Core/Converters/VectorConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
using Ametrin.Serializer;

namespace ML.Core.Converters;

public sealed class VectorConverter : ISerializationConverter<Vector>
{
static VectorConverter()
{
AmetrinSerializer.RegisterSerializer<VectorConverter, Vector>();
}

public static Result<Vector, DeserializationError> TryReadValue(IAmetrinReader reader)
{
return reader.TryReadArrayValue(static reader => reader.TryReadSingleValue()).Map(Vector.Of);
}

public static void WriteValue(IAmetrinWriter writer, Vector value)
{
writer.WriteArrayValue(value.AsSpan(), static (writer, v) => writer.WriteSingleValue(v));
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
namespace MachineLearning.Data;
namespace ML.Core.Data;

public interface ITokenizer<TData>
{
public int TokenCount { get; }
public IEnumerable<int> Tokenize(TData data);
public int TokenizeSingle(TData data);
public TData GetToken(int data);
public string Decode(IEnumerable<int> tokens);

}
public TData Decode(IEnumerable<int> tokens);
}
Loading