Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
7360f91
Add pytorch style dataloader
dayo05 Nov 28, 2021
3de29c3
Remove GetDataEnumerable from interface
dayo05 Nov 30, 2021
857f9e6
Resolve review except get random value
dayo05 Nov 30, 2021
954fa8c
Rename method and create reset method
dayo05 Nov 30, 2021
63568f9
Add copyright string
dayo05 Nov 30, 2021
d8d249f
Use new shuffle algorithm
dayo05 Nov 30, 2021
ab2eb09
Add summery
dayo05 Nov 30, 2021
ca49340
Make able to create non-shuffle dataloader
dayo05 Nov 30, 2021
2cc2300
Make able to create non-shuffle dataset
dayo05 Nov 30, 2021
4cff460
Change tensor tuple to dictionary
dayo05 Nov 30, 2021
124790d
Merge branch 'main' into main
dayo05 Nov 30, 2021
17a9022
Replace files and make dataset abstract class
dayo05 Nov 30, 2021
d9dddb8
Merge remote-tracking branch 'origin/main'
dayo05 Nov 30, 2021
07d2689
Merge branch 'dotnet:main' into main
dayo05 Nov 30, 2021
2315e66
Merge branch 'main' into main
dayo05 Nov 30, 2021
ab6bd3e
Make dataloader disposable
dayo05 Dec 1, 2021
9631ed6
Make count priority abstract
dayo05 Dec 1, 2021
43dfcc1
Make dataloader to stack data as end of tensor
dayo05 Dec 1, 2021
225b9f8
Create simple test for dataset and dataloader
dayo05 Dec 1, 2021
a44ba7d
Merge remote-tracking branch 'origin/main'
dayo05 Dec 1, 2021
6e336ca
Make dispose enumerator
dayo05 Dec 1, 2021
5b5d9d5
Rename methods and add copyright notice
dayo05 Dec 3, 2021
49d7afd
Rename reset to Reset
dayo05 Dec 3, 2021
b250fe0
Make Count of dataset to long type
dayo05 Dec 3, 2021
703cb01
Make type of Count to long
dayo05 Dec 3, 2021
65f06cf
Make Count to long
dayo05 Dec 3, 2021
5f1707f
Rename methods
dayo05 Dec 3, 2021
3bda582
Make move tensor automatically to device
dayo05 Dec 3, 2021
22d0556
Make able to use custom seed
dayo05 Dec 3, 2021
eb53d61
Edit test for long
dayo05 Dec 3, 2021
921bb3d
Create test for custom seed
dayo05 Dec 3, 2021
8733f10
Merge branch 'main' into main
dayo05 Dec 3, 2021
168f87c
Make dataloader tensor dispose on MoveNext or Reset
dayo05 Dec 3, 2021
4db9b64
Change GCD algorithm
dayo05 Dec 3, 2021
5315175
Merge branch 'dotnet:main' into main
dayo05 Dec 25, 2021
eab28eb
Added document comments
dayo05 Dec 25, 2021
0deaddb
Add document comment for classes
dayo05 Dec 25, 2021
e9c20a4
Make catenate every tensor once
dayo05 Dec 25, 2021
cc5dfe0
Update doc comment
dayo05 Dec 25, 2021
dfff08a
Make able to set custom shuffler
dayo05 Jan 6, 2022
9dffab6
Fix mistake on creating custom shuffler
dayo05 Jan 8, 2022
991c377
Add fisher yates shuffler and make that as default
dayo05 Jan 8, 2022
2efce39
Fix mistake on shuffler
dayo05 Jan 7, 2022
00e16ad
Make dispose dataset once
dayo05 Jan 10, 2022
9291dc4
Undo changes on global.json
dayo05 Jan 12, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions src/TorchSharp/Data/DataLoader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
using System;
using System.Linq;
using System.Collections;
using System.Collections.Generic;

using static TorchSharp.torch;

namespace TorchSharp.Data
{
public class DataLoader: IEnumerable<(Tensor, Tensor)>
{
private Dataset dataset;
private int batchSize;
private bool shuffle;
private Device device;

public DataLoader(Dataset dataset, int batchSize, bool shuffle = false, Device device = null)
{
this.dataset = dataset;
this.batchSize = batchSize;
this.shuffle = shuffle;
this.device = device ?? CPU;
}
public IEnumerator<(Tensor, Tensor)> GetEnumerator()
{
return new DataLoaderEnumerator(dataset, batchSize, shuffle, device);
}

IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}

public long Count => (dataset.Count() - 1) / batchSize + 1;

private class DataLoaderEnumerator : IEnumerator<(Tensor, Tensor)>
{
private Dataset dataset;
private IEnumerator<object> load;
private int batchSize;
private Device device;
public DataLoaderEnumerator(Dataset dataset, int batchSize, bool shuffle, Device device)
{
this.dataset = dataset;
this.batchSize = batchSize;
this.device = device;

if (shuffle)
{
var r = new Random();
Comment thread
dayo05 marked this conversation as resolved.
Outdated
load = dataset.GetDataEnumerable()
.Select(x => new {Number = r.Next(), Item = x})
.OrderBy(x => x.Number)
.Select(x => x.Item)
.GetEnumerator();
Comment thread
dayo05 marked this conversation as resolved.
Outdated
}
else load = dataset.GetDataEnumerable().GetEnumerator();
}

private Tensor dataTensor;
private Tensor labelTensor;
private (Tensor, Tensor) tmp;
Comment thread
dayo05 marked this conversation as resolved.
Outdated

public bool MoveNext()
{
if (!load.MoveNext()) return false;
(dataTensor, labelTensor) = dataset.GetTensor(load.Current);
dataTensor.unsqueeze_(0);
for (var i = 1; i < batchSize; i++)
{
if (!load.MoveNext())
break;
tmp = dataset.GetTensor(load.Current);
dataTensor = cat(new List<Tensor> {dataTensor, tmp.Item1.unsqueeze(0)}, 0);
labelTensor = cat(new List<Tensor> {labelTensor, tmp.Item2}, 0);
}
return true;
}

public void Reset()
{
load.Reset();
}

public (Tensor, Tensor) Current => (dataTensor.to(device), labelTensor.to(device));

object IEnumerator.Current => Current;

public void Dispose()
{
load.Dispose();
dataset.Dispose();
}
}
}
}
14 changes: 14 additions & 0 deletions src/TorchSharp/Data/Dataset.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using System;
using System.Collections.Generic;

using static TorchSharp.torch;

namespace TorchSharp.Data
Comment thread
dayo05 marked this conversation as resolved.
Outdated
{
public interface Dataset: IDisposable
Comment thread
dayo05 marked this conversation as resolved.
Outdated
{
public long Count();
Comment thread
dayo05 marked this conversation as resolved.
Outdated
public (Tensor, Tensor) GetTensor(object index);
Comment thread
dayo05 marked this conversation as resolved.
Outdated
public IEnumerable<object> GetDataEnumerable();
Comment thread
dayo05 marked this conversation as resolved.
Outdated
}
}