-
Notifications
You must be signed in to change notification settings - Fork 219
Add pytorch style dataloader #463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit
Hold shift + click to select a range
7360f91
Add pytorch style dataloader
dayo05 3de29c3
Remove GetDataEnumerable from interface
dayo05 857f9e6
Resolve review except get random value
dayo05 954fa8c
Rename method and create reset method
dayo05 63568f9
Add copyright string
dayo05 d8d249f
Use new shuffle algorithm
dayo05 ab2eb09
Add summery
dayo05 ca49340
Make able to create non-shuffle dataloader
dayo05 2cc2300
Make able to create non-shuffle dataset
dayo05 4cff460
Change tensor tuple to dictionary
dayo05 124790d
Merge branch 'main' into main
dayo05 17a9022
Replace files and make dataset abstract class
dayo05 d9dddb8
Merge remote-tracking branch 'origin/main'
dayo05 07d2689
Merge branch 'dotnet:main' into main
dayo05 2315e66
Merge branch 'main' into main
dayo05 ab6bd3e
Make dataloader disposable
dayo05 9631ed6
Make count priority abstract
dayo05 43dfcc1
Make dataloader to stack data as end of tensor
dayo05 225b9f8
Create simple test for dataset and dataloader
dayo05 a44ba7d
Merge remote-tracking branch 'origin/main'
dayo05 6e336ca
Make dispose enumerator
dayo05 5b5d9d5
Rename methods and add copyright notice
dayo05 49d7afd
Rename reset to Reset
dayo05 b250fe0
Make Count of dataset to long type
dayo05 703cb01
Make type of Count to long
dayo05 65f06cf
Make Count to long
dayo05 5f1707f
Rename methods
dayo05 3bda582
Make move tensor automatically to device
dayo05 22d0556
Make able to use custom seed
dayo05 eb53d61
Edit test for long
dayo05 921bb3d
Create test for custom seed
dayo05 8733f10
Merge branch 'main' into main
dayo05 168f87c
Make dataloader tensor dispose on MoveNext or Reset
dayo05 4db9b64
Change GCD algorithm
dayo05 5315175
Merge branch 'dotnet:main' into main
dayo05 eab28eb
Added document comments
dayo05 0deaddb
Add document comment for classes
dayo05 e9c20a4
Make catenate every tensor once
dayo05 cc5dfe0
Update doc comment
dayo05 dfff08a
Make able to set custom shuffler
dayo05 9dffab6
Fix mistake on creating custom shuffler
dayo05 991c377
Add fisher yates shuffler and make that as default
dayo05 2efce39
Fix mistake on shuffler
dayo05 00e16ad
Make dispose dataset once
dayo05 9291dc4
Undo changes on global.json
dayo05 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| using System; | ||
| using System.Linq; | ||
| using System.Collections; | ||
| using System.Collections.Generic; | ||
|
|
||
| using static TorchSharp.torch; | ||
|
|
||
| namespace TorchSharp.Data | ||
| { | ||
| public class DataLoader: IEnumerable<(Tensor, Tensor)> | ||
| { | ||
| private Dataset dataset; | ||
| private int batchSize; | ||
| private bool shuffle; | ||
| private Device device; | ||
|
|
||
| public DataLoader(Dataset dataset, int batchSize, bool shuffle = false, Device device = null) | ||
| { | ||
| this.dataset = dataset; | ||
| this.batchSize = batchSize; | ||
| this.shuffle = shuffle; | ||
| this.device = device ?? CPU; | ||
| } | ||
| public IEnumerator<(Tensor, Tensor)> GetEnumerator() | ||
| { | ||
| return new DataLoaderEnumerator(dataset, batchSize, shuffle, device); | ||
| } | ||
|
|
||
| IEnumerator IEnumerable.GetEnumerator() | ||
| { | ||
| return GetEnumerator(); | ||
| } | ||
|
|
||
| public long Count => (dataset.Count() - 1) / batchSize + 1; | ||
|
|
||
| private class DataLoaderEnumerator : IEnumerator<(Tensor, Tensor)> | ||
| { | ||
| private Dataset dataset; | ||
| private IEnumerator<object> load; | ||
| private int batchSize; | ||
| private Device device; | ||
| public DataLoaderEnumerator(Dataset dataset, int batchSize, bool shuffle, Device device) | ||
| { | ||
| this.dataset = dataset; | ||
| this.batchSize = batchSize; | ||
| this.device = device; | ||
|
|
||
| if (shuffle) | ||
| { | ||
| var r = new Random(); | ||
| load = dataset.GetDataEnumerable() | ||
| .Select(x => new {Number = r.Next(), Item = x}) | ||
| .OrderBy(x => x.Number) | ||
| .Select(x => x.Item) | ||
| .GetEnumerator(); | ||
|
dayo05 marked this conversation as resolved.
Outdated
|
||
| } | ||
| else load = dataset.GetDataEnumerable().GetEnumerator(); | ||
| } | ||
|
|
||
| private Tensor dataTensor; | ||
| private Tensor labelTensor; | ||
| private (Tensor, Tensor) tmp; | ||
|
dayo05 marked this conversation as resolved.
Outdated
|
||
|
|
||
| public bool MoveNext() | ||
| { | ||
| if (!load.MoveNext()) return false; | ||
| (dataTensor, labelTensor) = dataset.GetTensor(load.Current); | ||
| dataTensor.unsqueeze_(0); | ||
| for (var i = 1; i < batchSize; i++) | ||
| { | ||
| if (!load.MoveNext()) | ||
| break; | ||
| tmp = dataset.GetTensor(load.Current); | ||
| dataTensor = cat(new List<Tensor> {dataTensor, tmp.Item1.unsqueeze(0)}, 0); | ||
| labelTensor = cat(new List<Tensor> {labelTensor, tmp.Item2}, 0); | ||
| } | ||
| return true; | ||
| } | ||
|
|
||
| public void Reset() | ||
| { | ||
| load.Reset(); | ||
| } | ||
|
|
||
| public (Tensor, Tensor) Current => (dataTensor.to(device), labelTensor.to(device)); | ||
|
|
||
| object IEnumerator.Current => Current; | ||
|
|
||
| public void Dispose() | ||
| { | ||
| load.Dispose(); | ||
| dataset.Dispose(); | ||
| } | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| using System; | ||
| using System.Collections.Generic; | ||
|
|
||
| using static TorchSharp.torch; | ||
|
|
||
| namespace TorchSharp.Data | ||
|
dayo05 marked this conversation as resolved.
Outdated
|
||
| { | ||
| public interface Dataset: IDisposable | ||
|
dayo05 marked this conversation as resolved.
Outdated
|
||
| { | ||
| public long Count(); | ||
|
dayo05 marked this conversation as resolved.
Outdated
|
||
| public (Tensor, Tensor) GetTensor(object index); | ||
|
dayo05 marked this conversation as resolved.
Outdated
|
||
| public IEnumerable<object> GetDataEnumerable(); | ||
|
dayo05 marked this conversation as resolved.
Outdated
|
||
| } | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.