66using System . IO ;
77using System . Threading ;
88using System . Threading . Tasks ;
9+ using Microsoft . Extensions . AI ;
910using Microsoft . Shared . Diagnostics ;
1011using ModelContextProtocol . Client ;
1112using ModelContextProtocol . Protocol ;
@@ -42,21 +43,23 @@ public override async Task<IngestionDocument> ReadAsync(FileInfo source, string
4243 throw new FileNotFoundException ( "The specified file does not exist." , source . FullName ) ;
4344 }
4445
45- // Read file content as base64 data URI
46+ // Read file content and create DataContent
4647#if NET
47- byte [ ] fileBytes = await File . ReadAllBytesAsync ( source . FullName , cancellationToken ) . ConfigureAwait ( false ) ;
48+ ReadOnlyMemory < byte > fileBytes = await File . ReadAllBytesAsync ( source . FullName , cancellationToken ) . ConfigureAwait ( false ) ;
4849#else
49- byte [ ] fileBytes ;
50+ ReadOnlyMemory < byte > fileBytes ;
5051 using ( FileStream fs = new ( source . FullName , FileMode . Open , FileAccess . Read , FileShare . Read , 1 , FileOptions . Asynchronous ) )
5152 {
52- using MemoryStream ms = new ( ) ;
53+ using MemoryStream ms = new ( ( int ) Math . Min ( int . MaxValue , fs . Length ) ) ;
5354 await fs . CopyToAsync ( ms ) . ConfigureAwait ( false ) ;
54- fileBytes = ms . ToArray ( ) ;
55+ fileBytes = ms . GetBuffer ( ) . AsMemory ( 0 , ( int ) ms . Length ) ;
5556 }
5657#endif
57- string dataUri = CreateDataUri ( fileBytes , mediaType ) ;
58+ DataContent dataContent = new (
59+ fileBytes ,
60+ string . IsNullOrEmpty ( mediaType ) ? "application/octet-stream" : mediaType ! ) ;
5861
59- string markdown = await ConvertToMarkdownAsync ( dataUri , cancellationToken ) . ConfigureAwait ( false ) ;
62+ string markdown = await ConvertToMarkdownAsync ( dataContent , cancellationToken ) . ConfigureAwait ( false ) ;
6063
6164 return MarkdownParser . Parse ( markdown , identifier ) ;
6265 }
@@ -67,31 +70,23 @@ public override async Task<IngestionDocument> ReadAsync(Stream source, string id
6770 _ = Throw . IfNull ( source ) ;
6871 _ = Throw . IfNullOrEmpty ( identifier ) ;
6972
70- // Read stream content as base64 data URI
71- using MemoryStream ms = new ( ) ;
73+ // Read stream content and create DataContent
74+ using MemoryStream ms = source . CanSeek ? new ( ( int ) Math . Min ( int . MaxValue , source . Length ) ) : new ( ) ;
7275#if NET
7376 await source . CopyToAsync ( ms , cancellationToken ) . ConfigureAwait ( false ) ;
7477#else
7578 await source . CopyToAsync ( ms ) . ConfigureAwait ( false ) ;
7679#endif
77- byte [ ] fileBytes = ms . ToArray ( ) ;
78- string dataUri = CreateDataUri ( fileBytes , mediaType ) ;
80+ DataContent dataContent = new (
81+ ms . GetBuffer ( ) . AsMemory ( 0 , ( int ) ms . Length ) ,
82+ string . IsNullOrEmpty ( mediaType ) ? "application/octet-stream" : mediaType ) ;
7983
80- string markdown = await ConvertToMarkdownAsync ( dataUri , cancellationToken ) . ConfigureAwait ( false ) ;
84+ string markdown = await ConvertToMarkdownAsync ( dataContent , cancellationToken ) . ConfigureAwait ( false ) ;
8185
8286 return MarkdownParser . Parse ( markdown , identifier ) ;
8387 }
8488
85- #pragma warning disable S3995 // URI return values should not be strings
86- private static string CreateDataUri ( byte [ ] fileBytes , string ? mediaType )
87- #pragma warning restore S3995 // URI return values should not be strings
88- {
89- string base64Content = Convert . ToBase64String ( fileBytes ) ;
90- string mimeType = string . IsNullOrEmpty ( mediaType ) ? "application/octet-stream" : mediaType ! ;
91- return $ "data:{ mimeType } ;base64,{ base64Content } ";
92- }
93-
94- private async Task < string > ConvertToMarkdownAsync ( string dataUri , CancellationToken cancellationToken )
89+ private async Task < string > ConvertToMarkdownAsync ( DataContent dataContent , CancellationToken cancellationToken )
9590 {
9691 // Create HTTP client transport for MCP
9792 HttpClientTransport transport = new ( new HttpClientTransportOptions
@@ -109,7 +104,7 @@ private async Task<string> ConvertToMarkdownAsync(string dataUri, CancellationTo
109104 // Build parameters for convert_to_markdown tool
110105 Dictionary < string , object ? > parameters = new ( )
111106 {
112- [ "uri" ] = dataUri
107+ [ "uri" ] = dataContent . Uri
113108 } ;
114109
115110 // Call the convert_to_markdown tool
0 commit comments