Skip to content

Commit e3e0a6f

Browse files
Copilotstephentoubadamsitnik
authored andcommitted
Use DataContent from Microsoft.Extensions.AI for data URI generation (#7027)
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: stephentoub <2642209+stephentoub@users.noreply.github.com> Co-authored-by: adamsitnik <6011991+adamsitnik@users.noreply.github.com>
1 parent 1fce40d commit e3e0a6f

1 file changed

Lines changed: 18 additions & 23 deletions

File tree

src/Libraries/Microsoft.Extensions.DataIngestion.MarkItDown/MarkItDownMcpReader.cs

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.IO;
77
using System.Threading;
88
using System.Threading.Tasks;
9+
using Microsoft.Extensions.AI;
910
using Microsoft.Shared.Diagnostics;
1011
using ModelContextProtocol.Client;
1112
using ModelContextProtocol.Protocol;
@@ -42,21 +43,23 @@ public override async Task<IngestionDocument> ReadAsync(FileInfo source, string
4243
throw new FileNotFoundException("The specified file does not exist.", source.FullName);
4344
}
4445

45-
// Read file content as base64 data URI
46+
// Read file content and create DataContent
4647
#if NET
47-
byte[] fileBytes = await File.ReadAllBytesAsync(source.FullName, cancellationToken).ConfigureAwait(false);
48+
ReadOnlyMemory<byte> fileBytes = await File.ReadAllBytesAsync(source.FullName, cancellationToken).ConfigureAwait(false);
4849
#else
49-
byte[] fileBytes;
50+
ReadOnlyMemory<byte> fileBytes;
5051
using (FileStream fs = new(source.FullName, FileMode.Open, FileAccess.Read, FileShare.Read, 1, FileOptions.Asynchronous))
5152
{
52-
using MemoryStream ms = new();
53+
using MemoryStream ms = new((int)Math.Min(int.MaxValue, fs.Length));
5354
await fs.CopyToAsync(ms).ConfigureAwait(false);
54-
fileBytes = ms.ToArray();
55+
fileBytes = ms.GetBuffer().AsMemory(0, (int)ms.Length);
5556
}
5657
#endif
57-
string dataUri = CreateDataUri(fileBytes, mediaType);
58+
DataContent dataContent = new(
59+
fileBytes,
60+
string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!);
5861

59-
string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false);
62+
string markdown = await ConvertToMarkdownAsync(dataContent, cancellationToken).ConfigureAwait(false);
6063

6164
return MarkdownParser.Parse(markdown, identifier);
6265
}
@@ -67,31 +70,23 @@ public override async Task<IngestionDocument> ReadAsync(Stream source, string id
6770
_ = Throw.IfNull(source);
6871
_ = Throw.IfNullOrEmpty(identifier);
6972

70-
// Read stream content as base64 data URI
71-
using MemoryStream ms = new();
73+
// Read stream content and create DataContent
74+
using MemoryStream ms = source.CanSeek ? new((int)Math.Min(int.MaxValue, source.Length)) : new();
7275
#if NET
7376
await source.CopyToAsync(ms, cancellationToken).ConfigureAwait(false);
7477
#else
7578
await source.CopyToAsync(ms).ConfigureAwait(false);
7679
#endif
77-
byte[] fileBytes = ms.ToArray();
78-
string dataUri = CreateDataUri(fileBytes, mediaType);
80+
DataContent dataContent = new(
81+
ms.GetBuffer().AsMemory(0, (int)ms.Length),
82+
string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType);
7983

80-
string markdown = await ConvertToMarkdownAsync(dataUri, cancellationToken).ConfigureAwait(false);
84+
string markdown = await ConvertToMarkdownAsync(dataContent, cancellationToken).ConfigureAwait(false);
8185

8286
return MarkdownParser.Parse(markdown, identifier);
8387
}
8488

85-
#pragma warning disable S3995 // URI return values should not be strings
86-
private static string CreateDataUri(byte[] fileBytes, string? mediaType)
87-
#pragma warning restore S3995 // URI return values should not be strings
88-
{
89-
string base64Content = Convert.ToBase64String(fileBytes);
90-
string mimeType = string.IsNullOrEmpty(mediaType) ? "application/octet-stream" : mediaType!;
91-
return $"data:{mimeType};base64,{base64Content}";
92-
}
93-
94-
private async Task<string> ConvertToMarkdownAsync(string dataUri, CancellationToken cancellationToken)
89+
private async Task<string> ConvertToMarkdownAsync(DataContent dataContent, CancellationToken cancellationToken)
9590
{
9691
// Create HTTP client transport for MCP
9792
HttpClientTransport transport = new(new HttpClientTransportOptions
@@ -109,7 +104,7 @@ private async Task<string> ConvertToMarkdownAsync(string dataUri, CancellationTo
109104
// Build parameters for convert_to_markdown tool
110105
Dictionary<string, object?> parameters = new()
111106
{
112-
["uri"] = dataUri
107+
["uri"] = dataContent.Uri
113108
};
114109

115110
// Call the convert_to_markdown tool

0 commit comments

Comments
 (0)