Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions src/llm/src/bedrock/mapper.lua
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,27 @@ local function convert_image_to_converse(content_part)
return nil
end

local function convert_document_to_converse(content_part)
if content_part.type == "document" and content_part.source then
if content_part.source.type == "base64" then
local format = "pdf"
if content_part.source.mime_type then
format = content_part.source.mime_type:match("/(.+)") or "pdf"
end
return {
document = {
format = format,
name = "document",
source = {
bytes = content_part.source.data
}
}
}
end
end
return nil
end

local function normalize_tool_arguments(raw_arguments)
local arguments = raw_arguments
if type(arguments) == "string" then
Expand Down Expand Up @@ -235,6 +256,11 @@ function mapper.map_messages(contract_messages)
if img then
table.insert(content_blocks, img)
end
elseif part.type == "document" then
local doc = convert_document_to_converse(part)
if doc then
table.insert(content_blocks, doc)
end
end
end
end
Expand All @@ -260,6 +286,11 @@ function mapper.map_messages(contract_messages)
if img then
table.insert(content_blocks, img)
end
elseif part.type == "document" then
local doc = convert_document_to_converse(part)
if doc then
table.insert(content_blocks, doc)
end
end
end
end
Expand Down
95 changes: 95 additions & 0 deletions src/llm/src/bedrock/mapper_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,101 @@ local function define_tests()
end)
end)

describe("Document Content Conversion", function()
it("should convert base64 PDF in user message to Converse document block", function()
local result = mapper.map_messages({
{
role = "user",
content = {
{
type = "document",
source = {
type = "base64",
mime_type = "application/pdf",
data = "JVBERi0xLjQ..."
}
},
{ type = "text", text = "Extract invoice data." }
}
}
})

test.eq(#result.messages, 1)
local doc_block = result.messages[1].content[1] :: any
test.not_nil(doc_block.document)
test.eq(doc_block.document.format, "pdf")
test.eq(doc_block.document.name, "document")
test.eq(doc_block.document.source.bytes, "JVBERi0xLjQ...")
local text_block = result.messages[1].content[2] :: any
test.eq(text_block.text, "Extract invoice data.")
end)

it("should convert base64 PDF in assistant message to Converse document block", function()
local result = mapper.map_messages({
{ role = "user", content = { { type = "text", text = "Review this." } } },
{
role = "assistant",
content = {
{
type = "document",
source = {
type = "base64",
mime_type = "application/pdf",
data = "JVBERi0xLjQ..."
}
}
}
}
})

test.eq(#result.messages, 2)
local doc_block = result.messages[2].content[1] :: any
test.not_nil(doc_block.document)
test.eq(doc_block.document.format, "pdf")
test.eq(doc_block.document.source.bytes, "JVBERi0xLjQ...")
end)

it("should extract format from mime_type subtype", function()
local result = mapper.map_messages({
{
role = "user",
content = {
{
type = "document",
source = {
type = "base64",
mime_type = "text/plain",
data = "aGVsbG8="
}
}
}
}
})

local doc_block = result.messages[1].content[1] :: any
test.not_nil(doc_block.document)
test.eq(doc_block.document.format, "plain")
end)

it("should default format to pdf when mime_type is absent", function()
local result = mapper.map_messages({
{
role = "user",
content = {
{
type = "document",
source = { type = "base64", data = "JVBERi0xLjQ..." }
}
}
}
})

local doc_block = result.messages[1].content[1] :: any
test.not_nil(doc_block.document)
test.eq(doc_block.document.format, "pdf")
end)
end)

describe("map_tools", function()
it("should map contract tools to Converse toolConfig format", function()
local tools, name_map = mapper.map_tools({
Expand Down
11 changes: 11 additions & 0 deletions src/llm/src/claude/mapper.lua
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,17 @@ local function convert_image_content(content_part)
}
}
end
elseif content_part.type == "document" and content_part.source then
if content_part.source.type == "base64" then
return {
type = "document",
source = {
type = "base64",
media_type = content_part.source.mime_type,
data = content_part.source.data
}
}
end
end
return content_part
end
Expand Down
56 changes: 56 additions & 0 deletions src/llm/src/claude/mapper_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,62 @@ local function define_tests()
end)
end)

describe("Document Content Conversion", function()
it("should convert base64 PDF document in user message to Anthropic format", function()
local result = mapper.map_messages({
{
role = prompt.ROLE.USER,
content = {
{
type = "document",
source = {
type = "base64",
mime_type = "application/pdf",
data = "JVBERi0xLjQ..."
}
},
{ type = "text", text = "Summarize this." }
}
}
})

test.eq(#result.messages, 1)
local doc_block = result.messages[1].content[1] :: any
test.eq(doc_block.type, "document")
test.not_nil(doc_block.source)
test.eq(doc_block.source.type, "base64")
test.eq(doc_block.source.media_type, "application/pdf")
test.eq(doc_block.source.data, "JVBERi0xLjQ...")
local text_block = result.messages[1].content[2] :: any
test.eq(text_block.type, "text")
end)

it("should convert base64 PDF document in assistant message to Anthropic format", function()
local result = mapper.map_messages({
{ role = prompt.ROLE.USER, content = { { type = "text", text = "Review this." } } },
{
role = prompt.ROLE.ASSISTANT,
content = {
{
type = "document",
source = {
type = "base64",
mime_type = "application/pdf",
data = "JVBERi0xLjQ..."
}
}
}
}
})

test.eq(#result.messages, 2)
local doc_block = result.messages[2].content[1] :: any
test.eq(doc_block.type, "document")
test.eq(doc_block.source.media_type, "application/pdf")
test.eq(doc_block.source.data, "JVBERi0xLjQ...")
end)
end)

describe("Streaming Finish Reason Preservation", function()
it("should preserve LENGTH finish_reason when streaming response has tool_calls", function()
local client_result = {
Expand Down
16 changes: 15 additions & 1 deletion src/llm/src/prompt.lua
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ prompt.ROLE = {
-- Content types
prompt.CONTENT_TYPE = {
TEXT = "text",
IMAGE = "image"
IMAGE = "image",
DOCUMENT = "document"
}

---------------------------
Expand Down Expand Up @@ -113,6 +114,19 @@ function prompt.image_base64(mime_type: string, data: string): ContentPart
}
end

-- Create a document content part from base64 data (PDF, etc.)
-- Works with both Anthropic direct API and AWS Bedrock Converse API.
function prompt.document_base64(mime_type: string, data: string): ContentPart
return {
type = prompt.CONTENT_TYPE.DOCUMENT,
source = {
type = "base64",
mime_type = mime_type,
data = data
}
}
end

---------------------------
-- Helper Functions
---------------------------
Expand Down
39 changes: 39 additions & 0 deletions src/llm/src/prompt_test.lua
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,45 @@ local function define_tests()
test.eq(c1.type, "text")
test.eq(c2.type, "image")
end)

it("should expose DOCUMENT in CONTENT_TYPE constants", function()
test.eq(prompt.CONTENT_TYPE.DOCUMENT, "document")
end)

it("should create a document content part from base64 data", function()
local part = prompt.document_base64("application/pdf", "JVBERi0xLjQ...")

test.eq(part.type, "document")
test.not_nil(part.source)
local src = assert(part.source)
test.eq(src.type, "base64")
test.eq(src.mime_type, "application/pdf")
test.eq(src.data, "JVBERi0xLjQ...")
end)

it("should include document parts in user messages", function()
local builder = prompt.new()

builder:add_message(prompt.ROLE.USER, {
prompt.document_base64("application/pdf", "JVBERi0xLjQ..."),
prompt.text("Extract the invoice data from this PDF."),
})

local messages = builder:get_messages()
test.eq(#messages, 1)

local msg = assert(messages[1])
test.eq(#msg.content, 2)

local doc_part = assert(msg.content[1])
test.eq(doc_part.type, "document")
test.eq(doc_part.source.mime_type, "application/pdf")
test.eq(doc_part.source.data, "JVBERi0xLjQ...")

local text_part = assert(msg.content[2])
test.eq(text_part.type, "text")
test.eq(text_part.text, "Extract the invoice data from this PDF.")
end)
end)
end

Expand Down
Loading