Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## Unreleased
- Types: Added support for BLOB type, per base64 encoding

## 2026/06/17 0.43.0
- Types: Improved support for FLOAT type, converging to FLOAT vs. DOUBLE
- Types: Added method `ObjectArray.as_generic` for better reverse type lookups
Expand Down
3 changes: 3 additions & 0 deletions src/sqlalchemy_cratedb/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ def visit_TIMESTAMP(self, type_, **kw):
"""
return "TIMESTAMP %s" % ((type_.timezone and "WITH" or "WITHOUT") + " TIME ZONE",)

def visit_BLOB(self, type_, **kw):
return "STRING"

def visit_FLOAT(self, type_, **kw):
"""
From `sqlalchemy.sql.sqltypes.Float`.
Expand Down
2 changes: 2 additions & 0 deletions src/sqlalchemy_cratedb/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from .sa_version import SA_1_4, SA_2_0, SA_VERSION
from .type import FloatVector, ObjectArray, ObjectType
from .type.binary import LargeBinary
from .util import SSLMode

# For SQLAlchemy >= 1.1.
Expand Down Expand Up @@ -171,6 +172,7 @@ def process(value):
sqltypes.Date: Date,
sqltypes.DateTime: DateTime,
sqltypes.TIMESTAMP: DateTime,
sqltypes.LargeBinary: LargeBinary,
}

if SA_VERSION >= SA_2_0:
Expand Down
2 changes: 2 additions & 0 deletions src/sqlalchemy_cratedb/type/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .array import ObjectArray
from .binary import LargeBinary
from .geo import Geopoint, Geoshape
from .object import ObjectType
from .vector import FloatVector, knn_match

__all__ = [
Geopoint,
Geoshape,
LargeBinary,
ObjectArray,
ObjectType,
FloatVector,
Expand Down
35 changes: 35 additions & 0 deletions src/sqlalchemy_cratedb/type/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import base64

from sqlalchemy import String


class LargeBinary(String):
"""A type for large binary byte data.

The :class:`.LargeBinary` type corresponds to a large and/or unlengthed
Comment thread
amotl marked this conversation as resolved.
binary type for the target platform, such as BLOB on MySQL and BYTEA for
PostgreSQL. It also handles the necessary conversions for the DBAPI.

"""

__visit_name__ = "large_binary"

def bind_processor(self, dialect):
if dialect.dbapi is None:
return None

def process(value):
if value is not None:
return base64.b64encode(value).decode()
else:
return None

return process
Comment on lines +17 to +27

@amotl amotl Dec 24, 2023

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self, or others who want to pick this up:

Review that detail about returning a DBAPIBinary, or not.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@amotl Did you review it?

@amotl amotl Jun 4, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. I just staged the patch directly from its origin, where the code is in use and works well. However, it can still be wrong from a generic perspective.

Let's toggle this patch back into draft mode: Better safe than sorry, thanks! The other commit 465b55b has been removed here and diverged to a separate patch.

@amotl amotl Jun 18, 2026

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @bgunebakan,

Review that detail about returning a DBAPIBinary, or not.

No matter how the downstream supertask package currently consumes this API, I think we should aim for making it standards-compliant and then adjust supertask correspondingly, when possible. In this spirit, I think this comment is the most prominent example what we are referring to:

# TODO: return DBAPIBinary(value)

Only because we "unlocked" this feature by harmonizing sqlalchemy-cratedb with supertask in a bare-bones way to satisfy some basic explorations the other day doesn't automatically mean that the currently implemented interface is correct yet.

With kind regards,
Andreas.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi. I can see that this comment was removed from this patch. Unless this has been verified positively, I can't recommend to merge the patch.

# TODO: return DBAPIBinary(value)


def result_processor(self, dialect, coltype):
def process(value):
if value is not None:
return base64.b64decode(value)
return value

return process
75 changes: 75 additions & 0 deletions tests/test_type_binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# -*- coding: utf-8; -*-
#
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
# license agreements. See the NOTICE file distributed with this work for
# additional information regarding copyright ownership. Crate licenses
# this file to you under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. You may
# obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# However, if you have executed another commercial license agreement
# with Crate these terms will supersede the license and you may use the
# software solely pursuant to the terms of the relevant commercial agreement.

import base64
from unittest import TestCase
from unittest.mock import MagicMock

from sqlalchemy_cratedb.type.binary import LargeBinary


class LargeBinaryBindProcessorTest(TestCase):
def setUp(self):
self.type = LargeBinary()
self.dialect = MagicMock()
self.dialect.dbapi = MagicMock()

def test_encodes_bytes_to_base64_string(self):
process = self.type.bind_processor(self.dialect)
result = process(b"hello world")
self.assertEqual(result, base64.b64encode(b"hello world").decode())

def test_returns_none_for_none_input(self):
process = self.type.bind_processor(self.dialect)
self.assertIsNone(process(None))

def test_returns_none_processor_when_dbapi_is_none(self):
self.dialect.dbapi = None
processor = self.type.bind_processor(self.dialect)
self.assertIsNone(processor)

def test_encodes_arbitrary_binary_data(self):
process = self.type.bind_processor(self.dialect)
data = bytes(range(256))
result = process(data)
self.assertEqual(result, base64.b64encode(data).decode())


class LargeBinaryResultProcessorTest(TestCase):
def setUp(self):
self.type = LargeBinary()
self.dialect = MagicMock()

def test_decodes_base64_string_to_bytes(self):
process = self.type.result_processor(self.dialect, None)
encoded = base64.b64encode(b"hello world").decode()
result = process(encoded)
self.assertEqual(result, b"hello world")

def test_returns_none_for_none_input(self):
process = self.type.result_processor(self.dialect, None)
self.assertIsNone(process(None))

def test_round_trip(self):
bind = self.type.bind_processor(self.dialect)
result = self.type.result_processor(self.dialect, None)
data = b"\x00\x01\x02\xff\xfe\xfd"
self.assertEqual(result(bind(data)), data)