From b50434392717393dbbcab52ff453eb2ea186ae09 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Sat, 23 Dec 2023 13:46:28 +0100 Subject: [PATCH 1/3] Types: Add support for BLOB type, per base64 encoding --- CHANGES.md | 1 + src/sqlalchemy_cratedb/compiler.py | 3 ++ src/sqlalchemy_cratedb/dialect.py | 2 ++ src/sqlalchemy_cratedb/type/__init__.py | 2 ++ src/sqlalchemy_cratedb/type/binary.py | 44 +++++++++++++++++++++++++ 5 files changed, 52 insertions(+) create mode 100644 src/sqlalchemy_cratedb/type/binary.py diff --git a/CHANGES.md b/CHANGES.md index c6c88028..8998f4da 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,4 +1,5 @@ # Changelog +- Types: Added support for BLOB type, per base64 encoding ## 2026/05/28 0.42.0 - Added support for SQL Alchemy 2.1 diff --git a/src/sqlalchemy_cratedb/compiler.py b/src/sqlalchemy_cratedb/compiler.py index 851e8ebb..d257b718 100644 --- a/src/sqlalchemy_cratedb/compiler.py +++ b/src/sqlalchemy_cratedb/compiler.py @@ -257,6 +257,9 @@ def visit_TIMESTAMP(self, type_, **kw): """ return "TIMESTAMP %s" % ((type_.timezone and "WITH" or "WITHOUT") + " TIME ZONE",) + def visit_BLOB(self, type_, **kw): + return "STRING" + class CrateCompiler(compiler.SQLCompiler): def visit_getitem_binary(self, binary, operator, **kw): diff --git a/src/sqlalchemy_cratedb/dialect.py b/src/sqlalchemy_cratedb/dialect.py index d78306c0..818a7125 100644 --- a/src/sqlalchemy_cratedb/dialect.py +++ b/src/sqlalchemy_cratedb/dialect.py @@ -36,6 +36,7 @@ ) from .sa_version import SA_1_4, SA_2_0, SA_VERSION from .type import FloatVector, ObjectArray, ObjectType +from .type.binary import LargeBinary from .util import SSLMode TYPES_MAP = { @@ -161,6 +162,7 @@ def process(value): sqltypes.Date: Date, sqltypes.DateTime: DateTime, sqltypes.TIMESTAMP: DateTime, + sqltypes.LargeBinary: LargeBinary, } if SA_VERSION >= SA_2_0: diff --git a/src/sqlalchemy_cratedb/type/__init__.py b/src/sqlalchemy_cratedb/type/__init__.py index b524bb39..6d92e0e2 100644 --- a/src/sqlalchemy_cratedb/type/__init__.py +++ b/src/sqlalchemy_cratedb/type/__init__.py @@ -1,4 +1,5 @@ from .array import ObjectArray +from .binary import LargeBinary from .geo import Geopoint, Geoshape from .object import ObjectType from .vector import FloatVector, knn_match @@ -6,6 +7,7 @@ __all__ = [ Geopoint, Geoshape, + LargeBinary, ObjectArray, ObjectType, FloatVector, diff --git a/src/sqlalchemy_cratedb/type/binary.py b/src/sqlalchemy_cratedb/type/binary.py new file mode 100644 index 00000000..04b04073 --- /dev/null +++ b/src/sqlalchemy_cratedb/type/binary.py @@ -0,0 +1,44 @@ +import base64 + +import sqlalchemy as sa + + +class LargeBinary(sa.String): + """A type for large binary byte data. + + The :class:`.LargeBinary` type corresponds to a large and/or unlengthed + binary type for the target platform, such as BLOB on MySQL and BYTEA for + PostgreSQL. It also handles the necessary conversions for the DBAPI. + + """ + + __visit_name__ = "large_binary" + + def bind_processor(self, dialect): + if dialect.dbapi is None: + return None + + # TODO: DBAPIBinary = dialect.dbapi.Binary + + def process(value): + if value is not None: + # TODO: return DBAPIBinary(value) + return base64.b64encode(value).decode() + else: + return None + + return process + + # Python 3 has native bytes() type + # both sqlite3 and pg8000 seem to return it, + # psycopg2 as of 2.5 returns 'memoryview' + def result_processor(self, dialect, coltype): + if dialect.returns_native_bytes: + return None + + def process(value): + if value is not None: + return base64.b64decode(value) + return value + + return process From 08157ed8939689262c16cdd3f6d72c162f8841be Mon Sep 17 00:00:00 2001 From: Bilal Tonga Date: Thu, 18 Jun 2026 15:24:49 +0200 Subject: [PATCH 2/3] Fix formatting by removing unnecessary whitespace --- src/sqlalchemy_cratedb/compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlalchemy_cratedb/compiler.py b/src/sqlalchemy_cratedb/compiler.py index bf9c8a20..fe88d8e7 100644 --- a/src/sqlalchemy_cratedb/compiler.py +++ b/src/sqlalchemy_cratedb/compiler.py @@ -273,7 +273,7 @@ def visit_TIMESTAMP(self, type_, **kw): def visit_BLOB(self, type_, **kw): return "STRING" - + def visit_FLOAT(self, type_, **kw): """ From `sqlalchemy.sql.sqltypes.Float`. From dcdd4a925de0d38d90027fc155d7a03da75dc7ee Mon Sep 17 00:00:00 2001 From: Bilal Tonga Date: Fri, 19 Jun 2026 13:16:57 +0200 Subject: [PATCH 3/3] Add unit tests for bind and result processors --- src/sqlalchemy_cratedb/type/binary.py | 13 +---- tests/test_type_binary.py | 75 +++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 11 deletions(-) create mode 100644 tests/test_type_binary.py diff --git a/src/sqlalchemy_cratedb/type/binary.py b/src/sqlalchemy_cratedb/type/binary.py index 04b04073..4f67dd47 100644 --- a/src/sqlalchemy_cratedb/type/binary.py +++ b/src/sqlalchemy_cratedb/type/binary.py @@ -1,9 +1,9 @@ import base64 -import sqlalchemy as sa +from sqlalchemy import String -class LargeBinary(sa.String): +class LargeBinary(String): """A type for large binary byte data. The :class:`.LargeBinary` type corresponds to a large and/or unlengthed @@ -18,24 +18,15 @@ def bind_processor(self, dialect): if dialect.dbapi is None: return None - # TODO: DBAPIBinary = dialect.dbapi.Binary - def process(value): if value is not None: - # TODO: return DBAPIBinary(value) return base64.b64encode(value).decode() else: return None return process - # Python 3 has native bytes() type - # both sqlite3 and pg8000 seem to return it, - # psycopg2 as of 2.5 returns 'memoryview' def result_processor(self, dialect, coltype): - if dialect.returns_native_bytes: - return None - def process(value): if value is not None: return base64.b64decode(value) diff --git a/tests/test_type_binary.py b/tests/test_type_binary.py new file mode 100644 index 00000000..ff798b61 --- /dev/null +++ b/tests/test_type_binary.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8; -*- +# +# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor +# license agreements. See the NOTICE file distributed with this work for +# additional information regarding copyright ownership. Crate licenses +# this file to you under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. You may +# obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +# +# However, if you have executed another commercial license agreement +# with Crate these terms will supersede the license and you may use the +# software solely pursuant to the terms of the relevant commercial agreement. + +import base64 +from unittest import TestCase +from unittest.mock import MagicMock + +from sqlalchemy_cratedb.type.binary import LargeBinary + + +class LargeBinaryBindProcessorTest(TestCase): + def setUp(self): + self.type = LargeBinary() + self.dialect = MagicMock() + self.dialect.dbapi = MagicMock() + + def test_encodes_bytes_to_base64_string(self): + process = self.type.bind_processor(self.dialect) + result = process(b"hello world") + self.assertEqual(result, base64.b64encode(b"hello world").decode()) + + def test_returns_none_for_none_input(self): + process = self.type.bind_processor(self.dialect) + self.assertIsNone(process(None)) + + def test_returns_none_processor_when_dbapi_is_none(self): + self.dialect.dbapi = None + processor = self.type.bind_processor(self.dialect) + self.assertIsNone(processor) + + def test_encodes_arbitrary_binary_data(self): + process = self.type.bind_processor(self.dialect) + data = bytes(range(256)) + result = process(data) + self.assertEqual(result, base64.b64encode(data).decode()) + + +class LargeBinaryResultProcessorTest(TestCase): + def setUp(self): + self.type = LargeBinary() + self.dialect = MagicMock() + + def test_decodes_base64_string_to_bytes(self): + process = self.type.result_processor(self.dialect, None) + encoded = base64.b64encode(b"hello world").decode() + result = process(encoded) + self.assertEqual(result, b"hello world") + + def test_returns_none_for_none_input(self): + process = self.type.result_processor(self.dialect, None) + self.assertIsNone(process(None)) + + def test_round_trip(self): + bind = self.type.bind_processor(self.dialect) + result = self.type.result_processor(self.dialect, None) + data = b"\x00\x01\x02\xff\xfe\xfd" + self.assertEqual(result(bind(data)), data)