microsoft · ffelixg · May 13, 2026 · May 13, 2026 · May 14, 2026 · May 26, 2026
diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp
@@ -4780,9 +4780,8 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
     ColumnBuffers buffers(numCols, fetchSize);
 
     if (!hasLobColumns && fetchSize > 0) {
-        // Bind columns — Arrow always uses SQL_C_CHAR for VARCHAR because
-        // it processes raw byte buffers directly, not via Python codecs.
-        ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, SQL_C_CHAR);
+        // Always request WCHARs so we don't have to deal with CHAR encodings
+        ret = SQLBindColums(hStmt, buffers, columnNames, numCols, fetchSize, SQL_C_WCHAR);
         if (!SQL_SUCCEEDED(ret)) {
             LOG("Error when binding columns");
             return ret;
@@ -4841,16 +4840,7 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
                         }
                         case SQL_CHAR:
                         case SQL_VARCHAR:
-                        case SQL_LONGVARCHAR: {
-                            ret = GetDataVar(hStmt, idxCol + 1, SQL_C_CHAR,
-                                             buffers.charBuffers[idxCol],
-                                             buffers.indicators[idxCol].data());
-                            if (!SQL_SUCCEEDED(ret)) {
-                                LOG("Error fetching CHAR LOB for column %d", idxCol + 1);
-                                return ret;
-                            }
-                            break;
-                        }
+                        case SQL_LONGVARCHAR:
                         case SQL_SS_XML:
                         case SQL_WCHAR:
                         case SQL_WVARCHAR:
@@ -5093,24 +5083,7 @@ SQLRETURN FetchArrowBatch_wrap(SqlHandlePtr StatementHandle, py::list& capsules,
                     }
                     case SQL_CHAR:
                     case SQL_VARCHAR:
-                    case SQL_LONGVARCHAR: {
-#if defined(__APPLE__) || defined(__linux__)
-                        uint64_t fetchBufferSize = columnSize * 4 + 1 /*null-terminator*/;
-#else
-                        uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/;
-#endif
-                        auto target_vec = &arrowColumnProducer->varData;
-                        auto start = arrowColumnProducer->varVal[idxRowArrow];
-                        while (target_vec->size() < start + dataLen) {
-                            target_vec->resize(target_vec->size() * 2);
-                        }
-
-                        std::memcpy(&(*target_vec)[start],
-                                    &buffers.charBuffers[idxCol][idxRowSql * fetchBufferSize],
-                                    dataLen);
-                        arrowColumnProducer->varVal[idxRowArrow + 1] = start + dataLen;
-                        break;
-                    }
+                    case SQL_LONGVARCHAR:
                     case SQL_SS_XML:
                     case SQL_WCHAR:
                     case SQL_WVARCHAR:

diff --git a/tests/test_004_cursor_arrow.py b/tests/test_004_cursor_arrow.py
@@ -313,6 +313,63 @@ def test_arrow_long_string(cursor: mssql_python.Cursor):
     assert batch.column(0).to_pylist() == [long_string]
 
 
+def test_arrow_varchar_utf8_collation_unicode(cursor: mssql_python.Cursor):
+    table = "#t_arrow_utf8_varchar"
+    collation = "Latin1_General_100_CI_AS_SC_UTF8"
+    expected = [
+        "Grüße",
+        "你好😀",
+        "こんにちは",
+        "Привет",
+        "Hello 世界",
+        "😀😃😄😁",
+        "",
+        None,
+    ]
+
+    try:
+        cursor.execute(
+            f"create table {table} (id int primary key, v varchar(32) collate {collation})"
+        )
+    except Exception as exc:
+        pytest.skip(f"UTF-8 collation '{collation}' not supported: {exc}")
+
+    try:
+        for index, value in enumerate(expected, start=1):
+            cursor.execute(f"insert into {table} (id, v) values (?, ?)", index, value)
+        tbl = cursor.execute(f"select v from {table} order by id").arrow()
+        assert tbl.column(0).type.equals(pa.large_string())
+        assert tbl.column(0).to_pylist() == expected
+    finally:
+        cursor.execute(f"drop table if exists {table}")
+
+
+def test_arrow_varchar_utf8_collation_cp1252(cursor: mssql_python.Cursor):
+    table = "#t_arrow_cp1252_varchar"
+    collation = "SQL_Latin1_General_CP1_CI_AS"
+    expected = [
+        "Grüße",
+        "café René!",
+        "naïve café",
+        "Español",
+        "Müller-Öztürk",
+        "Françoise",
+        "",
+        None,
+    ]
+
+    cursor.execute(f"create table {table} (id int primary key, v varchar(32) collate {collation})")
+
+    try:
+        for index, value in enumerate(expected, start=1):
+            cursor.execute(f"insert into {table} (id, v) values (?, ?)", index, value)
+        tbl = cursor.execute(f"select v from {table} order by id").arrow()
+        assert tbl.column(0).type.equals(pa.large_string())
+        assert tbl.column(0).to_pylist() == expected
+    finally:
+        cursor.execute(f"drop table if exists {table}")
+
+
 def test_rownumber_arrow_batch_interleaved_fetchmany(cursor: mssql_python.Cursor):
     """Verify that arrow_batch and fetchmany can be interleaved
     on the same result set with correct rownumber tracking and values."""