From 59fcad450e4ca4711f37fa365fc0253cb40dd6a3 Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Tue, 30 Dec 2025 23:03:35 +0100 Subject: [PATCH 01/10] Document original behavior in test_encoding_buffersize --- tests/test_004_cursor.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index a54cffda..713126d8 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15018,3 +15018,24 @@ def test_close(db_connection): pytest.fail(f"Cursor close test failed: {e}") finally: cursor = db_connection.cursor() + + +def test_encoding_buffersize(cursor): + from mssql_python.helpers import check_error + from mssql_python.constants import ConstantsDDBC as ddbc_sql_const + cursor.execute( + "drop table if exists #t1;\n" + + "create table #t1 (a varchar(2) collate SQL_Latin1_General_CP1_CI_AS)\n" + + "insert into #t1 values (N'ßl')\n" + ) + with pytest.raises(Exception, match=".*GetData.*"): + cursor.execute("select * from #t1").fetchall() + with pytest.raises(Exception, match=".*Invalid cursor position.*"): + check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, cursor.hstmt, -1) + with pytest.raises(Exception, match=".*GetData.*"): + cursor.execute("select * from #t1").fetchmany(1) + with pytest.raises(Exception, match=".*Invalid Descriptor Index.*"): + check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, cursor.hstmt, -1) + assert cursor.execute("select * from #t1").fetchone()[0] == "l" + assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß".encode("utf-8")[1:] == b'\x9f' + assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" From c14964f023f94cfeb37b2308fb4c513ee33c033d Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Tue, 30 Dec 2025 23:31:40 +0100 Subject: [PATCH 02/10] Remove trivially false isLob --- mssql_python/pybind/ddbc_bindings.cpp | 11 +++-------- mssql_python/pybind/ddbc_bindings.h | 7 +++---- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 63696f91..fd1f1c2c 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -3587,8 +3587,7 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column // Fetch rows in batches // TODO: Move to anonymous namespace, since it is not used outside this file SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& columnNames, - py::list& rows, SQLUSMALLINT numCols, SQLULEN& numRowsFetched, - const std::vector& lobColumns) { + py::list& rows, SQLUSMALLINT numCols, SQLULEN& numRowsFetched) { LOG("FetchBatchData: Fetching data in batches"); SQLRETURN ret = SQLFetchScroll_ptr(hStmt, SQL_FETCH_NEXT, 0); if (ret == SQL_NO_DATA) { @@ -3607,15 +3606,12 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum SQLULEN columnSize; SQLULEN processedColumnSize; uint64_t fetchBufferSize; - bool isLob; }; std::vector columnInfos(numCols); for (SQLUSMALLINT col = 0; col < numCols; col++) { const auto& columnMeta = columnNames[col].cast(); columnInfos[col].dataType = columnMeta["DataType"].cast(); columnInfos[col].columnSize = columnMeta["ColumnSize"].cast(); - columnInfos[col].isLob = - std::find(lobColumns.begin(), lobColumns.end(), col + 1) != lobColumns.end(); columnInfos[col].processedColumnSize = columnInfos[col].columnSize; HandleZeroColumnSizeAtFetch(columnInfos[col].processedColumnSize); columnInfos[col].fetchBufferSize = @@ -3637,7 +3633,6 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum columnInfosExt[col].columnSize = columnInfos[col].columnSize; columnInfosExt[col].processedColumnSize = columnInfos[col].processedColumnSize; columnInfosExt[col].fetchBufferSize = columnInfos[col].fetchBufferSize; - columnInfosExt[col].isLob = columnInfos[col].isLob; // Map data type to processor function (switch executed once per column, // not per cell) @@ -4075,7 +4070,7 @@ SQLRETURN FetchMany_wrap(SqlHandlePtr StatementHandle, py::list& rows, int fetch SQLSetStmtAttr_ptr(hStmt, SQL_ATTR_ROW_ARRAY_SIZE, (SQLPOINTER)(intptr_t)fetchSize, 0); SQLSetStmtAttr_ptr(hStmt, SQL_ATTR_ROWS_FETCHED_PTR, &numRowsFetched, 0); - ret = FetchBatchData(hStmt, buffers, columnNames, rows, numCols, numRowsFetched, lobColumns); + ret = FetchBatchData(hStmt, buffers, columnNames, rows, numCols, numRowsFetched); if (!SQL_SUCCEEDED(ret) && ret != SQL_NO_DATA) { LOG("FetchMany_wrap: Error when fetching data - SQLRETURN=%d", ret); return ret; @@ -4208,7 +4203,7 @@ SQLRETURN FetchAll_wrap(SqlHandlePtr StatementHandle, py::list& rows, while (ret != SQL_NO_DATA) { ret = - FetchBatchData(hStmt, buffers, columnNames, rows, numCols, numRowsFetched, lobColumns); + FetchBatchData(hStmt, buffers, columnNames, rows, numCols, numRowsFetched); if (!SQL_SUCCEEDED(ret) && ret != SQL_NO_DATA) { LOG("FetchAll_wrap: Error when fetching data - SQLRETURN=%d", ret); return ret; diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index 391903ef..4419acef 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -651,7 +651,6 @@ struct ColumnInfoExt { SQLULEN columnSize; SQLULEN processedColumnSize; uint64_t fetchBufferSize; - bool isLob; }; // Forward declare FetchLobColumnData (defined in ddbc_bindings.cpp) - MUST be @@ -795,7 +794,7 @@ inline void ProcessChar(PyObject* row, ColumnBuffers& buffers, const void* colIn // Fast path: Data fits in buffer (not LOB or truncated) // fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence // '<' - if (!colInfo->isLob && numCharsInData < colInfo->fetchBufferSize) { + if (numCharsInData < colInfo->fetchBufferSize) { // Performance: Direct Python C API call - create string from buffer PyObject* pyStr = PyUnicode_FromStringAndSize( reinterpret_cast( @@ -838,7 +837,7 @@ inline void ProcessWChar(PyObject* row, ColumnBuffers& buffers, const void* colI // Fast path: Data fits in buffer (not LOB or truncated) // fetchBufferSize includes null-terminator, numCharsInData doesn't. Hence // '<' - if (!colInfo->isLob && numCharsInData < colInfo->fetchBufferSize) { + if (numCharsInData < colInfo->fetchBufferSize) { #if defined(__APPLE__) || defined(__linux__) // Performance: Direct UTF-16 decode (SQLWCHAR is 2 bytes on // Linux/macOS) @@ -902,7 +901,7 @@ inline void ProcessBinary(PyObject* row, ColumnBuffers& buffers, const void* col } // Fast path: Data fits in buffer (not LOB or truncated) - if (!colInfo->isLob && static_cast(dataLen) <= colInfo->processedColumnSize) { + if (static_cast(dataLen) <= colInfo->processedColumnSize) { // Performance: Direct Python C API call - create bytes from buffer PyObject* pyBytes = PyBytes_FromStringAndSize( reinterpret_cast( From 459868ae47ee5df411849fa7eabbf761866c4e5b Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Tue, 30 Dec 2025 23:47:47 +0100 Subject: [PATCH 03/10] Explain the buffer size issue in exception instead of attempting hopeless SQLGetData fallback --- mssql_python/pybind/ddbc_bindings.h | 33 ++++++++++++++++------------- tests/test_004_cursor.py | 10 ++------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index 4419acef..5ef74b09 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -653,11 +653,6 @@ struct ColumnInfoExt { uint64_t fetchBufferSize; }; -// Forward declare FetchLobColumnData (defined in ddbc_bindings.cpp) - MUST be -// outside namespace -py::object FetchLobColumnData(SQLHSTMT hStmt, SQLUSMALLINT col, SQLSMALLINT cType, bool isWideChar, - bool isBinary, const std::string& charEncoding = "utf-8"); - // Specialized column processors for each data type (eliminates switch in hot // loop) namespace ColumnProcessors { @@ -807,9 +802,12 @@ inline void ProcessChar(PyObject* row, ColumnBuffers& buffers, const void* colIn PyList_SET_ITEM(row, col - 1, pyStr); } } else { - // Slow path: LOB data requires separate fetch call - PyList_SET_ITEM(row, col - 1, - FetchLobColumnData(hStmt, col, SQL_C_CHAR, false, false).release().ptr()); + // Reaching this case indicates an error in the code. + // This function is only called on columns bound by SQLBindCol. + // For such columns, the ODBC Driver does not allow us to compensate by + // fetching the remaining data using SQLGetData / FetchLobColumnData. + ThrowStdException( + "Internal error: CHAR/VARCHAR column data exceeds buffer size."); } } @@ -874,9 +872,12 @@ inline void ProcessWChar(PyObject* row, ColumnBuffers& buffers, const void* colI } #endif } else { - // Slow path: LOB data requires separate fetch call - PyList_SET_ITEM(row, col - 1, - FetchLobColumnData(hStmt, col, SQL_C_WCHAR, true, false).release().ptr()); + // Reaching this case indicates an error in the code. + // This function is only called on columns bound by SQLBindCol. + // For such columns, the ODBC Driver does not allow us to compensate by + // fetching the remaining data using SQLGetData / FetchLobColumnData. + ThrowStdException( + "Internal error: NCHAR/NVARCHAR column data exceeds buffer size."); } } @@ -914,10 +915,12 @@ inline void ProcessBinary(PyObject* row, ColumnBuffers& buffers, const void* col PyList_SET_ITEM(row, col - 1, pyBytes); } } else { - // Slow path: LOB data requires separate fetch call - PyList_SET_ITEM( - row, col - 1, - FetchLobColumnData(hStmt, col, SQL_C_BINARY, false, true, "").release().ptr()); + // Reaching this case indicates an error in the code. + // This function is only called on columns bound by SQLBindCol. + // For such columns, the ODBC Driver does not allow us to compensate by + // fetching the remaining data using SQLGetData / FetchLobColumnData. + ThrowStdException( + "Internal error: BINARY/VARBINARY column data exceeds buffer size."); } } diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 713126d8..1aad9952 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15021,21 +15021,15 @@ def test_close(db_connection): def test_encoding_buffersize(cursor): - from mssql_python.helpers import check_error - from mssql_python.constants import ConstantsDDBC as ddbc_sql_const cursor.execute( "drop table if exists #t1;\n" + "create table #t1 (a varchar(2) collate SQL_Latin1_General_CP1_CI_AS)\n" + "insert into #t1 values (N'ßl')\n" ) - with pytest.raises(Exception, match=".*GetData.*"): + with pytest.raises(Exception, match=".*Internal error: CHAR/VARCHAR column data exceeds buffer size.*"): cursor.execute("select * from #t1").fetchall() - with pytest.raises(Exception, match=".*Invalid cursor position.*"): - check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, cursor.hstmt, -1) - with pytest.raises(Exception, match=".*GetData.*"): + with pytest.raises(Exception, match=".*Internal error: CHAR/VARCHAR column data exceeds buffer size.*"): cursor.execute("select * from #t1").fetchmany(1) - with pytest.raises(Exception, match=".*Invalid Descriptor Index.*"): - check_error(ddbc_sql_const.SQL_HANDLE_STMT.value, cursor.hstmt, -1) assert cursor.execute("select * from #t1").fetchone()[0] == "l" assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß".encode("utf-8")[1:] == b'\x9f' assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" From a7e43dce300c85a4d04337cddea9e798b9871a95 Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Wed, 31 Dec 2025 01:06:41 +0100 Subject: [PATCH 04/10] Raise exception instead of returning corrupted data for fetchone Also add columnSize == 0 check to FetchLobColumnData which was likely missed. That way FetchLobColumnData gets called directly instead of going through a SQLGetData call with an empty buffer first --- mssql_python/pybind/ddbc_bindings.cpp | 37 ++++++++++++++++----------- tests/test_004_cursor.py | 6 +++-- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index fd1f1c2c..f6da107e 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -2960,12 +2960,13 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p row.append(raw_bytes); } } else { - // Buffer too small, fallback to streaming - LOG("SQLGetData: CHAR column %d data truncated " - "(buffer_size=%zu), using streaming LOB", - i, dataBuffer.size()); - row.append(FetchLobColumnData(hStmt, i, SQL_C_CHAR, false, false, - charEncoding)); + // Reaching this case indicates an error in the code. + // Theoretically, we could still compensate by calling SQLGetData or + // FetchLobColumnData more often, but then we would still have to process + // the data we already got from the above call to SQLGetData. + // Better to throw an exception and fix the code than to risk returning corrupted data. + ThrowStdException("SQLGetData returned data larger than " + "expected for CHAR column"); } } else if (dataLen == SQL_NULL_DATA) { LOG("SQLGetData: Column %d is NULL (CHAR)", i); @@ -3002,7 +3003,7 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p case SQL_WCHAR: case SQL_WVARCHAR: case SQL_WLONGVARCHAR: { - if (columnSize == SQL_NO_TOTAL || columnSize > 4000) { + if (columnSize == SQL_NO_TOTAL || columnSize == 0 || columnSize > 4000) { LOG("SQLGetData: Streaming LOB for column %d (SQL_C_WCHAR) " "- columnSize=%lu", i, (unsigned long)columnSize); @@ -3031,12 +3032,13 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p "length=%lu for column %d", (unsigned long)numCharsInData, i); } else { - // Buffer too small, fallback to streaming - LOG("SQLGetData: NVARCHAR column %d data " - "truncated, using streaming LOB", - i); - row.append(FetchLobColumnData(hStmt, i, SQL_C_WCHAR, true, false, - "utf-16le")); + // Reaching this case indicates an error in the code. + // Theoretically, we could still compensate by calling SQLGetData or + // FetchLobColumnData more often, but then we would still have to process + // the data we already got from the above call to SQLGetData. + // Better to throw an exception and fix the code than to risk returning corrupted data. + ThrowStdException("SQLGetData returned data larger than " + "expected for WCHAR column"); } } else if (dataLen == SQL_NULL_DATA) { LOG("SQLGetData: Column %d is NULL (NVARCHAR)", i); @@ -3298,8 +3300,13 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p row.append(py::bytes( reinterpret_cast(dataBuffer.data()), dataLen)); } else { - row.append( - FetchLobColumnData(hStmt, i, SQL_C_BINARY, false, true, "")); + // Reaching this case indicates an error in the code. + // Theoretically, we could still compensate by calling SQLGetData or + // FetchLobColumnData more often, but then we would still have to process + // the data we already got from the above call to SQLGetData. + // Better to throw an exception and fix the code than to risk returning corrupted data. + ThrowStdException("SQLGetData returned data larger than " + "expected for BINARY column"); } } else if (dataLen == SQL_NULL_DATA) { row.append(py::none()); diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 1aad9952..893bf014 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15030,6 +15030,8 @@ def test_encoding_buffersize(cursor): cursor.execute("select * from #t1").fetchall() with pytest.raises(Exception, match=".*Internal error: CHAR/VARCHAR column data exceeds buffer size.*"): cursor.execute("select * from #t1").fetchmany(1) - assert cursor.execute("select * from #t1").fetchone()[0] == "l" - assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß".encode("utf-8")[1:] == b'\x9f' + with pytest.raises(Exception, match=".*SQLGetData returned data larger than expected for CHAR column.*"): + cursor.execute("select * from #t1").fetchone() + with pytest.raises(Exception, match=".*SQLGetData returned data larger than expected for CHAR column.*"): + cursor.execute("select LEFT(a, 1) from #t1").fetchone() assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" From c2112534e6ccec89254627265ec9c6522e1ab8c3 Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Wed, 31 Dec 2025 01:11:40 +0100 Subject: [PATCH 05/10] Actually fix the buffer size --- mssql_python/pybind/ddbc_bindings.cpp | 28 ++++++++++++++++++++++----- tests/test_004_cursor.py | 12 ++++-------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index f6da107e..56ae1b5d 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -2931,7 +2931,9 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p row.append( FetchLobColumnData(hStmt, i, SQL_C_CHAR, false, false, charEncoding)); } else { - uint64_t fetchBufferSize = columnSize + 1 /* null-termination */; + // Multiply by 2 because utf8 conversion by the driver might + // turn varchar(x) into up to 2*x bytes. + uint64_t fetchBufferSize = 2 * columnSize + 1 /* null-termination */; std::vector dataBuffer(fetchBufferSize); SQLLEN dataLen; ret = SQLGetData_ptr(hStmt, i, SQL_C_CHAR, dataBuffer.data(), dataBuffer.size(), @@ -3448,7 +3450,9 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column // TODO: handle variable length data correctly. This logic wont // suffice HandleZeroColumnSizeAtFetch(columnSize); - uint64_t fetchBufferSize = columnSize + 1 /*null-terminator*/; + // Multiply by 2 because utf8 conversion by the driver might + // turn varchar(x) into up to 2*x bytes. + uint64_t fetchBufferSize = 2 * columnSize + 1 /*null-terminator*/; // TODO: For LONGVARCHAR/BINARY types, columnSize is returned as // 2GB-1 by SQLDescribeCol. So fetchBufferSize = 2GB. // fetchSize=1 if columnSize>1GB. So we'll allocate a vector of @@ -3621,8 +3625,20 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum columnInfos[col].columnSize = columnMeta["ColumnSize"].cast(); columnInfos[col].processedColumnSize = columnInfos[col].columnSize; HandleZeroColumnSizeAtFetch(columnInfos[col].processedColumnSize); - columnInfos[col].fetchBufferSize = - columnInfos[col].processedColumnSize + 1; // +1 for null terminator + switch (columnInfos[col].dataType) { + case SQL_CHAR: + case SQL_VARCHAR: + case SQL_LONGVARCHAR: + // Multiply by 2 because utf8 conversion by the driver might + // turn varchar(x) into up to 2*x bytes. + columnInfos[col].fetchBufferSize = + 2 * columnInfos[col].processedColumnSize + 1; // +1 for null terminator + break; + default: + columnInfos[col].fetchBufferSize = + columnInfos[col].processedColumnSize + 1; // +1 for null terminator + break; + } } std::string decimalSeparator = GetDecimalSeparator(); // Cache decimal separator @@ -3925,7 +3941,9 @@ size_t calculateRowSize(py::list& columnNames, SQLUSMALLINT numCols) { case SQL_CHAR: case SQL_VARCHAR: case SQL_LONGVARCHAR: - rowSize += columnSize; + // Multiply by 2 because utf8 conversion by the driver might + // turn varchar(x) into up to 2*x bytes. + rowSize += 2 * columnSize; break; case SQL_SS_XML: case SQL_WCHAR: diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 893bf014..14fb5d3b 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15026,12 +15026,8 @@ def test_encoding_buffersize(cursor): + "create table #t1 (a varchar(2) collate SQL_Latin1_General_CP1_CI_AS)\n" + "insert into #t1 values (N'ßl')\n" ) - with pytest.raises(Exception, match=".*Internal error: CHAR/VARCHAR column data exceeds buffer size.*"): - cursor.execute("select * from #t1").fetchall() - with pytest.raises(Exception, match=".*Internal error: CHAR/VARCHAR column data exceeds buffer size.*"): - cursor.execute("select * from #t1").fetchmany(1) - with pytest.raises(Exception, match=".*SQLGetData returned data larger than expected for CHAR column.*"): - cursor.execute("select * from #t1").fetchone() - with pytest.raises(Exception, match=".*SQLGetData returned data larger than expected for CHAR column.*"): - cursor.execute("select LEFT(a, 1) from #t1").fetchone() + assert cursor.execute("select * from #t1").fetchall()[0][0] == "ßl" + assert cursor.execute("select * from #t1").fetchmany(1)[0][0] == "ßl" + assert cursor.execute("select * from #t1").fetchone()[0] == "ßl" + assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß" assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" From c6063caaa0fcfd71f264a291a5ddb3ea86a87e28 Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Wed, 31 Dec 2025 02:10:13 +0100 Subject: [PATCH 06/10] Rename test --- tests/test_004_cursor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 14fb5d3b..5af0ccc9 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15020,7 +15020,7 @@ def test_close(db_connection): cursor = db_connection.cursor() -def test_encoding_buffersize(cursor): +def test_varchar_buffersize_special_character(cursor): cursor.execute( "drop table if exists #t1;\n" + "create table #t1 (a varchar(2) collate SQL_Latin1_General_CP1_CI_AS)\n" From 11c94c4caf5f4b0d8dea7362bd1ce60fe16f447a Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Sat, 3 Jan 2026 02:00:37 +0100 Subject: [PATCH 07/10] Change multiplier to 4, add test which covers all of latin1 --- mssql_python/pybind/ddbc_bindings.cpp | 48 +++++++++++++++------------ mssql_python/pybind/ddbc_bindings.h | 6 ++-- tests/test_004_cursor.py | 40 ++++++++++++++++++++++ 3 files changed, 70 insertions(+), 24 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 56ae1b5d..e2cceb05 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -2931,9 +2931,9 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p row.append( FetchLobColumnData(hStmt, i, SQL_C_CHAR, false, false, charEncoding)); } else { - // Multiply by 2 because utf8 conversion by the driver might - // turn varchar(x) into up to 2*x bytes. - uint64_t fetchBufferSize = 2 * columnSize + 1 /* null-termination */; + // Multiply by 4 because utf8 conversion by the driver might + // turn varchar(x) into up to 3*x (maybe 4*x?) bytes. + uint64_t fetchBufferSize = 4 * columnSize + 1 /* null-termination */; std::vector dataBuffer(fetchBufferSize); SQLLEN dataLen; ret = SQLGetData_ptr(hStmt, i, SQL_C_CHAR, dataBuffer.data(), dataBuffer.size(), @@ -2962,13 +2962,15 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p row.append(raw_bytes); } } else { - // Reaching this case indicates an error in the code. + // Reaching this case indicates an error in mssql_python. // Theoretically, we could still compensate by calling SQLGetData or // FetchLobColumnData more often, but then we would still have to process // the data we already got from the above call to SQLGetData. // Better to throw an exception and fix the code than to risk returning corrupted data. - ThrowStdException("SQLGetData returned data larger than " - "expected for CHAR column"); + ThrowStdException( + "Internal error: SQLGetData returned data " + "larger than expected for CHAR column" + ); } } else if (dataLen == SQL_NULL_DATA) { LOG("SQLGetData: Column %d is NULL (CHAR)", i); @@ -3034,13 +3036,15 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p "length=%lu for column %d", (unsigned long)numCharsInData, i); } else { - // Reaching this case indicates an error in the code. + // Reaching this case indicates an error in mssql_python. // Theoretically, we could still compensate by calling SQLGetData or // FetchLobColumnData more often, but then we would still have to process // the data we already got from the above call to SQLGetData. // Better to throw an exception and fix the code than to risk returning corrupted data. - ThrowStdException("SQLGetData returned data larger than " - "expected for WCHAR column"); + ThrowStdException( + "Internal error: SQLGetData returned data " + "larger than expected for WCHAR column" + ); } } else if (dataLen == SQL_NULL_DATA) { LOG("SQLGetData: Column %d is NULL (NVARCHAR)", i); @@ -3302,13 +3306,15 @@ SQLRETURN SQLGetData_wrap(SqlHandlePtr StatementHandle, SQLUSMALLINT colCount, p row.append(py::bytes( reinterpret_cast(dataBuffer.data()), dataLen)); } else { - // Reaching this case indicates an error in the code. + // Reaching this case indicates an error in mssql_python. // Theoretically, we could still compensate by calling SQLGetData or // FetchLobColumnData more often, but then we would still have to process // the data we already got from the above call to SQLGetData. // Better to throw an exception and fix the code than to risk returning corrupted data. - ThrowStdException("SQLGetData returned data larger than " - "expected for BINARY column"); + ThrowStdException( + "Internal error: SQLGetData returned data " + "larger than expected for BINARY column" + ); } } else if (dataLen == SQL_NULL_DATA) { row.append(py::none()); @@ -3450,9 +3456,9 @@ SQLRETURN SQLBindColums(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& column // TODO: handle variable length data correctly. This logic wont // suffice HandleZeroColumnSizeAtFetch(columnSize); - // Multiply by 2 because utf8 conversion by the driver might - // turn varchar(x) into up to 2*x bytes. - uint64_t fetchBufferSize = 2 * columnSize + 1 /*null-terminator*/; + // Multiply by 4 because utf8 conversion by the driver might + // turn varchar(x) into up to 3*x (maybe 4*x?) bytes. + uint64_t fetchBufferSize = 4 * columnSize + 1 /*null-terminator*/; // TODO: For LONGVARCHAR/BINARY types, columnSize is returned as // 2GB-1 by SQLDescribeCol. So fetchBufferSize = 2GB. // fetchSize=1 if columnSize>1GB. So we'll allocate a vector of @@ -3629,10 +3635,10 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum case SQL_CHAR: case SQL_VARCHAR: case SQL_LONGVARCHAR: - // Multiply by 2 because utf8 conversion by the driver might - // turn varchar(x) into up to 2*x bytes. + // Multiply by 4 because utf8 conversion by the driver might + // turn varchar(x) into up to 3*x (maybe 4*x?) bytes. columnInfos[col].fetchBufferSize = - 2 * columnInfos[col].processedColumnSize + 1; // +1 for null terminator + 4 * columnInfos[col].processedColumnSize + 1; // +1 for null terminator break; default: columnInfos[col].fetchBufferSize = @@ -3941,9 +3947,9 @@ size_t calculateRowSize(py::list& columnNames, SQLUSMALLINT numCols) { case SQL_CHAR: case SQL_VARCHAR: case SQL_LONGVARCHAR: - // Multiply by 2 because utf8 conversion by the driver might - // turn varchar(x) into up to 2*x bytes. - rowSize += 2 * columnSize; + // Multiply by 4 because utf8 conversion by the driver might + // turn varchar(x) into up to 3*x (maybe 4*x?) bytes. + rowSize += 4 * columnSize; break; case SQL_SS_XML: case SQL_WCHAR: diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index 5ef74b09..cc94b137 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -802,7 +802,7 @@ inline void ProcessChar(PyObject* row, ColumnBuffers& buffers, const void* colIn PyList_SET_ITEM(row, col - 1, pyStr); } } else { - // Reaching this case indicates an error in the code. + // Reaching this case indicates an error in mssql_python. // This function is only called on columns bound by SQLBindCol. // For such columns, the ODBC Driver does not allow us to compensate by // fetching the remaining data using SQLGetData / FetchLobColumnData. @@ -872,7 +872,7 @@ inline void ProcessWChar(PyObject* row, ColumnBuffers& buffers, const void* colI } #endif } else { - // Reaching this case indicates an error in the code. + // Reaching this case indicates an error in mssql_python. // This function is only called on columns bound by SQLBindCol. // For such columns, the ODBC Driver does not allow us to compensate by // fetching the remaining data using SQLGetData / FetchLobColumnData. @@ -915,7 +915,7 @@ inline void ProcessBinary(PyObject* row, ColumnBuffers& buffers, const void* col PyList_SET_ITEM(row, col - 1, pyBytes); } } else { - // Reaching this case indicates an error in the code. + // Reaching this case indicates an error in mssql_python. // This function is only called on columns bound by SQLBindCol. // For such columns, the ODBC Driver does not allow us to compensate by // fetching the remaining data using SQLGetData / FetchLobColumnData. diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 5af0ccc9..225e6788 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15031,3 +15031,43 @@ def test_varchar_buffersize_special_character(cursor): assert cursor.execute("select * from #t1").fetchone()[0] == "ßl" assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß" assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" + + +def test_varchar_latin1_fetch(cursor): + def query(): + cursor.execute(""" + declare @t1 as table( + row_nr int, + latin1 varchar(1) collate SQL_Latin1_General_CP1_CI_AS, + utf8 varchar(3) collate Latin1_General_100_CI_AI_SC_UTF8 + ) + + insert into @t1 (row_nr, latin1) + select top 256 + row_number() over(order by (select 1)) - 1, + cast(row_number() over(order by (select 1)) - 1 as binary(1)) + from sys.objects + + update @t1 set utf8 = latin1 + + select * from @t1 + """) + cursor.nextset() + cursor.nextset() + + def validate(result): + assert len(result) == 256 + for (row_nr, latin1, utf8) in result: + assert utf8 == latin1 or ( + # small difference in how sql server and msodbcsql18 handle unmapped characters + row_nr in [129, 141, 143, 144, 157] + and utf8 == chr(row_nr) + and latin1 == '?' + ), (row_nr, utf8, latin1, chr(row_nr)) + + query() + validate(cursor.fetchall()) + query() + validate(cursor.fetchmany(500)) + query() + validate([cursor.fetchone() for _ in range(256)]) From bc2c7678c5b5792f4249f66a061e291dcccf663b Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Tue, 6 Jan 2026 13:09:58 +0100 Subject: [PATCH 08/10] Use recursive CTE instead of sys.objects in test --- tests/test_004_cursor.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 225e6788..d906f892 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15042,11 +15042,15 @@ def query(): utf8 varchar(3) collate Latin1_General_100_CI_AI_SC_UTF8 ) + ;with nums as ( + select 0 as n + union all + select n + 1 from nums where n < 255 + ) insert into @t1 (row_nr, latin1) - select top 256 - row_number() over(order by (select 1)) - 1, - cast(row_number() over(order by (select 1)) - 1 as binary(1)) - from sys.objects + select n, cast(n as binary(1)) + from nums + option (maxrecursion 256) update @t1 set utf8 = latin1 From 572516ec11acc2b4ed419f790efb33f52a9c779f Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Tue, 6 Jan 2026 13:18:52 +0100 Subject: [PATCH 09/10] Remove now redundant hstmt arg from ColumnProcessors --- mssql_python/pybind/ddbc_bindings.cpp | 2 +- mssql_python/pybind/ddbc_bindings.h | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/mssql_python/pybind/ddbc_bindings.cpp b/mssql_python/pybind/ddbc_bindings.cpp index 4772123b..e5e74f9c 100644 --- a/mssql_python/pybind/ddbc_bindings.cpp +++ b/mssql_python/pybind/ddbc_bindings.cpp @@ -3763,7 +3763,7 @@ SQLRETURN FetchBatchData(SQLHSTMT hStmt, ColumnBuffers& buffers, py::list& colum // types) to just 10 (setup only) Note: Processor functions no // longer need to check for NULL since we do it above if (columnProcessors[col - 1] != nullptr) { - columnProcessors[col - 1](row, buffers, &columnInfosExt[col - 1], col, i, hStmt); + columnProcessors[col - 1](row, buffers, &columnInfosExt[col - 1], col, i); continue; } diff --git a/mssql_python/pybind/ddbc_bindings.h b/mssql_python/pybind/ddbc_bindings.h index cc94b137..2b3c9e5e 100644 --- a/mssql_python/pybind/ddbc_bindings.h +++ b/mssql_python/pybind/ddbc_bindings.h @@ -643,7 +643,7 @@ struct ColumnBuffers { // Performance: Column processor function type for fast type conversion // Using function pointers eliminates switch statement overhead in the hot loop typedef void (*ColumnProcessor)(PyObject* row, ColumnBuffers& buffers, const void* colInfo, - SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt); + SQLUSMALLINT col, SQLULEN rowIdx); // Extended column info struct for processor functions struct ColumnInfoExt { @@ -664,7 +664,7 @@ namespace ColumnProcessors { // Performance: NULL check removed - handled centrally before processor is // called inline void ProcessInteger(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col, - SQLULEN rowIdx, SQLHSTMT) { + SQLULEN rowIdx) { // Performance: Direct Python C API call (bypasses pybind11 overhead) PyObject* pyInt = PyLong_FromLong(buffers.intBuffers[col - 1][rowIdx]); if (!pyInt) { // Handle memory allocation failure @@ -679,7 +679,7 @@ inline void ProcessInteger(PyObject* row, ColumnBuffers& buffers, const void*, S // Performance: NULL check removed - handled centrally before processor is // called inline void ProcessSmallInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col, - SQLULEN rowIdx, SQLHSTMT) { + SQLULEN rowIdx) { // Performance: Direct Python C API call PyObject* pyInt = PyLong_FromLong(buffers.smallIntBuffers[col - 1][rowIdx]); if (!pyInt) { // Handle memory allocation failure @@ -694,7 +694,7 @@ inline void ProcessSmallInt(PyObject* row, ColumnBuffers& buffers, const void*, // Performance: NULL check removed - handled centrally before processor is // called inline void ProcessBigInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col, - SQLULEN rowIdx, SQLHSTMT) { + SQLULEN rowIdx) { // Performance: Direct Python C API call PyObject* pyInt = PyLong_FromLongLong(buffers.bigIntBuffers[col - 1][rowIdx]); if (!pyInt) { // Handle memory allocation failure @@ -709,7 +709,7 @@ inline void ProcessBigInt(PyObject* row, ColumnBuffers& buffers, const void*, SQ // Performance: NULL check removed - handled centrally before processor is // called inline void ProcessTinyInt(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col, - SQLULEN rowIdx, SQLHSTMT) { + SQLULEN rowIdx) { // Performance: Direct Python C API call PyObject* pyInt = PyLong_FromLong(buffers.charBuffers[col - 1][rowIdx]); if (!pyInt) { // Handle memory allocation failure @@ -724,7 +724,7 @@ inline void ProcessTinyInt(PyObject* row, ColumnBuffers& buffers, const void*, S // Performance: NULL check removed - handled centrally before processor is // called inline void ProcessBit(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col, - SQLULEN rowIdx, SQLHSTMT) { + SQLULEN rowIdx) { // Performance: Direct Python C API call (converts 0/1 to True/False) PyObject* pyBool = PyBool_FromLong(buffers.charBuffers[col - 1][rowIdx]); if (!pyBool) { // Handle memory allocation failure @@ -739,7 +739,7 @@ inline void ProcessBit(PyObject* row, ColumnBuffers& buffers, const void*, SQLUS // Performance: NULL check removed - handled centrally before processor is // called inline void ProcessReal(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col, - SQLULEN rowIdx, SQLHSTMT) { + SQLULEN rowIdx) { // Performance: Direct Python C API call PyObject* pyFloat = PyFloat_FromDouble(buffers.realBuffers[col - 1][rowIdx]); if (!pyFloat) { // Handle memory allocation failure @@ -754,7 +754,7 @@ inline void ProcessReal(PyObject* row, ColumnBuffers& buffers, const void*, SQLU // Performance: NULL check removed - handled centrally before processor is // called inline void ProcessDouble(PyObject* row, ColumnBuffers& buffers, const void*, SQLUSMALLINT col, - SQLULEN rowIdx, SQLHSTMT) { + SQLULEN rowIdx) { // Performance: Direct Python C API call PyObject* pyFloat = PyFloat_FromDouble(buffers.doubleBuffers[col - 1][rowIdx]); if (!pyFloat) { // Handle memory allocation failure @@ -769,7 +769,7 @@ inline void ProcessDouble(PyObject* row, ColumnBuffers& buffers, const void*, SQ // Performance: NULL/NO_TOTAL checks removed - handled centrally before // processor is called inline void ProcessChar(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr, - SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) { + SQLUSMALLINT col, SQLULEN rowIdx) { const ColumnInfoExt* colInfo = static_cast(colInfoPtr); SQLLEN dataLen = buffers.indicators[col - 1][rowIdx]; @@ -815,7 +815,7 @@ inline void ProcessChar(PyObject* row, ColumnBuffers& buffers, const void* colIn // Performance: NULL/NO_TOTAL checks removed - handled centrally before // processor is called inline void ProcessWChar(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr, - SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) { + SQLUSMALLINT col, SQLULEN rowIdx) { const ColumnInfoExt* colInfo = static_cast(colInfoPtr); SQLLEN dataLen = buffers.indicators[col - 1][rowIdx]; @@ -885,7 +885,7 @@ inline void ProcessWChar(PyObject* row, ColumnBuffers& buffers, const void* colI // Performance: NULL/NO_TOTAL checks removed - handled centrally before // processor is called inline void ProcessBinary(PyObject* row, ColumnBuffers& buffers, const void* colInfoPtr, - SQLUSMALLINT col, SQLULEN rowIdx, SQLHSTMT hStmt) { + SQLUSMALLINT col, SQLULEN rowIdx) { const ColumnInfoExt* colInfo = static_cast(colInfoPtr); SQLLEN dataLen = buffers.indicators[col - 1][rowIdx]; From 06d83a2538a9cb6ae8dc988b77cf0ec61e8ab2e9 Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Thu, 8 Jan 2026 20:17:19 +0100 Subject: [PATCH 10/10] Windows tests assert that an exception/data corruption happens instead of failing --- tests/test_004_cursor.py | 98 ++++++++++++++++++++++++++++++++-------- 1 file changed, 80 insertions(+), 18 deletions(-) diff --git a/tests/test_004_cursor.py b/tests/test_004_cursor.py index 87ecfb96..b975af2f 100644 --- a/tests/test_004_cursor.py +++ b/tests/test_004_cursor.py @@ -15285,16 +15285,37 @@ def test_varchar_buffersize_special_character(cursor): + "create table #t1 (a varchar(2) collate SQL_Latin1_General_CP1_CI_AS)\n" + "insert into #t1 values (N'ßl')\n" ) - assert cursor.execute("select * from #t1").fetchall()[0][0] == "ßl" - assert cursor.execute("select * from #t1").fetchmany(1)[0][0] == "ßl" - assert cursor.execute("select * from #t1").fetchone()[0] == "ßl" - assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß" - assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" + import platform + + if platform.system() != "Windows": + # works fine with default settings + cursor.connection.setdecoding(mssql_python.SQL_CHAR) + assert cursor.execute("select * from #t1").fetchone()[0] == "ßl" + assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß" + assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" + assert cursor.execute("select * from #t1").fetchmany(1)[0][0] == "ßl" + assert cursor.execute("select * from #t1").fetchall()[0][0] == "ßl" + else: + # fetchone respects setdecoding + cursor.connection.setdecoding(mssql_python.SQL_CHAR) + assert cursor.execute("select * from #t1").fetchone()[0] == b"\xdfl" + cursor.connection.setdecoding(mssql_python.SQL_CHAR, "cp1252") + assert cursor.execute("select * from #t1").fetchone()[0] == "ßl" + assert cursor.execute("select LEFT(a, 1) from #t1").fetchone()[0] == "ß" + assert cursor.execute("select cast(a as varchar(3)) from #t1").fetchone()[0] == "ßl" + + # fetchmany/fetchall do not respect setdecoding + with pytest.raises(SystemError, match=".*returned a result with an exception set"): + cursor.execute("select * from #t1").fetchmany(1) + with pytest.raises(SystemError, match=".*returned a result with an exception set"): + cursor.execute("select * from #t1").fetchall() def test_varchar_latin1_fetch(cursor): def query(): - cursor.execute(""" + cursor.execute( + """ + set nocount on declare @t1 as table( row_nr int, latin1 varchar(1) collate SQL_Latin1_General_CP1_CI_AS, @@ -15314,23 +15335,64 @@ def query(): update @t1 set utf8 = latin1 select * from @t1 - """) - cursor.nextset() - cursor.nextset() - + """ + ) + def validate(result): assert len(result) == 256 - for (row_nr, latin1, utf8) in result: + for row_nr, latin1, utf8 in result: assert utf8 == latin1 or ( # small difference in how sql server and msodbcsql18 handle unmapped characters row_nr in [129, 141, 143, 144, 157] and utf8 == chr(row_nr) - and latin1 == '?' + and latin1 == "?" ), (row_nr, utf8, latin1, chr(row_nr)) - query() - validate(cursor.fetchall()) - query() - validate(cursor.fetchmany(500)) - query() - validate([cursor.fetchone() for _ in range(256)]) + import platform + + if platform.system() != "Windows": + # works fine with defaults + cursor.connection.setdecoding(mssql_python.SQL_CHAR) + query() + validate([cursor.fetchone() for _ in range(256)]) + query() + validate(cursor.fetchall()) + query() + validate(cursor.fetchmany(500)) + else: + # works fine if correctly configured by user for fetchone (SQLGetData) + cursor.connection.setdecoding(mssql_python.SQL_CHAR, "cp1252") + query() + validate([cursor.fetchone() for _ in range(256)]) + # broken for SQLBindCol + query() + with pytest.raises(SystemError, match=".*returned a result with an exception set"): + cursor.fetchall() + query() + with pytest.raises(SystemError, match=".*returned a result with an exception set"): + cursor.fetchmany(500) + + +def test_varchar_emoji(cursor): + cursor.connection.setdecoding(mssql_python.SQL_CHAR) # default + cursor.execute( + """ + set nocount on + declare @t1 as table( + a nvarchar(20), + b varchar(20) collate Latin1_General_100_CI_AI_SC_UTF8 + ) + insert into @t1 values (N'😄', N'😄') + select a, b from @t1 + """ + ) + ret = cursor.fetchone() + + import platform + + if platform.system() == "Windows": + # impossible to fetch varchar emojis on windows currently + assert tuple(ret) == ("😄", "??") + else: + # works fine on other platforms + assert tuple(ret) == ("😄", "😄")