diff --git a/AUTHORS.md b/AUTHORS.md index 6cfa216b1..912831836 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -55,6 +55,7 @@ - Meinrad Recheis ([@henon](https://github.com/henon)) - Mohamed Koubaa ([@koubaa](https://github.com/koubaa)) - Patrick Stewart ([@patstew](https://github.com/patstew)) +- Peter Kese ([@pkese](https://github.com/pkese)) - Raphael Nestler ([@rnestler](https://github.com/rnestler)) - Rickard Holmberg ([@rickardraysearch](https://github.com/rickardraysearch)) - Sam Winstanley ([@swinstanley](https://github.com/swinstanley)) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5871e7ffb..8aba9e9b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,6 +71,7 @@ One must now either use enum members (e.g. `MyEnum.Option`), or use enum constru - Exception stacktraces on `PythonException.StackTrace` are now properly formatted - Providing an invalid type parameter to a generic type or method produces a helpful Python error - Empty parameter names (as can be generated from F#) do not cause crashes +- Unicode strings with surrogates were truncated when converting from Python ### Removed diff --git a/src/embed_tests/TestCustomMarshal.cs b/src/embed_tests/TestCustomMarshal.cs index 5860857a3..99911bdb0 100644 --- a/src/embed_tests/TestCustomMarshal.cs +++ b/src/embed_tests/TestCustomMarshal.cs @@ -23,7 +23,7 @@ public static void GetManagedStringTwice() { const string expected = "FooBar"; - IntPtr op = Runtime.Runtime.PyUnicode_FromString(expected); + IntPtr op = Runtime.Runtime.PyString_FromString(expected); string s1 = Runtime.Runtime.GetManagedString(op); string s2 = Runtime.Runtime.GetManagedString(op); diff --git a/src/embed_tests/TestPyString.cs b/src/embed_tests/TestPyString.cs index 0de436e35..669ecde0d 100644 --- a/src/embed_tests/TestPyString.cs +++ b/src/embed_tests/TestPyString.cs @@ -94,5 +94,24 @@ public void TestUnicode() PyObject actual = new PyString(expected); Assert.AreEqual(expected, actual.ToString()); } + + [Test] + public void TestUnicodeSurrogateToString() + { + var expected = "foo\ud83d\udc3c"; + var actual = PythonEngine.Eval("'foo\ud83d\udc3c'"); + Assert.AreEqual(4, actual.Length()); + Assert.AreEqual(expected, actual.ToString()); + } + + [Test] + public void TestUnicodeSurrogate() + { + const string expected = "foo\ud83d\udc3c"; // "foo🐼" + PyObject actual = new PyString(expected); + // python treats "foo🐼" as 4 characters, dotnet as 5 + Assert.AreEqual(4, actual.Length()); + Assert.AreEqual(expected, actual.ToString()); + } } } diff --git a/src/embed_tests/TestRuntime.cs b/src/embed_tests/TestRuntime.cs index 9ca6cf139..9fb2e8b22 100644 --- a/src/embed_tests/TestRuntime.cs +++ b/src/embed_tests/TestRuntime.cs @@ -36,7 +36,7 @@ public static void Py_IsInitializedValue() public static void RefCountTest() { Runtime.Runtime.Py_Initialize(); - IntPtr op = Runtime.Runtime.PyUnicode_FromString("FooBar"); + IntPtr op = Runtime.Runtime.PyString_FromString("FooBar"); // New object RefCount should be one Assert.AreEqual(1, Runtime.Runtime.Refcount(op)); diff --git a/src/runtime/converter.cs b/src/runtime/converter.cs index 47263e8c4..80f31f058 100644 --- a/src/runtime/converter.cs +++ b/src/runtime/converter.cs @@ -221,7 +221,7 @@ internal static IntPtr ToPython(object value, Type type) return CLRObject.GetInstHandle(value, type); case TypeCode.String: - return Runtime.PyUnicode_FromString((string)value); + return Runtime.PyString_FromString((string)value); case TypeCode.Int32: return Runtime.PyInt_FromInt32((int)value); diff --git a/src/runtime/exceptions.cs b/src/runtime/exceptions.cs index bbdcdad30..a612e34e3 100644 --- a/src/runtime/exceptions.cs +++ b/src/runtime/exceptions.cs @@ -50,7 +50,7 @@ internal static Exception ToException(BorrowedReference ob) { message = String.Format("{0}()", name); } - return Runtime.PyUnicode_FromString(message); + return Runtime.PyString_FromString(message); } /// @@ -75,7 +75,7 @@ internal static Exception ToException(BorrowedReference ob) { message = message.Substring(fullTypeName.Length); } - return Runtime.PyUnicode_FromString(message); + return Runtime.PyString_FromString(message); } } @@ -153,7 +153,7 @@ internal static void SetArgsAndCause(BorrowedReference ob, Exception e) if (!string.IsNullOrEmpty(e.Message)) { args = Runtime.PyTuple_New(1); - IntPtr msg = Runtime.PyUnicode_FromString(e.Message); + IntPtr msg = Runtime.PyString_FromString(e.Message); Runtime.PyTuple_SetItem(args, 0, msg); } else diff --git a/src/runtime/pystring.cs b/src/runtime/pystring.cs index 07eabba14..172c09ebd 100644 --- a/src/runtime/pystring.cs +++ b/src/runtime/pystring.cs @@ -51,7 +51,7 @@ public PyString(PyObject o) : base(FromObject(o)) private static IntPtr FromString(string s) { - IntPtr val = Runtime.PyUnicode_FromUnicode(s, s.Length); + IntPtr val = Runtime.PyString_FromString(s); PythonException.ThrowIfIsNull(val); return val; } diff --git a/src/runtime/runtime.cs b/src/runtime/runtime.cs index 789b71f3e..537e5348f 100644 --- a/src/runtime/runtime.cs +++ b/src/runtime/runtime.cs @@ -230,7 +230,7 @@ private static void InitPyMembers() () => PyStringType = IntPtr.Zero); XDecref(op); - op = PyUnicode_FromString("unicode"); + op = PyString_FromString("unicode"); SetPyMemberTypeOf(ref PyUnicodeType, op, () => PyUnicodeType = IntPtr.Zero); XDecref(op); @@ -1527,7 +1527,12 @@ internal static bool PyString_Check(IntPtr ob) internal static IntPtr PyString_FromString(string value) { fixed(char* ptr = value) - return PyUnicode_FromKindAndData(2, (IntPtr)ptr, value.Length); + return Delegates.PyUnicode_DecodeUTF16( + (IntPtr)ptr, + value.Length * sizeof(Char), + IntPtr.Zero, + IntPtr.Zero + ).DangerousMoveToPointerOrNull(); } @@ -1553,16 +1558,6 @@ internal static long PyBytes_Size(IntPtr op) private static IntPtr _PyBytes_Size(IntPtr op) => Delegates._PyBytes_Size(op); - - internal static IntPtr PyUnicode_FromStringAndSize(IntPtr value, long size) - { - return PyUnicode_FromStringAndSize(value, new IntPtr(size)); - } - - - private static IntPtr PyUnicode_FromStringAndSize(IntPtr value, IntPtr size) => Delegates.PyUnicode_FromStringAndSize(value, size); - - internal static IntPtr PyUnicode_AsUTF8(IntPtr unicode) => Delegates.PyUnicode_AsUTF8(unicode); internal static bool PyUnicode_Check(IntPtr ob) @@ -1576,22 +1571,6 @@ internal static bool PyUnicode_Check(IntPtr ob) internal static IntPtr PyUnicode_FromEncodedObject(IntPtr ob, IntPtr enc, IntPtr err) => Delegates.PyUnicode_FromEncodedObject(ob, enc, err); - internal static IntPtr PyUnicode_FromKindAndData(int kind, IntPtr s, long size) - { - return PyUnicode_FromKindAndData(kind, s, new IntPtr(size)); - } - - - private static IntPtr PyUnicode_FromKindAndData(int kind, IntPtr s, IntPtr size) - => Delegates.PyUnicode_FromKindAndData(kind, s, size); - - internal static IntPtr PyUnicode_FromUnicode(string s, long size) - { - fixed(char* ptr = s) - return PyUnicode_FromKindAndData(2, (IntPtr)ptr, size); - } - - internal static int PyUnicode_GetMax() => Delegates.PyUnicode_GetMax(); internal static long PyUnicode_GetSize(IntPtr ob) @@ -1610,12 +1589,6 @@ internal static long PyUnicode_GetSize(IntPtr ob) internal static IntPtr PyUnicode_FromOrdinal(int c) => Delegates.PyUnicode_FromOrdinal(c); - internal static IntPtr PyUnicode_FromString(string s) - { - return PyUnicode_FromUnicode(s, s.Length); - } - - internal static IntPtr PyUnicode_InternFromString(string s) { using var ptr = new StrPtr(s, Encoding.UTF8); @@ -1646,11 +1619,12 @@ internal static string GetManagedString(IntPtr op) if (type == PyUnicodeType) { using var p = PyUnicode_AsUTF16String(new BorrowedReference(op)); - int length = (int)PyUnicode_GetSize(op); - char* codePoints = (char*)PyBytes_AsString(p.DangerousGetAddress()); + var bytesPtr = p.DangerousGetAddress(); + int bytesLength = (int)Runtime.PyBytes_Size(bytesPtr); + char* codePoints = (char*)PyBytes_AsString(bytesPtr); return new string(codePoints, startIndex: 1, // skip BOM - length: length); + length: bytesLength/2-1); // utf16 - BOM } return null; @@ -2442,11 +2416,10 @@ static Delegates() PyBytes_AsString = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyBytes_AsString), GetUnmanagedDll(_PythonDll)); PyBytes_FromString = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyBytes_FromString), GetUnmanagedDll(_PythonDll)); _PyBytes_Size = (delegate* unmanaged[Cdecl])GetFunctionByName("PyBytes_Size", GetUnmanagedDll(_PythonDll)); - PyUnicode_FromStringAndSize = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_FromStringAndSize), GetUnmanagedDll(_PythonDll)); PyUnicode_AsUTF8 = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_AsUTF8), GetUnmanagedDll(_PythonDll)); PyUnicode_FromObject = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_FromObject), GetUnmanagedDll(_PythonDll)); + PyUnicode_DecodeUTF16 = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_DecodeUTF16), GetUnmanagedDll(_PythonDll)); PyUnicode_FromEncodedObject = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_FromEncodedObject), GetUnmanagedDll(_PythonDll)); - PyUnicode_FromKindAndData = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_FromKindAndData), GetUnmanagedDll(_PythonDll)); PyUnicode_GetMax = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_GetMax), GetUnmanagedDll(_PythonDll)); _PyUnicode_GetSize = (delegate* unmanaged[Cdecl])GetFunctionByName("PyUnicode_GetSize", GetUnmanagedDll(_PythonDll)); PyUnicode_AsUnicode = (delegate* unmanaged[Cdecl])GetFunctionByName(nameof(PyUnicode_AsUnicode), GetUnmanagedDll(_PythonDll)); @@ -2738,11 +2711,10 @@ static Delegates() internal static delegate* unmanaged[Cdecl] PyBytes_AsString { get; } internal static delegate* unmanaged[Cdecl] PyBytes_FromString { get; } internal static delegate* unmanaged[Cdecl] _PyBytes_Size { get; } - internal static delegate* unmanaged[Cdecl] PyUnicode_FromStringAndSize { get; } internal static delegate* unmanaged[Cdecl] PyUnicode_AsUTF8 { get; } internal static delegate* unmanaged[Cdecl] PyUnicode_FromObject { get; } internal static delegate* unmanaged[Cdecl] PyUnicode_FromEncodedObject { get; } - internal static delegate* unmanaged[Cdecl] PyUnicode_FromKindAndData { get; } + internal static delegate* unmanaged[Cdecl] PyUnicode_DecodeUTF16 { get; } internal static delegate* unmanaged[Cdecl] PyUnicode_GetMax { get; } internal static delegate* unmanaged[Cdecl] _PyUnicode_GetSize { get; } internal static delegate* unmanaged[Cdecl] PyUnicode_AsUnicode { get; } diff --git a/src/runtime/typemanager.cs b/src/runtime/typemanager.cs index 13d822c09..e1bfe6aef 100644 --- a/src/runtime/typemanager.cs +++ b/src/runtime/typemanager.cs @@ -580,7 +580,7 @@ internal static IntPtr AllocateTypeObject(string name, IntPtr metatype) // Cheat a little: we'll set tp_name to the internal char * of // the Python version of the type name - otherwise we'd have to // allocate the tp_name and would have no way to free it. - IntPtr temp = Runtime.PyUnicode_FromString(name); + IntPtr temp = Runtime.PyString_FromString(name); IntPtr raw = Runtime.PyUnicode_AsUTF8(temp); Marshal.WriteIntPtr(type, TypeOffset.tp_name, raw); Marshal.WriteIntPtr(type, TypeOffset.name, temp); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy