Skip to content

Commit

Permalink
Simplify UTF8 StrPtr usage (pythonnet#2374)
Browse files Browse the repository at this point in the history
* Use non-BOM encodings

* Copy potential BOM to the output of PyString_FromString

The documentation of the used `PyUnicode_DecodeUTF16` states that not
passing `*byteorder` or passing a 0 results in the first two bytes, if
they are the BOM (U+FEFF, zero-width no-break space), to be interpreted
and skipped, which is incorrect when we convert a known "non BOM"
string, which all strings from C# are.

* Default to UTF8 for StrPtr
  • Loading branch information
filmor authored Jun 8, 2024
1 parent b112885 commit f82aeea
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 23 deletions.
3 changes: 2 additions & 1 deletion src/embed_tests/TestPyType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ public void CanCreateHeapType()
const string name = "nÁmæ";
const string docStr = "dÁcæ";

using var doc = new StrPtr(docStr, Encodings.UTF8);
using var doc = new StrPtr(docStr);

var spec = new TypeSpec(
name: name,
basicSize: Util.ReadInt32(Runtime.Runtime.PyBaseObjectType, TypeOffset.tp_basicsize),
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/Native/NativeTypeSpec.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public NativeTypeSpec(TypeSpec spec)
{
if (spec is null) throw new ArgumentNullException(nameof(spec));

this.Name = new StrPtr(spec.Name, Encodings.UTF8);
this.Name = new StrPtr(spec.Name);
this.BasicSize = spec.BasicSize;
this.ItemSize = spec.ItemSize;
this.Flags = (int)spec.Flags;
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/Native/StrPtr.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ struct StrPtr : IDisposable
public IntPtr RawPointer { get; set; }
unsafe byte* Bytes => (byte*)this.RawPointer;

public unsafe StrPtr(string value) : this(value, Encodings.UTF8) {}

public unsafe StrPtr(string value, Encoding encoding)
{
if (value is null) throw new ArgumentNullException(nameof(value));
Expand Down
43 changes: 22 additions & 21 deletions src/runtime/Runtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -795,13 +795,13 @@ public static int Py_Main(int argc, string[] argv)

internal static int PyRun_SimpleString(string code)
{
using var codePtr = new StrPtr(code, Encodings.UTF8);
using var codePtr = new StrPtr(code);
return Delegates.PyRun_SimpleStringFlags(codePtr, Utf8String);
}

internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedReference globals, BorrowedReference locals)
{
using var codePtr = new StrPtr(code, Encodings.UTF8);
using var codePtr = new StrPtr(code);
return Delegates.PyRun_StringFlags(codePtr, st, globals, locals, Utf8String);
}

Expand All @@ -813,14 +813,15 @@ internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedR
/// </summary>
internal static NewReference Py_CompileString(string str, string file, int start)
{
using var strPtr = new StrPtr(str, Encodings.UTF8);
using var strPtr = new StrPtr(str);

using var fileObj = new PyString(file);
return Delegates.Py_CompileStringObject(strPtr, fileObj, start, Utf8String, -1);
}

internal static NewReference PyImport_ExecCodeModule(string name, BorrowedReference code)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyImport_ExecCodeModule(namePtr, code);
}

Expand Down Expand Up @@ -867,13 +868,13 @@ internal static bool PyObject_IsIterable(BorrowedReference ob)

internal static int PyObject_HasAttrString(BorrowedReference pointer, string name)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyObject_HasAttrString(pointer, namePtr);
}

internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, string name)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyObject_GetAttrString(pointer, namePtr);
}

Expand All @@ -884,12 +885,12 @@ internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, S
internal static int PyObject_DelAttr(BorrowedReference @object, BorrowedReference name) => Delegates.PyObject_SetAttr(@object, name, null);
internal static int PyObject_DelAttrString(BorrowedReference @object, string name)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyObject_SetAttrString(@object, namePtr, null);
}
internal static int PyObject_SetAttrString(BorrowedReference @object, string name, BorrowedReference value)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyObject_SetAttrString(@object, namePtr, value);
}

Expand Down Expand Up @@ -1071,7 +1072,7 @@ internal static bool PyBool_CheckExact(BorrowedReference ob)

internal static NewReference PyLong_FromString(string value, int radix)
{
using var valPtr = new StrPtr(value, Encodings.UTF8);
using var valPtr = new StrPtr(value);
return Delegates.PyLong_FromString(valPtr, IntPtr.Zero, radix);
}

Expand Down Expand Up @@ -1274,7 +1275,7 @@ internal static NewReference EmptyPyBytes()
internal static NewReference PyByteArray_FromStringAndSize(IntPtr strPtr, nint len) => Delegates.PyByteArray_FromStringAndSize(strPtr, len);
internal static NewReference PyByteArray_FromStringAndSize(string s)
{
using var ptr = new StrPtr(s, Encodings.UTF8);
using var ptr = new StrPtr(s);
return PyByteArray_FromStringAndSize(ptr.RawPointer, checked((nint)ptr.ByteCount));
}

Expand Down Expand Up @@ -1302,7 +1303,7 @@ internal static IntPtr PyBytes_AsString(BorrowedReference ob)

internal static NewReference PyUnicode_InternFromString(string s)
{
using var ptr = new StrPtr(s, Encodings.UTF8);
using var ptr = new StrPtr(s);
return Delegates.PyUnicode_InternFromString(ptr);
}

Expand Down Expand Up @@ -1377,7 +1378,7 @@ internal static bool PyDict_Check(BorrowedReference ob)

internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer, string key)
{
using var keyStr = new StrPtr(key, Encodings.UTF8);
using var keyStr = new StrPtr(key);
return Delegates.PyDict_GetItemString(pointer, keyStr);
}

Expand All @@ -1393,7 +1394,7 @@ internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer
/// </summary>
internal static int PyDict_SetItemString(BorrowedReference dict, string key, BorrowedReference value)
{
using var keyPtr = new StrPtr(key, Encodings.UTF8);
using var keyPtr = new StrPtr(key);
return Delegates.PyDict_SetItemString(dict, keyPtr, value);
}

Expand All @@ -1402,7 +1403,7 @@ internal static int PyDict_SetItemString(BorrowedReference dict, string key, Bor

internal static int PyDict_DelItemString(BorrowedReference pointer, string key)
{
using var keyPtr = new StrPtr(key, Encodings.UTF8);
using var keyPtr = new StrPtr(key);
return Delegates.PyDict_DelItemString(pointer, keyPtr);
}

Expand Down Expand Up @@ -1517,7 +1518,7 @@ internal static bool PyIter_Check(BorrowedReference ob)

internal static NewReference PyModule_New(string name)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyModule_New(namePtr);
}

Expand All @@ -1531,7 +1532,7 @@ internal static NewReference PyModule_New(string name)
/// <returns>Return -1 on error, 0 on success.</returns>
internal static int PyModule_AddObject(BorrowedReference module, string name, StolenReference value)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
IntPtr valueAddr = value.DangerousGetAddressOrNull();
int res = Delegates.PyModule_AddObject(module, namePtr, valueAddr);
// We can't just exit here because the reference is stolen only on success.
Expand All @@ -1549,7 +1550,7 @@ internal static int PyModule_AddObject(BorrowedReference module, string name, St

internal static NewReference PyImport_ImportModule(string name)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyImport_ImportModule(namePtr);
}

Expand All @@ -1558,7 +1559,7 @@ internal static NewReference PyImport_ImportModule(string name)

internal static BorrowedReference PyImport_AddModule(string name)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PyImport_AddModule(namePtr);
}

Expand Down Expand Up @@ -1586,13 +1587,13 @@ internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath)

internal static BorrowedReference PySys_GetObject(string name)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PySys_GetObject(namePtr);
}

internal static int PySys_SetObject(string name, BorrowedReference ob)
{
using var namePtr = new StrPtr(name, Encodings.UTF8);
using var namePtr = new StrPtr(name);
return Delegates.PySys_SetObject(namePtr, ob);
}

Expand Down Expand Up @@ -1691,7 +1692,7 @@ internal static IntPtr PyMem_Malloc(long size)

internal static void PyErr_SetString(BorrowedReference ob, string message)
{
using var msgPtr = new StrPtr(message, Encodings.UTF8);
using var msgPtr = new StrPtr(message);
Delegates.PyErr_SetString(ob, msgPtr);
}

Expand Down

0 comments on commit f82aeea

Please sign in to comment.