Skip to content
9 changes: 9 additions & 0 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1795,6 +1795,8 @@ unicode_char(Py_UCS4 ch)
assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
PyUnicode_4BYTE_DATA(unicode)[0] = ch;
}
// ch >= 256 and so cannot be 0
_PyUnicode_STATE(unicode).embed_null = 0;
assert(_PyUnicode_CheckConsistency(unicode, 1));
return unicode;
}
Expand All @@ -1811,8 +1813,13 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
return NULL;
}

unsigned int embed_null;
if (size == -1) {
size = wcslen(u);
embed_null = 0;
}
else {
embed_null = EMBED_NULL_UNKNOWN;
}

/* If the Unicode data is known at construction time, we can apply
Expand Down Expand Up @@ -1877,6 +1884,7 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
default:
Py_UNREACHABLE();
}
_PyUnicode_STATE(unicode).embed_null = embed_null;

return unicode_result(unicode);
}
Expand Down Expand Up @@ -2232,6 +2240,7 @@ _PyUnicode_Copy(PyObject *unicode)

memcpy(PyUnicode_DATA(copy), PyUnicode_DATA(unicode),
length * PyUnicode_KIND(unicode));
_PyUnicode_STATE(copy).embed_null = _PyUnicode_STATE(unicode).embed_null;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_PyUnicode_Copy() makes a modifiable unicode object. It is legal to embed a null character in it after creation or replace an embeded null character with non-null character. In particular, it creates a new copy even from Latin1 character singletons.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, technically, any string can be modified anytime by the C API. People "should not do that", but since it's possible, I'm not sure if it's safe to make the assumption that people will not mutate a string long after its creation: after the cache is initialized.

assert(_PyUnicode_CheckConsistency(copy, 1));
return copy;
}
Expand Down