advanced-python-homework-2023/time_execution/pypy3.9-v7.3.13-linux64/include/pypy3.9/cpyext_unicodeobject.h

/* --- Internal Unicode Format -------------------------------------------- */


/* Py_UNICODE was the native Unicode storage format (code unit) used by
   Python and represents a single Unicode element in the Unicode type.
   With PEP 393, Py_UNICODE is deprecated and replaced with a
   typedef to wchar_t. */

#define PY_UNICODE_TYPE wchar_t
typedef wchar_t Py_UNICODE;

/* Py_UCS4 and Py_UCS2 are typedefs for the respective
   unicode representations. */
typedef unsigned int Py_UCS4;
typedef unsigned short Py_UCS2;
typedef unsigned char Py_UCS1;

/* --- Unicode Type ------------------------------------------------------- */

typedef struct {
        /*
           SSTATE_NOT_INTERNED (0)
           SSTATE_INTERNED_MORTAL (1)
           SSTATE_INTERNED_IMMORTAL (2)

           If interned != SSTATE_NOT_INTERNED, the two references from the
           dictionary to this object are *not* counted in ob_refcnt.
         */
        unsigned char interned;
        /* Character size:

           - PyUnicode_WCHAR_KIND (0):

             * character type = wchar_t (16 or 32 bits, depending on the
               platform)

           - PyUnicode_1BYTE_KIND (1):

             * character type = Py_UCS1 (8 bits, unsigned)
             * all characters are in the range U+0000-U+00FF (latin1)
             * if ascii is set, all characters are in the range U+0000-U+007F
               (ASCII), otherwise at least one character is in the range
               U+0080-U+00FF

           - PyUnicode_2BYTE_KIND (2):

             * character type = Py_UCS2 (16 bits, unsigned)
             * all characters are in the range U+0000-U+FFFF (BMP)
             * at least one character is in the range U+0100-U+FFFF

           - PyUnicode_4BYTE_KIND (4):

             * character type = Py_UCS4 (32 bits, unsigned)
             * all characters are in the range U+0000-U+10FFFF
             * at least one character is in the range U+10000-U+10FFFF
         */
        unsigned char kind;
        /* Compact is with respect to the allocation scheme. Compact unicode
           objects only require one memory block while non-compact objects use
           one block for the PyUnicodeObject struct and another for its data
           buffer. */
        unsigned char compact;
        /* The string only contains characters in the range U+0000-U+007F (ASCII)
           and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
           set, use the PyASCIIObject structure. */
        unsigned char ascii;
        /* The ready flag indicates whether the object layout is initialized
           completely. This means that this is either a compact object, or
           the data pointer is filled out. The bit is redundant, and helps
           to minimize the test in PyUnicode_IS_READY(). */
        unsigned char ready;
        /* Padding to ensure that PyUnicode_DATA() is always aligned to
           4 bytes (see issue #19537 on m68k). */
        /* not on PyPy */
    } _PyASCIIObject_state_t;

/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
   structure. state.ascii and state.compact are set, and the data
   immediately follow the structure. utf8_length and wstr_length can be found
   in the length field; the utf8 pointer is equal to the data pointer. */
typedef struct {
    /* There are 4 forms of Unicode strings:

       - compact ascii:

         * structure = PyASCIIObject
         * test: PyUnicode_IS_COMPACT_ASCII(op)
         * kind = PyUnicode_1BYTE_KIND
         * compact = 1
         * ascii = 1
         * ready = 1
         * (length is the length of the utf8 and wstr strings)
         * (data starts just after the structure)
         * (since ASCII is decoded from UTF-8, the utf8 string are the data)

       - compact:

         * structure = PyCompactUnicodeObject
         * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
         * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
           PyUnicode_4BYTE_KIND
         * compact = 1
         * ready = 1
         * ascii = 0
         * utf8 is not shared with data
         * utf8_length = 0 if utf8 is NULL
         * wstr is shared with data and wstr_length=length
           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
         * wstr_length = 0 if wstr is NULL
         * (data starts just after the structure)

       - legacy string, not ready:

         * structure = PyUnicodeObject
         * test: kind == PyUnicode_WCHAR_KIND
         * length = 0 (use wstr_length)
         * hash = -1
         * kind = PyUnicode_WCHAR_KIND
         * compact = 0
         * ascii = 0
         * ready = 0
         * interned = SSTATE_NOT_INTERNED
         * wstr is not NULL
         * data.any is NULL
         * utf8 is NULL
         * utf8_length = 0

       - legacy string, ready:

         * structure = PyUnicodeObject structure
         * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
         * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
           PyUnicode_4BYTE_KIND
         * compact = 0
         * ready = 1
         * data.any is not NULL
         * utf8 is shared and utf8_length = length with data.any if ascii = 1
         * utf8_length = 0 if utf8 is NULL
         * wstr is shared with data.any and wstr_length = length
           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
         * wstr_length = 0 if wstr is NULL

       Compact strings use only one memory block (structure + characters),
       whereas legacy strings use one block for the structure and one block
       for characters.

       Legacy strings are created by PyUnicode_FromUnicode() and
       PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
       when PyUnicode_READY() is called.

       See also _PyUnicode_CheckConsistency().
    */
    PyObject_HEAD
    Py_ssize_t length;          /* Number of code points in the string */
    //Py_hash_t hash;             /* Hash value; -1 if not set */
    _PyASCIIObject_state_t state;
    wchar_t *wstr;              /* wchar_t representation (null-terminated) */
} PyASCIIObject;

/* Non-ASCII strings allocated through PyUnicode_New use the
   PyCompactUnicodeObject structure. state.compact is set, and the data
   immediately follow the structure. */
typedef struct {
    PyASCIIObject _base;
    Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
                                 * terminating \0. */
    char *utf8;                 /* UTF-8 representation (null-terminated) */
    Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
                                 * surrogates count as two code points. */
} PyCompactUnicodeObject;

/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
   PyUnicodeObject structure. The actual string data is initially in the wstr
   block, and copied into the data block using _PyUnicode_Ready. */
typedef struct {
    PyCompactUnicodeObject _base;
    void* data;                     /* Canonical, smallest-form Unicode buffer */
} PyUnicodeObject;


/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */

/* Values for PyASCIIObject.state: */

/* Interning state. */
#define SSTATE_NOT_INTERNED 0
#define SSTATE_INTERNED_MORTAL 1
#define SSTATE_INTERNED_IMMORTAL 2

/* --- Constants ---------------------------------------------------------- */

/* This Unicode character will be used as replacement character during
   decoding if the errors argument is set to "replace". Note: the
   Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
   Unicode 3.0. */

#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
README.md 2023-10-12 17:14:36 +03:00			`/* --- Internal Unicode Format -------------------------------------------- */`


			`/* Py_UNICODE was the native Unicode storage format (code unit) used by`
			`Python and represents a single Unicode element in the Unicode type.`
			`With PEP 393, Py_UNICODE is deprecated and replaced with a`
			`typedef to wchar_t. */`

			`#define PY_UNICODE_TYPE wchar_t`
			`typedef wchar_t Py_UNICODE;`

			`/* Py_UCS4 and Py_UCS2 are typedefs for the respective`
			`unicode representations. */`
			`typedef unsigned int Py_UCS4;`
			`typedef unsigned short Py_UCS2;`
			`typedef unsigned char Py_UCS1;`

			`/* --- Unicode Type ------------------------------------------------------- */`

			`typedef struct {`
			`/*`
			`SSTATE_NOT_INTERNED (0)`
			`SSTATE_INTERNED_MORTAL (1)`
			`SSTATE_INTERNED_IMMORTAL (2)`

			`If interned != SSTATE_NOT_INTERNED, the two references from the`
			`dictionary to this object are not counted in ob_refcnt.`
			`*/`
			`unsigned char interned;`
			`/* Character size:`

			`- PyUnicode_WCHAR_KIND (0):`

			`* character type = wchar_t (16 or 32 bits, depending on the`
			`platform)`

			`- PyUnicode_1BYTE_KIND (1):`

			`* character type = Py_UCS1 (8 bits, unsigned)`
			`* all characters are in the range U+0000-U+00FF (latin1)`
			`* if ascii is set, all characters are in the range U+0000-U+007F`
			`(ASCII), otherwise at least one character is in the range`
			`U+0080-U+00FF`

			`- PyUnicode_2BYTE_KIND (2):`

			`* character type = Py_UCS2 (16 bits, unsigned)`
			`* all characters are in the range U+0000-U+FFFF (BMP)`
			`* at least one character is in the range U+0100-U+FFFF`

			`- PyUnicode_4BYTE_KIND (4):`

			`* character type = Py_UCS4 (32 bits, unsigned)`
			`* all characters are in the range U+0000-U+10FFFF`
			`* at least one character is in the range U+10000-U+10FFFF`
			`*/`
			`unsigned char kind;`
			`/* Compact is with respect to the allocation scheme. Compact unicode`
			`objects only require one memory block while non-compact objects use`
			`one block for the PyUnicodeObject struct and another for its data`
			`buffer. */`
			`unsigned char compact;`
			`/* The string only contains characters in the range U+0000-U+007F (ASCII)`
			`and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is`
			`set, use the PyASCIIObject structure. */`
			`unsigned char ascii;`
			`/* The ready flag indicates whether the object layout is initialized`
			`completely. This means that this is either a compact object, or`
			`the data pointer is filled out. The bit is redundant, and helps`
			`to minimize the test in PyUnicode_IS_READY(). */`
			`unsigned char ready;`
			`/* Padding to ensure that PyUnicode_DATA() is always aligned to`
			`4 bytes (see issue #19537 on m68k). */`
			`/* not on PyPy */`
			`} _PyASCIIObject_state_t;`

			`/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject`
			`structure. state.ascii and state.compact are set, and the data`
			`immediately follow the structure. utf8_length and wstr_length can be found`
			`in the length field; the utf8 pointer is equal to the data pointer. */`
			`typedef struct {`
			`/* There are 4 forms of Unicode strings:`

			`- compact ascii:`

			`* structure = PyASCIIObject`
			`* test: PyUnicode_IS_COMPACT_ASCII(op)`
			`* kind = PyUnicode_1BYTE_KIND`
			`* compact = 1`
			`* ascii = 1`
			`* ready = 1`
			`* (length is the length of the utf8 and wstr strings)`
			`* (data starts just after the structure)`
			`* (since ASCII is decoded from UTF-8, the utf8 string are the data)`

			`- compact:`

			`* structure = PyCompactUnicodeObject`
			`* test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)`
			`* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or`
			`PyUnicode_4BYTE_KIND`
			`* compact = 1`
			`* ready = 1`
			`* ascii = 0`
			`* utf8 is not shared with data`
			`* utf8_length = 0 if utf8 is NULL`
			`* wstr is shared with data and wstr_length=length`
			`if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2`
			`or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4`
			`* wstr_length = 0 if wstr is NULL`
			`* (data starts just after the structure)`

			`- legacy string, not ready:`

			`* structure = PyUnicodeObject`
			`* test: kind == PyUnicode_WCHAR_KIND`
			`* length = 0 (use wstr_length)`
			`* hash = -1`
			`* kind = PyUnicode_WCHAR_KIND`
			`* compact = 0`
			`* ascii = 0`
			`* ready = 0`
			`* interned = SSTATE_NOT_INTERNED`
			`* wstr is not NULL`
			`* data.any is NULL`
			`* utf8 is NULL`
			`* utf8_length = 0`

			`- legacy string, ready:`

			`* structure = PyUnicodeObject structure`
			`* test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND`
			`* kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or`
			`PyUnicode_4BYTE_KIND`
			`* compact = 0`
			`* ready = 1`
			`* data.any is not NULL`
			`* utf8 is shared and utf8_length = length with data.any if ascii = 1`
			`* utf8_length = 0 if utf8 is NULL`
			`* wstr is shared with data.any and wstr_length = length`
			`if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2`
			`or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4`
			`* wstr_length = 0 if wstr is NULL`

			`Compact strings use only one memory block (structure + characters),`
			`whereas legacy strings use one block for the structure and one block`
			`for characters.`

			`Legacy strings are created by PyUnicode_FromUnicode() and`
			`PyUnicode_FromStringAndSize(NULL, size) functions. They become ready`
			`when PyUnicode_READY() is called.`

			`See also _PyUnicode_CheckConsistency().`
			`*/`
			`PyObject_HEAD`
			`Py_ssize_t length; /* Number of code points in the string */`
			`//Py_hash_t hash; /* Hash value; -1 if not set */`
			`_PyASCIIObject_state_t state;`
			`wchar_t wstr; / wchar_t representation (null-terminated) */`
			`} PyASCIIObject;`

			`/* Non-ASCII strings allocated through PyUnicode_New use the`
			`PyCompactUnicodeObject structure. state.compact is set, and the data`
			`immediately follow the structure. */`
			`typedef struct {`
			`PyASCIIObject _base;`
			`Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the`
			`* terminating \0. */`
			`char utf8; / UTF-8 representation (null-terminated) */`
			`Py_ssize_t wstr_length; /* Number of code points in wstr, possible`
			`* surrogates count as two code points. */`
			`} PyCompactUnicodeObject;`

			`/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the`
			`PyUnicodeObject structure. The actual string data is initially in the wstr`
			`block, and copied into the data block using _PyUnicode_Ready. */`
			`typedef struct {`
			`PyCompactUnicodeObject _base;`
			`void* data; /* Canonical, smallest-form Unicode buffer */`
			`} PyUnicodeObject;`


			`/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */`

			`/* Values for PyASCIIObject.state: */`

			`/* Interning state. */`
			`#define SSTATE_NOT_INTERNED 0`
			`#define SSTATE_INTERNED_MORTAL 1`
			`#define SSTATE_INTERNED_IMMORTAL 2`

			`/* --- Constants ---------------------------------------------------------- */`

			`/* This Unicode character will be used as replacement character during`
			`decoding if the errors argument is set to "replace". Note: the`
			`Unicode character U+FFFD is the official REPLACEMENT CHARACTER in`
			`Unicode 3.0. */`

			`#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)`