/*** *wcsrchr.c - find last occurrence of wchar_t character in wide string * * Copyright (c) Microsoft Corporation. All rights reserved. * *Purpose: * defines wcsrchr() - find the last occurrence of a given character * in a string (wide-characters). * *******************************************************************************/ #include /*** *wchar_t *wcsrchr(string, ch) - find last occurrence of ch in wide string * *Purpose: * Finds the last occurrence of ch in string. The terminating * null character is used as part of the search (wide-characters). * *Entry: * wchar_t *string - string to search in * wchar_t ch - character to search for * *Exit: * returns a pointer to the last occurrence of ch in the given * string * returns NULL if ch does not occur in the string * *Exceptions: * *******************************************************************************/ static wchar_t * __cdecl wcsrchr_simple ( const wchar_t * string, wchar_t ch ) { wchar_t *start = (wchar_t *)string; while (*string++) /* find end of string */ ; /* search towards front */ while (--string != start && *string != ch) ; if (*string == ch) /* wchar_t found ? */ return( (wchar_t *)string ); return(NULL); } #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) // ARM64 Neon Intrinsics variant // For long strings, this is faster than the naive version. // But for short strings there is overhead. #include // Traverse the string forwards, only once. // Collect possible matches along the way. wchar_t * __cdecl wcsrchr ( const wchar_t * string, wchar_t ch ) { vector_t *src_a, characters, match; vector_t chmatch, zeromatch, orrmatches; __n64 uaddlvq32, maskv; unsigned __int64 chmask, zeromask, mask; unsigned long offset, ch_bitoffset, zero_bitoffset; wchar_t *found = (wchar_t)0; // If the string is not 2-byte-aligned, which should be // rare, fall back to a wchar-by-wchar loop. if ((intptr_t)(string) & 0x1) { return wcsrchr_simple(string, ch); } if (ch == 0) { if (*string == 0) { return (wchar_t *)string; } else { return wcschr_zero_internal(string); } } // Start by getting the aligned XMMWORD containing the first // characters of the string. This is done first to partially // cover any memory access latency. // Use 16 byte alignment throughout, to guarantee page-safe loads. src_a = (vector_t*)N128_ALIGN(string); // Now create patterns to check for a terminating zero or match. // These characters are copied to every position of a XMMWORD. match = neon_dupqr16(ch); // prepare to mask off any bits before the beginning of the string. offset = N128_OFFSET(string); { // Check initial full or partial XMMWORD characters = *src_a; // Compare against each pattern to get flags for each match chmatch = neon_cmeqq16(characters, match); zeromatch = neon_cmeqzq16(characters); maskv = neon_shrn_16(chmatch, 4); chmask = neon_umov64(maskv, 0); maskv = neon_shrn_16(zeromatch, 4); zeromask = neon_umov64(maskv, 0); // For the initial XMMWORD mask off any bits before the beginning // of the string. chmask = (chmask >> (offset << 2)); zeromask = (zeromask >> (offset << 2)); if ((chmask != 0) || (zeromask != 0)) { if (zeromask == 0) { // There is no zero match in this vector. // Record the offset of the LAST character match, // and advance to the next vector. _BitScanReverse64(&ch_bitoffset, chmask); ch_bitoffset = (ch_bitoffset >> 3); found = (wchar_t*)((ch_bitoffset << 1) + (intptr_t)(string)); } else { if (chmask == 0) { // The next match is the end of the string. return found; } // Search the FIRST zero match _BitScanForward64(&zero_bitoffset, zeromask); // We have zero match after 1 or more character matches in this vector. // Mask off all character matches after the FIRST zero match, // RETURN the bit position of the LAST character match // Found zero match in first block. 1 <= zero_bitoffset <= 63 chmask = chmask & ((1ull << zero_bitoffset) - 1); if (_BitScanReverse64(&ch_bitoffset, chmask)) { ch_bitoffset = (ch_bitoffset >> 3); found = (wchar_t*)((ch_bitoffset << 1) + (intptr_t)(string)); } return found; } } } for (;;) { // Check each XMMWORD until the end of the string is found. characters = *(++src_a); // Compare against each pattern to get flags for each match chmatch = neon_cmeqq16(characters, match); zeromatch = neon_cmeqzq16(characters); orrmatches = neon_orrq(chmatch, zeromatch); uaddlvq32 = neon_uaddlvq32(orrmatches); mask = neon_umov64(uaddlvq32, 0); if (mask != 0) { maskv = neon_shrn_16(chmatch, 4); chmask = neon_umov64(maskv, 0); maskv = neon_shrn_16(zeromatch, 4); zeromask = neon_umov64(maskv, 0); if (zeromask == 0) { // There is no zero match in this vector. // Record the offset of the LAST character match, // and advance to the next vector. _BitScanReverse64(&ch_bitoffset, chmask); ch_bitoffset = (ch_bitoffset >> 3); found = (wchar_t*)((ch_bitoffset << 1) + (intptr_t)(src_a)); } else { if (chmask == 0) { // The next match is the end of the string. return found; } // Search the FIRST zero match _BitScanForward64(&zero_bitoffset, zeromask); // We have zero match after 1 or more character matches in this vector. // Mask off all character matches after the FIRST zero match, // RETURN the bit position of the LAST character match chmask = chmask & ((1ull << zero_bitoffset) - 1); if (_BitScanReverse64(&ch_bitoffset, chmask)) { ch_bitoffset = (ch_bitoffset >> 3); found = (wchar_t*)((ch_bitoffset << 1) + (intptr_t)(src_a)); } return found; } } } } #else wchar_t * __cdecl wcsrchr ( const wchar_t * string, wchar_t ch ) { return wcsrchr_simple(string, ch); } #endif