LCOV - unnamed - glib/glib/gunicollate.c

LCOV - code coverage report

Current view:	top level - glib/glib - gunicollate.c (source / functions)		Hit	Total	Coverage
Test:	unnamed	Lines:	96	96	100.0 %
Date:	2024-04-23 05:16:05	Functions:	4	4	100.0 %
		Branches:	48	55	87.3 %

           Branch data     Line data    Source code

       1                 :            : /* gunicollate.c - Collation
       2                 :            :  *
       3                 :            :  *  Copyright 2001,2005 Red Hat, Inc.
       4                 :            :  *
       5                 :            :  * SPDX-License-Identifier: LGPL-2.1-or-later
       6                 :            :  *
       7                 :            :  * This library is free software; you can redistribute it and/or
       8                 :            :  * modify it under the terms of the GNU Lesser General Public
       9                 :            :  * License as published by the Free Software Foundation; either
      10                 :            :  * version 2.1 of the License, or (at your option) any later version.
      11                 :            :  *
      12                 :            :  * This library is distributed in the hope that it will be useful,
      13                 :            :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      14                 :            :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      15                 :            :  * Lesser General Public License for more details.
      16                 :            :  *
      17                 :            :  * You should have received a copy of the GNU Lesser General Public License
      18                 :            :  * along with this library; if not, see <http://www.gnu.org/licenses/>.
      19                 :            :  */
      20                 :            : 
      21                 :            : #include "config.h"
      22                 :            : 
      23                 :            : #include <locale.h>
      24                 :            : #include <string.h>
      25                 :            : #ifdef HAVE_WCHAR_H
      26                 :            : #include <wchar.h>
      27                 :            : #endif
      28                 :            : 
      29                 :            : #ifdef HAVE_CARBON
      30                 :            : #include <CoreServices/CoreServices.h>
      31                 :            : #endif
      32                 :            : 
      33                 :            : #include "gmem.h"
      34                 :            : #include "gunicode.h"
      35                 :            : #include "gunicodeprivate.h"
      36                 :            : #include "gstring.h"
      37                 :            : #include "gstrfuncs.h"
      38                 :            : #include "gtestutils.h"
      39                 :            : #include "gcharset.h"
      40                 :            : #include "gconvert.h"
      41                 :            : 
      42                 :            : #if SIZEOF_WCHAR_T == 4 && defined(__STDC_ISO_10646__)
      43                 :            : #define GUNICHAR_EQUALS_WCHAR_T 1
      44                 :            : #endif
      45                 :            : 
      46                 :            : #ifdef _MSC_VER
      47                 :            : /* Workaround for bug in MSVCR80.DLL */
      48                 :            : static gsize
      49                 :            : msc_strxfrm_wrapper (char       *string1,
      50                 :            :                      const char *string2,
      51                 :            :                      gsize       count)
      52                 :            : {
      53                 :            :   if (!string1 || count <= 0)
      54                 :            :     {
      55                 :            :       char tmp;
      56                 :            : 
      57                 :            :       return strxfrm (&tmp, string2, 1);
      58                 :            :     }
      59                 :            :   return strxfrm (string1, string2, count);
      60                 :            : }
      61                 :            : #define strxfrm msc_strxfrm_wrapper
      62                 :            : #endif
      63                 :            : 
      64                 :            : /**
      65                 :            :  * g_utf8_collate:
      66                 :            :  * @str1: a UTF-8 encoded string
      67                 :            :  * @str2: a UTF-8 encoded string
      68                 :            :  * 
      69                 :            :  * Compares two strings for ordering using the linguistically
      70                 :            :  * correct rules for the [current locale][setlocale].
      71                 :            :  * When sorting a large number of strings, it will be significantly 
      72                 :            :  * faster to obtain collation keys with g_utf8_collate_key() and 
      73                 :            :  * compare the keys with strcmp() when sorting instead of sorting 
      74                 :            :  * the original strings.
      75                 :            :  * 
      76                 :            :  * If the two strings are not comparable due to being in different collation
      77                 :            :  * sequences, the result is undefined. This can happen if the strings are in
      78                 :            :  * different language scripts, for example.
      79                 :            :  *
      80                 :            :  * Returns: < 0 if @str1 compares before @str2, 
      81                 :            :  *   0 if they compare equal, > 0 if @str1 compares after @str2.
      82                 :            :  **/
      83                 :            : gint
      84                 :         89 : g_utf8_collate (const gchar *str1,
      85                 :            :                 const gchar *str2)
      86                 :            : {
      87                 :            :   gint result;
      88                 :            : 
      89                 :            : #ifdef HAVE_CARBON
      90                 :            : 
      91                 :            :   UniChar *str1_utf16;
      92                 :            :   UniChar *str2_utf16;
      93                 :            :   glong len1;
      94                 :            :   glong len2;
      95                 :            :   SInt32 retval = 0;
      96                 :            : 
      97                 :            :   g_return_val_if_fail (str1 != NULL, 0);
      98                 :            :   g_return_val_if_fail (str2 != NULL, 0);
      99                 :            : 
     100                 :            :   str1_utf16 = g_utf8_to_utf16 (str1, -1, NULL, &len1, NULL);
     101                 :            :   str2_utf16 = g_utf8_to_utf16 (str2, -1, NULL, &len2, NULL);
     102                 :            : 
     103                 :            :   UCCompareTextDefault (kUCCollateStandardOptions,
     104                 :            :                         str1_utf16, len1, str2_utf16, len2,
     105                 :            :                         NULL, &retval);
     106                 :            :   result = retval;
     107                 :            : 
     108                 :            :   g_free (str2_utf16);
     109                 :            :   g_free (str1_utf16);
     110                 :            : 
     111                 :            : #elif defined(HAVE_WCHAR_H) && defined(GUNICHAR_EQUALS_WCHAR_T)
     112                 :            : 
     113                 :            :   gunichar *str1_norm;
     114                 :            :   gunichar *str2_norm;
     115                 :            : 
     116                 :         89 :   g_return_val_if_fail (str1 != NULL, 0);
     117                 :         89 :   g_return_val_if_fail (str2 != NULL, 0);
     118                 :            : 
     119                 :         89 :   str1_norm = _g_utf8_normalize_wc (str1, -1, G_NORMALIZE_ALL_COMPOSE);
     120                 :         89 :   str2_norm = _g_utf8_normalize_wc (str2, -1, G_NORMALIZE_ALL_COMPOSE);
     121                 :            : 
     122                 :         89 :   result = wcscoll ((wchar_t *)str1_norm, (wchar_t *)str2_norm);
     123                 :            : 
     124                 :         89 :   g_free (str1_norm);
     125                 :         89 :   g_free (str2_norm);
     126                 :            : 
     127                 :            : #else
     128                 :            : 
     129                 :            :   const gchar *charset;
     130                 :            :   gchar *str1_norm;
     131                 :            :   gchar *str2_norm;
     132                 :            : 
     133                 :            :   g_return_val_if_fail (str1 != NULL, 0);
     134                 :            :   g_return_val_if_fail (str2 != NULL, 0);
     135                 :            : 
     136                 :            :   str1_norm = g_utf8_normalize (str1, -1, G_NORMALIZE_ALL_COMPOSE);
     137                 :            :   str2_norm = g_utf8_normalize (str2, -1, G_NORMALIZE_ALL_COMPOSE);
     138                 :            : 
     139                 :            :   if (g_get_charset (&charset))
     140                 :            :     {
     141                 :            :       result = strcoll (str1_norm, str2_norm);
     142                 :            :     }
     143                 :            :   else
     144                 :            :     {
     145                 :            :       gchar *str1_locale = g_convert (str1_norm, -1, charset, "UTF-8", NULL, NULL, NULL);
     146                 :            :       gchar *str2_locale = g_convert (str2_norm, -1, charset, "UTF-8", NULL, NULL, NULL);
     147                 :            : 
     148                 :            :       if (str1_locale && str2_locale)
     149                 :            :         result =  strcoll (str1_locale, str2_locale);
     150                 :            :       else if (str1_locale)
     151                 :            :         result = -1;
     152                 :            :       else if (str2_locale)
     153                 :            :         result = 1;
     154                 :            :       else
     155                 :            :         result = strcmp (str1_norm, str2_norm);
     156                 :            : 
     157                 :            :       g_free (str1_locale);
     158                 :            :       g_free (str2_locale);
     159                 :            :     }
     160                 :            : 
     161                 :            :   g_free (str1_norm);
     162                 :            :   g_free (str2_norm);
     163                 :            : 
     164                 :            : #endif
     165                 :            : 
     166                 :         89 :   return result;
     167                 :            : }
     168                 :            : 
     169                 :            : #if defined(HAVE_WCHAR_H) && defined(GUNICHAR_EQUALS_WCHAR_T)
     170                 :            : /* We need UTF-8 encoding of numbers to encode the weights if
     171                 :            :  * we are using wcsxfrm. However, we aren't encoding Unicode
     172                 :            :  * characters, so we can't simply use g_unichar_to_utf8.
     173                 :            :  *
     174                 :            :  * The following routine is taken (with modification) from GNU
     175                 :            :  * libc's strxfrm routine:
     176                 :            :  *
     177                 :            :  * Copyright (C) 1995-1999,2000,2001 Free Software Foundation, Inc.
     178                 :            :  * Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
     179                 :            :  */
     180                 :            : static inline int
     181                 :       9072 : utf8_encode (char *buf, wchar_t val)
     182                 :            : {
     183                 :            :   int retval;
     184                 :            : 
     185         [ +  + ]:       9072 :   if (val < 0x80)
     186                 :            :     {
     187         [ +  + ]:       7956 :       if (buf)
     188                 :       3978 :         *buf++ = (char) val;
     189                 :       7956 :       retval = 1;
     190                 :            :     }
     191                 :            :   else
     192                 :            :     {
     193                 :            :       int step;
     194                 :            : 
     195         [ +  - ]:       1152 :       for (step = 2; step < 6; ++step)
     196         [ +  + ]:       1152 :         if ((val & (~(guint32)0 << (5 * step + 1))) == 0)
     197                 :       1116 :           break;
     198                 :       1116 :       retval = step;
     199                 :            : 
     200         [ +  + ]:       1116 :       if (buf)
     201                 :            :         {
     202                 :        558 :           *buf = (unsigned char) (~0xff >> step);
     203                 :        558 :           --step;
     204                 :            :           do
     205                 :            :             {
     206                 :        576 :               buf[step] = 0x80 | (val & 0x3f);
     207                 :        576 :               val >>= 6;
     208                 :            :             }
     209         [ +  + ]:        576 :           while (--step > 0);
     210                 :        558 :           *buf |= val;
     211                 :            :         }
     212                 :            :     }
     213                 :            : 
     214                 :       9072 :   return retval;
     215                 :            : }
     216                 :            : #endif
     217                 :            : 
     218                 :            : #ifdef HAVE_CARBON
     219                 :            : 
     220                 :            : static gchar *
     221                 :            : collate_key_to_string (UCCollationValue *key,
     222                 :            :                        gsize             key_len)
     223                 :            : {
     224                 :            :   gchar *result;
     225                 :            :   gsize result_len;
     226                 :            :   long *lkey = (long *) key;
     227                 :            : 
     228                 :            :   /* UCCollationValue format:
     229                 :            :    *
     230                 :            :    * UCCollateOptions (32/64 bits)
     231                 :            :    * SizeInBytes      (32/64 bits)
     232                 :            :    * Value            (8 bits arrey)
     233                 :            :    *
     234                 :            :    * UCCollateOptions: ordering option mask of the collator
     235                 :            :    * used to create the key. Size changes on 32-bit / 64-bit
     236                 :            :    * hosts. On 64-bits also the extra half-word seems to have
     237                 :            :    * some extra (unknown) meaning.
     238                 :            :    * SizeInBytes: size of the whole structure, in bytes
     239                 :            :    * (including UCCollateOptions and SizeInBytes fields). Size
     240                 :            :    * changes on 32-bit & 64-bit hosts.
     241                 :            :    * Value: array of bytes containing the comparison weights.
     242                 :            :    * Seems to have several sub-strings separated by \001 and \002
     243                 :            :    * chars. Also, experience shows this is directly strcmp-able.
     244                 :            :    */
     245                 :            : 
     246                 :            :   result_len = lkey[1];
     247                 :            :   result = g_malloc (result_len + 1);
     248                 :            :   memcpy (result, &lkey[2], result_len);
     249                 :            :   result[result_len] = '\0';
     250                 :            : 
     251                 :            :   return result;
     252                 :            : }
     253                 :            : 
     254                 :            : static gchar *
     255                 :            : carbon_collate_key_with_collator (const gchar *str,
     256                 :            :                                   gssize       len,
     257                 :            :                                   CollatorRef  collator)
     258                 :            : {
     259                 :            :   UniChar *str_utf16 = NULL;
     260                 :            :   glong len_utf16;
     261                 :            :   OSStatus ret;
     262                 :            :   UCCollationValue staticbuf[512];
     263                 :            :   UCCollationValue *freeme = NULL;
     264                 :            :   UCCollationValue *buf;
     265                 :            :   ItemCount buf_len;
     266                 :            :   ItemCount key_len;
     267                 :            :   ItemCount try_len;
     268                 :            :   gchar *result = NULL;
     269                 :            : 
     270                 :            :   str_utf16 = g_utf8_to_utf16 (str, len, NULL, &len_utf16, NULL);
     271                 :            :   try_len = len_utf16 * 5 + 2;
     272                 :            : 
     273                 :            :   if (try_len <= sizeof staticbuf)
     274                 :            :     {
     275                 :            :       buf = staticbuf;
     276                 :            :       buf_len = sizeof staticbuf;
     277                 :            :     }
     278                 :            :   else
     279                 :            :     {
     280                 :            :       freeme = g_new (UCCollationValue, try_len);
     281                 :            :       buf = freeme;
     282                 :            :       buf_len = try_len;
     283                 :            :     }
     284                 :            : 
     285                 :            :   ret = UCGetCollationKey (collator, str_utf16, len_utf16,
     286                 :            :                            buf_len, &key_len, buf);
     287                 :            : 
     288                 :            :   if (ret == kCollateBufferTooSmall)
     289                 :            :     {
     290                 :            :       freeme = g_renew (UCCollationValue, freeme, try_len * 2);
     291                 :            :       buf = freeme;
     292                 :            :       buf_len = try_len * 2;
     293                 :            :       ret = UCGetCollationKey (collator, str_utf16, len_utf16,
     294                 :            :                                buf_len, &key_len, buf);
     295                 :            :     }
     296                 :            : 
     297                 :            :   if (ret == 0)
     298                 :            :     result = collate_key_to_string (buf, key_len);
     299                 :            :   else
     300                 :            :     result = g_strdup ("");
     301                 :            : 
     302                 :            :   g_free (freeme);
     303                 :            :   g_free (str_utf16);
     304                 :            :   return result;
     305                 :            : }
     306                 :            : 
     307                 :            : static gchar *
     308                 :            : carbon_collate_key (const gchar *str,
     309                 :            :                     gssize       len)
     310                 :            : {
     311                 :            :   static CollatorRef collator;
     312                 :            : 
     313                 :            :   if (G_UNLIKELY (!collator))
     314                 :            :     {
     315                 :            :       UCCreateCollator (NULL, 0, kUCCollateStandardOptions, &collator);
     316                 :            : 
     317                 :            :       if (!collator)
     318                 :            :         {
     319                 :            :           static gboolean been_here;
     320                 :            :           if (!been_here)
     321                 :            :             g_warning ("%s: UCCreateCollator failed", G_STRLOC);
     322                 :            :           been_here = TRUE;
     323                 :            :           return g_strdup ("");
     324                 :            :         }
     325                 :            :     }
     326                 :            : 
     327                 :            :   return carbon_collate_key_with_collator (str, len, collator);
     328                 :            : }
     329                 :            : 
     330                 :            : static gchar *
     331                 :            : carbon_collate_key_for_filename (const gchar *str,
     332                 :            :                                  gssize       len)
     333                 :            : {
     334                 :            :   static CollatorRef collator;
     335                 :            : 
     336                 :            :   if (G_UNLIKELY (!collator))
     337                 :            :     {
     338                 :            :       /* http://developer.apple.com/qa/qa2004/qa1159.html */
     339                 :            :       UCCreateCollator (NULL, 0,
     340                 :            :                         kUCCollateComposeInsensitiveMask
     341                 :            :                          | kUCCollateWidthInsensitiveMask
     342                 :            :                          | kUCCollateCaseInsensitiveMask
     343                 :            :                          | kUCCollateDigitsOverrideMask
     344                 :            :                          | kUCCollateDigitsAsNumberMask
     345                 :            :                          | kUCCollatePunctuationSignificantMask, 
     346                 :            :                         &collator);
     347                 :            : 
     348                 :            :       if (!collator)
     349                 :            :         {
     350                 :            :           static gboolean been_here;
     351                 :            :           if (!been_here)
     352                 :            :             g_warning ("%s: UCCreateCollator failed", G_STRLOC);
     353                 :            :           been_here = TRUE;
     354                 :            :           return g_strdup ("");
     355                 :            :         }
     356                 :            :     }
     357                 :            : 
     358                 :            :   return carbon_collate_key_with_collator (str, len, collator);
     359                 :            : }
     360                 :            : 
     361                 :            : #endif /* HAVE_CARBON */
     362                 :            : 
     363                 :            : /**
     364                 :            :  * g_utf8_collate_key:
     365                 :            :  * @str: a UTF-8 encoded string.
     366                 :            :  * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
     367                 :            :  *
     368                 :            :  * Converts a string into a collation key that can be compared
     369                 :            :  * with other collation keys produced by the same function using 
     370                 :            :  * strcmp(). 
     371                 :            :  *
     372                 :            :  * The results of comparing the collation keys of two strings 
     373                 :            :  * with strcmp() will always be the same as comparing the two 
     374                 :            :  * original keys with g_utf8_collate().
     375                 :            :  * 
     376                 :            :  * Note that this function depends on the [current locale][setlocale].
     377                 :            :  * 
     378                 :            :  * Returns: a newly allocated string. This string should
     379                 :            :  *   be freed with g_free() when you are done with it.
     380                 :            :  **/
     381                 :            : gchar *
     382                 :        299 : g_utf8_collate_key (const gchar *str,
     383                 :            :                     gssize       len)
     384                 :            : {
     385                 :            :   gchar *result;
     386                 :            : 
     387                 :            : #ifdef HAVE_CARBON
     388                 :            : 
     389                 :            :   g_return_val_if_fail (str != NULL, NULL);
     390                 :            :   result = carbon_collate_key (str, len);
     391                 :            : 
     392                 :            : #elif defined(HAVE_WCHAR_H) && defined(GUNICHAR_EQUALS_WCHAR_T)
     393                 :            : 
     394                 :            :   gsize xfrm_len;
     395                 :            :   gunichar *str_norm;
     396                 :            :   wchar_t *result_wc;
     397                 :            :   gsize i;
     398                 :        299 :   gsize result_len = 0;
     399                 :            : 
     400                 :        299 :   g_return_val_if_fail (str != NULL, NULL);
     401                 :            : 
     402                 :        299 :   str_norm = _g_utf8_normalize_wc (str, len, G_NORMALIZE_ALL_COMPOSE);
     403                 :            : 
     404                 :        299 :   g_return_val_if_fail (str_norm != NULL, NULL);
     405                 :            : 
     406                 :        299 :   xfrm_len = wcsxfrm (NULL, (wchar_t *)str_norm, 0);
     407                 :        299 :   result_wc = g_new (wchar_t, xfrm_len + 1);
     408                 :        299 :   wcsxfrm (result_wc, (wchar_t *)str_norm, xfrm_len + 1);
     409                 :            : 
     410         [ +  + ]:       4835 :   for (i = 0; i < xfrm_len; i++)
     411                 :       4536 :     result_len += utf8_encode (NULL, result_wc[i]);
     412                 :            : 
     413                 :        299 :   result = g_malloc (result_len + 1);
     414                 :        299 :   result_len = 0;
     415         [ +  + ]:       4835 :   for (i = 0; i < xfrm_len; i++)
     416                 :       4536 :     result_len += utf8_encode (result + result_len, result_wc[i]);
     417                 :            : 
     418                 :        299 :   result[result_len] = '\0';
     419                 :            : 
     420                 :        299 :   g_free (result_wc);
     421                 :        299 :   g_free (str_norm);
     422                 :            : 
     423                 :        299 :   return result;
     424                 :            : #else
     425                 :            : 
     426                 :            :   gsize xfrm_len = 0;
     427                 :            :   const gchar *charset;
     428                 :            :   gchar *str_norm;
     429                 :            : 
     430                 :            :   g_return_val_if_fail (str != NULL, NULL);
     431                 :            : 
     432                 :            :   str_norm = g_utf8_normalize (str, len, G_NORMALIZE_ALL_COMPOSE);
     433                 :            : 
     434                 :            :   result = NULL;
     435                 :            : 
     436                 :            :   if (g_get_charset (&charset))
     437                 :            :     {
     438                 :            :       xfrm_len = strxfrm (NULL, str_norm, 0);
     439                 :            :       if (xfrm_len < G_MAXINT - 2)
     440                 :            :         {
     441                 :            :           result = g_malloc (xfrm_len + 1);
     442                 :            :           strxfrm (result, str_norm, xfrm_len + 1);
     443                 :            :         }
     444                 :            :     }
     445                 :            :   else
     446                 :            :     {
     447                 :            :       gchar *str_locale = g_convert (str_norm, -1, charset, "UTF-8", NULL, NULL, NULL);
     448                 :            : 
     449                 :            :       if (str_locale)
     450                 :            :         {
     451                 :            :           xfrm_len = strxfrm (NULL, str_locale, 0);
     452                 :            :           if (xfrm_len >= G_MAXINT - 2)
     453                 :            :             {
     454                 :            :               g_free (str_locale);
     455                 :            :               str_locale = NULL;
     456                 :            :             }
     457                 :            :         }
     458                 :            :       if (str_locale)
     459                 :            :         {
     460                 :            :           result = g_malloc (xfrm_len + 2);
     461                 :            :           result[0] = 'A';
     462                 :            :           strxfrm (result + 1, str_locale, xfrm_len + 1);
     463                 :            :           
     464                 :            :           g_free (str_locale);
     465                 :            :         }
     466                 :            :     }
     467                 :            :     
     468                 :            :   if (!result) 
     469                 :            :     {
     470                 :            :       xfrm_len = strlen (str_norm);
     471                 :            :       result = g_malloc (xfrm_len + 2);
     472                 :            :       result[0] = 'B';
     473                 :            :       memcpy (result + 1, str_norm, xfrm_len);
     474                 :            :       result[xfrm_len+1] = '\0';
     475                 :            :     }
     476                 :            : 
     477                 :            :   g_free (str_norm);
     478                 :            : #endif
     479                 :            : 
     480                 :            :   return result;
     481                 :            : }
     482                 :            : 
     483                 :            : /* This is a collation key that is very very likely to sort before any
     484                 :            :  * collation key that libc strxfrm generates. We use this before any
     485                 :            :  * special case (dot or number) to make sure that its sorted before
     486                 :            :  * anything else.
     487                 :            :  */
     488                 :            : #define COLLATION_SENTINEL "\1\1\1"
     489                 :            : 
     490                 :            : /**
     491                 :            :  * g_utf8_collate_key_for_filename:
     492                 :            :  * @str: a UTF-8 encoded string.
     493                 :            :  * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
     494                 :            :  *
     495                 :            :  * Converts a string into a collation key that can be compared
     496                 :            :  * with other collation keys produced by the same function using strcmp(). 
     497                 :            :  * 
     498                 :            :  * In order to sort filenames correctly, this function treats the dot '.' 
     499                 :            :  * as a special case. Most dictionary orderings seem to consider it
     500                 :            :  * insignificant, thus producing the ordering "event.c" "eventgenerator.c"
     501                 :            :  * "event.h" instead of "event.c" "event.h" "eventgenerator.c". Also, we
     502                 :            :  * would like to treat numbers intelligently so that "file1" "file10" "file5"
     503                 :            :  * is sorted as "file1" "file5" "file10".
     504                 :            :  * 
     505                 :            :  * Note that this function depends on the [current locale][setlocale].
     506                 :            :  *
     507                 :            :  * Returns: a newly allocated string. This string should
     508                 :            :  *   be freed with g_free() when you are done with it.
     509                 :            :  *
     510                 :            :  * Since: 2.8
     511                 :            :  */
     512                 :            : gchar *
     513                 :         36 : g_utf8_collate_key_for_filename (const gchar *str,
     514                 :            :                                  gssize       len)
     515                 :            : {
     516                 :            : #ifndef HAVE_CARBON
     517                 :            :   GString *result;
     518                 :            :   GString *append;
     519                 :            :   const gchar *p;
     520                 :            :   const gchar *prev;
     521                 :            :   const gchar *end;
     522                 :            :   gchar *collate_key;
     523                 :            :   gint digits;
     524                 :            :   gint leading_zeros;
     525                 :            : 
     526                 :            :   /*
     527                 :            :    * How it works:
     528                 :            :    *
     529                 :            :    * Split the filename into collatable substrings which do
     530                 :            :    * not contain [.0-9] and special-cased substrings. The collatable 
     531                 :            :    * substrings are run through the normal g_utf8_collate_key() and the 
     532                 :            :    * resulting keys are concatenated with keys generated from the 
     533                 :            :    * special-cased substrings.
     534                 :            :    *
     535                 :            :    * Special cases: Dots are handled by replacing them with '\1' which 
     536                 :            :    * implies that short dot-delimited substrings are before long ones, 
     537                 :            :    * e.g.
     538                 :            :    * 
     539                 :            :    *   a\1a   (a.a)
     540                 :            :    *   a-\1a  (a-.a)
     541                 :            :    *   aa\1a  (aa.a)
     542                 :            :    * 
     543                 :            :    * Numbers are handled by prepending to each number d-1 superdigits 
     544                 :            :    * where d = number of digits in the number and SUPERDIGIT is a 
     545                 :            :    * character with an integer value higher than any digit (for instance 
     546                 :            :    * ':'). This ensures that single-digit numbers are sorted before 
     547                 :            :    * double-digit numbers which in turn are sorted separately from 
     548                 :            :    * triple-digit numbers, etc. To avoid strange side-effects when 
     549                 :            :    * sorting strings that already contain SUPERDIGITs, a '\2'
     550                 :            :    * is also prepended, like this
     551                 :            :    *
     552                 :            :    *   file\21      (file1)
     553                 :            :    *   file\25      (file5)
     554                 :            :    *   file\2:10    (file10)
     555                 :            :    *   file\2:26    (file26)
     556                 :            :    *   file\2::100  (file100)
     557                 :            :    *   file:foo     (file:foo)
     558                 :            :    * 
     559                 :            :    * This has the side-effect of sorting numbers before everything else (except
     560                 :            :    * dots), but this is probably OK.
     561                 :            :    *
     562                 :            :    * Leading digits are ignored when doing the above. To discriminate
     563                 :            :    * numbers which differ only in the number of leading digits, we append
     564                 :            :    * the number of leading digits as a byte at the very end of the collation
     565                 :            :    * key.
     566                 :            :    *
     567                 :            :    * To try avoid conflict with any collation key sequence generated by libc we
     568                 :            :    * start each switch to a special cased part with a sentinel that hopefully
     569                 :            :    * will sort before anything libc will generate.
     570                 :            :    */
     571                 :            : 
     572         [ +  - ]:         36 :   if (len < 0)
     573                 :         36 :     len = strlen (str);
     574                 :            : 
     575                 :         36 :   result = g_string_sized_new (len * 2);
     576                 :         36 :   append = g_string_sized_new (0);
     577                 :            : 
     578                 :         36 :   end = str + len;
     579                 :            : 
     580                 :            :   /* No need to use utf8 functions, since we're only looking for ascii chars */
     581         [ +  + ]:        216 :   for (prev = p = str; p < end; p++)
     582                 :            :     {
     583      [ +  +  + ]:        180 :       switch (*p)
     584                 :            :         {
     585                 :         10 :         case '.':
     586         [ +  + ]:         10 :           if (prev != p) 
     587                 :            :             {
     588                 :          8 :               collate_key = g_utf8_collate_key (prev, p - prev);
     589                 :            :               g_string_append (result, collate_key);
     590                 :          8 :               g_free (collate_key);
     591                 :            :             }
     592                 :            :           
     593         [ +  - ]:         10 :           g_string_append (result, COLLATION_SENTINEL "\1");
     594                 :            :           
     595                 :            :           /* skip the dot */
     596                 :         10 :           prev = p + 1;
     597                 :         10 :           break;
     598                 :            :           
     599                 :         20 :         case '0':
     600                 :            :         case '1':
     601                 :            :         case '2':
     602                 :            :         case '3':
     603                 :            :         case '4':
     604                 :            :         case '5':
     605                 :            :         case '6':
     606                 :            :         case '7':
     607                 :            :         case '8':
     608                 :            :         case '9':
     609         [ +  + ]:         20 :           if (prev != p) 
     610                 :            :             {
     611                 :         19 :               collate_key = g_utf8_collate_key (prev, p - prev);
     612                 :            :               g_string_append (result, collate_key);
     613                 :         19 :               g_free (collate_key);
     614                 :            :             }
     615                 :            :           
     616         [ +  - ]:         20 :           g_string_append (result, COLLATION_SENTINEL "\2");
     617                 :            :           
     618                 :         20 :           prev = p;
     619                 :            :           
     620                 :            :           /* write d-1 colons */
     621         [ +  + ]:         20 :           if (*p == '0')
     622                 :            :             {
     623                 :          7 :               leading_zeros = 1;
     624                 :          7 :               digits = 0;
     625                 :            :             }
     626                 :            :           else
     627                 :            :             {
     628                 :         13 :               leading_zeros = 0;
     629                 :         13 :               digits = 1;
     630                 :            :             }
     631                 :            :           
     632         [ +  + ]:         44 :           while (++p < end)
     633                 :            :             {
     634   [ +  +  +  + ]:         27 :               if (*p == '0' && !digits)
     635                 :          7 :                 ++leading_zeros;
     636         [ +  + ]:         20 :               else if (g_ascii_isdigit(*p))
     637                 :         17 :                 ++digits;
     638                 :            :               else
     639                 :            :                 {
     640                 :            :                   /* count an all-zero sequence as
     641                 :            :                    * one digit plus leading zeros
     642                 :            :                    */
     643         [ +  + ]:          3 :                   if (!digits)
     644                 :            :                     {
     645                 :          1 :                       ++digits;
     646                 :          1 :                       --leading_zeros;
     647                 :            :                     }        
     648                 :          3 :                   break;
     649                 :            :                 }
     650                 :            :             }
     651                 :            : 
     652         [ +  + ]:         32 :           while (digits > 1)
     653                 :            :             {
     654                 :            :               g_string_append_c (result, ':');
     655                 :         12 :               --digits;
     656                 :            :             }
     657                 :            : 
     658         [ +  + ]:         20 :           if (leading_zeros > 0)
     659                 :            :             {
     660         [ +  - ]:          7 :               g_string_append_c (append, (char)leading_zeros);
     661                 :          7 :               prev += leading_zeros;
     662                 :            :             }
     663                 :            :           
     664                 :            :           /* write the number itself */
     665         [ -  + ]:         20 :           g_string_append_len (result, prev, p - prev);
     666                 :            :           
     667                 :         20 :           prev = p;
     668                 :         20 :           --p;    /* go one step back to avoid disturbing outer loop */
     669                 :         20 :           break;
     670                 :            :           
     671                 :        150 :         default:
     672                 :            :           /* other characters just accumulate */
     673                 :        150 :           break;
     674                 :            :         }
     675                 :            :     }
     676                 :            :   
     677         [ +  + ]:         36 :   if (prev != p) 
     678                 :            :     {
     679                 :         19 :       collate_key = g_utf8_collate_key (prev, p - prev);
     680                 :            :       g_string_append (result, collate_key);
     681                 :         19 :       g_free (collate_key);
     682                 :            :     }
     683                 :            :   
     684         [ -  + ]:         36 :   g_string_append (result, append->str);
     685                 :         36 :   g_string_free (append, TRUE);
     686                 :            : 
     687                 :         36 :   return g_string_free (result, FALSE);
     688                 :            : #else /* HAVE_CARBON */
     689                 :            :   return carbon_collate_key_for_filename (str, len);
     690                 :            : #endif
     691                 :            : }

Generated by: LCOV version 1.14