zl程序教程

您现在的位置是:首页 >  数据库

当前栏目

PostgreSQL的 initdb 源代码分析之十七

postgresql 分析 源代码 十七
2023-09-11 14:20:28 时间

继续分析:

    setup_collation()

 展开:

/*
 * populate pg_collation
 */
static void
setup_collation(void)
{
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
    int            i;
    FILE       *locale_a_handle;
    char        localebuf[NAMEDATALEN];
    int            count = 0;

    PG_CMD_DECL;
#endif

    fputs(_("creating collations ... "), stdout);
    fflush(stdout);

#if defined(HAVE_LOCALE_T) && !defined(WIN32)
    snprintf(cmd, sizeof(cmd),
             "\"%s\" %s template1 >%s",
             backend_exec, backend_options,
             DEVNULL);

    locale_a_handle = popen_check("locale -a", "r");
    if (!locale_a_handle)
        return;                    /* complaint already printed */

    PG_CMD_OPEN;

    PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
                "    collname name, "
                "    locale name, "
                "    encoding int) WITHOUT OIDS;\n");

    while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
    {
        size_t        len;
        int            enc;
        bool        skip;
        char       *quoted_locale;
        char        alias[NAMEDATALEN];

        len = strlen(localebuf);

        if (len == 0 || localebuf[len - 1] != '\n')
        {
            if (debug)
                fprintf(stderr, _("%s: locale name too long, skipped: %s\n"),
                        progname, localebuf);
            continue;
        }
        localebuf[len - 1] = '\0';

        /*
         * Some systems have locale names that don't consist entirely of ASCII
         * letters (such as "bokmål" or "français").  This is
         * pretty silly, since we need the locale itself to interpret the
         * non-ASCII characters. We can't do much with those, so we filter
         * them out.
         */
        skip = false;
        for (i = 0; i < len; i++)
        {
            if (IS_HIGHBIT_SET(localebuf[i]))
            {
                skip = true;
                break;
            }
        }
        if (skip)
        {
            if (debug)
                fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"),
                        progname, localebuf);
            continue;
        }

        enc = pg_get_encoding_from_locale(localebuf, debug);
        if (enc < 0)
        {
            /* error message printed by pg_get_encoding_from_locale() */
            continue;
        }
        if (!PG_VALID_BE_ENCODING(enc))
            continue;            /* ignore locales for client-only encodings */
        if (enc == PG_SQL_ASCII)
            continue;            /* C/POSIX are already in the catalog */

        count++;

        quoted_locale = escape_quotes(localebuf);

        PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n",
                       quoted_locale, quoted_locale, enc);

        /*
         * Generate aliases such as "en_US" in addition to "en_US.utf8" for
         * ease of use.  Note that collation names are unique per encoding
         * only, so this doesn't clash with "en_US" for LATIN1, say.
         */
        if (normalize_locale_name(alias, localebuf))
            PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n",
                           escape_quotes(alias), quoted_locale, enc);
    }

    /* Add an SQL-standard name */
    PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n", PG_UTF8);

    /*
     * When copying collations to the final location, eliminate aliases that
     * conflict with an existing locale name for the same encoding.  For
     * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
     * LATIN1.    But the unnormalized locale "br_FR" already exists for LATIN1.
     * Prefer the alias that matches the OS locale name, else the first locale
     * name by sort order (arbitrary choice to be deterministic).
     *
     * Also, eliminate any aliases that conflict with pg_collation's
     * hard-wired entries for "C" etc.
     */
    PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
                " SELECT DISTINCT ON (collname, encoding)"
                "   collname, "
                "   (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
                "   (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
                "   encoding, locale, locale "
                "  FROM tmp_pg_collation"
                "  WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
       "  ORDER BY collname, encoding, (collname = locale) DESC, locale;\n");

    pclose(locale_a_handle);
    PG_CMD_CLOSE;

    check_ok();
    if (count == 0 && !debug)
    {
        printf(_("No usable system locales were found.\n"));
        printf(_("Use the option \"--debug\" to see details.\n"));
    }
#else                            /* not HAVE_LOCALE_T && not WIN32 */
    printf(_("not supported on this platform\n"));
    fflush(stdout);
#endif   /* not HAVE_LOCALE_T  && not WIN32 */
}

其实质就是,向 pg_collation 表中插入数据

补充一点,pg_collation 的数据大概是这样的:

pgsql=# \x
Expanded display is on.
pgsql=# select * from pg_collation limit 10;
-[ RECORD 1 ]-+-----------------
collname      | default
collnamespace | 11
collowner     | 10
collencoding  | -1
collcollate   | 
collctype     | 
-[ RECORD 2 ]-+-----------------
collname      | C
collnamespace | 11
collowner     | 10
collencoding  | -1
collcollate   | C
collctype     | C
-[ RECORD 3 ]-+-----------------
collname      | POSIX
collnamespace | 11
collowner     | 10
collencoding  | -1
collcollate   | POSIX
collctype     | POSIX
-[ RECORD 4 ]-+-----------------
collname      | aa_DJ
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_DJ.utf8
collctype     | aa_DJ.utf8
-[ RECORD 5 ]-+-----------------
collname      | aa_DJ
collnamespace | 11
collowner     | 10
collencoding  | 8
collcollate   | aa_DJ
collctype     | aa_DJ
-[ RECORD 6 ]-+-----------------
collname      | aa_DJ.iso88591
collnamespace | 11
collowner     | 10
collencoding  | 8
collcollate   | aa_DJ.iso88591
collctype     | aa_DJ.iso88591
-[ RECORD 7 ]-+-----------------
collname      | aa_DJ.utf8
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_DJ.utf8
collctype     | aa_DJ.utf8
-[ RECORD 8 ]-+-----------------
collname      | aa_ER
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_ER
collctype     | aa_ER
-[ RECORD 9 ]-+-----------------
collname      | aa_ER.utf8
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_ER.utf8
collctype     | aa_ER.utf8
-[ RECORD 10 ]+-----------------
collname      | aa_ER.utf8@saaho
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_ER.utf8@saaho
collctype     | aa_ER.utf8@saaho

pgsql=#