Branch data Line data Source code
1 : : /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*-
2 : : * GObject introspection: Typelib hashing
3 : : *
4 : : * Copyright (C) 2010 Red Hat, Inc.
5 : : *
6 : : * SPDX-License-Identifier: LGPL-2.1-or-later
7 : : *
8 : : * This library is free software; you can redistribute it and/or
9 : : * modify it under the terms of the GNU Lesser General Public
10 : : * License as published by the Free Software Foundation; either
11 : : * version 2 of the License, or (at your option) any later version.
12 : : *
13 : : * This library is distributed in the hope that it will be useful,
14 : : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : : * Lesser General Public License for more details.
17 : : *
18 : : * You should have received a copy of the GNU Lesser General Public
19 : : * License along with this library; if not, write to the
20 : : * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 : : * Boston, MA 02111-1307, USA.
22 : : */
23 : :
24 : : #include <glib.h>
25 : : #include <glib-object.h>
26 : : #include <string.h>
27 : :
28 : : #include "cmph/cmph.h"
29 : : #include "gitypelib-internal.h"
30 : :
31 : : #define ALIGN_VALUE(this, boundary) \
32 : : (( ((unsigned long)(this)) + (((unsigned long)(boundary)) -1)) & (~(((unsigned long)(boundary))-1)))
33 : :
34 : : /*
35 : : * String hashing in the typelib. We have a set of static (fixed) strings,
36 : : * and given one, we need to find its index number. This problem is perfect
37 : : * hashing: http://en.wikipedia.org/wiki/Perfect_hashing
38 : : *
39 : : * I chose CMPH (http://cmph.sourceforge.net/) as it seemed high
40 : : * quality, well documented, and easy to embed.
41 : : *
42 : : * CMPH provides a number of algorithms; I chose BDZ, because while CHD
43 : : * appears to be the "best", the simplicity of BDZ appealed, and really,
44 : : * we're only talking about thousands of strings here, not millions, so
45 : : * a few microseconds is no big deal.
46 : : *
47 : : * In memory, the format is:
48 : : * INT32 mph_size
49 : : * MPH (mph_size bytes)
50 : : * (padding for alignment to uint32 if necessary)
51 : : * INDEX (array of uint16_t)
52 : : *
53 : : * Because BDZ is not order preserving, we need a lookaside table which
54 : : * maps the hash value into the directory index.
55 : : */
56 : :
57 : : struct _GITypelibHashBuilder {
58 : : gboolean prepared;
59 : : gboolean buildable;
60 : : cmph_t *c;
61 : : GHashTable *strings;
62 : : uint32_t dirmap_offset;
63 : : uint32_t packed_size;
64 : : };
65 : :
66 : : GITypelibHashBuilder *
67 : 8 : gi_typelib_hash_builder_new (void)
68 : : {
69 : 8 : GITypelibHashBuilder *builder = g_slice_new0 (GITypelibHashBuilder);
70 : 8 : builder->c = NULL;
71 : 8 : builder->strings = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL);
72 : 8 : return builder;
73 : : }
74 : :
75 : : void
76 : 2227 : gi_typelib_hash_builder_add_string (GITypelibHashBuilder *builder,
77 : : const char *str,
78 : : uint16_t value)
79 : : {
80 : 2227 : g_return_if_fail (builder->c == NULL);
81 : 4454 : g_hash_table_insert (builder->strings, g_strdup (str), GUINT_TO_POINTER (value));
82 : : }
83 : :
84 : : gboolean
85 : 8 : gi_typelib_hash_builder_prepare (GITypelibHashBuilder *builder)
86 : : {
87 : : char **strs;
88 : : GHashTableIter hashiter;
89 : : void *key, *value;
90 : : cmph_io_adapter_t *io;
91 : : cmph_config_t *config;
92 : : uint32_t num_elts;
93 : : uint32_t offset;
94 : : unsigned i;
95 : :
96 : 8 : if (builder->prepared)
97 : 0 : return builder->buildable;
98 : 8 : g_assert (builder->c == NULL);
99 : :
100 : 8 : num_elts = g_hash_table_size (builder->strings);
101 : 8 : g_assert (num_elts <= 65536);
102 : :
103 : 8 : strs = (char**) g_new (char *, num_elts + 1);
104 : :
105 : 8 : i = 0;
106 : 8 : g_hash_table_iter_init (&hashiter, builder->strings);
107 : 2235 : while (g_hash_table_iter_next (&hashiter, &key, &value))
108 : : {
109 : 2227 : const char *str = key;
110 : :
111 : 4454 : strs[i++] = g_strdup (str);
112 : : }
113 : 8 : strs[i++] = NULL;
114 : :
115 : 8 : io = cmph_io_vector_adapter (strs, num_elts);
116 : 8 : config = cmph_config_new (io);
117 : 8 : cmph_config_set_algo (config, CMPH_BDZ);
118 : :
119 : 8 : builder->c = cmph_new (config);
120 : 8 : builder->prepared = TRUE;
121 : 8 : if (!builder->c)
122 : : {
123 : 0 : builder->buildable = FALSE;
124 : 0 : goto out;
125 : : }
126 : 8 : builder->buildable = TRUE;
127 : 8 : g_assert (cmph_size (builder->c) == num_elts);
128 : :
129 : : /* Pack a size counter at front */
130 : 8 : offset = sizeof (uint32_t) + cmph_packed_size (builder->c);
131 : 8 : builder->dirmap_offset = ALIGN_VALUE (offset, 4);
132 : 8 : builder->packed_size = builder->dirmap_offset + (num_elts * sizeof (uint16_t));
133 : 8 : out:
134 : 8 : g_strfreev (strs);
135 : 8 : cmph_config_destroy (config);
136 : 8 : cmph_io_vector_adapter_destroy (io);
137 : 8 : return builder->buildable;
138 : : }
139 : :
140 : : uint32_t
141 : 8 : gi_typelib_hash_builder_get_buffer_size (GITypelibHashBuilder *builder)
142 : : {
143 : 8 : g_return_val_if_fail (builder != NULL, 0);
144 : 8 : g_return_val_if_fail (builder->prepared, 0);
145 : 8 : g_return_val_if_fail (builder->buildable, 0 );
146 : :
147 : 8 : return builder->packed_size;
148 : : }
149 : :
150 : : void
151 : 8 : gi_typelib_hash_builder_pack (GITypelibHashBuilder *builder, uint8_t* mem, uint32_t len)
152 : : {
153 : : uint16_t *table;
154 : : GHashTableIter hashiter;
155 : : void *key, *value;
156 : : #ifndef G_DISABLE_ASSERT
157 : : uint32_t num_elts;
158 : : #endif
159 : : uint8_t *packed_mem;
160 : :
161 : 8 : g_return_if_fail (builder != NULL);
162 : 8 : g_return_if_fail (builder->prepared);
163 : 8 : g_return_if_fail (builder->buildable);
164 : :
165 : 8 : g_assert (len >= builder->packed_size);
166 : 8 : g_assert ((((size_t)mem) & 0x3) == 0);
167 : :
168 : 8 : memset (mem, 0, len);
169 : :
170 : 8 : *((uint32_t*) mem) = builder->dirmap_offset;
171 : 8 : packed_mem = (uint8_t*)(mem + sizeof (uint32_t));
172 : 8 : cmph_pack (builder->c, packed_mem);
173 : :
174 : 8 : table = (uint16_t*) (mem + builder->dirmap_offset);
175 : :
176 : : #ifndef G_DISABLE_ASSERT
177 : 8 : num_elts = g_hash_table_size (builder->strings);
178 : : #endif
179 : 8 : g_hash_table_iter_init (&hashiter, builder->strings);
180 : 2235 : while (g_hash_table_iter_next (&hashiter, &key, &value))
181 : : {
182 : 2227 : const char *str = key;
183 : 2227 : uint16_t strval = (uint16_t)GPOINTER_TO_UINT(value);
184 : : uint32_t hashv;
185 : :
186 : 2227 : hashv = cmph_search_packed (packed_mem, str, strlen (str));
187 : 2227 : g_assert (hashv < num_elts);
188 : 2227 : table[hashv] = strval;
189 : : }
190 : : }
191 : :
192 : : void
193 : 8 : gi_typelib_hash_builder_destroy (GITypelibHashBuilder *builder)
194 : : {
195 : 8 : if (builder->c)
196 : : {
197 : 8 : cmph_destroy (builder->c);
198 : 8 : builder->c = NULL;
199 : : }
200 : 8 : g_hash_table_destroy (builder->strings);
201 : 8 : g_slice_free (GITypelibHashBuilder, builder);
202 : 8 : }
203 : :
204 : : uint16_t
205 : 63 : gi_typelib_hash_search (uint8_t* memory, const char *str, uint32_t n_entries)
206 : : {
207 : : uint32_t *mph;
208 : : uint16_t *table;
209 : : uint32_t dirmap_offset;
210 : : uint32_t offset;
211 : :
212 : 63 : g_assert ((((size_t)memory) & 0x3) == 0);
213 : 63 : mph = ((uint32_t*)memory)+1;
214 : :
215 : 63 : offset = cmph_search_packed (mph, str, strlen (str));
216 : :
217 : : /* Make sure that offset always lies in the entries array. cmph
218 : : cometimes generates offset larger than number of entries (for
219 : : 'str' argument which is not in the hashed list). In this case,
220 : : fake the correct result and depend on caller's final check that
221 : : the entry is really the one that the caller wanted. */
222 : 63 : if (offset >= n_entries)
223 : 0 : offset = 0;
224 : :
225 : 63 : dirmap_offset = *((uint32_t*)memory);
226 : 63 : table = (uint16_t*) (memory + dirmap_offset);
227 : :
228 : 63 : return table[offset];
229 : : }
230 : :
|