libxml2
|
Character encoding conversion functions. More...
Typedefs | |
typedef int(* | xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Convert characters to UTF-8. | |
typedef int(* | xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Convert characters from UTF-8. | |
typedef xmlCharEncError(* | xmlCharEncConvFunc) (void *vctxt, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush) |
Convert between character encodings. | |
typedef void(* | xmlCharEncConvCtxtDtor) (void *vctxt) |
Free a conversion context. | |
typedef xmlParserErrors(* | xmlCharEncConvImpl) (void *vctxt, const char *name, xmlCharEncFlags flags, xmlCharEncodingHandler **out) |
If this function returns XML_ERR_OK, it must fill the out pointer with an encoding handler. | |
Enumerations | |
enum | xmlCharEncError |
Encoding conversion errors. More... | |
enum | xmlCharEncoding |
Predefined values for some standard encodings. More... | |
enum | xmlCharEncFlags |
Encoding conversion flags. More... | |
Functions | |
void | xmlInitCharEncodingHandlers (void) |
void | xmlCleanupCharEncodingHandlers (void) |
Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases. | |
void | xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler) |
Register the char encoding handler. | |
xmlCharEncodingHandlerPtr | xmlGetCharEncodingHandler (xmlCharEncoding enc) |
xmlCharEncodingHandlerPtr | xmlFindCharEncodingHandler (const char *name) |
If the encoding is UTF-8, this will return a no-op handler that shouldn't be used. | |
xmlCharEncodingHandlerPtr | xmlNewCharEncodingHandler (const char *name, xmlCharEncodingInputFunc input, xmlCharEncodingOutputFunc output) |
Create and registers an xmlCharEncodingHandler. | |
xmlParserErrors | xmlCharEncNewCustomHandler (const char *name, xmlCharEncConvFunc input, xmlCharEncConvFunc output, xmlCharEncConvCtxtDtor ctxtDtor, void *inputCtxt, void *outputCtxt, xmlCharEncodingHandler **out) |
Create a custom xmlCharEncodingHandler. | |
int | xmlAddEncodingAlias (const char *name, const char *alias) |
Registers an alias alias for an encoding named name . | |
int | xmlDelEncodingAlias (const char *alias) |
Unregisters an encoding alias. | |
const char * | xmlGetEncodingAlias (const char *alias) |
Lookup an encoding name for the given alias. | |
void | xmlCleanupEncodingAliases (void) |
Unregisters all aliases. | |
xmlCharEncoding | xmlParseCharEncoding (const char *name) |
Compare the string to the encoding schemes already known. | |
const char * | xmlGetCharEncodingName (xmlCharEncoding enc) |
The "canonical" name for XML encoding. | |
xmlCharEncoding | xmlDetectCharEncoding (const unsigned char *in, int len) |
Guess the encoding of the entity using the first bytes of the entity content according to the non-normative appendix F of the XML-1.0 recommendation. | |
int | xmlCharEncCloseFunc (xmlCharEncodingHandler *handler) |
Releases an xmlCharEncodingHandler. | |
int | xmlUTF8ToIsolat1 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 block of chars out. | |
int | xmlIsolat1ToUTF8 (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out. | |
Character encoding conversion functions.
typedef void(* xmlCharEncConvCtxtDtor) (void *vctxt) |
Free a conversion context.
vctxt | conversion context |
typedef xmlCharEncError(* xmlCharEncConvFunc) (void *vctxt, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush) |
Convert between character encodings.
The value of inlen
after return is the number of bytes consumed and outlen
is the number of bytes produced.
If the converter can consume partial multi-byte sequences, the flush
flag can be used to detect truncated sequences at EOF. Otherwise, the flag can be ignored.
vctxt | conversion context |
out | a pointer to an array of bytes to store the result |
outlen | the length of out |
in | a pointer to an array of input bytes |
inlen | the length of in |
flush | end of input |
typedef xmlParserErrors(* xmlCharEncConvImpl) (void *vctxt, const char *name, xmlCharEncFlags flags, xmlCharEncodingHandler **out) |
If this function returns XML_ERR_OK, it must fill the out
pointer with an encoding handler.
The handler can be obtained from xmlCharEncNewCustomHandler().
flags
can contain XML_ENC_INPUT, XML_ENC_OUTPUT or both.
vctxt | user data |
name | encoding name |
flags | bit mask of flags |
out | pointer to resulting handler |
typedef int(* xmlCharEncodingInputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Convert characters to UTF-8.
On success, the value of inlen
after return is the number of bytes consumed and outlen
is the number of bytes produced.
out | a pointer to an array of bytes to store the UTF-8 result |
outlen | the length of out |
in | a pointer to an array of chars in the original encoding |
inlen | the length of in |
typedef int(* xmlCharEncodingOutputFunc) (unsigned char *out, int *outlen, const unsigned char *in, int *inlen) |
Convert characters from UTF-8.
On success, the value of inlen
after return is the number of bytes consumed and outlen
is the number of bytes produced.
out | a pointer to an array of bytes to store the result |
outlen | the length of out |
in | a pointer to an array of UTF-8 chars |
inlen | the length of in |
enum xmlCharEncError |
enum xmlCharEncFlags |
enum xmlCharEncoding |
Predefined values for some standard encodings.
int xmlAddEncodingAlias | ( | const char * | name, |
const char * | alias | ||
) |
Registers an alias alias
for an encoding named name
.
Existing aliases will be overwritten.
name | the encoding name as parsed, in UTF-8 format (ASCII actually) |
alias | the alias name as parsed, in UTF-8 format (ASCII actually) |
int xmlCharEncCloseFunc | ( | xmlCharEncodingHandler * | handler | ) |
Releases an xmlCharEncodingHandler.
Must be called after a handler is no longer in use.
handler | encoding handler |
xmlParserErrors xmlCharEncNewCustomHandler | ( | const char * | name, |
xmlCharEncConvFunc | input, | ||
xmlCharEncConvFunc | output, | ||
xmlCharEncConvCtxtDtor | ctxtDtor, | ||
void * | inputCtxt, | ||
void * | outputCtxt, | ||
xmlCharEncodingHandler ** | out | ||
) |
Create a custom xmlCharEncodingHandler.
name | the encoding name |
input | input callback which converts to UTF-8 |
output | output callback which converts from UTF-8 |
ctxtDtor | context destructor |
inputCtxt | context for input callback |
outputCtxt | context for output callback |
out | pointer to resulting handler |
void xmlCleanupCharEncodingHandlers | ( | void | ) |
Cleanup the memory allocated for the char encoding support, it unregisters all the encoding handlers and the aliases.
void xmlCleanupEncodingAliases | ( | void | ) |
Unregisters all aliases.
int xmlDelEncodingAlias | ( | const char * | alias | ) |
Unregisters an encoding alias.
alias | the alias name as parsed, in UTF-8 format (ASCII actually) |
xmlCharEncoding xmlDetectCharEncoding | ( | const unsigned char * | in, |
int | len | ||
) |
Guess the encoding of the entity using the first bytes of the entity content according to the non-normative appendix F of the XML-1.0 recommendation.
in | a pointer to the first bytes of the XML entity, must be at least 2 bytes long (at least 4 if encoding is UTF4 variant). |
len | pointer to the length of the buffer |
xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler | ( | const char * | name | ) |
If the encoding is UTF-8, this will return a no-op handler that shouldn't be used.
name | a string describing the char encoding. |
xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler | ( | xmlCharEncoding | enc | ) |
enc | an xmlCharEncoding value. |
const char * xmlGetCharEncodingName | ( | xmlCharEncoding | enc | ) |
The "canonical" name for XML encoding.
C.f. http://www.w3.org/TR/REC-xml#charencoding Section 4.3.3 Character Encoding in Entities
enc | the encoding |
const char * xmlGetEncodingAlias | ( | const char * | alias | ) |
Lookup an encoding name for the given alias.
alias | the alias name as parsed, in UTF-8 format (ASCII actually) |
void xmlInitCharEncodingHandlers | ( | void | ) |
int xmlIsolat1ToUTF8 | ( | unsigned char * | out, |
int * | outlen, | ||
const unsigned char * | in, | ||
int * | inlen | ||
) |
Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8 block of chars out.
The value of inlen
after return is the number of bytes consumed. The value of outlen
after return is the number of bytes produced.
out | a pointer to an array of bytes to store the result |
outlen | the length of out |
in | a pointer to an array of ISO Latin 1 chars |
inlen | the length of in |
xmlCharEncodingHandlerPtr xmlNewCharEncodingHandler | ( | const char * | name, |
xmlCharEncodingInputFunc | input, | ||
xmlCharEncodingOutputFunc | output | ||
) |
Create and registers an xmlCharEncodingHandler.
name | the encoding name, in UTF-8 format (ASCII actually) |
input | the xmlCharEncodingInputFunc to read that encoding |
output | the xmlCharEncodingOutputFunc to write that encoding |
xmlCharEncoding xmlParseCharEncoding | ( | const char * | name | ) |
Compare the string to the encoding schemes already known.
Note that the comparison is case insensitive accordingly to the section [XML] 4.3.3 Character Encoding in Entities.
name | the encoding name as parsed, in UTF-8 format (ASCII actually) |
void xmlRegisterCharEncodingHandler | ( | xmlCharEncodingHandlerPtr | handler | ) |
Register the char encoding handler.
handler | the xmlCharEncodingHandlerPtr handler block |
int xmlUTF8ToIsolat1 | ( | unsigned char * | out, |
int * | outlen, | ||
const unsigned char * | in, | ||
int * | inlen | ||
) |
Take a block of UTF-8 chars in and try to convert it to an ISO Latin 1 block of chars out.
The value of inlen
after return is the number of bytes consumed. The value of outlen
after return is the number of bytes produced.
out | a pointer to an array of bytes to store the result |
outlen | the length of out |
in | a pointer to an array of UTF-8 chars |
inlen | the length of in |