ENCA_STMT_START #define ENCA_STMT_START do ENCA_STMT_END #define ENCA_STMT_END while (0) EncaAnalyserOptions EncaAnalyserState EncaCharsetInfo EncaLanguageInfo EncaLanguageHookData1CS EncaLanguageHookDataEOL EncaUTFCheckData EncaCharsetInfo struct _EncaCharsetInfo { int enca; int rfc1345; int cstocs; int iconv; int mime; const char *human; unsigned int flags; unsigned int nsurface; }; EncaHookFunc int EncaAnalyserState *analyser EncaGuessFunc int EncaAnalyserState *analyser EncaLanguageInfo struct _EncaLanguageInfo { const char *name; const char *humanname; size_t ncharsets; const char *const *csnames; const unsigned short int *const *weights; const unsigned short int *significant; const unsigned char *const *letters; const unsigned char **const *pairs; long int weight_sum; EncaHookFunc hook; EncaHookFunc eolhook; EncaHookFunc lcuchook; EncaHookFunc ratinghook; }; EncaAnalyserOptions struct _EncaAnalyserOptions { int const_buffer; size_t min_chars; double threshold; int multibyte_enabled; int interpreted_surfaces; int ambiguous_mode; int filtering; int test_garbageness; int termination_strictness; }; EncaAnalyserState struct _EncaAnalyserState { /* Language data. */ const EncaLanguageInfo *lang; size_t ncharsets; int *charsets; /* Analyser state. */ EncaErrno gerrno; size_t size; unsigned char *buffer; EncaEncoding result; size_t *counts; size_t bin; size_t up; double *ratings; size_t *order; size_t size2; unsigned char *buffer2; /* Double-UTF-8 data. */ EncaUTFCheckData *utfch; int *utfbuf; /* Pair frequency data */ unsigned char *pair2bits; size_t *bitcounts; size_t *pairratings; /* LCUC data XXX: unused (yet) */ size_t *lcbits; size_t *ucbits; /* Options. */ EncaAnalyserOptions options; }; EncaLanguageHookData1CS struct _EncaLanguageHookData1CS { const char *name; size_t size; const unsigned char *list; size_t cs; }; EncaLanguageHookDataEOL struct _EncaLanguageHookDataEOL { const char *name; EncaSurface eol; size_t cs; }; EncaUTFCheckData struct _EncaUTFCheckData { double rating; size_t size; int result; int *ucs2; int *weights; }; FILL_NONLETTER #define FILL_NONLETTER '.' EPSILON #define EPSILON 0.000001 LF #define LF ((unsigned char)'\n') CR #define CR ((unsigned char)'\r') enca_ctype_test #define enca_ctype_test(c, t) ((enca_ctype_data[(unsigned char)c] & t) != 0) enca_isalnum #define enca_isalnum(c) enca_ctype_test((c), ENCA_CTYPE_ALNUM) enca_isalpha #define enca_isalpha(c) enca_ctype_test((c), ENCA_CTYPE_ALPHA) enca_iscntrl #define enca_iscntrl(c) enca_ctype_test((c), ENCA_CTYPE_CNTRL) enca_isdigit #define enca_isdigit(c) enca_ctype_test((c), ENCA_CTYPE_DIGIT) enca_isgraph #define enca_isgraph(c) enca_ctype_test((c), ENCA_CTYPE_GRAPH) enca_islower #define enca_islower(c) enca_ctype_test((c), ENCA_CTYPE_LOWER) enca_isprint #define enca_isprint(c) enca_ctype_test((c), ENCA_CTYPE_PRINT) enca_ispunct #define enca_ispunct(c) enca_ctype_test((c), ENCA_CTYPE_PUNCT) enca_isspace #define enca_isspace(c) enca_ctype_test((c), ENCA_CTYPE_SPACE) enca_isupper #define enca_isupper(c) enca_ctype_test((c), ENCA_CTYPE_UPPER) enca_isxdigit #define enca_isxdigit(c) enca_ctype_test((c), ENCA_CTYPE_XDIGIT) enca_isname #define enca_isname(c) enca_ctype_test((c), ENCA_CTYPE_NAME) enca_isbinary #define enca_isbinary(c) enca_ctype_test((c), ENCA_CTYPE_BINARY) enca_istext #define enca_istext(c) enca_ctype_test((c), ENCA_CTYPE_TEXT) ELEMENTS #define ELEMENTS(array) (sizeof(array)/sizeof((array)[0])) enca_malloc void* size_t size enca_realloc void* void *ptr,size_t size enca_free #define enca_free(ptr) \ ENCA_STMT_START{ if (ptr) free(ptr); ptr=NULL; }ENCA_STMT_END NEW #define NEW(type,n) ((type*)enca_malloc((n)*sizeof(type))) RENEW #define RENEW(ptr,type,n) ((type*)enca_realloc((ptr),(n)*sizeof(type))) MAKE_HOOK_LINE #define MAKE_HOOK_LINE(name) \ { #name, ELEMENTS(list_##name), list_##name, (size_t)-1 } enca_strdup char* const char *s enca_strstr const char* const char *haystack,const char* needle enca_strstr # define enca_strstr strstr enca_stpcpy char* char *dest,const char *src enca_stpcpy # define enca_stpcpy stpcpy enca_csname #define enca_csname(cs) enca_charset_name((cs), ENCA_NAME_STYLE_ENCA) enca_strconcat char* const char *str,... enca_strappend char* char *str,... enca_name_to_charset int const char *csname enca_name_to_surface EncaSurface const char *sname enca_language_init int EncaAnalyserState *analyser,const char *langname enca_language_destroy void EncaAnalyserState *analyser enca_get_charset_similarity_matrix double* const EncaLanguageInfo *lang enca_charsets_subset_identical int int charset1,int charset2,const size_t *counts enca_filter_boxdraw size_t EncaAnalyserState *analyser,unsigned char fill_char enca_language_hook_ncs int EncaAnalyserState *analyser,size_t ncs,EncaLanguageHookData1CS *hookdata enca_language_hook_eol int EncaAnalyserState *analyser,size_t ncs,EncaLanguageHookDataEOL *hookdata enca_guess_init void EncaAnalyserState *analyser enca_guess_destroy void EncaAnalyserState *analyser enca_eol_surface EncaSurface const unsigned char *buffer,size_t size,const size_t *counts enca_find_max_sec void EncaAnalyserState *analyser enca_double_utf8_init void EncaAnalyserState *analyser enca_double_utf8_destroy void EncaAnalyserState *analyser enca_pair_init void EncaAnalyserState *analyser enca_pair_destroy void EncaAnalyserState *analyser enca_pair_analyse int EncaAnalyserState *analyser ENCA_LANGUAGE_BE extern const EncaLanguageInfo ENCA_LANGUAGE_BE; ENCA_LANGUAGE_BG extern const EncaLanguageInfo ENCA_LANGUAGE_BG; ENCA_LANGUAGE_CS extern const EncaLanguageInfo ENCA_LANGUAGE_CS; ENCA_LANGUAGE_ET extern const EncaLanguageInfo ENCA_LANGUAGE_ET; ENCA_LANGUAGE_HR extern const EncaLanguageInfo ENCA_LANGUAGE_HR; ENCA_LANGUAGE_HU extern const EncaLanguageInfo ENCA_LANGUAGE_HU; ENCA_LANGUAGE_LT extern const EncaLanguageInfo ENCA_LANGUAGE_LT; ENCA_LANGUAGE_LV extern const EncaLanguageInfo ENCA_LANGUAGE_LV; ENCA_LANGUAGE_PL extern const EncaLanguageInfo ENCA_LANGUAGE_PL; ENCA_LANGUAGE_RU extern const EncaLanguageInfo ENCA_LANGUAGE_RU; ENCA_LANGUAGE_SK extern const EncaLanguageInfo ENCA_LANGUAGE_SK; ENCA_LANGUAGE_SL extern const EncaLanguageInfo ENCA_LANGUAGE_SL; ENCA_LANGUAGE_UK extern const EncaLanguageInfo ENCA_LANGUAGE_UK; ENCA_LANGUAGE_ZH extern const EncaLanguageInfo ENCA_LANGUAGE_ZH; EncaSurface typedef enum { /*< flags >*/ ENCA_SURFACE_EOL_CR = 1 << 0, ENCA_SURFACE_EOL_LF = 1 << 1, ENCA_SURFACE_EOL_CRLF = 1 << 2, ENCA_SURFACE_EOL_MIX = 1 << 3, ENCA_SURFACE_EOL_BIN = 1 << 4, ENCA_SURFACE_MASK_EOL = (ENCA_SURFACE_EOL_CR | ENCA_SURFACE_EOL_LF | ENCA_SURFACE_EOL_CRLF | ENCA_SURFACE_EOL_MIX | ENCA_SURFACE_EOL_BIN), ENCA_SURFACE_PERM_21 = 1 << 5, ENCA_SURFACE_PERM_4321 = 1 << 6, ENCA_SURFACE_PERM_MIX = 1 << 7, ENCA_SURFACE_MASK_PERM = (ENCA_SURFACE_PERM_21 | ENCA_SURFACE_PERM_4321 | ENCA_SURFACE_PERM_MIX), ENCA_SURFACE_QP = 1 << 8, ENCA_SURFACE_REMOVE = 1 << 13, ENCA_SURFACE_UNKNOWN = 1 << 14, ENCA_SURFACE_MASK_ALL = (ENCA_SURFACE_MASK_EOL | ENCA_SURFACE_MASK_PERM | ENCA_SURFACE_QP | ENCA_SURFACE_REMOVE) } EncaSurface; EncaNameStyle typedef enum { ENCA_NAME_STYLE_ENCA, ENCA_NAME_STYLE_RFC1345, ENCA_NAME_STYLE_CSTOCS, ENCA_NAME_STYLE_ICONV, ENCA_NAME_STYLE_HUMAN, ENCA_NAME_STYLE_MIME } EncaNameStyle; EncaCharsetFlags typedef enum { /*< flags >*/ ENCA_CHARSET_7BIT = 1 << 0, ENCA_CHARSET_8BIT = 1 << 1, ENCA_CHARSET_16BIT = 1 << 2, ENCA_CHARSET_32BIT = 1 << 3, ENCA_CHARSET_FIXED = 1 << 4, ENCA_CHARSET_VARIABLE = 1 << 5, ENCA_CHARSET_BINARY = 1 << 6, ENCA_CHARSET_REGULAR = 1 << 7, ENCA_CHARSET_MULTIBYTE = 1 << 8 } EncaCharsetFlags; EncaErrno typedef enum { ENCA_EOK = 0, ENCA_EINVALUE, ENCA_EEMPTY, ENCA_EFILTERED, ENCA_ENOCS8, ENCA_ESIGNIF, ENCA_EWINNER, ENCA_EGARBAGE } EncaErrno; ENCA_CS_UNKNOWN #define ENCA_CS_UNKNOWN (-1) ENCA_NOT_A_CHAR #define ENCA_NOT_A_CHAR 0xffff EncaAnalyser typedef struct _EncaAnalyserState *EncaAnalyser; EncaEncoding EncaEncoding struct _EncaEncoding { int charset; EncaSurface surface; }; enca_analyser_alloc EncaAnalyser const char *langname enca_analyser_free void EncaAnalyser analyser enca_analyse EncaEncoding EncaAnalyser analyser,unsigned char *buffer,size_t size enca_analyse_const EncaEncoding EncaAnalyser analyser,const unsigned char *buffer,size_t size enca_double_utf8_check int EncaAnalyser analyser,const unsigned char *buffer,size_t size enca_double_utf8_get_candidates int* EncaAnalyser analyser enca_errno int EncaAnalyser analyser enca_strerror const char* EncaAnalyser analyser,int errnum enca_set_multibyte void EncaAnalyser analyser,int multibyte enca_get_multibyte int EncaAnalyser analyser enca_set_interpreted_surfaces void EncaAnalyser analyser,int interpreted_surfaces enca_get_interpreted_surfaces int EncaAnalyser analyser enca_set_ambiguity void EncaAnalyser analyser,int ambiguity enca_get_ambiguity int EncaAnalyser analyser enca_set_filtering void EncaAnalyser analyser,int filtering enca_get_filtering int EncaAnalyser analyser enca_set_garbage_test void EncaAnalyser analyser,int garabage_test enca_get_garbage_test int EncaAnalyser analyser enca_set_termination_strictness void EncaAnalyser analyser,int termination_strictness enca_get_termination_strictness int EncaAnalyser analyser enca_set_significant int EncaAnalyser analyser,size_t significant enca_get_significant size_t EncaAnalyser analyser enca_set_threshold int EncaAnalyser analyser,double threshold enca_get_threshold double EncaAnalyser analyser enca_charset_name const char* int charset,EncaNameStyle whatname enca_get_charset_aliases const char** int charset,size_t *n enca_get_surface_name char* EncaSurface surface,EncaNameStyle whatname enca_parse_encoding_name EncaEncoding const char *name enca_charset_natural_surface EncaSurface int charset enca_charset_properties EncaCharsetFlags int charset enca_charset_is_known #define enca_charset_is_known(cs) \ ((cs) != ENCA_CS_UNKNOWN) enca_charset_is_7bit #define enca_charset_is_7bit(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_7BIT) enca_charset_is_8bit #define enca_charset_is_8bit(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_8BIT) enca_charset_is_16bit #define enca_charset_is_16bit(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_16BIT) enca_charset_is_32bit #define enca_charset_is_32bit(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_32BIT) enca_charset_is_fixed #define enca_charset_is_fixed(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_FIXED) enca_charset_is_variable #define enca_charset_is_variable(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_VARIABLE) enca_charset_is_binary #define enca_charset_is_binary(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_BINARY) enca_charset_is_regular #define enca_charset_is_regular(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_REGULAR) enca_charset_is_multibyte #define enca_charset_is_multibyte(cs) \ (enca_charset_properties(cs) & ENCA_CHARSET_MULTIBYTE) enca_charset_has_ucs2_map int int charset enca_charset_ucs2_map int int charset,unsigned int *buffer enca_number_of_charsets size_t void enca_analyser_language const char* EncaAnalyser analyser enca_language_english_name const char* const char *lang enca_get_languages const char** size_t *n enca_get_language_charsets int* const char *langname,size_t *n