ENCA_STMT_START
#define ENCA_STMT_START do
ENCA_STMT_END
#define ENCA_STMT_END while (0)
EncaAnalyserOptions
EncaAnalyserState
EncaCharsetInfo
EncaLanguageInfo
EncaLanguageHookData1CS
EncaLanguageHookDataEOL
EncaUTFCheckData
EncaCharsetInfo
struct _EncaCharsetInfo {
int enca;
int rfc1345;
int cstocs;
int iconv;
int mime;
const char *human;
unsigned int flags;
unsigned int nsurface;
};
EncaHookFunc
int
EncaAnalyserState *analyser
EncaGuessFunc
int
EncaAnalyserState *analyser
EncaLanguageInfo
struct _EncaLanguageInfo {
const char *name;
const char *humanname;
size_t ncharsets;
const char *const *csnames;
const unsigned short int *const *weights;
const unsigned short int *significant;
const unsigned char *const *letters;
const unsigned char **const *pairs;
long int weight_sum;
EncaHookFunc hook;
EncaHookFunc eolhook;
EncaHookFunc lcuchook;
EncaHookFunc ratinghook;
};
EncaAnalyserOptions
struct _EncaAnalyserOptions {
int const_buffer;
size_t min_chars;
double threshold;
int multibyte_enabled;
int interpreted_surfaces;
int ambiguous_mode;
int filtering;
int test_garbageness;
int termination_strictness;
};
EncaAnalyserState
struct _EncaAnalyserState {
/* Language data. */
const EncaLanguageInfo *lang;
size_t ncharsets;
int *charsets;
/* Analyser state. */
EncaErrno gerrno;
size_t size;
unsigned char *buffer;
EncaEncoding result;
size_t *counts;
size_t bin;
size_t up;
double *ratings;
size_t *order;
size_t size2;
unsigned char *buffer2;
/* Double-UTF-8 data. */
EncaUTFCheckData *utfch;
int *utfbuf;
/* Pair frequency data */
unsigned char *pair2bits;
size_t *bitcounts;
size_t *pairratings;
/* LCUC data XXX: unused (yet) */
size_t *lcbits;
size_t *ucbits;
/* Options. */
EncaAnalyserOptions options;
};
EncaLanguageHookData1CS
struct _EncaLanguageHookData1CS {
const char *name;
size_t size;
const unsigned char *list;
size_t cs;
};
EncaLanguageHookDataEOL
struct _EncaLanguageHookDataEOL {
const char *name;
EncaSurface eol;
size_t cs;
};
EncaUTFCheckData
struct _EncaUTFCheckData {
double rating;
size_t size;
int result;
int *ucs2;
int *weights;
};
FILL_NONLETTER
#define FILL_NONLETTER '.'
EPSILON
#define EPSILON 0.000001
LF
#define LF ((unsigned char)'\n')
CR
#define CR ((unsigned char)'\r')
enca_ctype_test
#define enca_ctype_test(c, t) ((enca_ctype_data[(unsigned char)c] & t) != 0)
enca_isalnum
#define enca_isalnum(c) enca_ctype_test((c), ENCA_CTYPE_ALNUM)
enca_isalpha
#define enca_isalpha(c) enca_ctype_test((c), ENCA_CTYPE_ALPHA)
enca_iscntrl
#define enca_iscntrl(c) enca_ctype_test((c), ENCA_CTYPE_CNTRL)
enca_isdigit
#define enca_isdigit(c) enca_ctype_test((c), ENCA_CTYPE_DIGIT)
enca_isgraph
#define enca_isgraph(c) enca_ctype_test((c), ENCA_CTYPE_GRAPH)
enca_islower
#define enca_islower(c) enca_ctype_test((c), ENCA_CTYPE_LOWER)
enca_isprint
#define enca_isprint(c) enca_ctype_test((c), ENCA_CTYPE_PRINT)
enca_ispunct
#define enca_ispunct(c) enca_ctype_test((c), ENCA_CTYPE_PUNCT)
enca_isspace
#define enca_isspace(c) enca_ctype_test((c), ENCA_CTYPE_SPACE)
enca_isupper
#define enca_isupper(c) enca_ctype_test((c), ENCA_CTYPE_UPPER)
enca_isxdigit
#define enca_isxdigit(c) enca_ctype_test((c), ENCA_CTYPE_XDIGIT)
enca_isname
#define enca_isname(c) enca_ctype_test((c), ENCA_CTYPE_NAME)
enca_isbinary
#define enca_isbinary(c) enca_ctype_test((c), ENCA_CTYPE_BINARY)
enca_istext
#define enca_istext(c) enca_ctype_test((c), ENCA_CTYPE_TEXT)
ELEMENTS
#define ELEMENTS(array) (sizeof(array)/sizeof((array)[0]))
enca_malloc
void*
size_t size
enca_realloc
void*
void *ptr,size_t size
enca_free
#define enca_free(ptr) \
ENCA_STMT_START{ if (ptr) free(ptr); ptr=NULL; }ENCA_STMT_END
NEW
#define NEW(type,n) ((type*)enca_malloc((n)*sizeof(type)))
RENEW
#define RENEW(ptr,type,n) ((type*)enca_realloc((ptr),(n)*sizeof(type)))
MAKE_HOOK_LINE
#define MAKE_HOOK_LINE(name) \
{ #name, ELEMENTS(list_##name), list_##name, (size_t)-1 }
enca_strdup
char*
const char *s
enca_strstr
const char*
const char *haystack,const char* needle
enca_strstr
# define enca_strstr strstr
enca_stpcpy
char*
char *dest,const char *src
enca_stpcpy
# define enca_stpcpy stpcpy
enca_csname
#define enca_csname(cs) enca_charset_name((cs), ENCA_NAME_STYLE_ENCA)
enca_strconcat
char*
const char *str,...
enca_strappend
char*
char *str,...
enca_name_to_charset
int
const char *csname
enca_name_to_surface
EncaSurface
const char *sname
enca_language_init
int
EncaAnalyserState *analyser,const char *langname
enca_language_destroy
void
EncaAnalyserState *analyser
enca_get_charset_similarity_matrix
double*
const EncaLanguageInfo *lang
enca_charsets_subset_identical
int
int charset1,int charset2,const size_t *counts
enca_filter_boxdraw
size_t
EncaAnalyserState *analyser,unsigned char fill_char
enca_language_hook_ncs
int
EncaAnalyserState *analyser,size_t ncs,EncaLanguageHookData1CS *hookdata
enca_language_hook_eol
int
EncaAnalyserState *analyser,size_t ncs,EncaLanguageHookDataEOL *hookdata
enca_guess_init
void
EncaAnalyserState *analyser
enca_guess_destroy
void
EncaAnalyserState *analyser
enca_eol_surface
EncaSurface
const unsigned char *buffer,size_t size,const size_t *counts
enca_find_max_sec
void
EncaAnalyserState *analyser
enca_double_utf8_init
void
EncaAnalyserState *analyser
enca_double_utf8_destroy
void
EncaAnalyserState *analyser
enca_pair_init
void
EncaAnalyserState *analyser
enca_pair_destroy
void
EncaAnalyserState *analyser
enca_pair_analyse
int
EncaAnalyserState *analyser
ENCA_LANGUAGE_BE
extern const EncaLanguageInfo ENCA_LANGUAGE_BE;
ENCA_LANGUAGE_BG
extern const EncaLanguageInfo ENCA_LANGUAGE_BG;
ENCA_LANGUAGE_CS
extern const EncaLanguageInfo ENCA_LANGUAGE_CS;
ENCA_LANGUAGE_ET
extern const EncaLanguageInfo ENCA_LANGUAGE_ET;
ENCA_LANGUAGE_HR
extern const EncaLanguageInfo ENCA_LANGUAGE_HR;
ENCA_LANGUAGE_HU
extern const EncaLanguageInfo ENCA_LANGUAGE_HU;
ENCA_LANGUAGE_LT
extern const EncaLanguageInfo ENCA_LANGUAGE_LT;
ENCA_LANGUAGE_LV
extern const EncaLanguageInfo ENCA_LANGUAGE_LV;
ENCA_LANGUAGE_PL
extern const EncaLanguageInfo ENCA_LANGUAGE_PL;
ENCA_LANGUAGE_RU
extern const EncaLanguageInfo ENCA_LANGUAGE_RU;
ENCA_LANGUAGE_SK
extern const EncaLanguageInfo ENCA_LANGUAGE_SK;
ENCA_LANGUAGE_SL
extern const EncaLanguageInfo ENCA_LANGUAGE_SL;
ENCA_LANGUAGE_UK
extern const EncaLanguageInfo ENCA_LANGUAGE_UK;
ENCA_LANGUAGE_ZH
extern const EncaLanguageInfo ENCA_LANGUAGE_ZH;
EncaSurface
typedef enum { /*< flags >*/
ENCA_SURFACE_EOL_CR = 1 << 0,
ENCA_SURFACE_EOL_LF = 1 << 1,
ENCA_SURFACE_EOL_CRLF = 1 << 2,
ENCA_SURFACE_EOL_MIX = 1 << 3,
ENCA_SURFACE_EOL_BIN = 1 << 4,
ENCA_SURFACE_MASK_EOL = (ENCA_SURFACE_EOL_CR
| ENCA_SURFACE_EOL_LF
| ENCA_SURFACE_EOL_CRLF
| ENCA_SURFACE_EOL_MIX
| ENCA_SURFACE_EOL_BIN),
ENCA_SURFACE_PERM_21 = 1 << 5,
ENCA_SURFACE_PERM_4321 = 1 << 6,
ENCA_SURFACE_PERM_MIX = 1 << 7,
ENCA_SURFACE_MASK_PERM = (ENCA_SURFACE_PERM_21
| ENCA_SURFACE_PERM_4321
| ENCA_SURFACE_PERM_MIX),
ENCA_SURFACE_QP = 1 << 8,
ENCA_SURFACE_REMOVE = 1 << 13,
ENCA_SURFACE_UNKNOWN = 1 << 14,
ENCA_SURFACE_MASK_ALL = (ENCA_SURFACE_MASK_EOL
| ENCA_SURFACE_MASK_PERM
| ENCA_SURFACE_QP
| ENCA_SURFACE_REMOVE)
} EncaSurface;
EncaNameStyle
typedef enum {
ENCA_NAME_STYLE_ENCA,
ENCA_NAME_STYLE_RFC1345,
ENCA_NAME_STYLE_CSTOCS,
ENCA_NAME_STYLE_ICONV,
ENCA_NAME_STYLE_HUMAN,
ENCA_NAME_STYLE_MIME
} EncaNameStyle;
EncaCharsetFlags
typedef enum { /*< flags >*/
ENCA_CHARSET_7BIT = 1 << 0,
ENCA_CHARSET_8BIT = 1 << 1,
ENCA_CHARSET_16BIT = 1 << 2,
ENCA_CHARSET_32BIT = 1 << 3,
ENCA_CHARSET_FIXED = 1 << 4,
ENCA_CHARSET_VARIABLE = 1 << 5,
ENCA_CHARSET_BINARY = 1 << 6,
ENCA_CHARSET_REGULAR = 1 << 7,
ENCA_CHARSET_MULTIBYTE = 1 << 8
} EncaCharsetFlags;
EncaErrno
typedef enum {
ENCA_EOK = 0,
ENCA_EINVALUE,
ENCA_EEMPTY,
ENCA_EFILTERED,
ENCA_ENOCS8,
ENCA_ESIGNIF,
ENCA_EWINNER,
ENCA_EGARBAGE
} EncaErrno;
ENCA_CS_UNKNOWN
#define ENCA_CS_UNKNOWN (-1)
ENCA_NOT_A_CHAR
#define ENCA_NOT_A_CHAR 0xffff
EncaAnalyser
typedef struct _EncaAnalyserState *EncaAnalyser;
EncaEncoding
EncaEncoding
struct _EncaEncoding { int charset; EncaSurface surface; };
enca_analyser_alloc
EncaAnalyser
const char *langname
enca_analyser_free
void
EncaAnalyser analyser
enca_analyse
EncaEncoding
EncaAnalyser analyser,unsigned char *buffer,size_t size
enca_analyse_const
EncaEncoding
EncaAnalyser analyser,const unsigned char *buffer,size_t size
enca_double_utf8_check
int
EncaAnalyser analyser,const unsigned char *buffer,size_t size
enca_double_utf8_get_candidates
int*
EncaAnalyser analyser
enca_errno
int
EncaAnalyser analyser
enca_strerror
const char*
EncaAnalyser analyser,int errnum
enca_set_multibyte
void
EncaAnalyser analyser,int multibyte
enca_get_multibyte
int
EncaAnalyser analyser
enca_set_interpreted_surfaces
void
EncaAnalyser analyser,int interpreted_surfaces
enca_get_interpreted_surfaces
int
EncaAnalyser analyser
enca_set_ambiguity
void
EncaAnalyser analyser,int ambiguity
enca_get_ambiguity
int
EncaAnalyser analyser
enca_set_filtering
void
EncaAnalyser analyser,int filtering
enca_get_filtering
int
EncaAnalyser analyser
enca_set_garbage_test
void
EncaAnalyser analyser,int garabage_test
enca_get_garbage_test
int
EncaAnalyser analyser
enca_set_termination_strictness
void
EncaAnalyser analyser,int termination_strictness
enca_get_termination_strictness
int
EncaAnalyser analyser
enca_set_significant
int
EncaAnalyser analyser,size_t significant
enca_get_significant
size_t
EncaAnalyser analyser
enca_set_threshold
int
EncaAnalyser analyser,double threshold
enca_get_threshold
double
EncaAnalyser analyser
enca_charset_name
const char*
int charset,EncaNameStyle whatname
enca_get_charset_aliases
const char**
int charset,size_t *n
enca_get_surface_name
char*
EncaSurface surface,EncaNameStyle whatname
enca_parse_encoding_name
EncaEncoding
const char *name
enca_charset_natural_surface
EncaSurface
int charset
enca_charset_properties
EncaCharsetFlags
int charset
enca_charset_is_known
#define enca_charset_is_known(cs) \
((cs) != ENCA_CS_UNKNOWN)
enca_charset_is_7bit
#define enca_charset_is_7bit(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_7BIT)
enca_charset_is_8bit
#define enca_charset_is_8bit(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_8BIT)
enca_charset_is_16bit
#define enca_charset_is_16bit(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_16BIT)
enca_charset_is_32bit
#define enca_charset_is_32bit(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_32BIT)
enca_charset_is_fixed
#define enca_charset_is_fixed(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_FIXED)
enca_charset_is_variable
#define enca_charset_is_variable(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_VARIABLE)
enca_charset_is_binary
#define enca_charset_is_binary(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_BINARY)
enca_charset_is_regular
#define enca_charset_is_regular(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_REGULAR)
enca_charset_is_multibyte
#define enca_charset_is_multibyte(cs) \
(enca_charset_properties(cs) & ENCA_CHARSET_MULTIBYTE)
enca_charset_has_ucs2_map
int
int charset
enca_charset_ucs2_map
int
int charset,unsigned int *buffer
enca_number_of_charsets
size_t
void
enca_analyser_language
const char*
EncaAnalyser analyser
enca_language_english_name
const char*
const char *lang
enca_get_languages
const char**
size_t *n
enca_get_language_charsets
int*
const char *langname,size_t *n