/* This file is part of Mailfromd. -*- c -*- Copyright (C) 2006-2020 Sergey Poznyakoff This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ MF_BUILTIN_MODULE MF_COND(WITH_DSPAM) #include "srvcfg.h" #undef HAVE_CONFIG_H #define CONFIG_DEFAULT "/dev/null" #define LOGDIR "/dev/null" #include <libdspam.h> #include "mflib/dspam.h" #include "msg.h" /* User parameters */ MF_VAR(dspam_user, STRING, SYM_PRECIOUS); MF_VAR(dspam_group, STRING, SYM_PRECIOUS); MF_VAR(dspam_config, STRING, SYM_PRECIOUS); MF_VAR(dspam_profile, STRING, SYM_PRECIOUS); /* Output variables */ MF_VAR(dspam_signature, STRING, SYM_PRECIOUS); MF_VAR(dspam_probability, NUMBER); MF_VAR(dspam_confidence, NUMBER); MF_VAR(dspam_prec, NUMBER); #define DEFAULT_DSPAM_PREC 3 static int _dspam_initialized; static void _dspam_shutdown() { dspam_shutdown_driver(NULL); } struct transtab { int trans_from; int trans_to; }; static struct builtin_const_trans mode_trans[] = { MF_TRANS(DSM_PROCESS), MF_TRANS(DSM_CLASSIFY) }; static struct builtin_const_trans flag_trans[] = { MF_TRANS(DSF_SIGNATURE), MF_TRANS(DSF_NOISE), MF_TRANS(DSF_WHITELIST) }; static struct builtin_const_trans tokenizer_trans[] = { MF_TRANS(DSZ_WORD), MF_TRANS(DSZ_CHAIN), MF_TRANS(DSZ_SBPH), MF_TRANS(DSZ_OSB), }; static struct builtin_const_trans tmod_trans[] = { MF_TRANS(DST_TEFT), MF_TRANS(DST_TOE), MF_TRANS(DST_TUM) }; static struct builtin_const_trans class_trans[] = { MF_TRANS(DSR_ISSPAM), MF_TRANS(DSR_ISINNOCENT), MF_TRANS(DSR_NONE) }; static struct builtin_const_trans source_trans[] = { MF_TRANS(DSS_ERROR), MF_TRANS(DSS_CORPUS), MF_TRANS(DSS_INOCULATION), MF_TRANS(DSS_NONE) }; static void ctx_cleanup(void *ptr) { DSPAM_CTX *ctx = ptr; dspam_destroy(ctx); } struct config_entry { int argc; char **argv; # define config_keyword argv[0] # define config_value argv[1] }; static void free_config_entry(void *data) { struct config_entry *entry = data; mu_argcv_free(entry->argc, entry->argv); } static int compare_config_entry(const void *a, const void *b) { struct config_entry const *ent_a = a; struct config_entry const *ent_b = b; return strcasecmp(ent_a->config_keyword, ent_b->config_keyword); } struct config_entry * config_find(mu_list_t config, const char *kw) { if (config) { struct config_entry key, *ret; key.argc = 1; key.argv = (char **)&kw; if (mu_list_locate(config, &key, (void **)&ret) == 0) return ret; } return NULL; } const char * config_find_value(mu_list_t config, const char *kw) { struct config_entry *ent = config_find(config, kw); if (ent) return ent->config_value; return NULL; } static int read_config(mu_list_t config, const char *file_name) { int rc; mu_stream_t str, flt; char *buf = NULL; size_t size = 0, n; static const char *args[] = { "INLINE-COMMENT", "#", "-r" }; if ((rc = mu_file_stream_create(&str, file_name, MU_STREAM_READ))) { mu_error(_("cannot open configuration file `%s': %s"), file_name, mu_strerror(rc)); return rc; } rc = mu_filter_create_args(&flt, str, "INLINE-COMMENT", MU_ARRAY_SIZE(args), args, MU_FILTER_DECODE, MU_STREAM_READ); mu_stream_unref(str); if (rc) { mu_error (_("cannot open filter stream for `%s': %s"), file_name, mu_strerror (rc)); return rc; } str = flt; while (mu_stream_getline(str, &buf, &size, &n) == 0 && n > 0) { struct config_entry *ent; struct mu_wordsplit ws; if (mu_wordsplit(buf, &ws, MU_WRDSF_DEFFLAGS)) { mu_error("mu_wordsplit: %s", mu_wordsplit_strerror(&ws)); break; } if (ws.ws_wordc) { ent = mu_alloc(sizeof(*ent)); ent->argc = ws.ws_wordc; ent->argv = ws.ws_wordv; mu_list_append(config, ent); ws.ws_wordc = 0; ws.ws_wordv = NULL; } /* FIXME: diagnostics */ mu_wordsplit_free(&ws); } free(buf); mu_stream_close(str); mu_stream_destroy(&str); return 0; } static void * alloc_config() { mu_list_t config; mu_list_create(&config); mu_list_set_destroy_item(config, free_config_entry); mu_list_set_comparator(config, compare_config_entry); return config; } static void destroy_config(void *data) { mu_list_t config = data; mu_list_destroy(&config); } MF_DECLARE_DATA(DSPAM_CONFIG, alloc_config, destroy_config) MF_DSEXP_SUPPRESS([<get_config>],[< static mu_list_t get_config(eval_environ_t env, mu_list_t config) { /* Initialize dspam library and set up global variables, if needed */ if (!_dspam_initialized) { const char *config_file = MF_VAR_STRING(dspam_config); if (config_file && config_file[0]) read_config(config, config_file); MF_ASSERT(libdspam_init(config_find_value(config, "StorageDriver")) == 0, mfe_failure, "libdspam_init failed"); dspam_init_driver(NULL); atexit(_dspam_shutdown); _dspam_initialized = 1; if (MF_VAR_STRING(dspam_user) == NULL) MF_VAR_SET_STRING(dspam_user, mf_server_user); if (MF_VAR_REF(dspam_prec, uint) == 0) MF_VAR_REF(dspam_prec, uint, DEFAULT_DSPAM_PREC); } return config; } >]) struct keyword_prop { char *name; int len; int flag; }; #define PROP_ATTACH 1 #define PROP_ALGORITHM 2 #define PROP_TOKENIZER 3 #define PROP_PVALUE 4 static struct keyword_prop keyword_prop[] = { { "IgnoreHeader", 0, PROP_ATTACH }, { "MySQL", 5, PROP_ATTACH }, { "PgSQL", 5, PROP_ATTACH }, { "SQLite", 6, PROP_ATTACH }, { "LocalMX", 0, PROP_ATTACH }, { "Storage", 7, PROP_ATTACH }, { "Processor", 9, PROP_ATTACH }, { "Hash", 4, PROP_ATTACH }, { "Algorithm", 0, PROP_ALGORITHM }, { "PValue", 0, PROP_PVALUE }, { "Tokenizer", 0, PROP_TOKENIZER }, { NULL } }; static struct mu_kwd algorithm_kwd[] = { { "graham", DSA_GRAHAM }, { "burton", DSA_BURTON }, { "robinson", DSA_ROBINSON }, { "naive", DSA_NAIVE }, { "chi-square", DSA_CHI_SQUARE }, { NULL } }; static struct mu_kwd pvalue_kwd[] = { { "robinson", DSP_ROBINSON }, { "markov", DSP_MARKOV }, { NULL } }; static struct mu_kwd tokenizer_kwd[] = { { "word", DSZ_WORD }, { "chain", DSZ_CHAIN }, { "chained", DSZ_CHAIN }, { "sbph", DSZ_SBPH }, { "osb", DSZ_OSB }, { NULL } }; static void set_context_attributes(DSPAM_CTX *ctx, mu_list_t config, const char *profile, int ignore_tokenizer) { mu_iterator_t itr; int algo = 0; int algo_set = 0; int pvalue = 0; int pvalue_set = 0; int tokenizer = 0; int tokenizer_set = 0; int n; if (!profile || !profile[0]) profile = config_find_value(config, "DefaultProfile"); mu_list_get_iterator(config, &itr); for (mu_iterator_first(itr); !mu_iterator_is_done(itr); mu_iterator_next(itr)) { struct config_entry *ent; struct keyword_prop *prop; mu_iterator_current (itr, (void **)&ent); for (prop = keyword_prop; prop->name; prop++) { char *p; if ((prop->len ? strncasecmp(ent->config_keyword, prop->name, prop->len) : strcasecmp(ent->config_keyword, prop->name)) == 0) { switch (prop->flag) { case PROP_ATTACH: dspam_addattribute(ctx, ent->config_keyword, ent->config_value); break; case PROP_ALGORITHM: algo_set = 1; if (mu_kwd_xlat_name_ci(algorithm_kwd, ent->config_value, &n) == 0) algo |= n; break; case PROP_PVALUE: if (pvalue_set) continue; if (mu_kwd_xlat_name_ci(pvalue_kwd, ent->config_value, &n) == 0) { pvalue = n; pvalue_set = 1; } break; case PROP_TOKENIZER: tokenizer_set = 1; if (mu_kwd_xlat_name_ci(tokenizer_kwd, ent->config_value, &n) == 0) tokenizer |= n; break; } } else if (profile && (p = strchr(ent->config_keyword, '.')) && strcasecmp(p + 1, profile) == 0) { size_t len = p - ent->config_keyword; char *key = mu_alloc(len + 1); memcpy(key, ent->config_keyword, len); key[len] = 0; dspam_addattribute(ctx, key, ent->config_value); free(key); } } } mu_iterator_destroy(&itr); if (algo_set) ctx->algorithms = algo | (pvalue_set ? pvalue : DSP_GRAHAM); if (!ignore_tokenizer && tokenizer_set) ctx->tokenizer = tokenizer; if ((ctx->algorithms & DSA_CHI_SQUARE) && !(ctx->algorithms & DSP_ROBINSON)) mu_diag_output(MU_DIAG_WARNING, "Chi-Square algorithm enabled with other " "algorithms: false positives may ensue"); } /* number dspam(number msg, number flags; number class_source) */ MF_DSEXP MF_DEFUN(dspam, NUMBER, NUMBER nmsg, NUMBER mode_flags, OPTIONAL, NUMBER class_src) { int rc; DSPAM_CTX *ctx; /* DSPAM Context */ int mode; int flags; mu_message_t msg; mu_stream_t msgstr, instr; const char *msgbuf; size_t msgsize; unsigned prec; mu_transport_t trans[2]; mu_list_t config = get_config(env, MF_GET_DATA); int tokenizer; /* Prepare message buffer */ msg = bi_message_from_descr(env, nmsg); rc = mu_message_size(msg, &msgsize); MF_ASSERT(rc == 0, mfe_failure, "mu_message_size: %s", mu_strerror(rc)); rc = mu_memory_stream_create(&msgstr, MU_STREAM_RDWR); MF_ASSERT(rc == 0, mfe_failure, "mu_static_memory_stream_create: %s", mu_strerror(rc)); MF_DCL_CLEANUP(msgstr, _builtin_stream_cleanup); rc = mu_message_get_streamref(msg, &instr); MF_ASSERT(rc == 0, mfe_failure, "mu_message_get_streamref: %s", mu_strerror(rc)); MF_DCL_CLEANUP(instr, _builtin_stream_cleanup); rc = mu_stream_copy(msgstr, instr, msgsize, NULL); MF_ASSERT(rc == 0, mfe_failure, "mu_stream_copy: %s", mu_strerror(rc)); MF_CLEANUP(instr); mu_stream_ioctl(msgstr, MU_IOCTL_TRANSPORT, MU_IOCTL_OP_GET, trans); msgbuf = (const char*)trans[0]; /* Prepare DSPAM context */ MF_ASSERT(_builtin_const_to_c(mode_trans, MU_ARRAY_SIZE(mode_trans), mode_flags & _MFL__DSM_MASK, &mode) == 0, mfe_failure, "bad dspam mode"); flags = _builtin_const_to_bitmap(flag_trans, MU_ARRAY_SIZE(flag_trans), mode_flags); /* Create the DSPAM context */ ctx = dspam_create(MF_VAR_STRING(dspam_user), MF_VAR_STRING(dspam_group), config_find_value(config, "Home"), mode, flags); MF_ASSERT(ctx != NULL, mfe_failure, "dspam_create failed"); MF_DCL_CLEANUP(ctx, ctx_cleanup); /* Use graham and robinson algorithms, graham's p-values */ ctx->algorithms = DSA_GRAHAM | DSA_BURTON | DSP_GRAHAM; tokenizer = mode_flags & _MFL__DSZ_MASK; set_context_attributes(ctx, config, MF_VAR_STRING(dspam_profile), tokenizer); MF_ASSERT(dspam_attach(ctx, NULL) == 0, mfe_failure, "dspam_attach failed"); /* Configure tokenizer */ if (tokenizer) MF_ASSERT(_builtin_const_to_c(tokenizer_trans, MU_ARRAY_SIZE(tokenizer_trans), tokenizer, &ctx->tokenizer) == 0, mfe_failure, "bad dspam tokenizer"); /* Set training mode */ MF_ASSERT(_builtin_const_to_c(tmod_trans, MU_ARRAY_SIZE(tmod_trans), mode_flags & _MFL__DST_MASK, &ctx->training_mode) == 0, mfe_failure, "bad dspam training mode"); /* Set up classification and source */ if (MF_DEFINED(class_src)) { MF_ASSERT(_builtin_const_to_c(class_trans, MU_ARRAY_SIZE(class_trans), class_src & _MFL__DSR_MASK, &ctx->classification) == 0, mfe_failure, "bad dspam classification flag"); MF_ASSERT(_builtin_const_to_c(source_trans, MU_ARRAY_SIZE(source_trans), class_src & _MFL__DSS_MASK, &ctx->source) == 0, mfe_failure, "bad dspam source flag"); } /* Process the message */ MF_ASSERT(dspam_process(ctx, msgbuf) == 0, mfe_failure, "dspam_process failed"); rc = MF_VAR_REF(dspam_prec, uint); prec = 1; while (rc--) prec *= 10; MF_VAR_REF(dspam_probability, ulong, ctx->probability * prec); MF_VAR_REF(dspam_confidence, ulong, ctx->confidence * prec); if (flags & DSF_SIGNATURE) { char signame[128]; _ds_create_signature_id(ctx, signame, sizeof(signame)); _ds_set_signature(ctx, ctx->signature, signame); MF_VAR_SET_STRING(dspam_signature, signame); } MF_ASSERT(_builtin_c_to_const(class_trans, MU_ARRAY_SIZE(class_trans), ctx->result, &rc) == 0, mfe_failure, "unrecognized dspam result"); MF_CLEANUP(ctx); /* FIXME: Any additional processing? */ MF_RETURN(rc); } END