candidates);
if (r == rs_detect_none) {
- msg_debug_lang_det ("no trigramms found, switch to nothing");
- } else if (r == rs_detect_multiple) {
+ msg_debug_lang_det ("no trigramms found, fallback to english");
+ rspamd_language_detector_set_language (task, part, "en");
+ }
+ else if (r == rs_detect_multiple) {
/* Check our guess */
mean = 0.0;
}
/* Now, convert hash to array and sort it */
- result = g_ptr_array_sized_new (kh_size (candidates));
+ if (r != rs_detect_none && kh_size (candidates) > 0) {
+ result = g_ptr_array_sized_new (kh_size (candidates));
- kh_foreach_value (candidates, cand, {
- if (!isnan (cand->prob)) {
- msg_debug_lang_det ("final probability %s -> %.2f", cand->lang,
- cand->prob);
- g_ptr_array_add (result, cand);
- }
- });
+ kh_foreach_value (candidates, cand, {
+ if (!isnan (cand->prob)) {
+ msg_debug_lang_det ("final probability %s -> %.2f", cand->lang,
+ cand->prob);
+ g_ptr_array_add (result, cand);
+ }
+ });
- if (frequency_heuristic_applied) {
- g_ptr_array_sort_with_data (result,
- rspamd_language_detector_cmp_heuristic, (gpointer) &cbd);
- } else {
- g_ptr_array_sort (result, rspamd_language_detector_cmp);
- }
+ if (frequency_heuristic_applied) {
+ g_ptr_array_sort_with_data (result,
+ rspamd_language_detector_cmp_heuristic, (gpointer) &cbd);
+ } else {
+ g_ptr_array_sort (result, rspamd_language_detector_cmp);
+ }
- kh_destroy (rspamd_candidates_hash, candidates);
+ if (result->len > 0 && !frequency_heuristic_applied) {
+ cand = g_ptr_array_index (result, 0);
+ cand->elt->occurencies++;
+ d->total_occurencies++;
+ }
- if (result->len > 0 && !frequency_heuristic_applied) {
- cand = g_ptr_array_index (result, 0);
- cand->elt->occurencies++;
- d->total_occurencies++;
+ part->languages = result;
+ ret = TRUE;
+ }
+ else if (part->languages == NULL) {
+ rspamd_language_detector_set_language (task, part, "en");
}
- part->languages = result;
-
- ret = TRUE;
+ kh_destroy (rspamd_candidates_hash, candidates);
}
end_ticks = rspamd_get_ticks (TRUE);
{
struct rspamd_lang_detector_res *lang;
- if (part->utf_words && task->lang_det) {
+ if (!IS_PART_EMPTY (part) && part->utf_words && part->utf_words->len > 0 &&
+ task->lang_det) {
if (rspamd_language_detector_detect (task, task->lang_det, part)) {
lang = g_ptr_array_index (part->languages, 0);
part->language = lang->lang;
msg_info_task ("detected part language: %s", part->language);
}
+ else {
+ part->language = "en"; /* Safe fallback */
+ }
}
}