aboutsummaryrefslogtreecommitdiff
path: root/vkext_flex.c
diff options
context:
space:
mode:
Diffstat (limited to 'vkext_flex.c')
-rw-r--r--vkext_flex.c178
1 files changed, 178 insertions, 0 deletions
diff --git a/vkext_flex.c b/vkext_flex.c
new file mode 100644
index 0000000..ade09c0
--- /dev/null
+++ b/vkext_flex.c
@@ -0,0 +1,178 @@
+/*
+ This file is part of VK/KittenPHP-DB-Engine.
+
+ VK/KittenPHP-DB-Engine is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 2 of the License, or
+ (at your option) any later version.
+
+ VK/KittenPHP-DB-Engine is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
+
+ This program is released under the GPL with the additional exemption
+ that compiling, linking, and/or using OpenSSL is allowed.
+ You are free to remove this exemption from derived works.
+
+ Copyright 2011-2013 Vkontakte Ltd
+ 2011-2013 Vitaliy Valtman
+*/
+
+#include <string.h>
+#include <assert.h>
+#include "vkext_flex.h"
+#include "vkext_flex_auto.c"
+
+#define BUFF_LEN (1 << 16)
+static char buff[BUFF_LEN];
+
+char *estrdup (const char *s) {
+ char *d = malloc (strlen (s) + 1);
+ if (d == NULL) return NULL;
+ strcpy (d,s);
+ return d;
+}
+
+char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int type_len, int lang_id) {
+ if (name_len > (1 << 10)) {
+ return estrdup (name);
+ }
+ struct lang *cur_lang;
+ if (lang_id < 0 || lang_id >= LANG_NUM || !langs[lang_id]) {
+ return estrdup (name);
+ }
+ cur_lang = langs[lang_id];
+ assert (cur_lang);
+ int t = -1;
+ if (!strcmp (type, "names")) {
+ if (cur_lang->names_start < 0) {
+ return estrdup (name);
+ }
+ t = cur_lang->names_start;
+ } else if (!strcmp (type, "surnames")) {
+ if (cur_lang->surnames_start < 0) {
+ return estrdup (name);
+ }
+ t = cur_lang->surnames_start;
+ } else {
+ return estrdup (name);
+ }
+ assert (t >= 0);
+ if (sex != 1) {
+ sex = 0;
+ }
+ int ca = -1;
+ int i;
+ for (i = 0; i < CASES_NUM; i++) if (!strcmp (cases_names[i], case_name)) {
+ ca = i;
+ break;
+ }
+ if (ca == -1 || ca >= cur_lang->cases_num) {
+ return estrdup (name);
+ }
+ assert (ca >= 0 && ca < cur_lang->cases_num);
+
+ int p = 0;
+ int wp = 0;
+ while (p < name_len) {
+ int pp = p;
+ while (pp < name_len && name[pp] != '-') {
+ pp++;
+ }
+ int hyphen = (name[pp] == '-');
+ int tt = t;
+ int best = -1;
+ int save_pp = pp;
+ int new_tt;
+ int isf = 0;
+ if (pp - p > 0) {
+ const char *fle = cur_lang->flexible_symbols;
+ while (*fle) {
+ if (*fle == name[pp - 1]) {
+ isf = 1;
+ break;
+ }
+ fle ++;
+ }
+ }
+ while (1 && isf) {
+ assert (tt >= 0);
+ if (cur_lang->nodes[tt].tail_len >= 0 && (!cur_lang->nodes[tt].hyphen || hyphen)) {
+ best = tt;
+ }
+ unsigned char c;
+ if (pp == p - 1) {
+ break;
+ }
+ pp --;
+ if (pp < p) {
+ c = 0;
+ } else {
+ c = name[pp];
+ }
+ new_tt = -1;
+ int l = cur_lang->nodes[tt].children_start;
+ int r = cur_lang->nodes[tt].children_end;
+ if (r - l <= 4) {
+ for (i = l; i < r; i++) if (cur_lang->children[2 * i] == c) {
+ new_tt = cur_lang->children[2 * i + 1] ;
+ break;
+ }
+ } else {
+ int x;
+ while (r - l > 1) {
+ x = (r + l) >> 1;
+ if (cur_lang->children[2 * x] <= c) {
+ l = x;
+ } else {
+ r = x;
+ }
+ }
+ if (cur_lang->children[2 * l] == c) {
+ new_tt = cur_lang->children[2 * l + 1];
+ }
+ }
+ if (new_tt == -1) {
+ break;
+ } else {
+ tt = new_tt;
+ }
+ }
+ if (best == -1) {
+ memcpy (buff + wp, name + p, save_pp - p);
+ wp += (save_pp - p);
+ } else {
+ int r = -1;
+ if (!sex) {
+ r = cur_lang->nodes[best].male_endings;
+ } else {
+ r = cur_lang->nodes[best].female_endings;
+ }
+ if (r < 0 || !cur_lang->endings[r * cur_lang->cases_num + ca]) {
+ memcpy (buff + wp, name + p, save_pp - p);
+ wp += (save_pp - p);
+ } else {
+ int ml = save_pp - p - cur_lang->nodes[best].tail_len;
+ if (ml < 0) {
+ ml = 0;
+ }
+ memcpy (buff + wp, name + p, ml);
+ wp += ml;
+ strcpy (buff + wp, cur_lang->endings[r * cur_lang->cases_num + ca]);
+ wp += strlen (cur_lang->endings[r * cur_lang->cases_num + ca]);
+ }
+ }
+ if (hyphen) {
+ buff[wp++] = '-';
+ } else {
+ buff[wp++] = 0;
+ }
+ p = save_pp + 1;
+ }
+
+ return estrdup (buff);
+}