aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--iconvlite.cpp189
-rw-r--r--iconvlite.h78
-rwxr-xr-xindex.cpp71
-rw-r--r--test.js9
-rw-r--r--vkext_flex.c2
-rw-r--r--vkext_flex.h12
6 files changed, 175 insertions, 186 deletions
diff --git a/iconvlite.cpp b/iconvlite.cpp
index a81ab70..af4bb4a 100644
--- a/iconvlite.cpp
+++ b/iconvlite.cpp
@@ -3,7 +3,76 @@
using namespace std;
-static void cp2utf1(char *out, const char *in) {
+typedef struct ConvLetter {
+ unsigned char win1251;
+ int unicode;
+} Letter;
+
+static Letter g_letters[] = {
+ {0x82, 0x201A}, // SINGLE LOW-9 QUOTATION MARK
+ {0x83, 0x0453}, // CYRILLIC SMALL LETTER GJE
+ {0x84, 0x201E}, // DOUBLE LOW-9 QUOTATION MARK
+ {0x85, 0x2026}, // HORIZONTAL ELLIPSIS
+ {0x86, 0x2020}, // DAGGER
+ {0x87, 0x2021}, // DOUBLE DAGGER
+ {0x88, 0x20AC}, // EURO SIGN
+ {0x89, 0x2030}, // PER MILLE SIGN
+ {0x8A, 0x0409}, // CYRILLIC CAPITAL LETTER LJE
+ {0x8B, 0x2039}, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ {0x8C, 0x040A}, // CYRILLIC CAPITAL LETTER NJE
+ {0x8D, 0x040C}, // CYRILLIC CAPITAL LETTER KJE
+ {0x8E, 0x040B}, // CYRILLIC CAPITAL LETTER TSHE
+ {0x8F, 0x040F}, // CYRILLIC CAPITAL LETTER DZHE
+ {0x90, 0x0452}, // CYRILLIC SMALL LETTER DJE
+ {0x91, 0x2018}, // LEFT SINGLE QUOTATION MARK
+ {0x92, 0x2019}, // RIGHT SINGLE QUOTATION MARK
+ {0x93, 0x201C}, // LEFT DOUBLE QUOTATION MARK
+ {0x94, 0x201D}, // RIGHT DOUBLE QUOTATION MARK
+ {0x95, 0x2022}, // BULLET
+ {0x96, 0x2013}, // EN DASH
+ {0x97, 0x2014}, // EM DASH
+ {0x99, 0x2122}, // TRADE MARK SIGN
+ {0x9A, 0x0459}, // CYRILLIC SMALL LETTER LJE
+ {0x9B, 0x203A}, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ {0x9C, 0x045A}, // CYRILLIC SMALL LETTER NJE
+ {0x9D, 0x045C}, // CYRILLIC SMALL LETTER KJE
+ {0x9E, 0x045B}, // CYRILLIC SMALL LETTER TSHE
+ {0x9F, 0x045F}, // CYRILLIC SMALL LETTER DZHE
+ {0xA0, 0x00A0}, // NO-BREAK SPACE
+ {0xA1, 0x040E}, // CYRILLIC CAPITAL LETTER SHORT U
+ {0xA2, 0x045E}, // CYRILLIC SMALL LETTER SHORT U
+ {0xA3, 0x0408}, // CYRILLIC CAPITAL LETTER JE
+ {0xA4, 0x00A4}, // CURRENCY SIGN
+ {0xA5, 0x0490}, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+ {0xA6, 0x00A6}, // BROKEN BAR
+ {0xA7, 0x00A7}, // SECTION SIGN
+ {0xA8, 0x0401}, // CYRILLIC CAPITAL LETTER IO
+ {0xA9, 0x00A9}, // COPYRIGHT SIGN
+ {0xAA, 0x0404}, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
+ {0xAB, 0x00AB}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+ {0xAC, 0x00AC}, // NOT SIGN
+ {0xAD, 0x00AD}, // SOFT HYPHEN
+ {0xAE, 0x00AE}, // REGISTERED SIGN
+ {0xAF, 0x0407}, // CYRILLIC CAPITAL LETTER YI
+ {0xB0, 0x00B0}, // DEGREE SIGN
+ {0xB1, 0x00B1}, // PLUS-MINUS SIGN
+ {0xB2, 0x0406}, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+ {0xB3, 0x0456}, // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+ {0xB4, 0x0491}, // CYRILLIC SMALL LETTER GHE WITH UPTURN
+ {0xB5, 0x00B5}, // MICRO SIGN
+ {0xB6, 0x00B6}, // PILCROW SIGN
+ {0xB7, 0x00B7}, // MIDDLE DOT
+ {0xB8, 0x0451}, // CYRILLIC SMALL LETTER IO
+ {0xB9, 0x2116}, // NUMERO SIGN
+ {0xBA, 0x0454}, // CYRILLIC SMALL LETTER UKRAINIAN IE
+ {0xBB, 0x00BB}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+ {0xBC, 0x0458}, // CYRILLIC SMALL LETTER JE
+ {0xBD, 0x0405}, // CYRILLIC CAPITAL LETTER DZE
+ {0xBE, 0x0455}, // CYRILLIC SMALL LETTER DZE
+ {0xBF, 0x0457} // CYRILLIC SMALL LETTER YI
+};
+
+static void cp1251_to_utf8(char *out, const char *in, size_t len) {
static const int table[128] = {
0x82D0,0x83D0,0x9A80E2,0x93D1,0x9E80E2,0xA680E2,0xA080E2,0xA180E2,
0xAC82E2,0xB080E2,0x89D0,0xB980E2,0x8AD0,0x8CD0,0x8BD0,0x8FD0,
@@ -22,7 +91,7 @@ static void cp2utf1(char *out, const char *in) {
0x80D1,0x81D1,0x82D1,0x83D1,0x84D1,0x85D1,0x86D1,0x87D1,
0x88D1,0x89D1,0x8AD1,0x8BD1,0x8CD1,0x8DD1,0x8ED1,0x8FD1
};
- while (*in)
+ while (*in) {
if (*in & 0x80) {
int v = table[(int)(0x7f & *in++)];
if (!v)
@@ -31,75 +100,71 @@ static void cp2utf1(char *out, const char *in) {
*out++ = (char)(v >> 8);
if (v >>= 16)
*out++ = (char)v;
- }
- else
+ } else {
*out++ = *in++;
+ }
+ }
*out = 0;
}
-string cp2utf(string s) {
- int c,i;
- int len = s.size();
- string ns;
- for(i=0; i<len; i++) {
- c=s[i];
- char buf[4], in[2] = {0, 0};
- *in = c;
- cp2utf1(buf, in);
- ns+=string(buf);
+
+static int utf8_to_cp1251(const char* utf8, char* windows1251, size_t n) {
+ int i = 0;
+ int j = 0;
+ for(; i < (int)n && utf8[i] != 0; ++i) {
+ char prefix = utf8[i];
+ char suffix = utf8[i+1];
+ if ((prefix & 0x80) == 0) {
+ windows1251[j] = (char)prefix;
+ ++j;
+ } else if ((~prefix) & 0x20) {
+ int first5bit = prefix & 0x1F;
+ first5bit <<= 6;
+ int sec6bit = suffix & 0x3F;
+ int unicode_char = first5bit + sec6bit;
+
+ if ( unicode_char >= 0x410 && unicode_char <= 0x44F ) {
+ windows1251[j] = (char)(unicode_char - 0x350);
+ } else if (unicode_char >= 0x80 && unicode_char <= 0xFF) {
+ windows1251[j] = (char)(unicode_char);
+ } else if (unicode_char >= 0x402 && unicode_char <= 0x403) {
+ windows1251[j] = (char)(unicode_char - 0x382);
+ } else {
+ int count = sizeof(g_letters) / sizeof(Letter);
+ for (int k = 0; k < count; ++k) {
+ if (unicode_char == g_letters[k].unicode) {
+ windows1251[j] = g_letters[k].win1251;
+ goto NEXT_LETTER;
+ }
+ }
+ // can't convert this char
+ return 0;
+ }
+NEXT_LETTER:
+ ++i;
+ ++j;
+ } else {
+ // can't convert this chars
+ return 0;
+ }
}
- return ns;
+ windows1251[j] = 0;
+ return 1;
}
string utf2cp(string s) {
size_t len = s.size();
- const char *buff = s.c_str();
- char *output = new char[len];
- convert_utf8_to_windows1251(buff, output, len);
+ char* output = new char[len+1];
+ utf8_to_cp1251(s.c_str(), output, len);
string ns(output);
+ delete[] output;
return ns;
}
-int convert_utf8_to_windows1251(const char* utf8, char* windows1251, size_t n)
-{
- int i = 0;
- int j = 0;
- for(; i < (int)n && utf8[i] != 0; ++i) {
- char prefix = utf8[i];
- char suffix = utf8[i+1];
- if ((prefix & 0x80) == 0) {
- windows1251[j] = (char)prefix;
- ++j;
- } else if ((~prefix) & 0x20) {
- int first5bit = prefix & 0x1F;
- first5bit <<= 6;
- int sec6bit = suffix & 0x3F;
- int unicode_char = first5bit + sec6bit;
-
- if ( unicode_char >= 0x410 && unicode_char <= 0x44F ) {
- windows1251[j] = (char)(unicode_char - 0x350);
- } else if (unicode_char >= 0x80 && unicode_char <= 0xFF) {
- windows1251[j] = (char)(unicode_char);
- } else if (unicode_char >= 0x402 && unicode_char <= 0x403) {
- windows1251[j] = (char)(unicode_char - 0x382);
- } else {
- int count = sizeof(g_letters) / sizeof(Letter);
- for (int k = 0; k < count; ++k) {
- if (unicode_char == g_letters[k].unicode) {
- windows1251[j] = g_letters[k].win1251;
- goto NEXT_LETTER;
- }
- }
- // can't convert this char
- return 0;
- }
-NEXT_LETTER:
- ++i;
- ++j;
- } else {
- // can't convert this chars
- return 0;
- }
- }
- windows1251[j] = 0;
- return 1;
-} \ No newline at end of file
+string cp2utf(string s) {
+ size_t len = s.size();
+ char* output = new char[len*3+1];
+ cp1251_to_utf8(output, s.c_str(), len);
+ string ns(output);
+ delete[] output;
+ return ns;
+}
diff --git a/iconvlite.h b/iconvlite.h
index c2bb6a3..3194eaf 100644
--- a/iconvlite.h
+++ b/iconvlite.h
@@ -1,85 +1,9 @@
-/*
-iconvlite.h
-Iconv Lite
-Simple cpp functions to convert strings from cp1251 to utf8 and ftom utf8 to cp1251
-*/
-
#ifndef ICONVLITE_H
#define ICONVLITE_H
using namespace std;
string cp2utf(string s);
-int convert_utf8_to_windows1251(const char* utf8, char* windows1251, size_t n);
string utf2cp(string s);
-typedef struct ConvLetter {
- unsigned char win1251;
- int unicode;
-} Letter;
-
-static Letter g_letters[] = {
- {0x82, 0x201A}, // SINGLE LOW-9 QUOTATION MARK
- {0x83, 0x0453}, // CYRILLIC SMALL LETTER GJE
- {0x84, 0x201E}, // DOUBLE LOW-9 QUOTATION MARK
- {0x85, 0x2026}, // HORIZONTAL ELLIPSIS
- {0x86, 0x2020}, // DAGGER
- {0x87, 0x2021}, // DOUBLE DAGGER
- {0x88, 0x20AC}, // EURO SIGN
- {0x89, 0x2030}, // PER MILLE SIGN
- {0x8A, 0x0409}, // CYRILLIC CAPITAL LETTER LJE
- {0x8B, 0x2039}, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- {0x8C, 0x040A}, // CYRILLIC CAPITAL LETTER NJE
- {0x8D, 0x040C}, // CYRILLIC CAPITAL LETTER KJE
- {0x8E, 0x040B}, // CYRILLIC CAPITAL LETTER TSHE
- {0x8F, 0x040F}, // CYRILLIC CAPITAL LETTER DZHE
- {0x90, 0x0452}, // CYRILLIC SMALL LETTER DJE
- {0x91, 0x2018}, // LEFT SINGLE QUOTATION MARK
- {0x92, 0x2019}, // RIGHT SINGLE QUOTATION MARK
- {0x93, 0x201C}, // LEFT DOUBLE QUOTATION MARK
- {0x94, 0x201D}, // RIGHT DOUBLE QUOTATION MARK
- {0x95, 0x2022}, // BULLET
- {0x96, 0x2013}, // EN DASH
- {0x97, 0x2014}, // EM DASH
- {0x99, 0x2122}, // TRADE MARK SIGN
- {0x9A, 0x0459}, // CYRILLIC SMALL LETTER LJE
- {0x9B, 0x203A}, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- {0x9C, 0x045A}, // CYRILLIC SMALL LETTER NJE
- {0x9D, 0x045C}, // CYRILLIC SMALL LETTER KJE
- {0x9E, 0x045B}, // CYRILLIC SMALL LETTER TSHE
- {0x9F, 0x045F}, // CYRILLIC SMALL LETTER DZHE
- {0xA0, 0x00A0}, // NO-BREAK SPACE
- {0xA1, 0x040E}, // CYRILLIC CAPITAL LETTER SHORT U
- {0xA2, 0x045E}, // CYRILLIC SMALL LETTER SHORT U
- {0xA3, 0x0408}, // CYRILLIC CAPITAL LETTER JE
- {0xA4, 0x00A4}, // CURRENCY SIGN
- {0xA5, 0x0490}, // CYRILLIC CAPITAL LETTER GHE WITH UPTURN
- {0xA6, 0x00A6}, // BROKEN BAR
- {0xA7, 0x00A7}, // SECTION SIGN
- {0xA8, 0x0401}, // CYRILLIC CAPITAL LETTER IO
- {0xA9, 0x00A9}, // COPYRIGHT SIGN
- {0xAA, 0x0404}, // CYRILLIC CAPITAL LETTER UKRAINIAN IE
- {0xAB, 0x00AB}, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
- {0xAC, 0x00AC}, // NOT SIGN
- {0xAD, 0x00AD}, // SOFT HYPHEN
- {0xAE, 0x00AE}, // REGISTERED SIGN
- {0xAF, 0x0407}, // CYRILLIC CAPITAL LETTER YI
- {0xB0, 0x00B0}, // DEGREE SIGN
- {0xB1, 0x00B1}, // PLUS-MINUS SIGN
- {0xB2, 0x0406}, // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
- {0xB3, 0x0456}, // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
- {0xB4, 0x0491}, // CYRILLIC SMALL LETTER GHE WITH UPTURN
- {0xB5, 0x00B5}, // MICRO SIGN
- {0xB6, 0x00B6}, // PILCROW SIGN
- {0xB7, 0x00B7}, // MIDDLE DOT
- {0xB8, 0x0451}, // CYRILLIC SMALL LETTER IO
- {0xB9, 0x2116}, // NUMERO SIGN
- {0xBA, 0x0454}, // CYRILLIC SMALL LETTER UKRAINIAN IE
- {0xBB, 0x00BB}, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
- {0xBC, 0x0458}, // CYRILLIC SMALL LETTER JE
- {0xBD, 0x0405}, // CYRILLIC CAPITAL LETTER DZE
- {0xBE, 0x0455}, // CYRILLIC SMALL LETTER DZE
- {0xBF, 0x0457} // CYRILLIC SMALL LETTER YI
-};
-
-#endif \ No newline at end of file
+#endif
diff --git a/index.cpp b/index.cpp
index ac7a705..25a359d 100755
--- a/index.cpp
+++ b/index.cpp
@@ -2,52 +2,49 @@
#include <v8.h>
#include <iostream>
+#include <stdlib.h>
#include "iconvlite.h"
#include "vkext_flex.h"
using namespace v8;
-using namespace std;
-
-const char* ToCString(const String::Utf8Value& value) {
- return *value ? *value : "<string conversion failed>";
-}
void flex(const FunctionCallbackInfo<Value>& args) {
- Isolate* isolate = args.GetIsolate();
-
- String::Utf8Value nameArg(args[0]);
- string nameString(*nameArg);
- string nameStringWindows1251 = utf2cp(nameString);
-
- String::Utf8Value caseArg(args[2]);
- string caseString(*caseArg);
- string caseStringWindows1251 = utf2cp(caseString);
-
- String::Utf8Value typeArg(args[3]);
- string typeString(*typeArg);
- string ctypeStringWindows1251 = utf2cp(typeString);
-
- int sex = (int)args[1]->NumberValue();
- int lang = (int)args[4]->NumberValue();
-
- char *result = do_flex(
- nameStringWindows1251.c_str(),
- nameStringWindows1251.length(),
- caseStringWindows1251.c_str(),
- caseStringWindows1251.length(),
- sex,
- ctypeStringWindows1251.c_str(),
- ctypeStringWindows1251.length(),
- lang);
-
- string resultStringWindows1251(result);
- string resultString = cp2utf(resultStringWindows1251);
-
- args.GetReturnValue().Set(String::NewFromUtf8(isolate, resultString.c_str()));
+ Isolate* isolate = args.GetIsolate();
+
+ String::Utf8Value nameArg(args[0]);
+ std::string nameString(*nameArg);
+ std::string nameStringWindows1251 = utf2cp(nameString);
+
+ String::Utf8Value caseArg(args[2]);
+ std::string caseString(*caseArg);
+ std::string caseStringWindows1251 = utf2cp(caseString);
+
+ String::Utf8Value typeArg(args[3]);
+ std::string typeString(*typeArg);
+ std::string ctypeStringWindows1251 = utf2cp(typeString);
+
+ int sex = (int)args[1]->NumberValue();
+ int lang = (int)args[4]->NumberValue();
+
+ char *result = do_flex(
+ nameStringWindows1251.c_str(),
+ nameStringWindows1251.length(),
+ caseStringWindows1251.c_str(),
+ caseStringWindows1251.length(),
+ sex,
+ ctypeStringWindows1251.c_str(),
+ lang);
+
+ std::string resultStringWindows1251(result);
+ free(result);
+
+ std::string resultString = cp2utf(resultStringWindows1251);
+
+ args.GetReturnValue().Set(String::NewFromUtf8(isolate, resultString.c_str()));
}
void Init(Handle<Object> exports) {
NODE_SET_METHOD(exports, "flex", flex);
}
-NODE_MODULE(hello, Init);
+NODE_MODULE(vkext_flex, Init)
diff --git a/test.js b/test.js
index b1a6c13..dcb2ca0 100644
--- a/test.js
+++ b/test.js
@@ -3,12 +3,14 @@ const vkflex = require('./index.js')
let nameCases = ['Gen', 'Dat', 'Acc', 'Ins', 'Abl']
let names = [
['Евгений', 'Зиновьев', 0],
- ['Павел', 'Дуров', 0],
- ['Анастасия', 'Семенюк', 1],
- ['Катя', 'Лебедева', 1]
+ //['Павел', 'Дуров', 0],
+ //['Анастасия', 'Семенюк', 1],
+ //['Катя', 'Лебедева', 1]
+ //['Denis', 'Komissarov', 0]
]
console.time('flex')
+for (let i = 0; i < 100; i++) {
for (let [name, surname, sex] of names) {
console.log('Testing "'+name+' '+surname+'"...')
@@ -18,4 +20,5 @@ for (let [name, surname, sex] of names) {
console.log('')
}
+}
console.timeEnd('flex')
diff --git a/vkext_flex.c b/vkext_flex.c
index 244ac37..b78de49 100644
--- a/vkext_flex.c
+++ b/vkext_flex.c
@@ -38,7 +38,7 @@ char *estrdup (const char *s) {
return d;
}
-char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int type_len, int lang_id) {
+char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id) {
if (name_len > (1 << 10)) {
return estrdup (name);
}
diff --git a/vkext_flex.h b/vkext_flex.h
index 48de3c2..0799748 100644
--- a/vkext_flex.h
+++ b/vkext_flex.h
@@ -14,7 +14,7 @@
You should have received a copy of the GNU General Public License
along with VK/KittenPHP-DB-Engine. If not, see <http://www.gnu.org/licenses/>.
- This program is released under the GPL with the additional exemption
+ This program is released under the GPL with the additional exemption
that compiling, linking, and/or using OpenSSL is allowed.
You are free to remove this exemption from derived works.
@@ -30,10 +30,10 @@
#if defined __cplusplus
extern "C" {
#endif
-
+
#include <stdio.h>
-
+
struct vk_node {
short tail_len;
short hyphen;
@@ -52,9 +52,9 @@ struct lang {
const char **endings;
struct vk_node nodes[];
};
-
-char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int type_len, int lang_id);
-
+
+char *do_flex (const char *name, int name_len, const char *case_name, int case_name_len, int sex, const char *type, int lang_id);
+
#if defined __cplusplus
};
#endif