Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
81145 views
1
// Copyright Joyent, Inc. and other Node contributors.
2
//
3
// Permission is hereby granted, free of charge, to any person obtaining a
4
// copy of this software and associated documentation files (the
5
// "Software"), to deal in the Software without restriction, including
6
// without limitation the rights to use, copy, modify, merge, publish,
7
// distribute, sublicense, and/or sell copies of the Software, and to permit
8
// persons to whom the Software is furnished to do so, subject to the
9
// following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included
12
// in all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17
// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18
// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22
#ifndef NAN_STRING_BYTES_H_
23
#define NAN_STRING_BYTES_H_
24
25
// Decodes a v8::Handle<v8::String> or Buffer to a raw char*
26
27
#include <node.h>
28
#include <node_buffer.h>
29
#include <assert.h>
30
#include <string.h> // memcpy
31
#include <limits.h>
32
33
namespace NanIntern {
34
35
using v8::Local;
36
using v8::Handle;
37
using v8::Object;
38
using v8::String;
39
using v8::Value;
40
41
42
//// Base 64 ////
43
44
#define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
45
46
47
48
//// Nan::HEX ////
49
50
static bool contains_non_ascii_slow(const char* buf, size_t len) {
51
for (size_t i = 0; i < len; ++i) {
52
if (buf[i] & 0x80) return true;
53
}
54
return false;
55
}
56
57
58
static bool contains_non_ascii(const char* src, size_t len) {
59
if (len < 16) {
60
return contains_non_ascii_slow(src, len);
61
}
62
63
const unsigned bytes_per_word = sizeof(void*);
64
const unsigned align_mask = bytes_per_word - 1;
65
const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
66
67
if (unaligned > 0) {
68
const unsigned n = bytes_per_word - unaligned;
69
if (contains_non_ascii_slow(src, n)) return true;
70
src += n;
71
len -= n;
72
}
73
74
75
#if defined(__x86_64__) || defined(_WIN64)
76
const uintptr_t mask = 0x8080808080808080ll;
77
#else
78
const uintptr_t mask = 0x80808080l;
79
#endif
80
81
const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
82
83
for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
84
if (srcw[i] & mask) return true;
85
}
86
87
const unsigned remainder = len & align_mask;
88
if (remainder > 0) {
89
const size_t offset = len - remainder;
90
if (contains_non_ascii_slow(src + offset, remainder)) return true;
91
}
92
93
return false;
94
}
95
96
97
static void force_ascii_slow(const char* src, char* dst, size_t len) {
98
for (size_t i = 0; i < len; ++i) {
99
dst[i] = src[i] & 0x7f;
100
}
101
}
102
103
104
static void force_ascii(const char* src, char* dst, size_t len) {
105
if (len < 16) {
106
force_ascii_slow(src, dst, len);
107
return;
108
}
109
110
const unsigned bytes_per_word = sizeof(void*);
111
const unsigned align_mask = bytes_per_word - 1;
112
const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
113
const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
114
115
if (src_unalign > 0) {
116
if (src_unalign == dst_unalign) {
117
const unsigned unalign = bytes_per_word - src_unalign;
118
force_ascii_slow(src, dst, unalign);
119
src += unalign;
120
dst += unalign;
121
len -= src_unalign;
122
} else {
123
force_ascii_slow(src, dst, len);
124
return;
125
}
126
}
127
128
#if defined(__x86_64__) || defined(_WIN64)
129
const uintptr_t mask = ~0x8080808080808080ll;
130
#else
131
const uintptr_t mask = ~0x80808080l;
132
#endif
133
134
const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
135
uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
136
137
for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
138
dstw[i] = srcw[i] & mask;
139
}
140
141
const unsigned remainder = len & align_mask;
142
if (remainder > 0) {
143
const size_t offset = len - remainder;
144
force_ascii_slow(src + offset, dst + offset, remainder);
145
}
146
}
147
148
149
static size_t base64_encode(const char* src,
150
size_t slen,
151
char* dst,
152
size_t dlen) {
153
// We know how much we'll write, just make sure that there's space.
154
assert(dlen >= base64_encoded_size(slen) &&
155
"not enough space provided for base64 encode");
156
157
dlen = base64_encoded_size(slen);
158
159
unsigned a;
160
unsigned b;
161
unsigned c;
162
unsigned i;
163
unsigned k;
164
unsigned n;
165
166
static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
167
"abcdefghijklmnopqrstuvwxyz"
168
"0123456789+/";
169
170
i = 0;
171
k = 0;
172
n = slen / 3 * 3;
173
174
while (i < n) {
175
a = src[i + 0] & 0xff;
176
b = src[i + 1] & 0xff;
177
c = src[i + 2] & 0xff;
178
179
dst[k + 0] = table[a >> 2];
180
dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
181
dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
182
dst[k + 3] = table[c & 0x3f];
183
184
i += 3;
185
k += 4;
186
}
187
188
if (n != slen) {
189
switch (slen - n) {
190
case 1:
191
a = src[i + 0] & 0xff;
192
dst[k + 0] = table[a >> 2];
193
dst[k + 1] = table[(a & 3) << 4];
194
dst[k + 2] = '=';
195
dst[k + 3] = '=';
196
break;
197
198
case 2:
199
a = src[i + 0] & 0xff;
200
b = src[i + 1] & 0xff;
201
dst[k + 0] = table[a >> 2];
202
dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
203
dst[k + 2] = table[(b & 0x0f) << 2];
204
dst[k + 3] = '=';
205
break;
206
}
207
}
208
209
return dlen;
210
}
211
212
213
static size_t hex_encode(const char* src, size_t slen, char* dst, size_t dlen) {
214
// We know how much we'll write, just make sure that there's space.
215
assert(dlen >= slen * 2 &&
216
"not enough space provided for hex encode");
217
218
dlen = slen * 2;
219
for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
220
static const char hex[] = "0123456789abcdef";
221
uint8_t val = static_cast<uint8_t>(src[i]);
222
dst[k + 0] = hex[val >> 4];
223
dst[k + 1] = hex[val & 15];
224
}
225
226
return dlen;
227
}
228
229
230
231
static Local<Value> Encode(const char* buf,
232
size_t buflen,
233
enum Nan::Encoding encoding) {
234
assert(buflen <= node::Buffer::kMaxLength);
235
if (!buflen && encoding != Nan::BUFFER)
236
return NanNew("");
237
238
Local<String> val;
239
switch (encoding) {
240
case Nan::BUFFER:
241
return NanNewBufferHandle(buf, buflen);
242
243
case Nan::ASCII:
244
if (contains_non_ascii(buf, buflen)) {
245
char* out = new char[buflen];
246
force_ascii(buf, out, buflen);
247
val = NanNew<String>(out, buflen);
248
delete[] out;
249
} else {
250
val = NanNew<String>(buf, buflen);
251
}
252
break;
253
254
case Nan::UTF8:
255
val = NanNew<String>(buf, buflen);
256
break;
257
258
case Nan::BINARY: {
259
// TODO(isaacs) use ExternalTwoByteString?
260
const unsigned char *cbuf = reinterpret_cast<const unsigned char*>(buf);
261
uint16_t * twobytebuf = new uint16_t[buflen];
262
for (size_t i = 0; i < buflen; i++) {
263
// XXX is the following line platform independent?
264
twobytebuf[i] = cbuf[i];
265
}
266
val = NanNew<String>(twobytebuf, buflen);
267
delete[] twobytebuf;
268
break;
269
}
270
271
case Nan::BASE64: {
272
size_t dlen = base64_encoded_size(buflen);
273
char* dst = new char[dlen];
274
275
size_t written = base64_encode(buf, buflen, dst, dlen);
276
assert(written == dlen);
277
278
val = NanNew<String>(dst, dlen);
279
delete[] dst;
280
break;
281
}
282
283
case Nan::UCS2: {
284
const uint16_t* data = reinterpret_cast<const uint16_t*>(buf);
285
val = NanNew<String>(data, buflen / 2);
286
break;
287
}
288
289
case Nan::HEX: {
290
size_t dlen = buflen * 2;
291
char* dst = new char[dlen];
292
size_t written = hex_encode(buf, buflen, dst, dlen);
293
assert(written == dlen);
294
295
val = NanNew<String>(dst, dlen);
296
delete[] dst;
297
break;
298
}
299
300
default:
301
assert(0 && "unknown encoding");
302
break;
303
}
304
305
return val;
306
}
307
308
#undef base64_encoded_size
309
310
} // namespace NanIntern
311
312
#endif // NAN_STRING_BYTES_H_
313
314