Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download

open-axiom repository from github

24005 views
1
// Copyright (C) 2010-2013, Gabriel Dos Reis.
2
// All rights reserved.
3
// Written by Gabriel Dos Reis.
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are
7
// met:
8
//
9
// - Redistributions of source code must retain the above copyright
10
// notice, this list of conditions and the following disclaimer.
11
//
12
// - Redistributions in binary form must reproduce the above copyright
13
// notice, this list of conditions and the following disclaimer in
14
// the documentation and/or other materials provided with the
15
// distribution.
16
//
17
// - Neither the name of The Numerical Algorithms Group Ltd. nor the
18
// names of its contributors may be used to endorse or promote products
19
// derived from this software without specific prior written permission.
20
//
21
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
33
// --% Author: Gabriel Dos Reis.
34
35
#include <ctype.h>
36
#include <string.h>
37
#include <iostream>
38
#include <iterator>
39
#include <open-axiom/sexpr>
40
#include <open-axiom/FileMapping>
41
#include <open-axiom/diagnostics>
42
43
namespace OpenAxiom {
44
namespace Sexpr {
45
static void
46
invalid_character(Reader::State& s) {
47
auto line = std::to_string(s.bytes.lineno);
48
auto column = std::to_string(s.cur - s.line);
49
auto msg = "invalid character on line " + line +
50
" and column " + column;
51
if (isprint(*s.cur))
52
throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur));
53
throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur));
54
}
55
56
static void
57
syntax_error(const std::string& s) {
58
throw Diagnostics::BasicError(s);
59
}
60
61
// Return true if character `c' introduces a blank.
62
static bool
63
is_blank(char c) {
64
return c == ' ' or c == '\t' or c == '\v'
65
or c == '\n' or c == '\f' or c == '\r';
66
}
67
68
// Return true if the character `c' introduces a delimiter.
69
static bool
70
is_delimiter(char c) {
71
return is_blank(c)
72
or c == '(' or c == ')' or c == '\''
73
or c == '`' or c == '#';
74
}
75
76
// Move the cursor past all consecutive blank characters, and
77
// return true if there are more input characters to consider.
78
static bool
79
skip_blank(Reader::State& s) {
80
for (bool done = false; s.cur < s.bytes.end and not done; )
81
switch (*s.cur) {
82
case '\n':
83
++s.bytes.lineno;
84
s.line = ++s.cur;
85
break;
86
case ' ': case '\t': case '\v': case '\r': case '\f':
87
++s.cur;
88
break;
89
default: done = true; break;
90
}
91
return s.cur < s.bytes.end;
92
}
93
94
// Move `cur' to end-of-line marker.
95
static void
96
skip_to_eol(Reader::State& s) {
97
// FIXME: properly handle CR+LF.
98
while (s.cur < s.bytes.end and *s.cur != '\n')
99
++s.cur;
100
}
101
102
// Move `cur' one-past a non-esacaped character `c'.
103
// Return true if the character was seen.
104
static bool
105
skip_to_nonescaped_char(Reader::State& s, char c) {
106
for (bool saw_escape = false; s.cur < s.bytes.end; ++s.cur)
107
if (saw_escape)
108
saw_escape = false;
109
else if (*s.cur == '\\')
110
saw_escape = true;
111
else if (*s.cur == c) {
112
++s.cur;
113
return true;
114
}
115
return false;
116
}
117
118
// Move the cursor past the closing quote of string literal.
119
// Return true if the closing quote was effectively seen.
120
static inline bool
121
skip_to_quote(Reader::State& s) {
122
return skip_to_nonescaped_char(s, '"');
123
}
124
125
template<typename Pred>
126
static bool
127
advance_while(Reader::State& s, Pred p) {
128
while (s.cur < s.bytes.end and p(*s.cur))
129
++s.cur;
130
return s.cur < s.bytes.end;
131
}
132
133
// Return true if the character `c' be part of a non-absolute
134
// identifier.
135
static bool
136
identifier_part(Byte c) {
137
switch (c) {
138
case '+': case '-': case '*': case '/': case '%': case '^':
139
case '~': case '@': case '$': case '&': case '=':
140
case '<': case '>': case '?': case '!': case '_':
141
case '[': case ']': case '{': case '}':
142
return true;
143
default:
144
return isalnum(c);
145
}
146
}
147
148
// -- AtomSyntax --
149
AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { }
150
151
// -- IntegerSyntax --
152
IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { }
153
154
void
155
IntegerSyntax::accept(Visitor& v) const {
156
v.visit(*this);
157
}
158
159
// -- CharacterSyntax --
160
CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { }
161
162
void
163
CharacterSyntax::accept(Visitor& v) const {
164
v.visit(*this);
165
}
166
167
// -- StringSyntax --
168
StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { }
169
170
void
171
StringSyntax::accept(Visitor& v) const {
172
v.visit(*this);
173
}
174
175
// -- SymbolSyntax --
176
SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k)
177
: AtomSyntax(t), sort(k)
178
{ }
179
180
void
181
SymbolSyntax::accept(Visitor& v) const {
182
v.visit(*this);
183
}
184
185
// -- AnchorSyntax --
186
AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { }
187
188
void
189
AnchorSyntax::accept(Visitor& v) const {
190
v.visit(*this);
191
}
192
193
// -- ReferenceSyntax --
194
ReferenceSyntax::ReferenceSyntax(const Lexeme& t, Ordinal n)
195
: AtomSyntax(t), pos(n)
196
{ }
197
198
void
199
ReferenceSyntax::accept(Visitor& v) const {
200
v.visit(*this);
201
}
202
203
// -- QuoteSyntax --
204
QuoteSyntax::QuoteSyntax(const Syntax* s)
205
: unary_form<QuoteSyntax>(s)
206
{ }
207
208
// -- AntiquoteSyntax --
209
AntiquoteSyntax::AntiquoteSyntax(const Syntax* s)
210
: unary_form<AntiquoteSyntax>(s)
211
{ }
212
213
// -- Expand --
214
Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
215
216
// -- Eval --
217
Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
218
219
// -- Splice --
220
Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
221
222
// -- Function --
223
Function::Function(const Syntax* s) : unary_form<Function>(s) { }
224
225
// -- Include --
226
Include::Include(const Syntax* c, const Syntax* s)
227
: binary_form<Include>(c, s)
228
{ }
229
230
// -- Exclude --
231
Exclude::Exclude(const Syntax* c, const Syntax* s)
232
: binary_form<Exclude>(c, s)
233
{ }
234
235
// -- ListSyntax --
236
ListSyntax::ListSyntax() : dot(false) { }
237
238
ListSyntax::ListSyntax(const base& elts, bool d)
239
: base(elts), dot(d)
240
{ }
241
242
ListSyntax::~ListSyntax() { }
243
244
void
245
ListSyntax::accept(Visitor& v) const {
246
v.visit(*this);
247
}
248
249
// -- VectorSyntax --
250
VectorSyntax::VectorSyntax() { }
251
252
VectorSyntax::VectorSyntax(const base& elts) : base(elts) { }
253
254
VectorSyntax::~VectorSyntax() { }
255
256
void
257
VectorSyntax::accept(Visitor& v) const {
258
v.visit(*this);
259
}
260
261
// ---------------
262
// -- Allocator --
263
// ---------------
264
Allocator::Allocator() { }
265
266
// This destructor is defined here so that it provides
267
// a single instantiation point for destructors of all
268
// used templates floating around.
269
Allocator::~Allocator() { }
270
271
const CharacterSyntax*
272
Allocator::make_character(const Lexeme& t) {
273
return chars.make(t);
274
}
275
276
const IntegerSyntax*
277
Allocator::make_integer(const Lexeme& t) {
278
return ints.make(t);
279
}
280
281
const StringSyntax*
282
Allocator::make_string(const Lexeme& t) {
283
return strs.make(t);
284
}
285
286
const SymbolSyntax*
287
Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) {
288
return syms.make(t, k);
289
}
290
291
const ReferenceSyntax*
292
Allocator::make_reference(size_t i, const Lexeme& t) {
293
return refs.make(t, i);
294
}
295
296
const AnchorSyntax*
297
Allocator::make_anchor(size_t t, const Syntax* s) {
298
return ancs.make(t, s);
299
}
300
301
const QuoteSyntax*
302
Allocator::make_quote(const Syntax* s) {
303
return quotes.make(s);
304
}
305
306
const AntiquoteSyntax*
307
Allocator::make_antiquote(const Syntax* s) {
308
return antis.make(s);
309
}
310
311
const Expand*
312
Allocator::make_expand(const Syntax* s) {
313
return exps.make(s);
314
}
315
316
const Eval*
317
Allocator::make_eval(const Syntax* s) {
318
return evls.make(s);
319
}
320
321
const Splice*
322
Allocator::make_splice(const Syntax* s) {
323
return spls.make(s);
324
}
325
326
const Function*
327
Allocator::make_function(const Syntax* s) {
328
return funs.make(s);
329
}
330
331
const Include*
332
Allocator::make_include(const Syntax* c, const Syntax* s) {
333
return incs.make(c, s);
334
}
335
336
const Exclude*
337
Allocator::make_exclude(const Syntax* c, const Syntax* s) {
338
return excs.make(c, s);
339
}
340
341
const ListSyntax*
342
Allocator::make_list(const std::vector<const Syntax*>& elts, bool dot) {
343
if (elts.empty())
344
return &empty_list;
345
return lists.make(elts, dot);
346
}
347
348
const VectorSyntax*
349
Allocator::make_vector(const std::vector<const Syntax*>& elts) {
350
if (elts.empty())
351
return &empty_vector;
352
return vectors.make(elts);
353
}
354
355
// The sequence of characters in [cur, last) consists
356
// entirely of digits. Return the corresponding natural value.
357
static size_t
358
natural_value(const Byte* cur, const Byte* last) {
359
size_t n = 0;
360
for (; cur < last; ++cur)
361
// FIXME: check for overflow.
362
n = 10 * n + (*cur - '0');
363
return n;
364
}
365
366
// -- Reader --
367
Reader::Reader(const Byte* f, const Byte* l)
368
: st{ { f, l, 1 }, f, f }
369
{ }
370
371
Reader::Reader(const RawInput& ri)
372
: st { ri, ri.start, ri.start }
373
{ }
374
375
static const Syntax* read_sexpr(Reader::State&);
376
377
// Parse a string literal
378
static const Syntax*
379
read_string(Reader::State& s) {
380
auto start = s.cur++;
381
if (not skip_to_quote(s))
382
syntax_error("missing closing quote sign for string literal");
383
Lexeme t = { { start, s.cur }, s.bytes.lineno };
384
return s.alloc.make_string(t);
385
}
386
387
// Parse an absolute identifier.
388
static const Syntax*
389
read_absolute_symbol(Reader::State& s) {
390
auto start = ++s.cur;
391
if (not skip_to_nonescaped_char(s, '|'))
392
syntax_error("missing closing bar sign for an absolute symbol");
393
Lexeme t = { { start, s.cur - 1 }, s.bytes.lineno };
394
return s.alloc.make_symbol(SymbolSyntax::absolute, t);
395
}
396
397
// Read an atom starting with digits.
398
static const Syntax*
399
read_maybe_natural(Reader::State& s) {
400
auto start = s.cur;
401
advance_while (s, isdigit);
402
if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
403
Lexeme t = { { start, s.cur }, s.bytes.lineno };
404
return s.alloc.make_integer(t);
405
}
406
advance_while(s, identifier_part);
407
Lexeme t = { { start, s.cur }, s.bytes.lineno };
408
return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
409
}
410
411
// Read an identifier.
412
static const Syntax*
413
read_identifier(Reader::State& s) {
414
auto start = s.cur;
415
advance_while(s, identifier_part);
416
Lexeme t = { { start, s.cur }, s.bytes.lineno };
417
return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
418
}
419
420
// Read an atom starting with a '+' or '-' sign; this
421
// should be identifier, or a signed integer.
422
static const Syntax*
423
read_maybe_signed_number(Reader::State& s) {
424
auto start = s.cur++;
425
if (s.cur < s.bytes.end and isdigit(*s.cur)) {
426
advance_while(s, isdigit);
427
if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
428
Lexeme t = { { start, s.cur }, s.bytes.lineno };
429
return s.alloc.make_integer(t);
430
}
431
}
432
advance_while(s, identifier_part);
433
Lexeme t = { { start, s.cur }, s.bytes.lineno };
434
return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
435
}
436
437
static const Syntax*
438
read_keyword(Reader::State& s) {
439
auto start = s.cur++;
440
advance_while(s, identifier_part);
441
Lexeme t = { { start, s.cur }, s.bytes.lineno };
442
return s.alloc.make_symbol(SymbolSyntax::keyword, t);
443
}
444
445
// Read an atom.
446
static const Syntax*
447
read_atom(Reader::State& s) {
448
switch (*s.cur) {
449
case '"': return read_string(s);
450
case ':': return read_keyword(s);
451
case '-': case '+': return read_maybe_signed_number(s);
452
453
case '0': case '1': case '2': case '3': case '4':
454
case '5': case '6': case '7': case '8': case '9':
455
return read_maybe_natural(s);
456
457
default:
458
if (identifier_part(*s.cur))
459
return read_identifier(s);
460
invalid_character(s);
461
++s.cur;
462
return nullptr;
463
}
464
}
465
466
// Parse a quote expression.
467
static const Syntax*
468
read_quote(Reader::State& s) {
469
++s.cur; // skip the quote character
470
auto x = read_sexpr(s);
471
if (x == nullptr)
472
syntax_error("end of input reached after quote sign");
473
return s.alloc.make_quote(x);
474
}
475
476
// Parse a backquote expression.
477
static const Syntax*
478
read_backquote(Reader::State& s) {
479
++s.cur; // skip the backquote character
480
auto x = read_sexpr(s);
481
if (x == nullptr)
482
syntax_error("end of input reached after backquote sign");
483
return s.alloc.make_antiquote(x);
484
}
485
486
// We've just seen "#(" indicating the start of a literal
487
// vector. Read the elements and return the corresponding form.
488
static const Syntax*
489
finish_literal_vector(Reader::State& s) {
490
++s.cur; // Skip the open paren.
491
std::vector<const Syntax*> elts { };
492
while (skip_blank(s) and *s.cur != ')') {
493
if (auto x = read_sexpr(s))
494
elts.push_back(x);
495
else
496
syntax_error("syntax error while reading vector elements");
497
}
498
if (s.cur >= s.bytes.end)
499
syntax_error("unfinished literal vector");
500
else
501
++s.cur;
502
return s.alloc.make_vector(elts);
503
}
504
505
// We've just seen the sharp sign followed by a digit. We assume
506
// we are about to read an anchor or a back reference.
507
static const Syntax*
508
finish_anchor_or_reference(Reader::State& s) {
509
auto start = s.cur;
510
advance_while(s, isdigit);
511
if (s.cur >= s.bytes.end)
512
syntax_error("end-of-input after sharp-number sign");
513
const Byte c = *s.cur;
514
if (c != '#' and c != '=')
515
syntax_error("syntax error after sharp-number-equal sign");
516
Lexeme t = { { start, s.cur }, s.bytes.lineno };
517
auto n = natural_value(start, s.cur);
518
++s.cur;
519
if (c == '#')
520
return s.alloc.make_reference(n, t);
521
auto x = read_sexpr(s);
522
if (x == nullptr)
523
syntax_error("syntax error after sharp-number-equal sign");
524
return s.alloc.make_anchor(n, x);
525
}
526
527
static const Syntax*
528
finish_function(Reader::State& s) {
529
++s.cur; // skip quote sign.
530
auto x = read_sexpr(s);
531
if (x == nullptr)
532
syntax_error("missing function designator after sharp-quote sign");
533
return s.alloc.make_function(x);
534
}
535
536
static const Syntax*
537
finish_uninterned_symbol(Reader::State& s) {
538
++s.cur; // skip colon sign.
539
auto start = s.cur;
540
advance_while(s, identifier_part);
541
Lexeme t = { { start, s.cur }, s.bytes.lineno };
542
return s.alloc.make_symbol(SymbolSyntax::uninterned, t);
543
}
544
545
static const Syntax*
546
finish_readtime_eval(Reader::State& s) {
547
++s.cur; // skip dot sign.
548
auto x = read_sexpr(s);
549
if (x == nullptr)
550
syntax_error("parse error after sharp-dot sign");
551
return s.alloc.make_eval(x);
552
}
553
554
static const Syntax*
555
finish_character(Reader::State& s) {
556
++s.cur; // skip backslash sign
557
auto start = s.cur;
558
advance_while(s, identifier_part);
559
Lexeme t = { { start, s.cur }, s.bytes.lineno };
560
return s.alloc.make_character(t);
561
}
562
563
static const Syntax*
564
finish_include(Reader::State& s) {
565
++s.cur;
566
auto cond = read_sexpr(s);
567
auto form = read_sexpr(s);
568
return s.alloc.make_include(cond, form);
569
}
570
571
static const Syntax*
572
finish_exclude(Reader::State& s) {
573
++s.cur;
574
auto cond = read_sexpr(s);
575
auto form = read_sexpr(s);
576
return s.alloc.make_exclude(cond, form);
577
}
578
579
static const Syntax*
580
read_sharp_et_al(Reader::State& s) {
581
if (++s.cur >= s.bytes.end)
582
syntax_error("end-of-input reached after sharp sign");
583
switch (*s.cur) {
584
case '(': return finish_literal_vector(s);
585
case '\'': return finish_function(s);
586
case ':': return finish_uninterned_symbol(s);
587
case '.': return finish_readtime_eval(s);
588
case '\\': return finish_character(s);
589
case '+': return finish_include(s);
590
case '-': return finish_exclude(s);
591
592
default:
593
if (isdigit(*s.cur))
594
return finish_anchor_or_reference(s);
595
syntax_error("syntax error after sharp-sign");
596
}
597
return nullptr;
598
}
599
600
// We have just seen a dot; read the tail and the closing parenthesis.
601
static const Syntax*
602
finish_dotted_list(Reader::State& s, std::vector<const Syntax*>& elts) {
603
++s.cur; // Skip dot sign.
604
auto x = read_sexpr(s);
605
if (x == nullptr)
606
syntax_error("missing expression after dot sign");
607
if (not skip_blank(s) or *s.cur != ')')
608
syntax_error("missing closing parenthesis");
609
++s.cur;
610
elts.push_back(x);
611
return s.alloc.make_list(elts, true);
612
}
613
614
static const Syntax*
615
read_pair(Reader::State& s) {
616
++s.cur; // skip opening parenthesis
617
std::vector<const Syntax*> elts { };
618
while (skip_blank(s))
619
switch (*s.cur) {
620
case '.':
621
if (elts.empty())
622
syntax_error("missing expression before dot sign.");
623
return finish_dotted_list(s, elts);
624
625
case ')':
626
++s.cur;
627
return s.alloc.make_list(elts);
628
629
default:
630
if (auto x = read_sexpr(s))
631
elts.push_back(x);
632
else
633
syntax_error("unfinished pair expression");
634
break;
635
}
636
syntax_error("end-of-input while looking for closing parenthesis");
637
return nullptr;
638
}
639
640
static const Syntax*
641
read_sexpr(Reader::State& s) {
642
while (skip_blank(s))
643
switch (*s.cur) {
644
case ';': skip_to_eol(s); break;
645
case '\'': return read_quote(s);
646
case '`': return read_backquote(s);
647
case '|': return read_absolute_symbol(s);
648
case '#': return read_sharp_et_al(s);
649
case '(': return read_pair(s);
650
default: return read_atom(s);
651
}
652
return nullptr;
653
}
654
655
const Syntax*
656
Reader::read() {
657
return read_sexpr(st);
658
}
659
660
const Byte*
661
Reader::position(Ordinal p) {
662
st.cur = st.bytes.start + p;
663
st.line = st.cur;
664
// while (st.line > st.start and st.line[-1] != '\n')
665
// --st.line;
666
return st.cur;
667
}
668
669
}
670
}
671
672