CoCalc -- sexpr.cc

open-axiom repository from github
²⁴⁰⁰⁵ views
1
// Copyright (C) 2010-2013, Gabriel Dos Reis.
2
// All rights reserved.
3
// Written by Gabriel Dos Reis.
4
//
5
// Redistribution and use in source and binary forms, with or without
6
// modification, are permitted provided that the following conditions are
7
// met:
8
//
9
//     - Redistributions of source code must retain the above copyright
10
//       notice, this list of conditions and the following disclaimer.
11
//
12
//     - Redistributions in binary form must reproduce the above copyright
13
//       notice, this list of conditions and the following disclaimer in
14
//       the documentation and/or other materials provided with the
15
//       distribution.
16
//
17
//     - Neither the name of The Numerical Algorithms Group Ltd. nor the
18
//       names of its contributors may be used to endorse or promote products
19
//       derived from this software without specific prior written permission.
20
//
21
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23
// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25
// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32

33
// --% Author: Gabriel Dos Reis.
34

35
#include <ctype.h>
36
#include <string.h>
37
#include <iostream>
38
#include <iterator>
39
#include <open-axiom/sexpr>
40
#include <open-axiom/FileMapping>
41
#include <open-axiom/diagnostics>
42

43
namespace OpenAxiom {
44
   namespace Sexpr {
45
      static void
46
      invalid_character(Reader::State& s) {
47
         auto line = std::to_string(s.bytes.lineno);
48
         auto column = std::to_string(s.cur - s.line);
49
         auto msg = "invalid character on line " + line +
50
            " and column " + column;
51
         if (isprint(*s.cur))
52
            throw Diagnostics::BasicError(msg + ": " + std::string(1, *s.cur));
53
         throw Diagnostics::BasicError(msg + " with code " + std::to_string(*s.cur));
54
      }
55
      
56
      static void
57
      syntax_error(const std::string& s) {
58
         throw Diagnostics::BasicError(s);
59
      }
60

61
      // Return true if character `c' introduces a blank.
62
      static bool
63
      is_blank(char c) {
64
         return c == ' ' or c == '\t' or c == '\v'
65
            or c == '\n' or c == '\f' or c == '\r';
66
      }
67
      
68
      // Return true if the character `c' introduces a delimiter.
69
      static bool
70
      is_delimiter(char c) {
71
         return is_blank(c)
72
            or c == '(' or c == ')' or c == '\''
73
            or c == '`' or c == '#';
74
      }
75

76
      // Move the cursor past all consecutive blank characters, and
77
      // return true if there are more input characters to consider.
78
      static bool
79
      skip_blank(Reader::State& s) {
80
         for (bool done = false; s.cur < s.bytes.end and not done; )
81
            switch (*s.cur) {
82
            case '\n':
83
               ++s.bytes.lineno;
84
               s.line = ++s.cur;
85
               break;
86
            case ' ': case '\t': case '\v': case '\r': case '\f':
87
               ++s.cur;
88
               break;
89
            default: done = true; break;
90
            }
91
         return s.cur < s.bytes.end;
92
      }
93

94
      // Move `cur' to end-of-line marker.
95
      static void
96
      skip_to_eol(Reader::State& s) {
97
         // FIXME: properly handle CR+LF.
98
         while (s.cur < s.bytes.end and *s.cur != '\n')
99
            ++s.cur;
100
      }
101

102
      // Move `cur' one-past a non-esacaped character `c'.
103
      // Return true if the character was seen.
104
      static bool
105
      skip_to_nonescaped_char(Reader::State& s, char c) {
106
         for (bool saw_escape = false; s.cur < s.bytes.end; ++s.cur)
107
            if (saw_escape)
108
               saw_escape = false;
109
            else if (*s.cur == '\\')
110
               saw_escape = true;
111
            else if (*s.cur == c) {
112
               ++s.cur;
113
               return true;
114
            }
115
         return false;
116
      }
117

118
      // Move the cursor past the closing quote of string literal.
119
      // Return true if the closing quote was effectively seen.
120
      static inline bool
121
      skip_to_quote(Reader::State& s) {
122
         return skip_to_nonescaped_char(s, '"');
123
      }
124

125
      template<typename Pred>
126
      static bool
127
      advance_while(Reader::State& s, Pred p) {
128
         while (s.cur < s.bytes.end and p(*s.cur))
129
            ++s.cur;
130
         return s.cur < s.bytes.end;
131
      }
132

133
      // Return true if the character `c' be part of a non-absolute
134
      // identifier.
135
      static bool
136
      identifier_part(Byte c) {
137
         switch (c) {
138
         case '+': case '-': case '*': case '/': case '%': case '^':
139
         case '~': case '@': case '$': case '&': case '=':
140
         case '<': case '>': case '?': case '!': case '_':
141
         case '[': case ']': case '{': case '}':
142
            return true;
143
         default:
144
            return isalnum(c);
145
         }
146
      }
147

148
      // -- AtomSyntax --
149
      AtomSyntax::AtomSyntax(const Lexeme& t) : lex(t) { }
150

151
      // -- IntegerSyntax --
152
      IntegerSyntax::IntegerSyntax(const Lexeme& t) : AtomSyntax(t) { }
153

154
      void
155
      IntegerSyntax::accept(Visitor& v) const {
156
         v.visit(*this);
157
      }
158

159
      // -- CharacterSyntax --
160
      CharacterSyntax::CharacterSyntax(const Lexeme& t) : AtomSyntax(t) { }
161

162
      void
163
      CharacterSyntax::accept(Visitor& v) const {
164
         v.visit(*this);
165
      }
166

167
      // -- StringSyntax --
168
      StringSyntax::StringSyntax(const Lexeme& t) : AtomSyntax(t) { }
169

170
      void
171
      StringSyntax::accept(Visitor& v) const {
172
         v.visit(*this);
173
      }
174

175
      // -- SymbolSyntax --
176
      SymbolSyntax::SymbolSyntax(const Lexeme& t, Kind k)
177
            : AtomSyntax(t), sort(k)
178
      { }
179

180
      void
181
      SymbolSyntax::accept(Visitor& v) const {
182
         v.visit(*this);
183
      }
184

185
      // -- AnchorSyntax --
186
      AnchorSyntax::AnchorSyntax(size_t t, const Syntax* s) : tag(t), val(s) { }
187

188
      void
189
      AnchorSyntax::accept(Visitor& v) const {
190
         v.visit(*this);
191
      }
192

193
      // -- ReferenceSyntax --
194
       ReferenceSyntax::ReferenceSyntax(const Lexeme& t, Ordinal n)
195
             : AtomSyntax(t), pos(n)
196
      { }
197

198
      void
199
      ReferenceSyntax::accept(Visitor& v) const {
200
         v.visit(*this);
201
      }
202

203
      // -- QuoteSyntax --
204
      QuoteSyntax::QuoteSyntax(const Syntax* s)
205
            : unary_form<QuoteSyntax>(s)
206
      { }
207

208
      // -- AntiquoteSyntax --
209
      AntiquoteSyntax::AntiquoteSyntax(const Syntax* s)
210
            : unary_form<AntiquoteSyntax>(s)
211
      { }
212

213
      // -- Expand --
214
      Expand::Expand(const Syntax* s) : unary_form<Expand>(s) { }
215

216
      // -- Eval --
217
      Eval::Eval(const Syntax* s) : unary_form<Eval>(s) { }
218

219
      // -- Splice --
220
      Splice::Splice(const Syntax* s) : unary_form<Splice>(s) { }
221

222
      // -- Function --
223
      Function::Function(const Syntax* s) : unary_form<Function>(s) { }
224

225
      // -- Include --
226
      Include::Include(const Syntax* c, const Syntax* s)
227
            : binary_form<Include>(c, s)
228
      { }
229

230
      // -- Exclude --
231
      Exclude::Exclude(const Syntax* c, const Syntax* s)
232
            : binary_form<Exclude>(c, s)
233
      { }
234

235
      // -- ListSyntax --
236
      ListSyntax::ListSyntax() : dot(false) { }
237

238
      ListSyntax::ListSyntax(const base& elts, bool d)
239
            : base(elts), dot(d)
240
      { }
241

242
      ListSyntax::~ListSyntax() { }
243

244
      void
245
      ListSyntax::accept(Visitor& v) const {
246
         v.visit(*this);
247
      }
248

249
      // -- VectorSyntax --
250
      VectorSyntax::VectorSyntax() { }
251

252
      VectorSyntax::VectorSyntax(const base& elts) : base(elts) { }
253

254
      VectorSyntax::~VectorSyntax() { }
255
      
256
      void
257
      VectorSyntax::accept(Visitor& v) const {
258
         v.visit(*this);
259
      }
260

261
      // ---------------
262
      // -- Allocator --
263
      // ---------------
264
      Allocator::Allocator() { }
265

266
      // This destructor is defined here so that it provides
267
      // a single instantiation point for destructors of all
268
      // used templates floating around.
269
      Allocator::~Allocator() { }
270

271
      const CharacterSyntax*
272
      Allocator::make_character(const Lexeme& t) {
273
         return chars.make(t);
274
      }
275

276
      const IntegerSyntax*
277
      Allocator::make_integer(const Lexeme& t) {
278
         return ints.make(t);
279
      }
280

281
      const StringSyntax*
282
      Allocator::make_string(const Lexeme& t) {
283
         return strs.make(t);
284
      }
285

286
      const SymbolSyntax*
287
      Allocator::make_symbol(SymbolSyntax::Kind k, const Lexeme& t) {
288
         return syms.make(t, k);
289
      }
290

291
      const ReferenceSyntax*
292
      Allocator::make_reference(size_t i, const Lexeme& t) {
293
         return refs.make(t, i);
294
      }
295

296
      const AnchorSyntax*
297
      Allocator::make_anchor(size_t t, const Syntax* s) {
298
         return ancs.make(t, s);
299
      }
300

301
      const QuoteSyntax*
302
      Allocator::make_quote(const Syntax* s) {
303
         return quotes.make(s);
304
      }
305

306
      const AntiquoteSyntax*
307
      Allocator::make_antiquote(const Syntax* s) {
308
         return antis.make(s);
309
      }
310

311
      const Expand*
312
      Allocator::make_expand(const Syntax* s) {
313
         return exps.make(s);
314
      }
315

316
      const Eval*
317
      Allocator::make_eval(const Syntax* s) {
318
         return evls.make(s);
319
      }
320

321
      const Splice*
322
      Allocator::make_splice(const Syntax* s) {
323
         return spls.make(s);
324
      }
325

326
      const Function*
327
      Allocator::make_function(const Syntax* s) {
328
         return funs.make(s);
329
      }
330

331
      const Include*
332
      Allocator::make_include(const Syntax* c, const Syntax* s) {
333
         return incs.make(c, s);
334
      }
335

336
      const Exclude*
337
      Allocator::make_exclude(const Syntax* c, const Syntax* s) {
338
         return excs.make(c, s);
339
      }
340

341
      const ListSyntax*
342
      Allocator::make_list(const std::vector<const Syntax*>& elts, bool dot) {
343
         if (elts.empty())
344
            return &empty_list;
345
         return lists.make(elts, dot);
346
      }
347

348
      const VectorSyntax*
349
      Allocator::make_vector(const std::vector<const Syntax*>& elts) {
350
         if (elts.empty())
351
            return &empty_vector;
352
         return vectors.make(elts);
353
      }
354

355
      // The sequence of characters in [cur, last) consists
356
      // entirely of digits.  Return the corresponding natural value.
357
      static size_t
358
      natural_value(const Byte* cur, const Byte* last) {
359
         size_t n = 0;
360
         for (; cur < last; ++cur)
361
            // FIXME: check for overflow.
362
            n = 10 * n + (*cur - '0');
363
         return n;
364
      }
365

366
      // -- Reader --
367
      Reader::Reader(const Byte* f, const Byte* l)
368
            : st{ { f, l, 1 }, f, f }
369
      { }
370

371
      Reader::Reader(const RawInput& ri)
372
            : st { ri, ri.start, ri.start }
373
      { }
374

375
      static const Syntax* read_sexpr(Reader::State&);
376

377
      // Parse a string literal
378
      static const Syntax*
379
      read_string(Reader::State& s) {
380
         auto start = s.cur++;
381
         if (not skip_to_quote(s))
382
            syntax_error("missing closing quote sign for string literal");
383
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
384
         return s.alloc.make_string(t);
385
      }
386

387
      // Parse an absolute identifier.
388
      static const Syntax*
389
      read_absolute_symbol(Reader::State& s) {
390
         auto start = ++s.cur;
391
         if (not skip_to_nonescaped_char(s, '|'))
392
            syntax_error("missing closing bar sign for an absolute symbol");
393
         Lexeme t = { { start, s.cur - 1 }, s.bytes.lineno };
394
         return s.alloc.make_symbol(SymbolSyntax::absolute, t);
395
      }
396

397
      // Read an atom starting with digits.
398
      static const Syntax*
399
      read_maybe_natural(Reader::State& s) {
400
         auto start = s.cur;
401
         advance_while (s, isdigit);
402
         if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
403
            Lexeme t = { { start, s.cur }, s.bytes.lineno };
404
            return s.alloc.make_integer(t);
405
         }
406
         advance_while(s, identifier_part);
407
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
408
         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
409
      }
410

411
      // Read an identifier.
412
      static const Syntax*
413
      read_identifier(Reader::State& s) {
414
         auto start = s.cur;
415
         advance_while(s, identifier_part);
416
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
417
         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
418
      }
419

420
      // Read an atom starting with a '+' or '-' sign; this
421
      // should be identifier, or a signed integer.
422
      static const Syntax*
423
      read_maybe_signed_number(Reader::State& s) {
424
         auto start = s.cur++;
425
         if (s.cur < s.bytes.end and isdigit(*s.cur)) {
426
            advance_while(s, isdigit);
427
            if (s.cur >= s.bytes.end or is_delimiter(*s.cur)) {
428
               Lexeme t = { { start, s.cur }, s.bytes.lineno };
429
               return s.alloc.make_integer(t);
430
            }
431
         }
432
         advance_while(s, identifier_part);
433
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
434
         return s.alloc.make_symbol(SymbolSyntax::ordinary, t);
435
      }
436

437
      static const Syntax*
438
      read_keyword(Reader::State& s) {
439
         auto start = s.cur++;
440
         advance_while(s, identifier_part);
441
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
442
         return s.alloc.make_symbol(SymbolSyntax::keyword, t);
443
      }
444

445
      // Read an atom.
446
      static const Syntax*
447
      read_atom(Reader::State& s) {
448
         switch (*s.cur) {
449
         case '"': return read_string(s);
450
         case ':': return read_keyword(s);
451
         case '-': case '+': return read_maybe_signed_number(s);
452

453
         case '0': case '1': case '2': case '3': case '4':
454
         case '5': case '6': case '7': case '8': case '9':
455
            return read_maybe_natural(s);
456

457
         default:
458
            if (identifier_part(*s.cur))
459
               return read_identifier(s);
460
            invalid_character(s);
461
            ++s.cur;
462
            return nullptr;
463
         }
464
      }
465

466
      // Parse a quote expression.
467
      static const Syntax*
468
      read_quote(Reader::State& s) {
469
         ++s.cur;               // skip the quote character
470
         auto x = read_sexpr(s);
471
         if (x == nullptr)
472
            syntax_error("end of input reached after quote sign");
473
         return s.alloc.make_quote(x);
474
      }
475

476
      // Parse a backquote expression.
477
      static const Syntax*
478
      read_backquote(Reader::State& s) {
479
         ++s.cur;               // skip the backquote character
480
         auto x = read_sexpr(s);
481
         if (x == nullptr)
482
            syntax_error("end of input reached after backquote sign");
483
         return s.alloc.make_antiquote(x);
484
      }
485

486
      // We've just seen "#(" indicating the start of a literal
487
      // vector.  Read the elements and return the corresponding form.
488
      static const Syntax*
489
      finish_literal_vector(Reader::State& s) {
490
         ++s.cur;               // Skip the open paren.
491
         std::vector<const Syntax*> elts { };
492
         while (skip_blank(s) and *s.cur != ')') {
493
            if (auto x = read_sexpr(s))
494
               elts.push_back(x);
495
            else
496
               syntax_error("syntax error while reading vector elements");
497
         }
498
         if (s.cur >= s.bytes.end)
499
            syntax_error("unfinished literal vector");
500
         else
501
            ++s.cur;
502
         return s.alloc.make_vector(elts);
503
      }
504

505
      // We've just seen the sharp sign followed by a digit.  We assume
506
      // we are about to read an anchor or a back reference.
507
      static const Syntax*
508
      finish_anchor_or_reference(Reader::State& s) {
509
         auto start = s.cur;
510
         advance_while(s, isdigit);
511
         if (s.cur >= s.bytes.end)
512
            syntax_error("end-of-input after sharp-number sign");
513
         const Byte c = *s.cur;
514
         if (c != '#' and c != '=')
515
            syntax_error("syntax error after sharp-number-equal sign");
516
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
517
         auto n = natural_value(start, s.cur);
518
         ++s.cur;
519
         if (c == '#')
520
            return s.alloc.make_reference(n, t);
521
         auto x = read_sexpr(s);
522
         if (x == nullptr)
523
            syntax_error("syntax error after sharp-number-equal sign");
524
         return s.alloc.make_anchor(n, x);
525
      }
526

527
      static const Syntax*
528
      finish_function(Reader::State& s) {
529
         ++s.cur;               // skip quote sign.
530
         auto x = read_sexpr(s);
531
         if (x == nullptr)
532
            syntax_error("missing function designator after sharp-quote sign");
533
         return s.alloc.make_function(x);
534
      }
535

536
      static const Syntax*
537
      finish_uninterned_symbol(Reader::State& s) {
538
         ++s.cur;               // skip colon sign.
539
         auto start = s.cur;
540
         advance_while(s, identifier_part);
541
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
542
         return s.alloc.make_symbol(SymbolSyntax::uninterned, t);
543
      }
544

545
      static const Syntax*
546
      finish_readtime_eval(Reader::State& s) {
547
         ++s.cur;               // skip dot sign.
548
         auto x = read_sexpr(s);
549
         if (x == nullptr)
550
            syntax_error("parse error after sharp-dot sign");
551
         return s.alloc.make_eval(x);
552
      }
553

554
      static const Syntax*
555
      finish_character(Reader::State& s) {
556
         ++s.cur;               // skip backslash sign
557
         auto start = s.cur;
558
         advance_while(s, identifier_part);
559
         Lexeme t = { { start, s.cur }, s.bytes.lineno };
560
         return s.alloc.make_character(t);
561
      }
562

563
      static const Syntax*
564
      finish_include(Reader::State& s) {
565
         ++s.cur;
566
         auto cond = read_sexpr(s);
567
         auto form = read_sexpr(s);
568
         return s.alloc.make_include(cond, form);
569
      }
570

571
      static const Syntax*
572
      finish_exclude(Reader::State& s) {
573
         ++s.cur;
574
         auto cond = read_sexpr(s);
575
         auto form = read_sexpr(s);
576
         return s.alloc.make_exclude(cond, form);
577
      }
578

579
      static const Syntax*
580
      read_sharp_et_al(Reader::State& s) {
581
         if (++s.cur >= s.bytes.end)
582
            syntax_error("end-of-input reached after sharp sign");
583
         switch (*s.cur) {
584
         case '(':  return finish_literal_vector(s);
585
         case '\'': return finish_function(s);
586
         case ':': return finish_uninterned_symbol(s);
587
         case '.': return finish_readtime_eval(s);
588
         case '\\': return finish_character(s);
589
         case '+': return finish_include(s);
590
         case '-': return finish_exclude(s);
591

592
         default:
593
            if (isdigit(*s.cur))
594
               return finish_anchor_or_reference(s);
595
            syntax_error("syntax error after sharp-sign");
596
         }
597
         return nullptr;
598
      }
599

600
      // We have just seen a dot; read the tail and the closing parenthesis.
601
      static const Syntax*
602
      finish_dotted_list(Reader::State& s, std::vector<const Syntax*>& elts) {
603
         ++s.cur;               // Skip dot sign.
604
         auto x = read_sexpr(s);
605
         if (x == nullptr)
606
            syntax_error("missing expression after dot sign");
607
         if (not skip_blank(s) or *s.cur != ')')
608
            syntax_error("missing closing parenthesis");
609
         ++s.cur;
610
         elts.push_back(x);
611
         return s.alloc.make_list(elts, true);
612
      }
613

614
      static const Syntax*
615
      read_pair(Reader::State& s) {
616
         ++s.cur;               // skip opening parenthesis
617
         std::vector<const Syntax*> elts { };
618
         while (skip_blank(s))
619
            switch (*s.cur) {
620
            case '.':
621
               if (elts.empty())
622
                  syntax_error("missing expression before dot sign.");
623
               return finish_dotted_list(s, elts);
624

625
            case ')':
626
               ++s.cur;
627
               return s.alloc.make_list(elts);
628

629
            default:
630
               if (auto x = read_sexpr(s))
631
                  elts.push_back(x);
632
               else
633
                  syntax_error("unfinished pair expression");
634
               break;
635
            }
636
         syntax_error("end-of-input while looking for closing parenthesis");
637
         return nullptr;
638
      }
639

640
      static const Syntax*
641
      read_sexpr(Reader::State& s) {
642
         while (skip_blank(s))
643
            switch (*s.cur) {
644
            case ';': skip_to_eol(s); break;
645
            case '\'': return read_quote(s);
646
            case '`': return read_backquote(s);
647
            case '|': return read_absolute_symbol(s);
648
            case '#': return read_sharp_et_al(s);
649
            case '(': return read_pair(s);
650
            default: return read_atom(s);
651
            }
652
         return nullptr;
653
      }
654

655
      const Syntax*
656
      Reader::read() {
657
         return read_sexpr(st);
658
      }
659

660
      const Byte*
661
      Reader::position(Ordinal p) {
662
         st.cur = st.bytes.start + p;
663
         st.line = st.cur;
664
         // while (st.line > st.start and st.line[-1] != '\n')
665
         //    --st.line;
666
         return st.cur;
667
      }
668

669
   }
670
}
671

672
Product

Resources

Company