// Copyright (C) 2013, Gabriel Dos Reis.1// All rights reserved.2//3// Redistribution and use in source and binary forms, with or without4// modification, are permitted provided that the following conditions are5// met:6//7// - Redistributions of source code must retain the above copyright8// notice, this list of conditions and the following disclaimer.9//10// - Redistributions in binary form must reproduce the above copyright11// notice, this list of conditions and the following disclaimer in12// the documentation and/or other materials provided with the13// distribution.14//15// - Neither the name of The Numerical Algorithms Group Ltd. nor the16// names of its contributors may be used to endorse or promote products17// derived from this software without specific prior written permission.18//19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS20// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED21// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A22// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER23// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,24// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,25// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR26// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF27// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING28// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS29// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.3031// --% Author: Gabriel Dos Reis.32// --% Description:33// --% This program implements basic functionalities for untangling34// --% algebra source code from the pamphlets. The syntax is that35// --% of `noweb'. A chunk definition starts with a pattern36// --% <<name>>= on a line by itself, and ends with `@' by itself37// --% on a line. A chunk can refer to another chunk through38// --% a pattern of the form `<<name>>'.3940#include <string.h>41#include <stdlib.h>42#include <utility>43#include <string>44#include <iostream>45#include <fstream>46#include <iterator>47#include <list>48#include <vector>49#include <map>50#include <open-axiom/storage>51#include <open-axiom/FileMapping>5253namespace OpenAxiom {54namespace Hammer {55// -------------56// -- Element --57// -------------58// Base class of document elements.59struct Element {60virtual ~Element() { }61};6263// ---------------64// -- BasicText --65// ---------------66// Plain text, with no reference to any chunk.67struct BasicText : Element {68BasicText(const Byte* f, const Byte* l) : span(f, l) { }69// Pointer to the start of this basic text element70const Byte* begin() const { return span.first; }71// One-past-the-end of the this basic text element.72const Byte* end() const { return span.second; }73private:74std::pair<const Byte*, const Byte*> span;75};7677// ---------------78// -- Reference --79// ---------------80// Reference to a a chunk by name.81struct Reference : Element {82explicit Reference(const std::string& s) : label(s) { }83// Naame of the chunk referenced.84const std::string& name() const { return label; }85private:86const std::string label;87};8889// -------------------90// -- CompositeText --91// -------------------92// Sequence of basic elements and reference to chunks.93struct CompositeText: private std::vector<const Element*> {94typedef std::vector<const Element*> base;95using base::iterator;96using base::begin;97using base::end;98using base::size;99using base::operator[];100101// Augment this chunk with a basic text in the open interval102// [f,l).103CompositeText& add_text(const Byte* f, const Byte* l) {104texts.push_back(BasicText(f, l));105push_back(&texts.back());106return *this;107}108109// Augment this chunk with a reference to another chunk110// named `n'. Note that we don't attempt to check for111// possible circularities.112CompositeText reference_chunk(const Byte* f, const Byte* l) {113refs.push_back(Reference(std::string(f, l)));114push_back(&refs.back());115return *this;116}117118private:119std::list<BasicText> texts;120std::list<Reference> refs;121};122123// --------------124// -- Document --125// --------------126// A whole document; a sequence of chunks.127struct Document : std::list<CompositeText> {128Document(const Memory::FileMapping& file)129: active_chunk(&prose), text_start(file.begin()) {130parse(file);131}132133// Return a pointer to a document chunk name `n'.134// Otherwise, return null.135CompositeText* lookup_chunk(const std::string& n) const {136ChunkTable::const_iterator i = defs.find(n);137return i == defs.end() ? 0 : i->second;138}139140private:141typedef std::map<std::string, CompositeText*> ChunkTable;142CompositeText prose; // the prose around the chunks.143ChunkTable defs; // chunk definition table.144CompositeText* active_chunk; // chunk under construction.145const Byte* text_start; // begining of current basic text.146147// Append basic text in the range `[text_start,last)'148// to the current chunk.149void finish_chunk(const Byte* last) {150if (text_start != last)151active_chunk->add_text(text_start, last);152active_chunk = &prose;153text_start = last;154}155156// Start a new chunk or extend an existing chunk.157void begin_chunk(const std::string& name, const Byte* start) {158if (CompositeText* chunk = lookup_chunk(name))159active_chunk = chunk;160else {161push_back(CompositeText());162defs[name] = active_chunk = &back();163}164text_start = start;165}166167// Parse a file mapping into this document.168void parse(const Memory::FileMapping&);169};170171// Return true if the character `c' introduces a newline.172static inline bool173looking_at_newline(char c) {174return c == '\n' or c == '\r';175}176177// Attempt to advance the cursor past newline marker.178// Return true on sucess.179static bool180saw_newline(const Byte*& cur, const Byte* end) {181if (*cur == '\n') {182++cur;183return true;184}185else if (*cur == '\r') {186if (++cur < end and *cur == '\n')187++cur;188return true;189}190return false;191}192193// Move `cur' to end of line or `end', whichever comes first.194// Return true if the area swept consisted only of blank characters.195static inline bool196trailing_blank(const Byte*& cur, const Byte* end) {197bool result = true;198for (; cur < end and not saw_newline(cur, end); ++cur)199result = isspace(*cur);200return result;201}202203// Attempt to advance `cur' past the double left angle brackets204// starting a chunk name. Returm true on success.205static bool206chunk_name_began(const Byte*& cur, const Byte* end) {207if (cur[0] == '<' and cur + 1 < end and cur[1] == '<') {208cur += 2;209return true;210}211return false;212}213214// Attempt to move `cur' past the double right angle brackets215// terminating a chunk name. Returm true on success.216static bool217chunk_name_ended(const Byte*& cur, const Byte* end) {218if (cur[0] == '>' and cur + 1 < end and cur[1] == '>') {219cur += 2;220return true;221}222return false;223}224225// We've just seen the start of a chunk reference; skip226// characters till we seen of the chunk's name.227static void228skip_to_end_of_chunk_name(const Byte*& cur, const Byte* end) {229while (cur < end) {230if (looking_at_newline(*cur)231or (cur + 1 < end and cur[0] == '>' and cur[1] == '>'))232return;233++cur;234}235}236237// Move the cursor until end of line.238static void239skip_to_end_of_line(const Byte*& cur, const Byte* end) {240while (cur < end) {241if (saw_newline(cur, end))242break;243++cur;244}245}246247void248Document::parse(const Memory::FileMapping& file) {249auto cur = text_start;250auto last = file.end();251// Process one line at a time.252while (cur < last) {253// 1. `@' ends previous chunk254if (*cur == '@') {255auto p = cur;256if (trailing_blank(++cur, last))257finish_chunk(p);258}259// 2. `<<' introduces a chunk reference or a chunk definition.260else if (chunk_name_began(cur, last)) {261auto label_start = cur;262skip_to_end_of_chunk_name(cur, last);263if (chunk_name_ended(cur, last)) {264auto label_end = cur - 2;265if (cur < last and *cur == '=') {266if (trailing_blank(++cur, last)) {267// chunk definition or extension268finish_chunk(label_start - 2);269begin_chunk(std::string(label_start, label_end), cur);270}271}272else if (trailing_blank(cur, last)) {273// This is just a reference to a chunk.274active_chunk->add_text(text_start, label_start - 2);275active_chunk->reference_chunk(label_start, label_end);276text_start = cur;277}278else279skip_to_end_of_line(cur, last);280}281}282else283skip_to_end_of_line(cur, last);284}285finish_chunk(cur);286}287288// Capture chunk resolution in a document.289struct resolve_chunk {290resolve_chunk(const std::string& s, const Document& f)291: name(s), doc(f) { }292const std::string name; // name of the chunk293const Document& doc; // document containing the chunk.294};295296// Print the resolution of a chunk name onto an output stream.297std::ostream&298operator<<(std::ostream& os, const resolve_chunk& rc) {299// FIXME: no attempt at detecting circularities.300const CompositeText* doc = rc.doc.lookup_chunk(rc.name);301if (doc == 0) {302std::cerr << "chunk " << rc.name << " is undefined" << std::endl;303exit(1);304}305for (std::size_t i = 0; i < doc->size(); ++i) {306const Element* elt = (*doc)[i];307if (const BasicText* t = dynamic_cast<const BasicText*>(elt))308std::copy(t->begin(), t->end(),309std::ostream_iterator<char>(os));310else if (const Reference* r = dynamic_cast<const Reference*>(elt))311os << resolve_chunk(r->name(), rc.doc);312else {313std::cerr << "unknown document element" << std::endl;314exit(1);315}316}317318return os;319}320321// Return true if the `arg' is the option named`opt'.322static inline bool323is_option(const char* arg, const char* opt) {324return strcmp(arg, opt) == 0;325}326327// `arg' is a argument on the command line. If `arg'328// does not match option name `opt', return null. Otherwise,329// return a pointer to the terminating NUL character if there330// is no specified value for that option, or a pointer to the331// start of the value.332static const char*333is_named_arg(const char* arg, const char* opt) {334const int n = strlen(opt);335int i = 0;336// Get out if argion name does not match.337// Note: Ideally, we could use strncmp(). However, that338// function is not available in C++98, so we cannot depend on it.339for (; i < n ; ++i)340if (arg[i] != opt[i])341return 0;342343if (arg[i] == '\0')344return arg + i; // no value for the option.345return arg + n + 1; // being of the value.346}347}348}349350351int352main(int argc, char* argv[]) {353using namespace OpenAxiom::Hammer;354int error_count = 0;355const char* chunk = 0; // chunck to tangle356const char* output_path = 0; // path to the output file357const char* input_path = 0; // path to the input file.358// 1. Process command line arguments.359for (int pos = 1; error_count == 0 and pos < argc; ++pos) {360if (const char* val = is_named_arg(argv[pos], "--tangle")) {361if (chunk != 0) {362std::cerr << "cannot tangle more than one chunk";363++error_count;364}365else366chunk = *val == 0 ? "*" : val;367}368else if (const char* val = is_named_arg(argv[pos], "--output")) {369if (*val == 0) {370std::cerr << "missing output file name" << std::endl;371++error_count;372}373else374output_path = val;375}376else if (argv[pos][0] == '-' and argv[pos][1] == '-') {377std::cerr << "unknown option " << argv[pos] << std::endl;378++error_count;379}380else if (input_path != 0) {381std::cerr << "there must be exactly one input file" << std::endl;382++error_count;383}384else385input_path = argv[pos];386}387388// 2. Basic sanity check.389if (input_path == 0) {390std::cerr << "missing input file" << std::endl;391return 1;392}393if (output_path == 0) {394std::cerr << "missing output file" << std::endl;395return 1;396}397if (chunk == 0) {398std::cerr << "missing chunk name" << std::endl;399return 1;400}401402if (error_count != 0)403return 1;404405// 3. Attempt to extract the chunk.406try {407OpenAxiom::Memory::FileMapping file(input_path);408std::ofstream os(output_path);409os << resolve_chunk(chunk, Document(file));410}411catch(const OpenAxiom::SystemError& e) {412std::cerr << e.message() << std::endl;413exit(1);414}415return 0;416}417418419