📚 The CoCalc Library - books, templates and other resources
License: OTHER
#!/usr/bin/env python31# Copyright: Harald Schilly <[email protected]>2# License: Apache 2.03from pprint import pprint4import yaml5import json6import os7import itertools as it8# to make all "src" absolute paths!9ROOT = os.path.dirname(os.path.abspath(__file__))10os.chdir(ROOT)111213# TODO this is silly code, please fix it ...1415def update_meta(meta, new_meta):16'''17A simple dict update would overwrite/remove entries.18'''19if 'tags' in new_meta:20meta['tags'].update(new_meta['tags'])21if 'licenses' in new_meta:22meta['licenses'].update(new_meta['licenses'])23if 'categories' in new_meta:24# if you introduce a new category, it must be unique25assert all(nm not in meta['categories'].keys() for nm in new_meta['categories'].keys())26meta['categories'].update(new_meta['categories'])272829# TODO this is just for a unique id for each document. maybe make it stable?30ID = it.count(0)31all_ids = set()32def init_doc(docs, prefix):33for doc in docs:34doc['src'] = os.path.join(prefix, doc['src'])35if 'thumbnail' in doc:36doc['thumbnail'] = os.path.join(prefix, doc['thumbnail'])37if 'id' in doc:38assert doc['id'] not in all_ids39all_ids.add(doc['id'])40else:41newid = 'doc-{}'.format(next(ID))42assert newid not in all_ids43all_ids.add(newid)44doc['id'] = newid4546# prefix is the path to prefix47def resolve_references(meta, docs, prefix=''):48# append new documents and merge meta49if 'references' in meta:50for ref in meta['references']:51prefix = os.path.join(prefix, os.path.dirname(ref))52print("resolve_references prefix={}".format(prefix))53new_meta, *new_docs = yaml.load_all(open(ref))54init_doc(new_docs, prefix)55resolve_references(new_meta, new_docs, prefix=prefix)56update_meta(meta, new_meta)57docs.extend(new_docs)58del meta['references']59return meta, docs6061def consistency_checks(meta, docs):62print('done. running consistency checks ...')63cats = meta['categories']64tags = meta['tags']65allowed_keys = ['id', 'src', 'title', 'description', 'website', 'author', 'license', 'category', 'tags', 'thumbnail', 'subdir']66for doc in docs:67print('checking {0[id]}: {0[title]}'.format(doc))68assert all(k in allowed_keys for k in doc.keys()), "keys: {}".format(list(doc.keys()))69assert 'title' in doc, "doc {} misses a title".format(doc.id)70assert 'category' in doc, "doc {} misses category".format(doc['title'])71assert 'src' in doc, "doc {} misses src".format(doc['title'])72assert doc['src'].endswith('/'), 'doc "{}" src must end with a slash to signal it is a directory. single files will be supported later ...'.format(doc['title'])73assert os.path.exists(os.path.join(ROOT, doc['src'])), 'doc "{}" src path does not exist!'74assert doc['category'] in cats75if 'tags' in doc:76for t in doc['tags']:77assert t in tags, 'Tag {} of document {} not in meta.tags'.format(t, doc['id'])78if 'thumbnail' in doc:79assert os.path.exists(doc['thumbnail']), 'Thumbnail {0[thumbnail]} for {0[id]} does not exist'.format(doc)80for k, v in cats.items():81assert 'name' in v82for k, v in tags.items():83assert 'name' in v8485def debug(meta, docs):86print("META:")87pprint(meta)88print("DOCS:")89for doc in docs:90pprint(doc)9192def export_json(meta, docs):93with open('index.json', 'w') as out:94json.dump({'metadata': meta, 'documents': docs}, out, indent=1)9596def main(index_fn):97meta, *docs = yaml.load_all(open(index_fn))98init_doc(docs, ROOT)99resolve_references(meta, docs, prefix=ROOT)100#debug(meta, docs)101consistency_checks(meta, docs)102export_json(meta, docs)103print('all done.')104105if __name__ == '__main__':106main(index_fn = 'index.yaml')107108