CoCalc -- book.tex

📚 The CoCalc Library - books, templates and other resources
cocalc-examples / think-complexity-2ed / book / book.tex
¹³²⁹²³ views
License: OTHER
1
% LaTeX source for ``Think Complexity, 2nd edition''
2
% Copyright (c)  2016  Allen B. Downey.
3

4
% Permission is granted to copy, distribute, transmit and adapt
5
% this work under a Creative Commons
6
% Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
7
% https://creativecommons.org/licenses/by-nc-sa/4.0/
8

9
% If you are interested in distributing a commercial version of this
10
% work, please contact Allen Downey.
11

12
% The LaTeX source for this book is available from
13
% https://github.com/AllenDowney/ThinkComplexity2
14

15
% TODO: Fix PDFLATEX options.
16

17
\documentclass[12pt]{book}
18

19
\title{Think Complexity}
20
\author{Allen B. Downey}
21

22
\newcommand{\thetitle}{Think Complexity}
23
\newcommand{\thesubtitle}{Exploring Complexity Science in Python}
24
\newcommand{\theauthors}{Allen B. Downey}
25
\newcommand{\theversion}{2.6.2}
26

27
%%%% Both LATEX and PLASTEX
28

29
\usepackage{graphicx}
30
\usepackage{hevea}
31
\usepackage{makeidx}
32
\usepackage{setspace}
33

34
\makeindex
35

36
% automatically index glossary terms
37
\newcommand{\term}[1]{%
38
\item[#1:]\index{#1}}
39

40
\usepackage{amsmath}
41
\usepackage{amsthm}
42

43
% format end of chapter excercises
44
\newtheoremstyle{exercise}
45
  {12pt}        % space above
46
  {12pt}        % space below
47
  {}            % body font
48
  {}            % indent amount
49
  {\bfseries}   % head font
50
  {}            % punctuation
51
  {12pt}        % head space
52
  {}            % custom head
53
\theoremstyle{exercise}
54
\newtheorem{exercise}{Exercise}[chapter]
55

56
\usepackage{afterpage}
57

58
\newcommand\blankpage{%
59
    \null
60
    \thispagestyle{empty}%
61
    \addtocounter{page}{-1}%
62
    \newpage}
63

64
\newif\ifplastex
65
\plastexfalse
66

67
%%%% PLASTEX ONLY
68
\ifplastex
69

70
\usepackage{localdef}
71

72
\usepackage{url}
73

74
\newcount\anchorcnt
75
\newcommand*{\Anchor}[1]{%
76
  \@bsphack%
77
    \Hy@GlobalStepCount\anchorcnt%
78
    \edef\@currentHref{anchor.\the\anchorcnt}%
79
    \Hy@raisedlink{\hyper@anchorstart{\@currentHref}\hyper@anchorend}%
80
    \M@gettitle{}\label{#1}%
81
    \@esphack%
82
}
83

84
% code listing environments:
85
% we don't need these for plastex because they get replaced
86
% by preprocess.py
87
%\newenvironment{code}{\begin{code}}{\end{code}}
88
%\newenvironment{stdout}{\begin{code}}{\end{code}}
89

90
% inline syntax formatting
91
\newcommand{\py}{\verb}%}
92

93
%%%% LATEX ONLY
94
\else
95

96
\input{latexonly}
97

98
\newcommand{\myrightarrow}{\ensuremath \rightarrow}
99

100
\fi
101

102
%%%% END OF PREAMBLE
103
\begin{document}
104

105
\frontmatter
106

107
%%%% PLASTEX ONLY
108
\ifplastex
109

110
\maketitle
111

112
%%%% LATEX ONLY
113
\else
114

115
\begin{latexonly}
116

117
% half title ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---
118

119
\thispagestyle{empty}
120

121
\begin{flushright}
122
\vspace*{2.0in}
123

124
\begin{spacing}{3}
125
{\huge \thetitle}
126
\end{spacing}
127

128
\vspace{0.25in}
129

130
Version \theversion
131

132
\vfill
133

134
\end{flushright}
135

136
% verso ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  --- 
137

138
\afterpage{\blankpage}
139

140
%\clearemptydoublepage
141
%\pagebreak
142
%\thispagestyle{empty}
143
%\vspace*{6in}
144

145
% title page ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  --- 
146

147
\pagebreak
148
\thispagestyle{empty}
149

150
\begin{flushright}
151
\vspace*{2.0in}
152

153
\begin{spacing}{3}
154
{\huge \thetitle}
155
\end{spacing}
156

157
\vspace{0.25in}
158

159
Version \theversion
160

161
\vspace{1in}
162

163

164
{\Large
165
\theauthors \\
166
}
167

168

169
\vspace{0.5in}
170

171
{\Large Green Tea Press}
172

173
{\small Needham, Massachusetts}
174

175
%\includegraphics[width=1in]{figs/logo1.eps}
176
\vfill
177

178
\end{flushright}
179

180

181
% copyright ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---
182

183
\pagebreak
184
\thispagestyle{empty}
185

186
Copyright \copyright ~2016 \theauthors.
187

188

189

190
\vspace{0.2in}
191

192
\begin{flushleft}
193
Green Tea Press       \\
194
9 Washburn Ave \\
195
Needham MA 02492
196
\end{flushleft}
197

198
Permission is granted to copy, distribute, transmit and adapt
199
this work under a Creative Commons
200
Attribution-NonCommercial-ShareAlike 4.0 International License:
201
\url{https://thinkcomplex.com/license}.
202

203
If you are interested in distributing a commercial version of this
204
work, please contact the author.
205

206
The \LaTeX\ source for this book is available from
207

208
\begin{code}
209
      https://github.com/AllenDowney/ThinkComplexity2
210
\end{code}
211

212
% table of contents ---  ---  ---  ---  ---  ---  ---  ---  ---
213

214
\cleardoublepage
215
\setcounter{tocdepth}{1}
216
\tableofcontents
217

218
\end{latexonly}
219

220

221
% HTML title page ---  ---  ---  ---  ---  ---  ---  ---  ---  ---  ---
222

223
\begin{htmlonly}
224

225
\vspace{1em}
226

227
{\Large \thetitle}
228

229
{\large \theauthors}
230

231
Version \theversion
232

233
\vspace{1em}
234

235
Copyright \copyright ~2016 \theauthors.
236

237
Permission is granted to copy, distribute, and/or modify this work
238
under the terms of the Creative Commons
239
Attribution-NonCommercial-ShareAlike 4.0 International License, which is
240
available at \url{https://thinkcomplex.com/license}.
241

242
\vspace{1em}
243

244
\setcounter{chapter}{-1}
245

246
\end{htmlonly}
247

248
% END OF THE PART WE SKIP FOR PLASTEX
249
\fi
250

251
\chapter{Preface}
252
\label{preface}
253

254
Complexity science is an interdisciplinary
255
field --- at the intersection of mathematics, computer science and
256
natural science --- that focuses on {\bf complex systems},
257
which are systems with many interacting components.
258

259
One of the core tools of complexity science is discrete models,
260
including networks and graphs, cellular automatons, and agent-based
261
simulations.  These tools are useful in the natural and social sciences,
262
and sometimes in arts and humanities.
263

264
For an overview of complexity science, see
265
\url{https://thinkcomplex.com/complex}.
266

267
\index{complexity science}
268
\index{complex systems}
269

270

271

272
Why should you learn about complexity science?  Here are a few reasons:
273

274
\begin{itemize}
275

276
\item Complexity science is useful, especially for explaining why natural and social systems behave the way they do.  Since Newton, math-based physics has focused on systems with small numbers of components and simple interactions.  These models are effective for some applications, like celestial mechanics, and less useful for others, like economics.  Complexity science provides a diverse and adaptable modeling toolkit. 
277

278
\item Many of the central results of complexity science are surprising; a recurring theme of this book is that simple models can produce complicated behavior, with the corollary that we can sometimes explain complicated behavior in the real world using simple models.
279

280
\item As I explain in Chapter~\ref{overview}, complexity science is at the center of a slow shift in the practice of science and a change in what we consider science to be.
281

282
\item Studying complexity science provides an opportunity to learn about diverse physical and social systems, to develop and apply programming skills, and to think about fundamental questions in the philosophy of science.
283

284
\end{itemize}
285

286
By reading this book and working on the exercises you will have a chance to explore topics and ideas you might not encounter otherwise, practice programming in Python, and learn more about data structures and algorithms.
287

288
Features of this book include:
289

290
\begin{description}
291

292
\item[Technical details] Most books about complexity science
293
  are written for a popular audience.  They leave out
294
  technical details, which is frustrating for people who can handle
295
  them.  This book presents the code, the math, and the explanations
296
  you need to understand how the models work.
297

298
\item[Further reading] Throughout the book, I include pointers to
299
  further reading, including original papers (most of which are
300
  available electronically) and related articles from Wikipedia and
301
  other sources.
302

303
\item[Jupyter notebooks] For each chapter I provide a Jupyter notebook
304
  that includes the code from the chapter, additional examples, and
305
  animations that let you see the models in action.
306

307
\item[Exercises and solutions] At the end of each chapter I suggest
308
  exercises you might want to work on, with solutions.
309

310
\end{description}
311

312
For most of the links in this book I use URL redirection.  This mechanism has the drawback of hiding the link destination, but it makes the URLs shorter and less obtrusive.  Also, and more importantly, it allows me to update the links without updating the book.  If you find a broken link, please let me know and I will change the redirection.
313

314

315
\section{Who is this book for?}
316

317
The examples and supporting code for this book are in Python.  You
318
should know core Python and be familiar with its object-oriented features,
319
specifically using and defining classes.
320

321
\index{Python}
322

323
If you are not already familiar with Python, you might want to start
324
with {\it Think Python}, which is appropriate for people who have never programmed before.  If you have programming experience in another language, there are many good Python books to choose from, as well as online resources.
325

326
I use NumPy, SciPy, and NetworkX throughout the book.  If you are familiar with these libraries already, that's great, but I will also explain them when they appear.
327

328
\index{NumPy}
329
\index{SciPy}
330
\index{NetworkX}
331

332
I assume that the reader knows some mathematics: I use logarithms in several places, and vectors in one example.  But that's about it.
333

334

335
\section{Changes from the first edition}
336

337
For the second edition, I added two chapters, one on evolution, the other on the evolution of cooperation.
338

339
In the first edition, each chapter presented background on a topic and suggested experiments the reader could run.  For the second edition, I have done those experiments.  Each chapter presents the implementation and results as a worked example, then suggests additional experiments for the reader.
340

341
For the second edition, I replaced some of my own code with standard libraries like NumPy and NetworkX.  The result is more concise and more efficient, and it gives readers a chance to learn these libraries.
342

343
Also, the Jupyter notebooks are new.  For every chapter there are two notebooks: one contains the code from the chapter, explanatory text, and exercises; the other contains solutions to the exercises.
344

345
\index{Jupyter}
346

347
Finally, all supporting software has been updated to Python 3 (but most of it runs unmodified in Python 2).
348

349

350
\section{Using the code}
351
\label{code}
352

353

354
All code used in this book is available from a Git repository on GitHub:
355
\url{https://thinkcomplex.com/repo}.  
356
If you are not familiar with Git, it is a 
357
version control system that allows you to keep track of the files that
358
make up a project.  A collection of files under Git's control is
359
called a ``repository''.  GitHub is a hosting service that provides
360
storage for Git repositories and a convenient web interface.
361

362
\index{repository}
363
\index{Git}
364
\index{GitHub}
365

366
The GitHub homepage for my repository provides several ways to
367
work with the code:
368

369
\begin{itemize}
370

371
\item You can create a copy of my repository by pressing the {\sf
372
  Fork} button in the upper right.  If you don't already have a GitHub
373
  account, you'll need to create one.  After forking, you'll have your
374
  own repository on GitHub that you can use to keep track of code you
375
  write while working on this book.  Then you can clone the repo,
376
  which means that you copy the files to your computer.
377

378
\index{fork}
379

380
\item Or you can clone my repository without forking; that is, you can
381
  make a copy of my repo on your computer.  You don't need a GitHub
382
  account to do this, but you won't be able to write your changes back
383
  to GitHub.
384

385
\index{clone}
386

387
\item If you don't want to use Git at all, you can download the files
388
  in a Zip file using the green button that says ``Clone or download''.
389

390
\end{itemize}
391

392
I developed this book using Anaconda from Continuum Analytics, which
393
is a free Python distribution that includes all the packages you'll
394
need to run the code (and lots more).  I found Anaconda easy to
395
install.  By default it does a user-level installation, not
396
system-level, so you don't need administrative privileges.  And it
397
supports both Python 2 and Python 3.  You can download Anaconda from
398
\url{https://continuum.io/downloads}.
399

400
\index{Anaconda}
401

402
The repository includes both Python scripts and Jupyter
403
notebooks.  If you have not used Jupyter before, you can read about
404
it at \url{https://jupyter.org}.
405

406
\index{Jupyter}
407

408
There are three ways you can work with the Jupyter notebooks:
409

410
\begin{description}
411

412
\item[Run Jupyter on your computer]
413

414
If you installed Anaconda, you can install Jupyter by running the following command in a terminal or Command Window:
415

416
\begin{verbatim}
417
$ conda install jupyter
418
\end{verbatim}
419

420
Before you launch Jupyter, you should \py{cd} into the directory that contains the code:
421

422
\begin{verbatim}
423
$ cd ThinkComplexity2/code
424
\end{verbatim}
425

426
And then start the Jupyter server:
427

428
\begin{verbatim}
429
$ jupyter notebook
430
\end{verbatim}
431

432
When you start the server, it should launch your default web browser
433
or create a new tab in an open browser window.  Then you can open
434
and run the notebooks.
435

436
\item[Run Jupyter on Binder]
437

438
Binder is a service that runs Jupyter in a virtual machine.  If you
439
follow this link, \url{https://thinkcomplex.com/binder},
440
you should get a Jupyter home page with the notebooks for this book
441
and the supporting data and scripts.
442

443
\index{Binder}
444

445
You can run the scripts and modify them to run your own code, but the
446
virtual machine you run them in is temporary.  
447
If you leave it idle, the virtual machine disappears along with any changes you made. 
448

449
\item[View notebooks on GitHub]
450

451
GitHub provides a view of the notebooks you can
452
can use to read the notebooks and see the results I
453
generated, but you won't be able to modify or run the code.
454

455
\end{description}
456

457
Good luck, and have fun!
458

459

460
\begin{flushleft}
461
Allen B. Downey \\
462

463
Professor of Computer Science \\
464

465
Olin College of Engineering \\
466

467
Needham, MA
468
\end{flushleft}
469

470

471
\section*{Contributor List}
472

473
\index{contributors}
474

475
If you have a suggestion or correction, please send email to
476
{\tt downey@allendowney.com}.  If I make a change based on your
477
feedback, I will add you to the contributor list
478
(unless you ask to be omitted).
479
\index{contributors}
480

481
Let me know what version of the book you are working with, and
482
what format.  If you include at least part of the sentence the
483
error appears in, that makes it easy for me to search.  Page and
484
section numbers are fine, too, but not quite as easy to work with.
485
Thanks!
486

487
\small
488

489
\begin{itemize}
490

491
\item John Harley, Jeff Stanton, Colden Rouleau and
492
Keerthik Omanakuttan are Computational Modeling students who
493
pointed out typos.
494

495
\item Jose Oscar Mur-Miranda found several typos.
496

497
\item Phillip Loh, Corey Dolphin, Noam Rubin and Julian Ceipek
498
found typos and made helpful suggestions.
499

500
%\item I am grateful to the program committee that read and selected
501
%the case studies included in this book:
502
%Sarah Spence Adams,
503
%John Geddes,
504
%Stephen Holt,
505
%Vincent Manno,
506
%Robert Martello,
507
%Amon Millner,
508
%Jos\'{e} Oscar Mur-Miranda,
509
%Mark Somerville, and
510
%Ursula Wolz.
511

512
\item Sebastian Sch\"{o}ner sent two pages of corrections!
513

514
\item Philipp Marek sent a number of corrections.
515

516
\item Jason Woodard co-taught Complexity Science with me at Olin College, introduced me to NK models, and made many helpful suggestions and corrections.
517

518
\item Davi Post sent several corrections and suggestions.
519

520
\item Graham Taylor sent a pull request on GitHub that fixed many typos.
521

522
% ENDCONTRIB
523

524
\end{itemize}
525

526
I would especially like to thank the technical reviewers, Vincent Knight and Eric Ma, who made many helpful suggestions, and the copy editor, Charles Roumeliotis, who caught many errors and inconsistencies.
527

528
Other people who reported errors include
529
Richard Hollands,
530
Muhammad Najmi bin Ahmad Zabidi,
531
Alex Hantman, and
532
Jonathan Harford.
533

534

535

536

537
\normalsize
538

539

540

541
\mainmatter
542

543
\chapter{Complexity Science}
544
\label{overview}
545

546
Complexity science is relatively new; it became recognizable as a
547
field, and was given a name, in the 1980s.  But its newness is not because it
548
applies the tools of science to a new subject, but because it uses
549
different tools, allows different kinds of work, and ultimately
550
changes what we mean by ``science''.
551

552
\index{complexity science}
553

554
To demonstrate the difference, I'll start with an example of classical
555
science: suppose someone asks you why planetary orbits are
556
elliptical.  You might invoke Newton's law of universal
557
gravitation and use it to write a differential equation that describes
558
planetary motion.  Then you can solve the differential equation and
559
show that the solution is an ellipse.  QED!
560

561
\index{gravitation}
562
\index{planetary motion}
563

564
Most people find this kind of explanation satisfying.  It includes a
565
mathematical derivation --- so it has some of the rigor of a proof --- and
566
it explains a specific observation, elliptical orbits, by appealing to
567
a general principle, gravitation.
568

569
\index{proof}
570
\index{natural law}
571

572
Let me contrast that with a different kind of explanation.  Suppose
573
you move to a city like Detroit that is racially segregated, and you
574
want to know why it's like that.  If you do some research, you might
575
find a paper by Thomas Schelling called ``Dynamic Models of
576
Segregation'', which proposes a simple model of racial segregation:
577

578
\index{segregation}
579
\index{Detroit}
580

581
Here is my description of the model, from Chapter~\ref{agent-based}:
582

583
\begin{quote}
584
The Schelling model of the city is an array of cells where each cell
585
represents a house.  The houses are occupied by two kinds of
586
``agents'', labeled red and blue, in roughly equal numbers.  About
587
10\% of the houses are empty.
588

589
\index{Schelling, Thomas}
590
\index{agent}
591

592
At any point in time, an agent might be happy or unhappy, depending on
593
the other agents in the neighborhood.  In one version of the model,
594
agents are happy if they have at least two neighbors like themselves,
595
and unhappy if they have one or zero.
596

597
\index{agent-based model}
598

599
The simulation proceeds by choosing an agent at random and checking
600
to see whether it is happy.  If so, nothing happens; if not,
601
the agent chooses one of the unoccupied cells at
602
random and moves.
603
\end{quote}
604

605
If you start with a simulated city that is entirely unsegregated and
606
run the model for a short time, clusters of similar agents appear.  As
607
time passes, the clusters grow and coalesce until there are a small
608
number of large clusters and most agents live in homogeneous
609
neighborhoods.
610

611
\index{segregation}
612

613
The degree of segregation in the model is surprising, and it suggests
614
an explanation of segregation in real cities.  Maybe Detroit is
615
segregated because people prefer not to be greatly outnumbered and
616
will move if the composition of their neighborhoods makes them
617
unhappy.
618

619
\index{racism}
620
\index{xenophobia}
621

622
Is this explanation satisfying in the same way as the explanation of
623
planetary motion?  Many people would say not, but why?
624

625
Most obviously, the Schelling model is highly abstract, which is to
626
say not realistic.  So you might be tempted to say that people are more complicated than planets.  But that can't be right.  After all, some planets have people on them, so they have to be more complicated than people.
627

628
\index{abstract model}
629

630
Both systems are complicated, and both models are based on
631
simplifications.  For example, in the model of planetary motion we
632
include forces between the planet and its sun, and ignore interactions
633
between planets.  In Schelling's model, we include individual decisions
634
based on local information, and ignore every other aspect of human behavior. 
635

636
\index{simplification}
637

638
But there are differences of degree.
639
For planetary motion, we can defend
640
the model by showing that the forces we ignore are smaller than the
641
ones we include.  And we can extend the model to include other
642
interactions and show that the effect is small.  For Schelling's model
643
it is harder to justify the simplifications.
644

645
\index{justification}
646

647
Another difference is that Schelling's model doesn't appeal to any
648
physical laws, and it uses only simple computation, not mathematical
649
derivation.  Models like Schelling's don't look like classical
650
science, and many people find them less compelling, at least at first.
651
But as I will try to demonstrate, these models do useful work,
652
including prediction, explanation, and design.  One of the goals of
653
this book is to explain how.
654

655
\index{modeling}
656

657

658
\section{The changing criteria of science}
659

660
Complexity science is not just a different set of models; it is also a
661
gradual shift in the criteria models are judged by, and in the kinds
662
of models that are considered acceptable.
663

664
\index{complexity science}
665

666
For example, classical models tend to be law-based, expressed in the
667
form of equations, and solved by mathematical derivation.  Models that
668
fall under the umbrella of complexity are often rule-based,
669
expressed as computations, and simulated rather than analyzed.
670

671
Not everyone finds these models satisfactory.  For example, in
672
{\em Sync}, Steven Strogatz writes about his model of spontaneous
673
synchronization in some species of fireflies.  He presents a
674
simulation that demonstrates the phenomenon, but then writes:
675

676
\index{Strogatz, Steven}
677
\index{Sync@{\it Sync}}
678
\index{fireflies}
679
\index{synchronization}
680

681
\begin{quote}
682
I repeated the simulation dozens of times, for other random
683
initial conditions and for other numbers of oscillators.  Sync
684
every time. [...] The challenge now was to prove it.  Only an
685
ironclad proof would demonstrate, in a way that no computer ever
686
could, that sync was inevitable; and the best kind of proof would
687
clarify {\em why} it was inevitable.
688
\end{quote}
689

690
Strogatz is a mathematician, so his enthusiasm for proofs is
691
understandable, but his proof doesn't address what is, to me, the most
692
interesting part of the phenomenon.  In order to prove that ``sync was
693
inevitable'', Strogatz makes several simplifying assumptions, in
694
particular that each firefly can see all the others.
695

696
\index{proof}
697

698
In my opinion, it is more interesting to explain how an entire valley
699
of fireflies can synchronize {\em despite the fact that they cannot
700
  all see each other}.  How this kind of global behavior emerges from
701
local interactions is the subject of Chapter~\ref{agent-based}.
702
Explanations of these phenomena often use agent-based models, which
703
explore (in ways that would be difficult or impossible with
704
mathematical analysis) the conditions that allow or prevent
705
synchronization.
706

707
I am a computer scientist, so my enthusiasm for computational models
708
is probably no surprise.  I don't mean to say that Strogatz is wrong,
709
but rather that people have different opinions about what questions to
710
ask and what tools to use to answer them.  These opinions are based
711
on value judgments, so there is no reason to expect agreement.
712

713
\index{computational model}
714

715
Nevertheless, there is rough consensus among scientists
716
about which models are considered good science, and which others
717
are fringe science, pseudoscience, or not science at all.
718

719
\index{fringe science}
720
\index{pseudoscience}
721

722
A central thesis of this book is that the
723
criteria this consensus is based on change over time, and that
724
the emergence of complexity science reflects a gradual shift in
725
these criteria.
726

727

728
\section{The axes of scientific models}
729

730
I have described classical models as based on physical laws, expressed
731
in the form of equations, and solved by mathematical analysis;
732
conversely, models of complex systems are often based on simple
733
rules and implemented as computations.
734

735
\index{criteria for models}
736

737
We can think of this trend as a shift over time along two axes:
738

739
\begin{description}
740

741
\item[Equation-based \myrightarrow~simulation-based] \quad
742

743
\item[Analysis \myrightarrow~computation] \quad
744

745
\end{description}
746

747
Complexity science is different in several other ways.  I present them
748
here so you know what's coming, but some of them might not make sense
749
until you have seen the examples later in the book.
750

751
\begin{description}
752

753
\item[Continuous \myrightarrow~discrete] Classical models tend to be
754
  based on continuous mathematics, like calculus; models of complex
755
  systems are often based on discrete mathematics, including graphs and
756
  cellular automatons.
757

758
\index{continuous}
759
\index{discrete}
760

761
\item[Linear \myrightarrow~nonlinear] Classical models are often
762
  linear, or use linear approximations to nonlinear systems;
763
  complexity science is more friendly to nonlinear models.
764

765
\index{linear}
766
\index{nonlinear}
767

768
\item[Deterministic \myrightarrow~stochastic] Classical models are
769
  usually deterministic, which may reflect underlying philosophical
770
  determinism, discussed in Chapter~\ref{automatons}; complex models
771
  often include randomness.
772

773
\index{deterministic}
774
\index{stochastic}
775

776
\item[Abstract \myrightarrow~detailed] In classical models, planets are
777
  point masses, planes are frictionless, and cows are
778
  spherical (see \url{https://thinkcomplex.com/cow}).
779
  Simplifications like these are often necessary for analysis,
780
  but computational models can be more realistic.
781

782
\index{spherical cow}
783
\index{cow, spherical}
784

785
\item[One, two \myrightarrow~many] Classical models are often limited to
786
  small numbers of components.  For example, in celestial mechanics the
787
  two-body problem can be solved analytically; the three-body problem
788
  cannot.  Complexity science often works with large numbers of components and larger number of interactions.
789

790
\index{one, two, many}
791

792
\item[Homogeneous \myrightarrow~heterogeneous] In classical models, the
793
  components and interactions tend to be identical; complex models more often
794
  include heterogeneity.
795

796
\index{homogeneous}
797
\index{composite}
798

799
\end{description}
800

801
These are generalizations, so we should not take them too seriously.
802
And I don't mean to deprecate classical science.  A more complicated
803
model is not necessarily better; in fact, it is usually worse.
804

805
And I don't mean to say that these changes are abrupt or complete.
806
Rather, there is a gradual migration in the frontier of what is
807
considered acceptable, respectable work.  Some tools that used to be
808
regarded with suspicion are now common, and some models that were
809
widely accepted are now regarded with scrutiny.
810

811
For example, when Appel and Haken proved the four-color theorem in
812
1976, they used a computer to enumerate 1,936 special cases that were,
813
in some sense, lemmas of their proof.  At the time, many
814
mathematicians did not consider the theorem truly proved.  Now
815
computer-assisted proofs are common and generally (but not
816
universally) accepted.
817

818
\index{Appel, Kenneth}
819
\index{Haken, Wolfgang}
820
\index{four-color theorem}
821

822
Conversely, a substantial body of economic analysis is based on a
823
model of human behavior called ``Economic man'', or, with tongue in
824
cheek, {\it Homo economicus}.  Research based on this model was
825
highly regarded for several decades, especially if it involved
826
mathematical virtuosity.  More recently, this model is treated with
827
skepticism, and models that include imperfect information and
828
bounded rationality are hot topics.
829

830
\index{economic man}
831
\index{Homo economicus}
832
\index{economics}
833

834

835
\section{Different models for different purposes}
836

837
Complex models are often appropriate for different purposes and
838
interpretations:
839

840
\index{complex model}
841

842
\begin{description}
843

844
\item[Predictive \myrightarrow~explanatory] Schelling's model
845
of segregation might shed light on a complex social phenomenon, but
846
it is not useful for prediction.  On the other hand, a simple model
847
of celestial mechanics can predict solar eclipses, down to the second,
848
years in the future.
849

850
\index{predictive model}
851
\index{explanatory model}
852

853
\item[Realism \myrightarrow~instrumentalism] Classical models lend
854
  themselves to a realist interpretation; for example, most people
855
  accept that electrons are real things that exist.  Instrumentalism
856
  is the view that models can be useful even if the entities they
857
  postulate don't exist.  George Box wrote what might be the motto of
858
  instrumentalism: ``All models are wrong, but some are useful."
859

860
\index{realism}
861
\index{instrumentalism}
862

863
\item[Reductionism \myrightarrow~holism] Reductionism is the view that
864
  the behavior of a system can be explained by understanding its
865
  components.  For example, the periodic table of the elements is a
866
  triumph of reductionism, because it explains the chemical behavior
867
  of elements with a model of electrons in atoms.  Holism
868
  is the view that some phenomena that appear at the system level do
869
  not exist at the level of components, and cannot be explained in
870
  component-level terms.
871

872
\index{reductionism}
873
\index{holism}
874

875
\end{description}
876

877
We get back to explanatory models in Chapter~\ref{scale-free},
878
instrumentalism in Chapter~\ref{lifechap}, and holism in Chapter~\ref{soc}.
879

880

881
\section{Complexity engineering}
882

883
I have been talking about complex systems in the context of science,
884
but complexity is also a cause, and effect, of changes in engineering
885
and the design of social systems:
886

887
\index{engineering}
888

889
\begin{description}
890

891
\item[Centralized \myrightarrow~decentralized] Centralized systems are
892
  conceptually simple and easier to analyze, but decentralized systems
893
  can be more robust.  For example, in the World Wide Web clients send
894
  requests to centralized servers; if the servers are down, the
895
  service is unavailable.  In peer-to-peer networks, every node is
896
  both a client and a server.  To take down the service, you have to
897
  take down {\em every} node.
898

899
\index{centralized}
900
\index{decentralized}
901
\index{client-server architecture}
902
\index{peer-to-peer architecture}
903

904
\item[One-to-many \myrightarrow~many-to-many] In many communication
905
  systems, broadcast services are being augmented, and sometimes
906
  replaced, by services that allow users to communicate with each
907
  other and create, share, and modify content.
908

909
\index{broadcast service}
910

911
\item[Top-down \myrightarrow~bottom-up] In social, political and
912
  economic systems, many activities that would normally be centrally
913
  organized now operate as grassroots movements.  Even armies, which
914
  are the canonical example of hierarchical structure, are moving
915
  toward devolved command and control.
916

917
\index{top-down}
918
\index{bottom-up}
919
\index{grassroots}
920

921
\item[Analysis \myrightarrow~computation] In classical engineering,
922
  the space of feasible designs is limited by our capability for
923
  analysis.  For example, designing the Eiffel Tower was possible
924
  because Gustave Eiffel developed novel analytic techniques, in
925
  particular for dealing with wind load.  Now tools for computer-aided
926
  design and analysis make it possible to build almost anything that
927
  can be imagined.  Frank Gehry's Guggenheim Museum Bilbao is my
928
  favorite example.
929

930
\index{analysis}
931
\index{computation}
932
\index{Eiffel Tower}
933
\index{Eiffel, Gustave}
934
\index{Gehry, Frank}
935
\index{Guggenheim Museum Bilbao}
936

937
\item[Isolation \myrightarrow~interaction] In classical engineering,
938
  the complexity of large systems is managed by isolating components
939
  and minimizing interactions.  This is still an important engineering
940
  principle; nevertheless, the availability of computation makes
941
  it increasingly feasible to design systems with complex interactions
942
  between components.
943

944
\index{isolation}
945
\index{interaction}
946

947
\item[Design \myrightarrow~search] Engineering is sometimes described
948
  as a search for solutions in a landscape of possible designs.
949
  Increasingly, the search process can be automated.  For example,
950
  genetic algorithms explore large design spaces and discover
951
  solutions human engineers would not imagine (or like).  The ultimate
952
  genetic algorithm, evolution, notoriously generates designs that
953
  violate the rules of human engineering.
954

955
\index{design}
956
\index{search}
957

958
\end{description}
959

960

961
\section{Complexity thinking}
962

963
We are getting farther afield now, but the shifts I am postulating
964
in the criteria of scientific modeling are related to 20th century
965
developments in logic and epistemology.
966

967
\index{logic}
968
\index{epistemology}
969

970
\begin{description}
971

972
\item[Aristotelian logic \myrightarrow~many-valued logic] In
973
  traditional logic, any proposition is either true or false.  This
974
  system lends itself to math-like proofs, but fails (in dramatic
975
  ways) for many real-world applications.  Alternatives include
976
  many-valued logic, fuzzy logic, and other systems designed to handle
977
  indeterminacy, vagueness, and uncertainty.  Bart
978
  Kosko discusses some of these systems in {\em Fuzzy Thinking}.
979

980
\index{Aristotelian logic}
981
\index{many-valued logic}
982
\index{Kosko, Bart}
983
\index{Fuzzy Thinking@{\it Fuzzy Thinking}}
984
\index{uncertainty}
985

986
\item[Frequentist probability \myrightarrow~Bayesianism] Bayesian
987
  probability has been around for centuries, but was not widely used
988
  until recently, facilitated by the availability of cheap computation
989
  and the reluctant acceptance of subjectivity
990
  in probabilistic claims.  Sharon Bertsch McGrayne presents this
991
  history in {\em The Theory That Would Not Die}.
992

993
\index{frequentist}
994
\index{Bayesian}
995
\index{McGrayne, Sharon Bertsch}
996
\index{Theory That Would Not Die, The@{\it The Theory That Would Not Die}}
997

998
\item[Objective \myrightarrow~subjective] The Enlightenment, and
999
  philosophic modernism, are based on belief in objective truth, that
1000
  is, truths that are independent of the people that hold them.  20th
1001
  century developments including quantum mechanics, G\"{o}del's
1002
  Incompleteness Theorem, and Kuhn's study of the history of science
1003
  called attention to seemingly unavoidable subjectivity in
1004
  even ``hard sciences'' and mathematics.  Rebecca Goldstein presents
1005
  the historical context of G\"{o}del's proof in {\it Incompleteness}.
1006

1007
\index{objective}
1008
\index{subjective}
1009
\index{Kuhn, Thomas}
1010
\index{Godel's Incompleteness Theorem@G\"{o}del's Incompleteness Theorem}
1011
\index{incompleteness}
1012
\index{Goldstein, Rebecca}
1013
\index{Incompleteness@{\it Incompleteness}}
1014

1015
\item[Physical law \myrightarrow~theory \myrightarrow~model] Some
1016
  people distinguish between laws, theories, and models.  Calling
1017
  something a ``law'' implies that it is objectively true and
1018
  immutable; ``theory'' suggests that it is subject to revision; and
1019
  ``model'' concedes that it is a subjective choice based on
1020
  simplifications and approximations.  
1021

1022
\index{physical law}
1023
\index{theory}
1024
\index{model}
1025

1026
  I think they are all the same thing.  Some concepts that are called
1027
  laws are really definitions; others are, in effect, the
1028
  assertion that a certain model predicts or explains the behavior of a system
1029
  particularly well.  We come back to the nature of physical laws in
1030
  Section~\ref{model1}, Section~\ref{model3} and Section~\ref{model2}.
1031

1032
%TODO: Check how these refs look in the O'Reilly version.
1033

1034
\item[Determinism \myrightarrow~indeterminism] Determinism is the view
1035
  that all events are caused, inevitably, by prior events.  Forms of
1036
  indeterminism include randomness, probabilistic causation, and
1037
  fundamental uncertainty.  We come back to this
1038
  topic in Section~\ref{determinism} and Section~\ref{freewill}
1039

1040
\index{determinism}
1041
\index{indeterminism}
1042
\index{free will}
1043

1044
\end{description}
1045

1046
These trends are not universal or complete, but the center of
1047
opinion is shifting along these axes.  As evidence, consider the
1048
reaction to Thomas Kuhn's {\em The Structure of Scientific
1049
  Revolutions}, which was reviled when it was published and is
1050
now considered almost uncontroversial.
1051

1052
\index{Kuhn, Thomas}
1053
\index{Structure of Scientific Revolutions@{\it The Structure of Scientific Revolutions}}
1054

1055
These trends are both cause and effect of complexity science.  For
1056
example, highly abstracted models are more acceptable now because of
1057
the diminished expectation that there should be a unique, correct model
1058
for every system.  Conversely, developments in complex systems
1059
challenge determinism and the related concept of physical law.
1060

1061
This chapter is an overview of the themes coming up in the book, but
1062
not all of it will make sense before you see the examples.  When you
1063
get to the end of the book, you might find it helpful to read this
1064
chapter again.
1065

1066

1067
\chapter{Graphs}
1068
\label{graphs}
1069

1070
\newcommand{\Erdos}{Erd\H{o}s}
1071
\newcommand{\Renyi}{R\'{e}nyi}
1072

1073
The next three chapters are about systems made up of components and connections between components.  For example, in a social network, the components are people and connections represent friendships, business relationships, etc.  In an ecological food web, the components are species and the connections represent predator-prey relationships.
1074

1075
In this chapter, I introduce NetworkX, a Python package for building
1076
models of these systems.  We start with the \Erdos-\Renyi~model,
1077
which has interesting mathematical properties.  In the next
1078
chapter we move on to models that are more useful for explaining
1079
real-world systems.
1080

1081
The code for this chapter is in {\tt chap02.ipynb} in the repository
1082
for this book.  More information about working with the code is
1083
in Section~\ref{code}.
1084

1085

1086
\section{What is a graph?}
1087

1088
\begin{figure}
1089
\centerline{\includegraphics[width=3.5in]{figs/chap02-1.pdf}}
1090
\caption{A directed graph that represents a social network.}
1091
\label{chap02-1}
1092
\end{figure}
1093

1094
To most people a ``graph" is a visual representation of data, like
1095
a bar chart or a plot of stock prices over time.  That's not what this
1096
chapter is about.  
1097

1098
\index{graph}
1099

1100
In this chapter, a {\bf graph} is a representation of
1101
a system that contains discrete, interconnected elements.  The
1102
elements are represented by {\bf nodes} --- also called {\bf vertices} --
1103
and the interconnections are represented by {\bf edges}.
1104

1105
\index{node}
1106
\index{edge}
1107
\index{vertex}
1108

1109
For example, you could represent a road map with a node for each
1110
city and an edge for each road between cities.  Or you could
1111
represent a social network using a node for each person, with an
1112
edge between two people if they are friends.
1113

1114
\index{road network}
1115
\index{social network}
1116

1117
In some graphs, edges have attributes like length, cost, or weight.
1118
For example, in a road map, the length of an edge might represent 
1119
distance between cities or travel time.  In a
1120
social network there might be different kinds of edges to represent
1121
different kinds of relationships: friends, business associates, etc.
1122

1123
\index{edge weight} 
1124
\index{weight}
1125

1126
Edges may be {\bf directed} or {\bf undirected}, depending on whether
1127
the relationships they represent are asymmetric or symmetric.  In a
1128
road map, you might represent a one-way street with a directed edge
1129
and a two-way street with an undirected edge.  In some social
1130
networks, like Facebook, friendship is symmetric: if $A$ is friends
1131
with $B$ then $B$ is friends with $A$.  But on Twitter, for example,
1132
the ``follows'' relationship is not symmetric; if $A$ follows $B$,
1133
that doesn't imply that $B$ follows $A$.  So you might use undirected
1134
edges to represent a Facebook network and directed edges for Twitter.
1135

1136
\index{directed graph} 
1137
\index{undirected graph}
1138

1139
Graphs have interesting mathematical properties, and
1140
there is a branch of mathematics called {\bf graph theory}
1141
that studies them.
1142

1143
\index{graph theory}
1144

1145
Graphs are also useful, because there are many real world
1146
problems that can be solved using {\bf graph algorithms}.
1147
For example, Dijkstra's shortest path algorithm is an efficient
1148
way to find the shortest path from a node to all
1149
other nodes in a graph.  A {\bf path} is a sequence of nodes
1150
with an edge between each consecutive pair.
1151

1152
\index{graph algorithm}
1153
\index{path}
1154

1155
Graphs are usually drawn with squares or circles for nodes and lines
1156
for edges.  For example, the directed graph in Figure~\ref{chap02-1}
1157
might represent three people who follow each other on Twitter.
1158
The arrow indicates the direction of the relationship.  In this example, Alice and Bob follow each other, both follow
1159
Chuck, and Chuck follows no one.
1160

1161
\index{representing graphs}
1162

1163
The undirected graph in Figure~\ref{chap02-2} shows four cities
1164
in the northeast United States; the labels on the edges
1165
indicate driving time in hours.
1166
In this example the placement of the nodes corresponds
1167
roughly to the geography of the cities, but in general the layout
1168
of a graph is arbitrary.
1169

1170
\index{graph layout}
1171

1172

1173
\section{NetworkX}
1174

1175
\begin{figure}
1176
\centerline{\includegraphics[width=3.5in]{figs/chap02-2.pdf}}
1177
\caption{An undirected graph that represents driving time between cities.}
1178
\label{chap02-2}
1179
\end{figure}
1180

1181
To represent graphs, we'll use a package called NetworkX,
1182
which is the most commonly used network library in Python.
1183
You can read more about it at \url{https://thinkcomplex.com/netx},
1184
but I'll explain it as we go along.
1185

1186
\index{NetworkX}
1187

1188
We can create a directed graph by importing NetworkX (usually imported as \py{nx}) and instantiating
1189
\py{nx.DiGraph}:
1190

1191
\begin{code}
1192
import networkx as nx
1193
G = nx.DiGraph()
1194
\end{code}
1195

1196
At this point, \py{G} is a \py{DiGraph} object that contains no nodes
1197
and no edges.  We can add nodes using the \py{add_node} method:
1198

1199
\begin{code}
1200
G.add_node('Alice')
1201
G.add_node('Bob')
1202
G.add_node('Chuck')
1203
\end{code}
1204

1205
Now we can use the \py{nodes} method to get a list of nodes:
1206

1207
\begin{code}
1208
>>> list(G.nodes())
1209
NodeView(('Alice', 'Bob', 'Chuck'))
1210
\end{code}
1211

1212
The \py{nodes} method returns a \py{NodeView}, which can be used in a for loop or, as in this example, used to make a list.
1213

1214
Adding edges works pretty much the same way:
1215

1216
\begin{code}
1217
G.add_edge('Alice', 'Bob')
1218
G.add_edge('Alice', 'Chuck')
1219
G.add_edge('Bob', 'Alice')
1220
G.add_edge('Bob', 'Chuck')
1221
\end{code}
1222

1223
And we can use \py{edges} to get the list of edges:
1224

1225
\begin{code}
1226
>>> list(G.edges())
1227
[('Alice', 'Bob'), ('Alice', 'Chuck'),
1228
 ('Bob', 'Alice'), ('Bob', 'Chuck')]
1229
\end{code}
1230

1231
NetworkX provides several functions for drawing graphs;
1232
\py{draw_circular} arranges the nodes in a circle and connects them
1233
with edges:
1234

1235
\begin{code}
1236
nx.draw_circular(G,
1237
                 node_color=COLORS[0],
1238
                 node_size=2000,
1239
                 with_labels=True)
1240
\end{code}
1241

1242
That's the code I use to generate Figure~\ref{chap02-1}.
1243
The option \py{with_labels} causes the nodes to be labeled;
1244
in the next example we'll see how to label the edges.
1245

1246
\index{Graph}
1247
\index{node}
1248
\index{edge}
1249

1250
To generate Figure~\ref{chap02-2}, I start with a dictionary
1251
that maps from each city name to its approximate longitude
1252
and latitude:
1253

1254
\begin{code}
1255
positions = dict(Albany=(-74, 43),
1256
                 Boston=(-71, 42),
1257
                 NYC=(-74, 41),
1258
                 Philly=(-75, 40))
1259
\end{code}
1260

1261
Since this is an undirected graph, I instantiate \py{nx.Graph}:
1262

1263
\begin{code}
1264
G = nx.Graph()
1265
\end{code}
1266

1267
Then I can use \py{add_nodes_from} to iterate the keys of
1268
\py{positions} and add them as nodes:
1269

1270
\begin{code}
1271
G.add_nodes_from(positions)
1272
\end{code}
1273

1274
Next I'll make a dictionary that maps from each edge to the corresponding
1275
driving time:
1276

1277
\begin{code}
1278
drive_times = {('Albany', 'Boston'): 3,
1279
               ('Albany', 'NYC'): 4,
1280
               ('Boston', 'NYC'): 4,
1281
               ('NYC', 'Philly'): 2}
1282
\end{code}
1283

1284
Now I can use \py{add_edges_from}, which iterates the keys of
1285
\py{drive_times} and adds them as edges:
1286

1287
\begin{code}
1288
G.add_edges_from(drive_times)
1289
\end{code}
1290

1291
Instead of \py{draw_circular}, which arranges the nodes in
1292
a circle, I'll use \py{draw}, which takes the position dictionary as the second
1293
parameter:
1294

1295
\begin{code}
1296
nx.draw(G, positions,
1297
        node_color=COLORS[1],
1298
        node_shape='s',
1299
        node_size=2500,
1300
        with_labels=True)
1301
\end{code}
1302

1303
\py{draw} uses \py{positions} to determine the locations of the nodes.
1304

1305
To add the edge labels, we use \py{draw_networkx_edge_labels}:
1306

1307
\begin{code}
1308
nx.draw_networkx_edge_labels(G, positions,
1309
                             edge_labels=drive_times)
1310
\end{code}
1311

1312
The \py{edge_labels} parameter expects a dictionary that maps from each pair of nodes to a label; in this case, the labels are driving times between cities.
1313
And that's how I generated Figure~\ref{chap02-2}.
1314

1315
In both of these examples, the nodes are strings, but in general they
1316
can be any hashable type.
1317

1318
\index{hashable}
1319

1320

1321
\section{Random graphs}
1322
\label{randomgraphs}
1323

1324
A random graph is just what it sounds like: a graph with nodes and edges
1325
generated at random.  Of course, there are many random processes that
1326
can generate graphs, so there are many kinds of random graphs.
1327

1328
\index{random graph}
1329

1330
One of the more interesting kinds is the \Erdos-\Renyi~model, studied
1331
by Paul \Erdos~and Alfr\'{e}d \Renyi~in the 1960s.
1332

1333
\index{Renyi, Alfred@\Renyi, Afr\'{e}d}
1334
\index{Erdos, Paul@\Erdos, Paul}
1335

1336
An \Erdos-\Renyi~graph (ER graph) is characterized by two parameters:
1337
$n$ is the number of nodes and $p$ is the probability that there
1338
is an edge between any two nodes.
1339
See \url{https://thinkcomplex.com/er}.
1340

1341
\index{Erdos-Renyi model@\Erdos-\Renyi~model}
1342

1343
\Erdos~and \Renyi~studied the properties of these random graphs;
1344
one of their surprising results is the existence of
1345
abrupt changes in the properties of random graphs as
1346
random edges are added.
1347

1348
\index{random edge}
1349

1350
One of the properties that displays this kind of transition is
1351
connectivity.  An undirected graph is {\bf connected} if there is a
1352
path from every node to every other node.
1353

1354
\index{connected graph}
1355

1356
In an ER graph, the probability that the graph is connected is very
1357
low when $p$ is small and nearly 1 when $p$ is large.  Between these
1358
two regimes, there is a rapid transition at a particular value of
1359
$p$, denoted $p^*$.
1360

1361
%TODO: check these formulas in the O'Reilly version
1362

1363
\Erdos~and \Renyi~showed that this critical value is
1364
$p^* = (\ln n) / n$, where $n$ is the number of nodes.
1365
A random graph, $G(n, p)$, is unlikely to be connected
1366
if $p < p^*$ and very likely to be connected if $p > p^*$.
1367

1368
\index{critical value}
1369

1370
To test this claim, we'll develop algorithms to generate random
1371
graphs and check whether they are connected.
1372

1373

1374
\section{Generating graphs}
1375
\label{generating}
1376

1377
\begin{figure}
1378
\centerline{\includegraphics[width=3.5in]{figs/chap02-3.pdf}}
1379
\caption{A complete graph with 10 nodes.}
1380
\label{chap02-3}
1381
\end{figure}
1382

1383
I'll start by generating a {\bf complete} graph, which is a graph
1384
where every node is connected to every other.
1385

1386
\index{complete graph}
1387
\index{generator function}
1388

1389
Here's a generator function that takes a list of nodes and enumerates
1390
all distinct pairs.  If you are not familiar with generator functions,
1391
you can read about them at \url{https://thinkcomplex.com/gen}.
1392

1393
\begin{code}
1394
def all_pairs(nodes):
1395
    for i, u in enumerate(nodes):
1396
        for j, v in enumerate(nodes):
1397
            if i>j:
1398
                yield u, v
1399
\end{code}
1400

1401
We can use \py{all_pairs} to construct a complete graph:
1402

1403
\begin{code}
1404
def make_complete_graph(n):
1405
    G = nx.Graph()
1406
    nodes = range(n)
1407
    G.add_nodes_from(nodes)
1408
    G.add_edges_from(all_pairs(nodes))
1409
    return G
1410
\end{code}
1411

1412
\py{make_complete_graph} takes the number of nodes, \py{n}, and
1413
returns a new \py{Graph} with \py{n} nodes and edges between all
1414
pairs of nodes.
1415

1416
The following code makes a complete graph with 10 nodes and draws it:
1417

1418
\begin{code}
1419
complete = make_complete_graph(10)
1420
nx.draw_circular(complete,
1421
                 node_color=COLORS[2],
1422
                 node_size=1000,
1423
                 with_labels=True)
1424
\end{code}
1425

1426
Figure~\ref{chap02-3} shows the result.
1427
Soon we will modify this code to generate ER graphs, but first
1428
we'll develop functions to check whether a graph is connected.
1429

1430

1431
\section{Connected graphs}
1432
\label{connected}
1433

1434
A graph is {\bf connected} if there is a path from every node to every
1435
other node (see \url{https://thinkcomplex.com/conn}).
1436
\index{connected graph}
1437
\index{path}
1438

1439
For many applications involving graphs, it is useful to check whether a graph is connected.  Fortunately, there is a simple algorithm that does it.
1440

1441
You can start at any node and check whether you can reach all
1442
other nodes.  If you can reach a node, $v$, you can reach any
1443
of the {\bf neighbors} of $v$, which are the nodes connected to
1444
$v$ by an edge.
1445

1446
\index{neighbor node}
1447

1448
The \py{Graph} class provides a method called \py{neighbors}
1449
that returns a list of neighbors for a given node.  For
1450
example, in the complete graph we generated in the previous section:
1451

1452
\begin{code}
1453
>>> complete.neighbors(0)
1454
[1, 2, 3, 4, 5, 6, 7, 8, 9]
1455
\end{code}
1456

1457
Suppose we start at node $s$.  We can mark $s$ as ``seen'' and mark its neighbors.  Then we mark the neighbor's neighbors, and their neighbors, and so
1458
on, until we can't reach any more nodes.  If all nodes are
1459
seen, the graph is connected.
1460

1461
\index{reachable node}
1462

1463
Here's what that looks like in Python:
1464

1465
\begin{code}
1466
def reachable_nodes(G, start):
1467
    seen = set()
1468
    stack = [start]
1469
    while stack:
1470
        node = stack.pop()
1471
        if node not in seen:
1472
            seen.add(node)
1473
            stack.extend(G.neighbors(node))
1474
    return seen
1475
\end{code}
1476

1477
\py{reachable_nodes} takes a \py{Graph} and a starting node, {\tt
1478
  start}, and returns the set of nodes that can be reached from {\tt
1479
  start}.
1480

1481
\index{set}
1482
\index{stack}
1483

1484
Initially the set, \py{seen}, is empty, and we create a
1485
list called \py{stack} that keeps track of nodes we have
1486
discovered but not yet processed.  Initially the stack contains
1487
a single node, \py{start}.
1488

1489
\index{set}
1490

1491
Now, each time through the loop, we:
1492

1493
\begin{enumerate}
1494

1495
\item Remove one node from the stack.
1496

1497
\item If the node is already in \py{seen}, we go back
1498
to Step 1.
1499

1500
\item Otherwise, we add the node to \py{seen} and add its
1501
neighbors to the stack.
1502

1503
\end{enumerate}
1504

1505
When the stack is empty, we can't reach any more nodes, so we
1506
break out of the loop and return \py{seen}.
1507

1508
\index{stack}
1509

1510
As an example, we can find all nodes in the complete graph that
1511
are reachable from node 0:
1512

1513
\begin{code}
1514
>>> reachable_nodes(complete, 0)
1515
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
1516
\end{code}
1517

1518
Initially, the stack contains node 0 and \py{seen} is empty.
1519
The first time through the loop, node 0 is added to \py{seen}
1520
and all the other nodes are added to the stack (since they are all
1521
neighbors of node 0).
1522

1523
The next time through the loop, \py{pop} returns the last element
1524
in the stack, which is node 9.  So node 9 gets added to \py{seen}
1525
and its neighbors get added to the stack.
1526

1527
Notice that the same node can appear more than once in the stack;
1528
in fact, a node with $k$ neighbors will be added to the stack
1529
$k$ times.  Later, we will look for ways to make this algorithm
1530
more efficient.
1531

1532
We can use \py{reachable_nodes} to write \py{is_connected}:
1533

1534
\begin{code}
1535
def is_connected(G):
1536
    start = next(iter(G))
1537
    reachable = reachable_nodes(G, start)
1538
    return len(reachable) == len(G)
1539
\end{code}
1540

1541
\py{is_connected} chooses a starting node by making a node iterator and choosing the first element.
1542
Then it uses \py{reachable} to get the set of nodes that can be reached from
1543
\py{start}.  If the size of this set is the same as the size
1544
of the graph, that means we can reach all nodes, which means the
1545
graph is connected.
1546

1547
A complete graph is, not surprisingly, connected:
1548

1549
\begin{code}
1550
>>> is_connected(complete)
1551
True
1552
\end{code}
1553

1554
In the next section we will generate ER graphs and check whether they
1555
are connected.
1556

1557

1558
\section{Generating ER graphs}
1559
\label{flip}
1560

1561
\begin{figure}
1562
\centerline{\includegraphics[width=3.5in]{figs/chap02-4.pdf}}
1563
\caption{An ER graph with \py{n=10} and \py{p=0.3}.}
1564
\label{chap02-4}
1565
\end{figure}
1566

1567
The ER graph $G(n, p)$ contains $n$ nodes, and each pair of nodes is
1568
connected by an edge with probability $p$.  Generating an ER graph is
1569
similar to generating a complete graph.
1570

1571
\index{flip}
1572
\index{generator function}
1573

1574
The following generator function enumerates all possible edges and
1575
chooses which ones should be added to the graph:
1576

1577
\begin{code}
1578
def random_pairs(nodes, p):
1579
    for edge in all_pairs(nodes):
1580
        if flip(p):
1581
            yield edge
1582
\end{code}
1583

1584
\py{random_pairs} uses \py{flip}:
1585

1586
\begin{code}
1587
def flip(p):
1588
    return np.random.random() < p
1589
\end{code} 
1590

1591
This is the first example we're seen that uses NumPy.  Following convention, I
1592
import \py{numpy} as \py{np}.  NumPy provides a module named \py{random}, which provides a method named \py{random}, which returns a number between 0 and 1, uniformly distributed.
1593

1594
So \py{flip} returns \py{True} with the
1595
given probability, \py{p}, and \py{False} with the complementary
1596
probability, \py{1-p}.
1597

1598
\index{NumPy}  
1599

1600
Finally, \py{make_random_graph} generates and returns the ER graph $G(n, p)$:
1601

1602
\begin{code}
1603
def make_random_graph(n, p):
1604
    G = nx.Graph()
1605
    nodes = range(n)
1606
    G.add_nodes_from(nodes)
1607
    G.add_edges_from(random_pairs(nodes, p))
1608
    return G
1609
\end{code}
1610

1611
\py{make_random_graph} is almost identical to \py{make_complete_graph};
1612
the only difference is that it uses \py{random_pairs} instead of
1613
\py{all_pairs}.
1614

1615
Here's an example with \py{p=0.3}:
1616

1617
\begin{code}
1618
random_graph = make_random_graph(10, 0.3)
1619
\end{code}
1620

1621
Figure~\ref{chap02-4} shows the result.  This graph turns out to be
1622
connected; in fact, most ER graphs with $n=10$ and
1623
$p=0.3$ are connected.  In the next section, we'll see how many.
1624

1625

1626

1627
\section{Probability of connectivity}
1628

1629
\begin{figure}
1630
\centerline{\includegraphics[width=3.5in]{figs/chap02-5.pdf}}
1631
\caption{Probability of connectivity with $n=10$ and a range of $p$.
1632
The vertical line shows the predicted critical value.}
1633
\label{chap02-5}
1634
\end{figure}
1635

1636
\begin{figure}
1637
\centerline{\includegraphics[width=3.5in]{figs/chap02-6.pdf}}
1638
\caption{Probability of connectivity for several values of $n$ and a range of $p$.}
1639
\label{chap02-6}
1640
\end{figure}
1641

1642
For given values of $n$ and $p$, we would like to know the probability
1643
that $G(n, p)$ is connected.  We can estimate it by generating
1644
a large number of random graphs and counting how many are connected.
1645
Here's how:
1646

1647
\begin{code}
1648
def prob_connected(n, p, iters=100):
1649
    tf = [is_connected(make_random_graph(n, p))
1650
          for i in range(iters)]
1651
    return np.mean(bool)
1652
\end{code}
1653

1654
The parameters \py{n} and \py{p} are passed along to \py{make_random_graph};
1655
\py{iters} is the number of random graphs we generate.
1656

1657
This function uses a list comprehension; if you are not familiar with this feature, you can read about it at \url{https://thinkcomplex.com/comp}.
1658

1659
\index{list comprehension}
1660

1661
The result, \py{tf}, is a list of boolean values: \py{True} for each graph that's connected and \py{False} for each one that's not.
1662

1663
\index{boolean}
1664

1665
\py{np.mean} is a NumPy function that computes the mean of this list, treating
1666
\py{True} as 1 and \py{False} as 0.  The result is the fraction of random graphs that are connected.
1667

1668
\index{NumPy}
1669
\index{mean}
1670
\index{probability of connectivity}
1671

1672
\begin{code}
1673
>>> prob_connected(10, 0.23, iters=10000)
1674
0.33
1675
\end{code}
1676

1677
I chose $0.23$ because it is close to the critical value where the probability of connectivity goes from near 0 to near 1.  According to \Erdos~and \Renyi, $p^* = \ln n / n = 0.23$.
1678

1679
\index{critical value}
1680

1681
We can get a clearer view of the transition by estimating the probability
1682
of connectivity for a range of values of $p$:
1683

1684
\begin{code}
1685
n = 10
1686
ps = np.logspace(-2.5, 0, 11)
1687
ys = [prob_connected(n, p) for p in ps]
1688
\end{code}
1689

1690
The NumPy function \py{logspace} returns an
1691
array of 11 values from $10^{-2.5}$ to $10^0 = 1$, equally spaced
1692
on a logarithmic scale.
1693

1694
\index{NumPy}
1695
\index{logarithm}
1696
\index{logspace}
1697

1698
For each value of \py{p} in the array, we compute the probability that a graph with parameter \py{p} is connected and store the results in \py{ys}.
1699

1700
Figure~\ref{chap02-5} shows the results, with
1701
a vertical line at the computed critical value, $p^* = 0.23$.
1702
As expected, the transition from 0 to 1
1703
occurs near the critical value.
1704

1705
Figure~\ref{chap02-6} shows
1706
similar results for larger values of $n$.  As $n$ increases, the
1707
critical value gets smaller and the transition gets more abrupt.
1708

1709
These experimental results are consistent with the analytic results \Erdos~and
1710
\Renyi~presented in their papers.
1711

1712

1713
\section{Analysis of graph algorithms}
1714
\label{graphanalysis}
1715

1716
Earlier in this chapter I presented an algorithm for checking whether
1717
a graph is connected; in the next few chapters, we will see
1718
other graph algorithms.  Along the way, we will analyze the
1719
performance of those algorithms, figuring out how their run times
1720
grow as the size of the graphs increases.
1721

1722
\index{analysis of algorithms}
1723

1724
If you are not already familiar with analysis of algorithms,
1725
you might want to read Appendix B of {\it Think Python, 2nd Edition},
1726
at \url{https://thinkcomplex.com/tp2}. 
1727

1728
\newcommand{\V}{n}
1729
\newcommand{\E}{m}
1730

1731
The order of growth for graph algorithms is usually expressed
1732
as a function of $\V$, the number of vertices (nodes), and $\E$, the number
1733
of edges.
1734

1735
\index{graph algorithm}
1736

1737
As an example, let's analyze \py{reachable_nodes} from
1738
Section~\ref{connected}:
1739

1740
\begin{code}
1741
def reachable_nodes(G, start):
1742
    seen = set()
1743
    stack = [start]
1744
    while stack:
1745
        node = stack.pop()
1746
        if node not in seen:
1747
            seen.add(node)
1748
            stack.extend(G.neighbors(node))
1749
    return seen
1750
\end{code}
1751

1752
Each time through the loop, we pop a node off the stack; by default,
1753
\py{pop} removes and returns the last element of a list, which is
1754
a constant time operation.
1755

1756
\index{constant time}
1757

1758
Next we check whether the node is in \py{seen}, which is a set,
1759
so checking membership is constant time.
1760

1761
If the node is not already in \py{seen}, we add it, which is
1762
constant time, and then add the neighbors to the stack, which is
1763
linear in the number of neighbors.
1764

1765
\index{linear time}
1766

1767
To express the run time in terms of $\V$ and $\E$, we can add up
1768
the total number of times each node is added to \py{seen}
1769
and \py{stack}.
1770

1771
Each node is only added to \py{seen} once, so the total number
1772
of additions is $\V$.
1773

1774
But nodes might be added to \py{stack} many times, depending on
1775
how many neighbors they have.  If a node has $k$ neighbors, it
1776
is added to \py{stack} $k$ times.  Of course, if it has $k$ neighbors,
1777
that means it is connected to $k$ edges.
1778

1779
So the total number
1780
of additions to \py{stack} is the total number of edges, $\E$,
1781
doubled because we consider every edge twice.
1782

1783
\index{order of growth}
1784

1785
Therefore, the order of growth for this function is $O(\V + \E)$,
1786
which is a convenient way to say that the run time grows in proportion
1787
to either $\V$ or $\E$, whichever is bigger.
1788

1789
\index{breadth-first search}
1790
\index{BFS}
1791

1792
If we know the relationship between $\V$ and $\E$, we can simplify
1793
this expression.  For example, in a complete graph the number of edges
1794
is $n(n-1)/2$, which is in $O(\V^2)$.  So for a complete graph,
1795
\py{reachable_nodes} is quadratic in $\V$.
1796
\index{quadratic}
1797

1798

1799
\section{Exercises}
1800

1801
The code for this chapter is in \py{chap02.ipynb}, which is a
1802
Jupyter notebook in the repository for this book.  For more information
1803
about working with this code, see Section~\ref{code}.
1804

1805
\begin{exercise}
1806
Launch \py{chap02.ipynb} and run the code.  There are a few short
1807
exercises embedded in the notebook that you might want to try.
1808
\end{exercise}
1809

1810
\begin{exercise}
1811
In Section~\ref{graphanalysis} we analyzed the performance of
1812
\py{reachable_nodes} and classified it in $O(n + m)$, where $n$ is the
1813
number of nodes and $m$ is the number of edges.  Continuing the
1814
analysis, what is the order of growth for \py{is_connected}?
1815

1816
\begin{code}
1817
def is_connected(G):
1818
    start = list(G)[0]
1819
    reachable = reachable_nodes(G, start)
1820
    return len(reachable) == len(G)
1821
\end{code}
1822

1823
\end{exercise}
1824

1825
\begin{exercise}
1826
In my implementation of \py{reachable_nodes}, you might be bothered by
1827
the apparent inefficiency of adding {\em all} neighbors to the stack
1828
without checking whether they are already in \py{seen}.  Write a
1829
version of this function that checks the neighbors before adding them
1830
to the stack.  Does this ``optimization'' change the order of growth?
1831
Does it make the function faster?
1832
\end{exercise}
1833

1834

1835
\begin{exercise}
1836

1837
There are actually two kinds of ER graphs.  The one we generated in
1838
this chapter, $G(n, p)$, is characterized by two parameters, the number
1839
of nodes and the probability of an edge between nodes.
1840

1841
\index{Erdos-Renyi model@\Erdos-\Renyi~model}
1842

1843
An alternative definition, denoted $G(n, m)$, is also characterized by
1844
two parameters: the number of nodes, $n$, and the number of edges,
1845
$m$.  Under this definition, the number of edges is fixed, but their
1846
location is random.
1847

1848
Repeat the experiments we did in this chapter using this alternative
1849
definition.  Here are a few suggestions for how to proceed:
1850

1851
1. Write a function called \py{m_pairs} that takes a list of nodes
1852
and the number of edges, $m$, and returns a random selection of $m$
1853
edges.  A simple way to do that is to generate a list of all possible
1854
edges and use \py{random.sample}.
1855

1856
2. Write a function called \py{make_m_graph} that takes $n$ and
1857
$m$ and returns a random graph with $n$ nodes and $m$ edges.
1858

1859
3. Make a version of \py{prob_connected} that uses
1860
\py{make_m_graph} instead of \py{make_random_graph}.
1861

1862
4. Compute the probability of connectivity for a range of values of $m$.
1863

1864
How do the results of this experiment compare to the results using the
1865
first type of ER graph?
1866

1867
\end{exercise}
1868

1869

1870
\chapter{Small World Graphs}
1871

1872
Many networks in the real world, including social networks, have
1873
the ``small world property'', which is that the average distance
1874
between nodes, measured in number of edges on the shortest path,
1875
is much smaller than expected.
1876

1877
In this chapter, I present Stanley Milgram's famous Small World
1878
Experiment, which was the first demonstration of
1879
the small world property in a real social network.  Then we'll
1880
consider Watts-Strogatz graphs, which are intended as a model of
1881
small world graphs.  I'll replicate the experiment Watts and Strogatz performed and explain what it is intended to show.
1882

1883
Along the way, we'll see two new graph algorithms: breadth-first
1884
search (BFS) and Dijkstra's algorithm for computing the shortest
1885
path between nodes in a graph.
1886

1887
The code for this chapter is in {\tt chap03.ipynb} in the repository
1888
for this book.  More information about working with the code is
1889
in Section~\ref{code}.
1890

1891
\section{Stanley Milgram}
1892

1893
Stanley Milgram was an American social psychologist who conducted
1894
two of the most famous experiments in social science, the
1895
Milgram experiment, which studied people's obedience to authority
1896
(\url{https://thinkcomplex.com/milgram})
1897
and the Small World Experiment, which studied
1898
the structure of social networks
1899
(\url{https://thinkcomplex.com/small}).
1900

1901
\index{Milgram, Stanley}
1902
\index{small world experiment}
1903

1904
In the Small World Experiment, Milgram sent a package to several
1905
randomly-chosen people in Wichita, Kansas, with instructions asking
1906
them to forward an enclosed letter to a target person, identified by
1907
name and occupation, in Sharon, Massachusetts (which happens to be the town near
1908
Boston where I grew up).  The subjects were told that they could mail
1909
the letter directly to the target person only if they knew him
1910
personally; otherwise they were instructed to send it, and the same
1911
instructions, to a relative or friend they thought would be more
1912
likely to know the target person.
1913

1914
\index{Kansas}
1915
\index{Wichita, Kansas}
1916
\index{Massachusetts}
1917
\index{Sharon, Massachusetts}
1918

1919
Many of the letters were never delivered, but for the ones that
1920
were the average path length --- the number of
1921
times the letters were forwarded --- was about six.  This result
1922
was taken to confirm previous observations (and speculations) that
1923
the typical distance between any two people in a social network
1924
is about ``six degrees of separation''.
1925

1926
\index{six degrees}
1927

1928
This conclusion is surprising because most people expect social
1929
networks to be localized --- people tend to live near their
1930
friends --- and in a graph with local connections, path lengths tend to
1931
increase in proportion to geographical distance.  For example, most of
1932
my friends live nearby, so I would guess that the average distance
1933
between nodes in a social network is about 50 miles.  Wichita is about
1934
1600 miles from Boston, so if Milgram's letters traversed typical
1935
links in the social network, they should have taken 32 hops, not 6.
1936

1937
\index{hop}
1938
\index{social network}
1939
\index{local connection}
1940

1941

1942
\section{Watts and Strogatz}
1943
\label{watts}
1944

1945
In 1998 Duncan Watts and Steven Strogatz published a paper in {\em
1946
  Nature}, ``Collective dynamics of `small-world' networks'', that
1947
proposed an explanation for the small world phenomenon.  You can
1948
download it from
1949
\url{https://thinkcomplex.com/watts}.
1950

1951
\index{Watts, Duncan} 
1952
\index{Strogatz, Steven}
1953
\index{small world network}
1954

1955
Watts and Strogatz start with two kinds of graph that were well
1956
understood: random graphs and regular graphs.  In a random graph, nodes
1957
are connected at random.  In a regular graph, every node has the
1958
same number of neighbors.
1959
They consider two
1960
properties of these graphs, clustering and path length:
1961

1962
\index{random graph}
1963
\index{regular graph}
1964
\index{clustering}
1965
\index{path length}
1966

1967
\begin{itemize}
1968

1969
\item Clustering is a measure of the ``cliquishness'' of the graph.
1970
In a graph, a {\bf clique} is a subset of nodes that are
1971
all connected to each other; in a social network, a clique is
1972
a set of people who are all friends with each other.  Watts and Strogatz
1973
defined a clustering coefficient that quantifies the likelihood
1974
that two nodes that are connected to the same node are also
1975
connected to each other.
1976

1977
\index{clique}
1978

1979
\item Path length is a measure of the average distance between
1980
two nodes, which corresponds to the degrees of separation in
1981
a social network.
1982

1983
\end{itemize}
1984

1985
Watts and Strogatz show that regular graphs
1986
have high clustering and high path lengths, whereas
1987
random graphs with the same size usually have low clustering
1988
and low path lengths.  So neither of these is a good model of
1989
social networks, which combine high clustering with
1990
short path lengths.
1991

1992
Their goal was to create a {\bf generative model} of a social
1993
network.  A generative model tries to explain a phenomenon by
1994
modeling the process that builds or leads to the phenomenon.
1995
Watts and Strogatz proposed this process for building
1996
small-world graphs:
1997
\index{generative model}
1998

1999
\begin{enumerate}
2000

2001
\item Start with a regular graph with $n$ nodes and each node
2002
  connected to $k$ neighbors.
2003

2004
\item Choose a subset of the edges and ``rewire'' them by
2005
  replacing them with random edges.
2006
  \index{rewire}
2007

2008
\end{enumerate}
2009

2010
The probability that an edge is rewired is a parameter, $p$,
2011
that controls how random the graph is.  With $p=0$, the graph
2012
is regular; with $p=1$ it is completely random.
2013
\index{parameter}
2014

2015
Watts and Strogatz found that small values of $p$ yield graphs
2016
with high clustering, like a regular graph, and low path
2017
lengths, like a random graph.
2018

2019
In this chapter I replicate the Watts and Strogatz experiment
2020
in the following steps:
2021

2022
\begin{enumerate}
2023

2024
\item We'll start by constructing a ring lattice, which is a kind
2025
of regular graph.
2026

2027
\item Then we'll rewire it as Watts and Strogatz did.
2028

2029
\item We'll write a function to measure the degree of clustering
2030
and use a NetworkX function to compute path lengths.
2031

2032
\item Then we'll compute the degree of clustering and path length for
2033
a range of values of $p$.
2034

2035
\item Finally, I'll present Dijkstra's algorithm, which computes shortest paths efficiently.
2036

2037
\end{enumerate}
2038

2039

2040
\section{Ring lattice}
2041

2042
\begin{figure}
2043
\centerline{\includegraphics[width=3.5in]{figs/chap03-1.pdf}}
2044
\caption{A ring lattice with $n=10$ and $k=4$.}
2045
\label{chap03-1}
2046
\end{figure}
2047

2048
A {\bf regular} graph is a graph where each node has the same number
2049
of neighbors; the number of neighbors is also called the {\bf degree}
2050
of the node.
2051

2052
\index{regular graph}
2053
\index{ring lattice}
2054

2055
A ring lattice is a kind of regular graph, which Watts and Strogatz
2056
use as the basis of their model.
2057
In a ring lattice with $n$ nodes, the nodes can be arranged in a circle
2058
with each node connected to the $k$ nearest neighbors.
2059

2060
For example, a ring lattice with $n=3$
2061
and $k=2$ would contain the following edges: $(0, 1)$, $(1, 2)$, and
2062
$(2, 0)$.  Notice that the edges ``wrap around'' from the
2063
highest-numbered node back to 0.
2064

2065
More generally, we can enumerate the edges like this:
2066

2067
\begin{code}
2068
def adjacent_edges(nodes, halfk):
2069
    n = len(nodes)
2070
    for i, u in enumerate(nodes):
2071
        for j in range(i+1, i+halfk+1):
2072
            v = nodes[j % n]
2073
            yield u, v
2074
\end{code}
2075

2076
\py{adjacent_edges} takes a list of nodes and a parameter,
2077
\py{halfk}, which is half of $k$.  It is a generator function that
2078
yields one edge at a time.  It uses the modulus operator, \verb"%",
2079
to wrap around from the highest-numbered node to the lowest.
2080

2081
\index{generator function}
2082

2083
We can test it like this:
2084

2085
\begin{code}
2086
>>> nodes = range(3)
2087
>>> for edge in adjacent_edges(nodes, 1):
2088
...     print(edge)
2089
(0, 1)
2090
(1, 2)
2091
(2, 0)
2092
\end{code}
2093

2094
Now we can use \py{adjacent_edges} to make a ring lattice:
2095

2096
\begin{code}
2097
def make_ring_lattice(n, k):
2098
    G = nx.Graph()
2099
    nodes = range(n)
2100
    G.add_nodes_from(nodes)
2101
    G.add_edges_from(adjacent_edges(nodes, k//2))
2102
    return G
2103
\end{code}
2104

2105
Notice that \py{make_ring_lattice} uses floor division to compute
2106
\py{halfk}, so it is only correct if \py{k} is even.  If
2107
\py{k} is odd, floor division rounds down, so the result is
2108
a ring lattice with degree \py{k-1}.  As one of the exercises at the
2109
end of the chapter, you will generate regular graphs with odd
2110
values of \py{k}.
2111

2112
\index{floor division}
2113
\index{degree}
2114

2115
We can test \py{make_ring_lattice} like this:
2116

2117
\begin{code}
2118
lattice = make_ring_lattice(10, 4)
2119
\end{code}
2120

2121
Figure~\ref{chap03-1} shows the result.
2122

2123

2124
\section{WS graphs}
2125

2126
\begin{figure}
2127
\centerline{\includegraphics[width=3.5in]{figs/chap03-2.pdf}}
2128
\caption{WS graphs with $n=20$, $k=4$, and $p=0$ (left), $p=0.2$ (middle),
2129
and $p=1$ (right).}
2130
\label{chap03-2}
2131
\end{figure}
2132

2133
To make a Watts-Strogatz (WS) graph, we start with a ring lattice and
2134
``rewire'' some of the edges.  In their paper, Watts and Strogatz
2135
consider the edges in a particular order and rewire each one with
2136
probability $p$.  If an edge is rewired, they leave the first node
2137
unchanged and choose the second node at random.  They don't allow self
2138
loops or multiple edges; that is, you can't have a edge from a node to
2139
itself, and you can't have more than one edge between the same two
2140
nodes.
2141

2142
\index{Watts-Strogatz graph}
2143
\index{rewire}
2144

2145
Here is my implementation of this process.
2146

2147
\begin{code}
2148
def rewire(G, p):
2149
    nodes = set(G)
2150
    for u, v in G.edges():
2151
        if flip(p):
2152
            choices = nodes - {u} - set(G[u])
2153
            new_v = np.random.choice(list(choices))
2154
            G.remove_edge(u, v)
2155
            G.add_edge(u, new_v)
2156
\end{code}
2157

2158
The parameter \py{p} is the probability of rewiring an edge.  The
2159
\py{for} loop enumerates the edges and uses \py{flip} (defined in Section~\ref{flip}) to choose which ones get rewired.
2160

2161
If we are rewiring an edge from node \py{u} to node \py{v}, we
2162
have to choose a replacement for \py{v}, called \py{new_v}.
2163

2164
\begin{enumerate}
2165

2166
\item To compute the possible choices,
2167
we start with \py{nodes}, which is a set,
2168
and subtract off \py{u} and its neighbors, which avoids self
2169
loops and multiple edges.
2170

2171
\item To choose \py{new_v}, we use the NumPy
2172
function \py{choice}, which is in the module \py{random}.
2173

2174
\index{NumPy}
2175
\index{random}
2176
\index{choice}
2177

2178
\item Then we remove the existing edge from \py{u} to \py{v}, and 
2179

2180
\item Add a new edge from \py{u} to \py{new_v}.
2181

2182
\end{enumerate}
2183

2184
As an aside, the expression \py{G[u]} returns a dictionary that contains the neighbors of \py{u} as keys.  It is usually faster than using \py{G.neighbors}
2185
(see \url{https://thinkcomplex.com/neigh}).
2186

2187
This function does not consider the edges in the order specified
2188
by Watts and Strogatz, but that doesn't seem to affect
2189
the results.
2190

2191
Figure~\ref{chap03-2} shows WS graphs with $n=20$, $k=4$, and
2192
a range of values of $p$.  When $p=0$, the graph is a ring lattice.
2193
When $p=1$, it is completely random.  As we'll see, the interesting
2194
things happen in between.
2195

2196

2197
\section{Clustering}
2198
\label{clustering}
2199

2200
The next step is to compute the clustering coefficient, which
2201
quantifies the tendency for the nodes to form cliques.
2202
A {\bf clique} is a set of nodes that are completely connected;
2203
that is, there are edges between all pairs of nodes in the set.
2204

2205
\index{clustering coefficient}
2206
\index{network average clustering coefficient}
2207

2208
Suppose a particular node, $u$, has $k$ neighbors.  If all of the
2209
neighbors are connected to each other, there would be $k(k-1)/2$
2210
edges among them.  The fraction of those edges that actually exist
2211
is the local clustering coefficient for $u$, denoted $C_u$.
2212

2213
If we compute the average of $C_u$ over all nodes, we get the
2214
``network average clustering coefficient'', denoted $\bar{C}$.
2215

2216
Here is a function that computes it.
2217

2218
\begin{code}
2219
def node_clustering(G, u):
2220
    neighbors = G[u]
2221
    k = len(neighbors)
2222
    if k < 2:
2223
        return np.nan
2224

2225
    possible = k * (k-1) / 2
2226
    exist = 0
2227
    for v, w in all_pairs(neighbors):
2228
        if G.has_edge(v, w):
2229
            exist +=1
2230
    return exist / possible
2231
\end{code}
2232

2233
Again I use \py{G[u]}, which returns a dictionary with the neighbors of \py{u} as keys.  
2234

2235
If a node has fewer than 2 neighbors, the clustering coefficient is undefined, so we return \py{np.nan}, which is a special value that indicates ``Not a Number''.
2236

2237
\index{NaN}
2238
\index{Not a Number}
2239

2240
Otherwise we compute the number of possible edges among the neighbors, count the number of those edges that actually exist, and return the fraction that exist.
2241

2242
We can test the function like this:
2243

2244
\begin{code}
2245
>>> lattice = make_ring_lattice(10, 4)
2246
>>> node_clustering(lattice, 1)
2247
0.5
2248
\end{code}
2249

2250
In a ring lattice with $k=4$, the clustering coefficient for each node
2251
is 0.5 (if you are not convinced, take another look at
2252
Figure~\ref{chap03-1}).
2253

2254
Now we can compute the network average clustering coefficient like this:
2255

2256
\begin{code}
2257
def clustering_coefficient(G):
2258
    cu = [node_clustering(G, node) for node in G]
2259
    return np.nanmean(cu)
2260
\end{code}
2261

2262
The NumPy function \py{nanmean} computes the mean of the local clustering coefficients, ignoring any values that are \py{NaN}.
2263

2264
\index{NumPy}
2265
\index{nanmean}
2266

2267
We can test \py{clustering_coefficient} like this:
2268

2269
\begin{code}
2270
>>> clustering_coefficient(lattice)
2271
0.5
2272
\end{code}
2273

2274
In this graph, the local clustering coefficient for all nodes is 0.5,
2275
so the average across nodes is 0.5.  Of course, we expect this value
2276
to be different for WS graphs.
2277

2278

2279
\section{Shortest path lengths}
2280
\label{pathlength}
2281

2282
The next step is to compute the characteristic path length, $L$, which
2283
is the average length of the shortest path between each pair of nodes.
2284
To compute it, I'll start with a function provided by NetworkX,
2285
\py{shortest_path_length}.  I'll use it to replicate the Watts and
2286
Strogatz experiment, then I'll explain how it works.
2287

2288
\index{characteristic path length}
2289
\index{path length}
2290
\index{shortest path}
2291

2292
Here's a function that takes a graph and returns a list of shortest
2293
path lengths, one for each pair of nodes.
2294

2295
\begin{code}
2296
def path_lengths(G):
2297
    length_map = nx.shortest_path_length(G)
2298
    lengths = [length_map[u][v] for u, v in all_pairs(G)]
2299
    return lengths
2300
\end{code}
2301

2302
The return value from \py{nx.shortest_path_length} is a dictionary
2303
of dictionaries.  The outer dictionary maps from each node, \py{u},
2304
to a dictionary that maps from each node, \py{v}, to the length of
2305
the shortest path from \py{u} to \py{v}.
2306

2307
With the list of lengths from \py{path_lengths}, we can compute $L$
2308
like this:
2309

2310
\begin{code}
2311
def characteristic_path_length(G):
2312
    return np.mean(path_lengths(G))
2313
\end{code}
2314

2315
And we can test it with a small ring lattice:
2316

2317
\begin{code}
2318
>>> lattice = make_ring_lattice(3, 2)
2319
>>> characteristic_path_length(lattice)
2320
1.0
2321
\end{code}
2322

2323
In this example, all 3 nodes are connected to each other, so the
2324
mean path length is 1.
2325

2326

2327
\section{The WS experiment}
2328

2329
\begin{figure}
2330
\centerline{\includegraphics[width=3.5in]{figs/chap03-3.pdf}}
2331
\caption{Clustering coefficient (C) and characteristic path length (L) for
2332
WS graphs with $n=1000$, $k=10$, and a range of $p$.}
2333
\label{chap03-3}
2334
\end{figure}
2335

2336
Now we are ready to replicate the WS experiment, which shows that for
2337
a range of values of $p$, a WS graph has high clustering like a
2338
regular graph and short path lengths like a random graph.
2339

2340
I'll start with \py{run_one_graph}, which takes \py{n}, \py{k}, and \py{p}; it
2341
generates a WS graph with the given parameters and computes the
2342
mean path length, \py{mpl}, and clustering coefficient, \py{cc}:
2343

2344
\begin{code}
2345
def run_one_graph(n, k, p):
2346
    ws = make_ws_graph(n, k, p)
2347
    mpl = characteristic_path_length(ws)
2348
    cc = clustering_coefficient(ws)
2349
    return mpl, cc
2350
\end{code}
2351

2352
Watts and Strogatz ran their experiment with \py{n=1000} and \py{k=10}.
2353
With these parameters, \py{run_one_graph} takes a few seconds on
2354
my computer; most of that time is spent computing the mean path length.
2355

2356
\index{NumPy}
2357
\index{logspace}
2358

2359
Now we need to compute these values for a range of \py{p}.  I'll use
2360
the NumPy function \py{logspace} again to compute \py{ps}:
2361

2362
\begin{code}
2363
ps = np.logspace(-4, 0, 9)
2364
\end{code}
2365

2366
Here's the function that runs the experiment:
2367

2368
\begin{code}
2369
def run_experiment(ps, n=1000, k=10, iters=20):
2370
    res = []
2371
    for p in ps:
2372
        t = [run_one_graph(n, k, p) for _ in range(iters)]
2373
        means = np.array(t).mean(axis=0)
2374
        res.append(means)
2375
    return np.array(res)
2376
\end{code}
2377

2378
For each value of \py{p}, we generate 20 random graphs and average the
2379
results.  Since the return value from \py{run_one_graph} is a pair, \py{t} is a list of pairs.  When we convert it to an array, we get one row for each iteration and columns for \py{L} and \py{C}.  Calling \py{mean} with the option \py{axis=0} computes the mean of each column; the result is an array with one row and two columns.
2380

2381
\index{NumPy}
2382
\index{mean}
2383

2384
When the loop exits, \py{means} is a list of pairs, which we convert to a NumPy array with one row for each value of \py{p} and columns for \py{L} and \py{C}.
2385

2386
We can extract the columns like this:
2387

2388
\begin{code}
2389
L, C = np.transpose(res)
2390
\end{code}
2391

2392
In order to plot \py{L} and \py{C} on the same axes, we standardize them
2393
by dividing through by the first element:
2394

2395
\begin{code}
2396
L /= L[0]
2397
C /= C[0]
2398
\end{code}
2399

2400
Figure~\ref{chap03-3} shows the results.  As $p$ increases, the mean
2401
path length drops quickly, because even a small number of randomly
2402
rewired edges provide shortcuts between regions of the graph that
2403
are far apart in the lattice.  On the other hand, removing local links
2404
decreases the clustering coefficient much more slowly.
2405

2406
As a result, there is a wide range of $p$ where a WS graph has the
2407
properties of a small world graph, high clustering and low path
2408
lengths.
2409

2410
And that's why Watts and Strogatz propose WS graphs as a
2411
model for real-world networks that exhibit the small world phenomenon.
2412

2413

2414
\section{What kind of explanation is {\em that}?}
2415

2416
If you ask me why planetary orbits are elliptical,
2417
I might start by modeling a planet and a star as point masses; I
2418
would look up the law of universal gravitation at
2419
\url{https://thinkcomplex.com/grav}
2420
and use it to write a differential equation for the motion of
2421
the planet.  Then I would either derive the orbit equation or,
2422
more likely, look it up at \url{https://thinkcomplex.com/orbit}.
2423
With a little algebra, I could derive the conditions that
2424
yield an elliptical orbit.  Then I would argue that the objects
2425
we consider planets satisfy these conditions.
2426

2427
\index{orbit}
2428
\index{ellipse}
2429
\index{planetary motion}
2430
\index{universal gravitation}
2431

2432
People, or at least scientists, are generally satisfied with
2433
this kind of explanation.  One of the reasons for its appeal
2434
is that the assumptions and approximations in the model seem
2435
reasonable.  Planets and stars are not really point masses,
2436
but the distances between them are so big that their actual
2437
sizes are negligible.  Planets in the same solar system can
2438
affect each other's orbits, but the effect is usually small.
2439
And we ignore relativistic effects, again on the assumption that
2440
they are small.
2441

2442
\index{explanatory model}
2443

2444
This explanation is also appealing because it is equation-based.
2445
We can express the orbit equation in a closed form, which means
2446
that we can compute orbits efficiently.  It also means that
2447
we can derive general expressions for the orbital velocity,
2448
orbital period, and other quantities.
2449

2450
\index{equation-based model}
2451

2452
Finally, I think this kind of explanation is appealing because
2453
it has the form of a mathematical proof.
2454
It is important to remember that the proof pertains to the
2455
model and not the real world.
2456
That is, we can prove that
2457
an idealized model yields elliptical orbits, but
2458
we can't prove that real orbits are ellipses (in
2459
fact, they are not).
2460
Nevertheless, the resemblance to a proof is appealing.
2461

2462
\index{mathematical proof}
2463
\index{proof}
2464

2465
By comparison, Watts and Strogatz's explanation of the small
2466
world phenomenon may seem less satisfying.  First, the model
2467
is more abstract, which is to say less realistic.  Second,
2468
the results are generated by simulation, not by mathematical
2469
analysis.  Finally, the results seem less like a proof and
2470
more like an example.
2471

2472
\index{abstract model}
2473

2474
Many of the models in this book are like the Watts and Strogatz model:
2475
abstract, simulation-based and (at least superficially) less formal
2476
than conventional mathematical models.  One of the goals of this book
2477
is to consider the questions these models raise:
2478

2479
\index{simulation-based model}
2480

2481
\begin{itemize}
2482

2483
\item What kind of work can these models do: are they predictive, or
2484
  explanatory, or both?
2485

2486
\item Are the explanations these models offer less satisfying than
2487
  explanations based on more traditional models?  Why?
2488

2489
\item How should we characterize the differences between these and
2490
  more conventional models?  Are they different in kind or only in
2491
  degree?
2492

2493
\end{itemize}
2494

2495
Over the course of the book I will offer my answers
2496
to these questions, but they are tentative and sometimes
2497
speculative.  I encourage you to consider them skeptically
2498
and reach your own conclusions.
2499

2500

2501
\section{Breadth-First Search}
2502
\label{bfs}
2503

2504
When we computed shortest paths, we used a function provided by
2505
NetworkX, but I have not explained how it works.  To do that, I'll
2506
start with breadth-first search, which is the basis of Dijkstra's
2507
algorithm for computing shortest paths.
2508

2509
\index{NetworkX}
2510

2511
In Section~\ref{connected} I presented \py{reachable_nodes}, which finds all
2512
the nodes that can be reached from a given starting node:
2513

2514
\begin{code}
2515
def reachable_nodes(G, start):
2516
    seen = set()
2517
    stack = [start]
2518
    while stack:
2519
        node = stack.pop()
2520
        if node not in seen:
2521
            seen.add(node)
2522
            stack.extend(G.neighbors(node))
2523
    return seen
2524
\end{code}
2525

2526
I didn't say so at the time, but \py{reachable_nodes} performs a
2527
depth-first search (DFS).  Now we'll modify it to perform breadth-first
2528
search (BFS).
2529

2530
\index{depth-first search}
2531
\index{breadth-first search}
2532
\index{DFS}
2533
\index{BFS}
2534

2535
To understand the difference, imagine you are exploring a castle.
2536
You start in a room with three doors marked A, B, and C.  You open
2537
door C and discover another room, with doors marked D, E, and F.
2538

2539
Which door do you open next?  If you are feeling adventurous, you might
2540
want to go deeper into the castle and choose D, E, or F.  That would be
2541
a depth-first search.
2542

2543
But if you wanted to be more systematic, you might go back and explore
2544
A and B before D, E, and F.  That would be a breadth-first search.
2545

2546
In \py{reachable_nodes}, we use the list method \py{pop} to choose the next node to ``explore''.  By default, \py{pop} returns the last element of the list, which is the last one we added.  In the example, that would be door F.
2547

2548
\index{pop}
2549

2550
If we want to perform a BFS instead, the simplest solution is to
2551
pop the first element of the list:
2552

2553
\begin{code}
2554
        node = stack.pop(0)
2555
\end{code}
2556

2557
That works, but it is slow.  In Python, popping the last element
2558
of a list takes constant time, but popping the first element is linear
2559
in the length of the list.  In the worst case, the length of the
2560
stack is $O(\V)$, which makes this implementation of BFS $O(\V\E)$,
2561
which is much worse than what it should be, $O(\V + \E)$.
2562

2563
\index{stack}
2564
\index{queue}
2565
\index{deque}
2566
\index{double-ended queue}
2567

2568
We can solve this problem with a double-ended queue, also known
2569
as a {\bf deque}.  The important feature of a deque is that you
2570
can add and remove elements from the beginning or end in constant time.
2571
To see how it is implemented, see \url{https://thinkcomplex.com/deque}.
2572

2573
\index{collections module}
2574

2575
Python provides a \py{deque} in the \py{collections} module, so we can
2576
import it like this:
2577

2578
\begin{code}
2579
from collections import deque
2580
\end{code}
2581

2582
And we can use it to write an efficient BFS:
2583

2584
\begin{code}
2585
def reachable_nodes_bfs(G, start):
2586
    seen = set()
2587
    queue = deque([start])
2588
    while queue:
2589
        node = queue.popleft()
2590
        if node not in seen:
2591
            seen.add(node)
2592
            queue.extend(G.neighbors(node))
2593
    return seen
2594
\end{code}
2595

2596
The differences are:
2597

2598
\begin{itemize}
2599

2600
\item I replaced the list called \py{stack} with a deque called \py{queue}.
2601

2602
\item I replaced \py{pop} with \py{popleft}, which removes and returns
2603
the leftmost element of the queue.
2604

2605
\end{itemize}
2606

2607
This version is back to being $O(\V + \E)$.  Now we're ready to
2608
find shortest paths.
2609

2610

2611
\section{Dijkstra's algorithm}
2612
\label{dijkstra}
2613

2614
Edsger W. Dijkstra was a Dutch computer scientist who invented an
2615
efficient shortest-path algorithm (see
2616
\url{https://thinkcomplex.com/dijk}).  He also invented the semaphore, which is a data structure used to coordinate programs that
2617
communicate with each other (see
2618
\url{https://thinkcomplex.com/sem} and Downey, {\em The Little Book of Semaphores}).
2619

2620
\index{Dijkstra, Edsger}
2621
\index{Little Book of Semaphores@{\em The Little Book of Semaphores}}
2622

2623
Dijkstra is famous (and notorious) as the author of a series
2624
of essays on computer science.
2625
Some, like ``A Case against the GO TO Statement'',
2626
had a profound effect on programming practice.
2627
Others, like
2628
``On the Cruelty of Really Teaching Computing Science'', are
2629
entertaining in their cantankerousness, but less effective.
2630

2631
{\bf Dijkstra's algorithm} solves the ``single source shortest path
2632
problem'', which means that it finds the minimum distance from a given
2633
``source'' node to every other node in the graph (or at least every
2634
connected node).
2635

2636
\index{shortest path}
2637
\index{single source shortest path}
2638
\index{Dijkstra's algorithm}
2639

2640
I'll present a simplified version of the algorithm that
2641
considers all edges the same length.  The more general version
2642
works with any non-negative edge lengths.
2643

2644
The simplified version is similar to the breadth-first search
2645
in the previous section except that we replace the set called
2646
\py{seen} with a dictionary called \py{dist}, which maps from each
2647
node to its distance from the source:
2648

2649
\begin{code}
2650
def shortest_path_dijkstra(G, source):
2651
    dist = {source: 0}
2652
    queue = deque([source])
2653
    while queue:
2654
        node = queue.popleft()
2655
        new_dist = dist[node] + 1
2656

2657
        neighbors = set(G[node]).difference(dist)
2658
        for n in neighbors:
2659
            dist[n] = new_dist
2660

2661
        queue.extend(neighbors)
2662
    return dist
2663
\end{code}
2664

2665
Here's how it works:
2666

2667
\begin{itemize}
2668

2669
\item Initially, \py{queue} contains a single element, \py{source}, and \py{dist} maps from \py{source} to distance 0 (which is the distance from \py{source} to itself).
2670

2671
\item Each time through the loop, we use \py{popleft} to select the next node in the queue.
2672

2673
\item Next we find all neighbors of \py{node} that are not already in
2674
\py{dist}.
2675

2676
\item Since the distance from \py{source} to \py{node} is \py{dist[node]}, the
2677
distance to any of the undiscovered neighbors is \py{dist[node]+1}.
2678

2679
\item For each neighbor, we add an entry to \py{dist}, then we add
2680
the neighbors to the queue.
2681

2682
\end{itemize}
2683

2684
This algorithm only works if we use BFS, not DFS.  To see why, consider this:
2685

2686
\index{BFS}
2687
\index{DFS}
2688

2689
\begin{enumerate}
2690

2691
\item The first time through the loop \py{node} is \py{source}, and \py{new_dist}
2692
is 1.  So the neighbors of \py{source} get distance 1 and they
2693
go in the queue.
2694

2695
\item When we process the neighbors of \py{source}, all of {\em their}
2696
neighbors get distance 2.  We know that none of them can have
2697
distance 1, because if they did, we would have discovered
2698
them during the first iteration.
2699

2700
\item Similarly, when we process the nodes with distance 2, we give
2701
their neighbors distance 3.  We know that none of them can
2702
have distance 1 or 2, because if they did, we would have
2703
discovered them during a previous iteration.
2704

2705
\end{enumerate}
2706

2707
And so on.  If you are familiar with proof by induction, you
2708
can see where this is going.
2709

2710
\index{proof by induction}
2711

2712
But this argument only works if we process all nodes with distance
2713
1 before we start processing nodes with distance 2, and so on.
2714
And that's exactly what BFS does.
2715

2716
In the exercises at the end of this chapter, you'll write a version
2717
of Dijkstra's algorithm using DFS, so you'll have a chance to see
2718
what goes wrong.
2719

2720

2721
\section{Exercises}
2722

2723
\begin{exercise}
2724

2725
In a ring lattice, every node has the same number of neighbors.  The
2726
number of neighbors is called the {\bf degree} of the node, and a
2727
graph where all nodes have the same degree is called a {\bf regular
2728
  graph}.
2729

2730
\index{ring lattice}
2731

2732
All ring lattices are regular, but not all regular graphs are ring
2733
lattices.  In particular, if \py{k} is odd, we can't construct a ring
2734
lattice, but we might be able to construct a regular graph.
2735

2736
Write a function called \py{make_regular_graph} that takes \py{n} and \py{k}
2737
and returns a regular graph that contains \py{n} nodes, where every node
2738
has \py{k} neighbors.  If it's not possible to make a regular graph with
2739
the given values of \py{n} and \py{k}, the function should raise a
2740
\py{ValueError}.
2741

2742
\end{exercise}
2743

2744

2745
\begin{exercise}
2746

2747
My implementation of \py{reachable_nodes_bfs} is efficient in the sense
2748
that it is in $O(\V + \E)$, but it incurs a lot of overhead adding nodes
2749
to the queue and removing them.  NetworkX provides a simple, fast
2750
implementation of BFS, available from the NetworkX repository on
2751
GitHub at \url{https://thinkcomplex.com/connx}.
2752

2753
Here is a version I modified to return a set of nodes:
2754

2755
\begin{code}
2756
def plain_bfs(G, start):
2757
    seen = set()
2758
    nextlevel = {start}
2759
    while nextlevel:
2760
        thislevel = nextlevel
2761
        nextlevel = set()
2762
        for v in thislevel:
2763
            if v not in seen:
2764
                seen.add(v)
2765
                nextlevel.update(G[v])
2766
    return seen
2767
\end{code}
2768

2769
Compare this function to \py{reachable_nodes_bfs} and see which is
2770
faster.  Then see if you can modify this function to implement a
2771
faster version of \py{shortest_path_dijkstra}.
2772

2773
\end{exercise}
2774

2775

2776
\begin{exercise}
2777

2778
The following implementation of BFS
2779
contains two performance errors.  What are
2780
they?  What is the actual order of growth for this algorithm?
2781

2782
\index{BFS}
2783
\index{order of growth}
2784
\index{performance error}
2785

2786
\begin{code}
2787
def bfs(G, start):
2788
    visited = set()
2789
    queue = [start]
2790
    while len(queue):
2791
        curr_node = queue.pop(0)    # Dequeue
2792
        visited.add(curr_node)
2793

2794
        # Enqueue non-visited and non-enqueued children
2795
        queue.extend(c for c in G[curr_node]
2796
                     if c not in visited and c not in queue)
2797
    return visited
2798
\end{code}
2799

2800
\end{exercise}
2801

2802

2803
\begin{exercise}
2804
In Section~\ref{dijkstra}, I claimed that Dijkstra's algorithm does
2805
not work unless it uses BFS.  Write a version of
2806
\py{shortest_path_dijkstra} that uses DFS and test it on a few examples
2807
to see what goes wrong.
2808
\end{exercise}
2809

2810

2811

2812
\begin{exercise}
2813

2814
%TODO: Do this exercise (project idea)
2815

2816
A natural question about the Watts and Strogatz paper is
2817
whether the small world phenomenon is specific to their
2818
generative model or whether other similar models yield
2819
the same qualitative result (high clustering and low path lengths).
2820

2821
\index{small world phenomenon}
2822

2823
To answer this question, choose a variation of the
2824
Watts and Strogatz model and repeat the experiment.
2825
There are two kinds of variation you might consider:
2826

2827
\begin{itemize}
2828

2829
\item Instead of starting with a regular graph, start with
2830
another graph with high clustering.  For example, you could
2831
put nodes at random locations in a 2-D space
2832
and connect each node to its nearest $k$ neighbors.
2833

2834
\item Experiment with different kinds of rewiring.
2835

2836
\end{itemize}
2837

2838
If a range of similar models yield similar behavior, we
2839
say that the results of the paper are {\bf robust}.
2840

2841
\index{robust}
2842

2843
\end{exercise}
2844

2845

2846
\begin{exercise}
2847

2848
Dijkstra's algorithm solves the ``single source shortest path''
2849
problem, but to compute the characteristic path length of a graph,
2850
we actually want to solve the ``all pairs shortest path'' problem.
2851

2852
\index{all pairs shortest path}
2853

2854
Of course, one option is to run Dijkstra's algorithm $n$ times,
2855
once for each starting node.  And for some applications, that's
2856
probably good enough.  But there are are more efficient alternatives.
2857

2858
Find an algorithm for the all-pairs shortest path problem and
2859
implement it.  See
2860
\url{https://thinkcomplex.com/short}.
2861

2862
Compare the run time of your implementation with running
2863
Dijkstra's algorithm $n$ times.  Which algorithm is better in
2864
theory?  Which is better in practice?  Which one does NetworkX
2865
use?
2866

2867
% https://github.com/networkx/networkx/blob/master/networkx/algorithms/shortest_paths/unweighted.py
2868

2869
\end{exercise}
2870

2871

2872

2873
\chapter{Scale-free networks}
2874
\label{scale-free}
2875

2876
\newcommand{\Barabasi}{Barab\'{a}si}
2877

2878
In this chapter, we'll work with data from an online social network, and use a
2879
Watts-Strogatz graph to model it.  The WS model has characteristics of
2880
a small world network, like the data, but it has low
2881
variability in the number of neighbors from node to node,
2882
unlike the data.
2883

2884
This discrepancy is the motivation for a network model developed
2885
by \Barabasi~and Albert.  The BA model captures the observed variability
2886
in the number of neighbors, and it has one of the small world
2887
properties, short path lengths, but it does not have the high
2888
clustering of a small world network.
2889

2890
The chapter ends with a discussion of WS and BA graphs as explanatory
2891
models for small world networks.
2892

2893
The code for this chapter is in {\tt chap04.ipynb} in the respository
2894
for this book.  More information about working with the code is
2895
in Section~\ref{code}.
2896

2897

2898
\section{Social network data}
2899

2900
Watts-Strogatz graphs are intended to model networks in the natural
2901
and social sciences.  In their original paper, Watts and Strogatz
2902
looked at the network of film actors (connected if they have appeared
2903
in a movie together); the electrical power grid in the western United
2904
States; and the network of neurons in the brain of the roundworm
2905
{\it C. elegans}.  They found that all of these networks had the
2906
high connectivity and low path lengths characteristic of small world
2907
graphs.
2908

2909
\index{Watts-Strogatz graph}
2910

2911
In this section we'll perform the same analysis with a different
2912
dataset, a set of Facebook users and their friends.  If you are not
2913
familiar with Facebook, users who are connected to each other are
2914
called ``friends'', regardless of the nature of their relationship in
2915
the real world.
2916

2917
\index{Facebook data}
2918
\index{SNAP}
2919

2920
I'll use data from the Stanford Network Analysis Project (SNAP), which
2921
shares large datasets from online social networks and other sources.
2922
Specifically, I'll use their Facebook data\footnote{J. McAuley and
2923
  J. Leskovec. Learning to Discover Social Circles in Ego
2924
  Networks. NIPS, 2012.}, which includes 4039 users and 88,234
2925
friend relationships among them.  This dataset is in the repository
2926
for this book, but it is also available from the SNAP website at
2927
\url{https://thinkcomplex.com/snap}.
2928

2929
The data file contains one line per edge, with users identified by
2930
integers from 0 to 4038.  Here's the code that reads the file:
2931

2932
\begin{code}
2933
def read_graph(filename):
2934
    G = nx.Graph()
2935
    array = np.loadtxt(filename, dtype=int)
2936
    G.add_edges_from(array)
2937
    return G
2938
\end{code}
2939

2940
NumPy provides a function called \py{loadtext} that reads the
2941
given file and returns the contents as a NumPy array.  The
2942
parameter \py{dtype} indicates that the ``data type'' of the array
2943
is \py{int}.
2944

2945
\index{NumPy}
2946
\index{loadtext}
2947
\index{dtype}
2948

2949
Then we use \py{add_edges_from} to iterate the rows of the array
2950
and make edges.  Here are the results:
2951

2952
\begin{code}
2953
>>> fb = read_graph('facebook_combined.txt.gz')
2954
>>> n = len(fb)
2955
>>> m = len(fb.edges())
2956
>>> n, m
2957
(4039, 88234)
2958
\end{code}
2959

2960
The node and edge counts are consistent with the documentation
2961
of the dataset.
2962

2963
Now we can check whether this dataset has the characteristics of
2964
a small world graph: high clustering and low path lengths.
2965

2966
In Section~\ref{clustering} we wrote a function to compute
2967
the network average clustering coefficient.  NetworkX provides
2968
a function called \py{average_clustering}, which does the same
2969
thing a little faster.
2970

2971
\index{NetworkX}
2972
\index{average clustering}
2973

2974
But for larger graphs, they are both too slow, taking time
2975
proportional to $n k^2$, where $n$ is the number of nodes and
2976
$k$ is the number of neighbors each node is connected to.
2977

2978
Fortunately, NetworkX provides a function that estimates the
2979
clustering coefficient by random sampling.  You can invoke it like
2980
this:
2981

2982
\begin{code}
2983
    from networkx.algorithms.approximation import average_clustering
2984
    average_clustering(G, trials=1000)
2985
\end{code}
2986

2987
The following function does something similar for path lengths.
2988

2989
\begin{code}
2990
def sample_path_lengths(G, nodes=None, trials=1000):
2991
    if nodes is None:
2992
        nodes = list(G)
2993
    else:
2994
        nodes = list(nodes)
2995

2996
    pairs = np.random.choice(nodes, (trials, 2))
2997
    lengths = [nx.shortest_path_length(G, *pair)
2998
               for pair in pairs]
2999
    return lengths
3000
\end{code}
3001

3002
\py{G} is a graph, \py{nodes} is the list of nodes to sample
3003
from, and \py{trials} is the number of random paths to sample.
3004
If \py{nodes} is \py{None}, we sample from the entire graph.
3005

3006
\index{NumPy}
3007
\index{random}
3008
\index{choice}
3009

3010
\py{pairs} is a NumPy array of randomly chosen nodes with
3011
one row for each trial and two columns.
3012

3013
\index{list comprehension}
3014

3015
The list comprehension enumerates the rows in the array and
3016
computes the shortest distance between each pair of nodes.
3017
The result is a list of path lengths.
3018

3019
\py{estimate_path_length} generates a list of random path lengths and
3020
returns their mean:
3021

3022
\begin{code}
3023
def estimate_path_length(G, nodes=None, trials=1000):
3024
    return np.mean(sample_path_lengths(G, nodes, trials))
3025
\end{code}
3026

3027
I'll use \py{average_clustering} to compute $C$:
3028

3029
\begin{code}
3030
C = average_clustering(fb)
3031
\end{code}
3032

3033
And \py{estimate_path_lengths} to compute $L$:
3034

3035
\begin{code}
3036
L = estimate_path_lengths(fb)
3037
\end{code}
3038

3039
The clustering coefficient is about 0.61, which is high,
3040
as we expect if this network has the small world property.
3041

3042
And the average path is 3.7, which is
3043
quite short in a network of more than 4000 users.  It's a small
3044
world after all.
3045

3046
Now let's see if we can construct a WS graph that has the same
3047
characteristics as this network.
3048

3049

3050
\section{WS Model}
3051

3052
\index{Watts-Strogatz graph}
3053
\index{Facebook data}
3054

3055
In the Facebook dataset, the average number of edges per node is about
3056
22.  Since each edge is connected to two nodes, the average degree
3057
is twice the number of edges per node:
3058

3059
\begin{code}
3060
>>> k = int(round(2*m/n))
3061
>>> k
3062
44
3063
\end{code}
3064

3065
We can make a WS graph with \py{n=4039} and \py{k=44}.  When \py{p=0}, we
3066
get a ring lattice.
3067

3068
\begin{code}
3069
lattice = nx.watts_strogatz_graph(n, k, 0)
3070
\end{code}
3071

3072
In this graph, clustering is high: \py{C} is 0.73, compared to 0.61
3073
in the dataset.  But \py{L} is 46, much higher
3074
than in the dataset!
3075

3076
With \py{p=1} we get a random graph:
3077

3078
\begin{code}
3079
random_graph = nx.watts_strogatz_graph(n, k, 1)
3080
\end{code}
3081

3082
In the random graph, \py{L} is 2.6, even shorter than
3083
in the dataset (3.7), but \py{C} is only 0.011, so that's no good.
3084

3085
By trial and error, we find that when \py{p=0.05} we get a WS graph with
3086
high clustering and low path length:
3087

3088
\begin{code}
3089
ws = nx.watts_strogatz_graph(n, k, 0.05, seed=15)
3090
\end{code}
3091

3092
In this graph \py{C} is 0.63, a bit higher than
3093
in the dataset, and \py{L} is 3.2, a bit lower than in the dataset.
3094
So this graph models the small world characteristics of the dataset
3095
well.
3096

3097
So far, so good.
3098

3099

3100
\section{Degree}
3101
\label{degree}
3102

3103
\begin{figure}
3104
\centerline{\includegraphics[width=5.5in]{figs/chap04-1.pdf}}
3105
\caption{PMF of degree in the Facebook dataset and in the WS model.}
3106
\label{chap04-1}
3107
\end{figure}
3108

3109
If the WS graph is a good model for the Facebook network,
3110
it should have the same average degree across nodes, and ideally the
3111
same variance in degree.
3112

3113
\index{degree}
3114

3115
This function returns a list of degrees in a graph, one for each node:
3116

3117
\begin{code}
3118
def degrees(G):
3119
    return [G.degree(u) for u in G]
3120
\end{code}
3121

3122
The mean degree in model is 44, which is close to the mean degree in the dataset, 43.7.
3123

3124
However, the standard deviation of degree in the model is 1.5, which is not close to the standard deviation in the dataset, 52.4.  Oops.
3125

3126
What's the problem?  To get a better view, we have to look at the
3127
{\bf distribution} of degrees, not just the mean and standard deviation.
3128

3129
\index{degree distribution}
3130
\index{PMF object}
3131
\index{probability mass function}
3132

3133
I'll represent the distribution of degrees with a \py{Pmf} object,
3134
which is defined in the \py{thinkstats2} module.
3135
\py{Pmf} stands for ``probability mass function'';
3136
if you are not familiar with this concept,
3137
you might want to read Chapter 3 of {\it Think Stats, 2nd edition}
3138
at \url{https://thinkcomplex.com/ts2}.
3139

3140
Briefly, a \py{Pmf} maps from values to their probabilities.
3141
A \py{Pmf} of degrees is a mapping from each possible degree, $d$, to the
3142
fraction of nodes with degree $d$.
3143

3144
As an example, I'll construct a graph with nodes 1, 2, and 3 connected
3145
to a central node, 0:
3146

3147
\begin{code}
3148
G = nx.Graph()
3149
G.add_edge(1, 0)
3150
G.add_edge(2, 0)
3151
G.add_edge(3, 0)
3152
nx.draw(G)
3153
\end{code}
3154

3155
Here's the list of degrees in this graph:
3156

3157
\begin{code}
3158
>>> degrees(G)
3159
[3, 1, 1, 1]
3160
\end{code}
3161

3162
Node 0 has degree 3, the others have degree 1.  Now I can make
3163
a \py{Pmf} that represents this degree distribution:
3164

3165
\begin{code}
3166
>>> from thinkstats2 import Pmf
3167
>>> Pmf(degrees(G))
3168
Pmf({1: 0.75, 3: 0.25})
3169
\end{code}
3170

3171
The result is a \py{Pmf} object that maps from each degree to a
3172
fraction or probability.  In this example, 75\% of the nodes have
3173
degree 1 and 25\% have degree 3.
3174

3175
Now we can make a \py{Pmf} that contains node degrees from the
3176
dataset, and compute the mean and standard deviation:
3177

3178
\begin{code}
3179
>>> from thinkstats2 import Pmf
3180
>>> pmf_fb = Pmf(degrees(fb))
3181
>>> pmf_fb.Mean(), pmf_fb.Std()
3182
(43.691, 52.414)
3183
\end{code}
3184

3185
And the same for the WS model:
3186

3187
\begin{code}
3188
>>> pmf_ws = Pmf(degrees(ws))
3189
>>> pmf_ws.mean(), pmf_ws.std()
3190
(44.000, 1.465)
3191
\end{code}
3192

3193
We can use the \py{thinkplot} module to plot the results:
3194

3195
\begin{code}
3196
thinkplot.Pdf(pmf_fb, label='Facebook')
3197
thinkplot.Pdf(pmf_ws, label='WS graph')
3198
\end{code}
3199

3200
Figure~\ref{chap04-1} shows the two distributions.  They are
3201
very different.
3202

3203
\index{thinkplot module}
3204

3205
In the WS model, most users have about 44 friends; the minimum is 38
3206
and the maximum is 50.  That's not much variation.
3207
In the dataset, there are many users with only 1 or 2 friends,
3208
but one has more than 1000!
3209

3210
\index{heavy-tailed distribution}
3211

3212
Distributions like this, with many small values and a few very large
3213
values, are called {\bf heavy-tailed}.
3214

3215

3216
\section{Heavy-tailed distributions}
3217
\label{heavytail}
3218

3219
\begin{figure}
3220
\centerline{\includegraphics[width=5.5in]{figs/chap04-2.pdf}}
3221
\caption{PMF of degree in the Facebook dataset and in the WS model,
3222
on a log-log scale.}
3223
\label{chap04-2}
3224
\end{figure}
3225

3226
Heavy-tailed distributions are a
3227
common feature in many areas of complexity science and they will be a
3228
recurring theme of this book.
3229

3230
We can get a clearer picture of a heavy-tailed distribution by
3231
plotting it on a log-log axis, as shown in Figure~\ref{chap04-2}.
3232
This transformation emphasizes the tail of the distribution; that
3233
is, the probabilities of large values.
3234

3235
\index{logarithm}
3236
\index{log-log axis}
3237
\index{power law}
3238

3239
\newcommand{\PMF}{\mathrm{PMF}}
3240
\newcommand{\CDF}{\mathrm{CDF}}
3241
\newcommand{\CCDF}{\mathrm{CCDF}}
3242

3243
Under this transformation, the data fall approximately on a
3244
straight line, which suggests that there is a {\bf power law}
3245
relationship between the largest values in the distribution and their
3246
probabilities.  Mathematically, a distribution obeys a power law if
3247
%
3248
\[ \PMF(k) \sim k^{-\alpha} \]
3249
%
3250
where $\PMF(k)$ is the fraction of nodes with degree $k$, $\alpha$
3251
is a parameter, and the symbol $\sim$ indicates that the PMF is
3252
asymptotic to $k^{-\alpha}$ as $k$ increases.
3253

3254
If we take the log of both sides, we get
3255
%
3256
\[ \log \PMF(k) \sim -\alpha \log k \]
3257
%
3258
So if a distribution follows a power law and we plot $\PMF(k)$ versus
3259
$k$ on a log-log scale, we expect a straight line with slope
3260
$-\alpha$, at least for large values of $k$.
3261

3262
All power law distributions are heavy-tailed, but there are other
3263
heavy-tailed distributions that don't follow a power law.  We will
3264
see more examples soon.
3265

3266
But first, we have a problem: the WS model has the high clustering
3267
and low path length we see in the data, but the degree distribution
3268
doesn't resemble the data at all.  This discrepancy is the motivation
3269
for our next topic, the \Barabasi-Albert model.
3270

3271

3272
\section{\Barabasi-Albert model}
3273
\label{scale.free}
3274

3275
In 1999 \Barabasi~and Albert published a paper,
3276
``Emergence of Scaling in Random Networks'', that characterizes the
3277
structure of several real-world networks,
3278
including graphs that represent the interconnectivity of movie actors,
3279
web pages, and elements in the electrical power grid
3280
in the western United States.  You can download the paper from
3281
\url{https://thinkcomplex.com/barabasi}.
3282

3283
\index{Barabasi-Albert graph@\Barabasi-Albert graph}
3284
\index{movie actor data}
3285
\index{world-wide web data}
3286
\index{electrical power grid data}
3287

3288
They measure the degree of each node and compute $\PMF(k)$, the
3289
probability that a vertex has degree $k$.  Then they plot $\PMF(k)$
3290
versus $k$ on a log-log scale.  The plots fit a
3291
straight line, at least for large values of $k$,
3292
so \Barabasi~and Albert conclude that these
3293
distributions are heavy-tailed.
3294

3295
They also propose a model that generates graphs with the same
3296
property.  The essential features of the model, which distinguish it
3297
from the WS model, are:
3298

3299
\index{generative model}
3300
\index{growth}
3301

3302
\begin{description}
3303

3304
\item[Growth:]  Instead of starting with a fixed number of vertices,
3305
the BA model starts with a small graph and adds vertices one at a time.
3306

3307
\item[Preferential attachment:] When a new edge is created, it is
3308
more likely to connect to a vertex that already has a large number
3309
of edges.  This ``rich get richer'' effect is characteristic of
3310
the growth patterns of some real-world networks.
3311

3312
\index{preferential attachment}
3313
\index{rich get richer}
3314

3315
\end{description}
3316

3317
Finally, they show that graphs generated by the \Barabasi-Albert (BA)
3318
model have a degree distribution that obeys a power law.
3319

3320
\index{BA graph}
3321

3322
Graphs with this property are sometimes called {\bf scale-free networks},
3323
for reasons I won't explain; if you are curious, you can read more
3324
at \url{https://thinkcomplex.com/scale}.
3325

3326
\index{scale-free network}
3327
\index{NetworkX}
3328

3329
NetworkX provides a function that generates BA graphs.  We will use
3330
it first; then I'll show you how it works.
3331

3332
\begin{code}
3333
ba = nx.barabasi_albert_graph(n=4039, k=22)
3334
\end{code}
3335

3336
The parameters are \py{n}, the number of nodes to generate, and
3337
\py{k}, the number of edges each node starts with when it is added to
3338
the graph.  I chose \py{k=22} because that is the average number
3339
of edges per node in the dataset.
3340

3341
\begin{figure}
3342
\centerline{\includegraphics[width=5.5in]{figs/chap04-3.pdf}}
3343
\caption{PMF of degree in the Facebook dataset and in the BA model,
3344
on a log-log scale.}
3345
\label{chap04-3}
3346
\end{figure}
3347

3348
The resulting graph has 4039 nodes and 21.9 edges per node.
3349
Since every edge is connected to two nodes, the average degree
3350
is 43.8, very close to the average degree in the dataset,
3351
43.7.
3352

3353
And the standard deviation of degree is 40.9, which is a bit
3354
less than in the dataset, 52.4, but it is much better
3355
than what we got from the WS graph, 1.5.
3356

3357
Figure~\ref{chap04-3} shows the degree distributions for the
3358
Facebook dataset and the BA model on a log-log scale.  The model
3359
is not perfect; in particular, it deviates from the data when
3360
\py{k} is less than 10.  But the tail looks like a straight line,
3361
which suggests that this process generates degree distributions
3362
that follow a power law.
3363

3364
\index{degree distribution}
3365

3366
So the BA model is better than the WS model at reproducing the degree
3367
distribution.  But does it have the small world property?
3368

3369
\index{small world property}
3370

3371
In this example, the average path length, $L$, is $2.5$, which
3372
is even more ``small world'' than the actual network, which has
3373
$L=3.69$.  So that's good, although maybe too good.
3374

3375
\index{clustering coefficient}
3376

3377
On the other hand, the clustering coefficient, $C$, is $0.037$,
3378
not even close to the value in the dataset, $0.61$.
3379
So that's a problem.
3380

3381
Table~\ref{table04-1} summarizes these results.  The WS model captures
3382
the small world characteristics, but not the degree distribution.  The
3383
BA model captures the degree distribution, at least approximately,
3384
and the average path length, but not the clustering coefficient.
3385

3386
In the exercises at the end of this chapter, you can explore other
3387
models intended to capture all of these characteristics.
3388

3389
\begin{table}[]
3390
\centering
3391
\begin{tabular}{lrrr}
3392
\hline
3393
            & \textbf{Facebook} & \textbf{WS model} & \textbf{BA model} \\
3394
\hline
3395
C           & 0.61             & 0.63             & 0.037             \\
3396
L           & 3.69              & 3.23              & 2.51              \\
3397
Mean degree & 43.7              & 44                & 43.7              \\
3398
Std degree  & 52.4              & 1.5               & 40.1              \\
3399
Power law?  & maybe             & no                & yes               \\
3400
\hline
3401
\end{tabular}
3402
\caption{Characteristics of the Facebook dataset compared to two models.}
3403
\label{table04-1}
3404
\end{table}
3405

3406

3407
\section{Generating BA graphs}
3408

3409
In the previous sections we used a NetworkX function to generate BA
3410
graphs; now let's see how it works.  Here is a version
3411
of \py{barabasi_albert_graph}, with some changes I made to
3412
make it easier to read:
3413

3414
\begin{code}
3415
def barabasi_albert_graph(n, k):
3416

3417
    G = nx.empty_graph(k)
3418
    targets = list(range(k))
3419
    repeated_nodes = []
3420

3421
    for source in range(k, n):
3422
        G.add_edges_from(zip([source]*k, targets))
3423

3424
        repeated_nodes.extend(targets)
3425
        repeated_nodes.extend([source] * k)
3426

3427
        targets = _random_subset(repeated_nodes, k)
3428

3429
    return G
3430
\end{code}
3431

3432
The parameters are \py{n}, the number of nodes we want, and \py{k}, the
3433
number of edges each new node gets (which will turn out to be
3434
the average number of edges per node).
3435

3436
\index{NetworkX}
3437
\index{zip}
3438

3439
We start with a graph that has \py{k} nodes and no edges.  Then we
3440
initialize two variables:
3441

3442
\begin{description}
3443

3444
\item[\py{targets}:] The list of \py{k} nodes that will be connected
3445
to the next node.  Initially \py{targets} contains the original
3446
\py{k} nodes; later it will contain a random subset of nodes.
3447

3448
\item[\py{repeated_nodes}:] A list of existing nodes where each
3449
node appears once for every edge it is connected to.  When we
3450
select from \py{repeated_nodes}, the probability of selecting any
3451
node is proportional to the number of edges it has.
3452

3453
\end{description}
3454

3455
Each time through the loop, we add edges from the source to
3456
each node in \py{targets}.  Then we update \py{repeated_nodes} by
3457
adding each target once and the new node \py{k} times.
3458

3459
Finally, we choose a subset of the nodes to be targets for the
3460
next iteration.  Here's the definition of \py{_random_subset}:
3461

3462
\begin{code}
3463
def _random_subset(repeated_nodes, k):
3464
    targets = set()
3465
    while len(targets) < k:
3466
        x = random.choice(repeated_nodes)
3467
        targets.add(x)
3468
    return targets
3469
\end{code}
3470

3471
Each time through the loop, \py{_random_subset} chooses from
3472
\py{repeated_nodes} and adds the chosen node to \py{targets}.  Because
3473
\py{targets} is a set, it automatically discards duplicates, so
3474
the loop only exits when we have selected \py{k} different nodes.
3475

3476

3477
\section{Cumulative distributions}
3478
\label{cdf}
3479

3480
\begin{figure}
3481
\centerline{\includegraphics[width=5.5in]{figs/chap04-4.pdf}}
3482
\caption{CDF of degree in the Facebook dataset along with the WS model (left) and the BA model (right), on a log-x scale.}
3483
\label{chap04-4}
3484
\end{figure}
3485

3486
Figure~\ref{chap04-3} represents the degree distribution by plotting
3487
the probability mass function (PMF) on a log-log scale.  That's how
3488
\Barabasi~and Albert present their results and it is the representation
3489
used most often in articles about power law distributions.  But it
3490
is not the best way to look at data like this.
3491

3492
A better alternative is a {\bf cumulative distribution function}
3493
(CDF), which maps from a value, $x$, to the fraction of values less
3494
than or equal to $x$.
3495

3496
\index{cumulative distribution function}
3497
\index{CDF}
3498

3499
Given a \py{Pmf}, the simplest way to compute a cumulative probability
3500
is to add up the probabilities for values up to and including $x$:
3501

3502
\begin{code}
3503
def cumulative_prob(pmf, x):
3504
    ps = [pmf[value] for value in pmf if value<=x]
3505
    return np.sum(ps)
3506
\end{code}
3507

3508
For example, given the degree distribution in the dataset,
3509
\py{pmf_fb}, we can compute the fraction of users with 25 or fewer
3510
friends:
3511

3512
\begin{code}
3513
>>> cumulative_prob(pmf_fb, 25)
3514
0.506
3515
\end{code}
3516

3517
The result is close to 0.5, which means that the median number
3518
of friends is about 25.
3519

3520
CDFs are better for visualization because they are less noisy than
3521
PMFs.  Once you get used to interpreting CDFs, they provide
3522
a clearer picture of the shape of a
3523
distribution than PMFs.
3524

3525
The \py{thinkstats} module provides a class called \py{Cdf} that
3526
represents a cumulative distribution function.  We can use it
3527
to compute the CDF of degree in the dataset.
3528

3529
\begin{code}
3530
from thinkstats2 import Cdf
3531
cdf_fb = Cdf(degrees(fb), label='Facebook')
3532
\end{code}
3533

3534
And \py{thinkplot} provides a function called \py{Cdf} that plots
3535
cumulative distribution functions.
3536

3537
\index{thinkplot module}
3538

3539
\begin{code}
3540
thinkplot.Cdf(cdf_fb)
3541
\end{code}
3542

3543
Figure~\ref{chap04-4} shows the degree CDF for the Facebook dataset
3544
along with the WS model (left) and the BA model (right).  The x-axis
3545
is on a log scale.
3546

3547
\begin{figure}
3548
\centerline{\includegraphics[width=5.5in]{figs/chap04-5.pdf}}
3549
\caption{Complementary CDF of degree in the Facebook dataset along with the WS model (left) and the BA model (right), on a log-log scale.}
3550
\label{chap04-5}
3551
\end{figure}
3552

3553
Clearly the CDF for the WS model is very different from the CDF
3554
from the data.  The BA model is better, but still not very good,
3555
especially for small values.
3556

3557
\index{WS model}
3558
\index{BA model}
3559

3560
In the tail of the distribution (values greater than 100) it looks
3561
like the BA model matches the dataset well enough, but it is
3562
hard to see.  We can get a clearer view with one other view of the
3563
data: plotting the complementary CDF on a log-log scale.
3564

3565
The {\bf complementary CDF} (CCDF) is defined
3566
%
3567
\[ \CCDF(x) \equiv 1 - \CDF(x) \]
3568
%
3569
This definition is useful because if the PMF follows a power law, the CCDF
3570
also follows a power law:
3571
%
3572
\[ \CCDF(x) \sim \left( \frac{x}{x_m} \right)^{-\alpha} \]
3573
%
3574
where $x_m$ is the minimum possible value and $\alpha$ is a parameter
3575
that determines the shape of the distribution.
3576

3577
Taking the log of both sides yields:
3578
%
3579
\[ \log \CCDF(x) \sim -\alpha (\log x - \log x_m) \]
3580
%
3581
So if the distribution obeys a power law, we expect the CCDF on
3582
a log-log scale to be a straight line with slope $-\alpha$.
3583

3584
\index{logarithm}
3585

3586
Figure~\ref{chap04-5} shows the CCDF of degree for the Facebook data,
3587
along with the WS model (left) and the BA model (right), on a log-log
3588
scale.
3589

3590
With this way of looking at the data, we can see that the BA model
3591
matches the tail of the distribution (values above 20) reasonably well.
3592
The WS model does not.
3593

3594

3595
\section{Explanatory models}
3596
\label{model1}
3597

3598
\begin{figure}
3599
\centerline{\includegraphics[height=2in]{figs/model.pdf}}
3600
\caption{The logical structure of an explanatory model.\label{fig.model}}
3601
\end{figure}
3602

3603
We started the discussion of networks with Milgram's Small World
3604
Experiment, which shows that path lengths in social
3605
networks are surprisingly small; hence, ``six degrees of separation''.
3606

3607
\index{six degrees}
3608
\index{explanatory model}
3609
\index{system}
3610
\index{observable}
3611
\index{model}
3612
\index{behavior}
3613

3614
When we see something surprising, it is natural to ask ``Why?''  but
3615
sometimes it's not clear what kind of answer we are looking for.  One
3616
kind of answer is an {\bf explanatory model} (see
3617
Figure~\ref{fig.model}).  The logical structure of an explanatory
3618
model is:
3619

3620
\begin{enumerate}
3621

3622
\item In a system, S, we see something observable, O, that warrants
3623
  explanation.
3624

3625
\item We construct a model, M, that is analogous to the system; that
3626
  is, there is a correspondence between the elements of the model and
3627
  the elements of the system.
3628

3629
\item By simulation or mathematical derivation, we show that the model
3630
  exhibits a behavior, B, that is analogous to O.
3631

3632
\item We conclude that S exhibits O {\em because} S is similar to M, M
3633
  exhibits B, and B is similar to O.
3634

3635
\end{enumerate}
3636

3637
At its core, this is an argument by analogy, which says that if two
3638
things are similar in some ways, they are likely to be similar in
3639
other ways.
3640

3641
\index{analogy}
3642
\index{argument by analogy}
3643

3644
Argument by analogy can be useful, and explanatory models can be
3645
satisfying, but they do not constitute a proof in the mathematical
3646
sense of the word.
3647

3648
\index{proof}
3649
\index{mathematical proof}
3650

3651
Remember that all models leave out, or ``abstract away'',
3652
details that we think are unimportant.  For any system there
3653
are many possible models that include or ignore different features.
3654
And there might be models that exhibit different behaviors that are similar to O in different ways.
3655
In that case, which model explains O?
3656

3657
\index{abstract model}
3658

3659
The small world phenomenon is an example: the Watts-Strogatz (WS)
3660
model and the \Barabasi-Albert (BA) model both exhibit elements of
3661
small world behavior, but they offer different explanations:
3662

3663
\begin{itemize}
3664

3665
\item The WS model suggests that social networks are ``small'' because
3666
  they include both strongly-connected clusters and ``weak ties'' that
3667
  connect clusters (see \url{https://thinkcomplex.com/weak}).
3668

3669
\index{weak tie}
3670

3671
\item The BA model suggests that social networks are small because
3672
  they include nodes with high degree that act as hubs, and that
3673
  hubs grow, over time, due to preferential attachment.
3674

3675
\index{preferential attachment}
3676

3677
\end{itemize}
3678

3679
As is often the case in young areas of science, the problem is
3680
not that we have no explanations, but too many.
3681

3682

3683
\section{Exercises}
3684

3685
\begin{exercise}
3686

3687
In Section~\ref{model1} we discussed two explanations for the
3688
small world phenomenon, ``weak ties'' and ``hubs''.
3689
Are these explanations compatible; that is, can they both be right?
3690
Which do you find more satisfying as an explanation, and why?
3691

3692
\index{weak tie}
3693
\index{hub}
3694

3695
Is there data you could collect, or experiments you could perform,
3696
that would provide evidence in favor of one model over the other?
3697

3698
Choosing among competing models is the topic of Thomas Kuhn's
3699
essay, ``Objectivity, Value Judgment, and Theory Choice'', which
3700
you can read at \url{https://thinkcomplex.com/kuhn}.
3701

3702
\index{Kuhn, Thomas}
3703
\index{theory choice}
3704
\index{objectivity}
3705

3706
What criteria does Kuhn propose for choosing among competing models?
3707
Do these criteria influence your opinion about the WS and BA models?
3708
Are there other criteria you think should be considered?
3709

3710
\end{exercise}
3711

3712

3713
\begin{exercise}
3714

3715
NetworkX provides a function called \py{powerlaw_cluster_graph} that
3716
implements the "Holme and Kim algorithm for growing graphs with
3717
powerlaw degree distribution and approximate average clustering".
3718
Read the documentation of this function (\url{https://thinkcomplex.com/hk}) and see if you can use it to
3719
generate a graph that has the same number of nodes as the Facebook
3720
dataset, the same average degree, and the same clustering coefficient.
3721
How does the degree distribution in the model compare to the actual
3722
distribution?
3723

3724
\index{Holme-Kim graph}
3725
\index{HK model}
3726

3727
\end{exercise}
3728

3729

3730
\begin{exercise}
3731

3732
Data files from the \Barabasi~and Albert paper are available from
3733
\url{https://thinkcomplex.com/netdata}.  Their actor collaboration data is included in the repository for this book in a
3734
file named \py{actor.dat.gz}.  The following function reads the file and
3735
builds the graph.
3736

3737
\begin{code}
3738
import gzip
3739

3740
def read_actor_network(filename, n=None):
3741
    G = nx.Graph()
3742
    with gzip.open(filename) as f:
3743
        for i, line in enumerate(f):
3744
            nodes = [int(x) for x in line.split()]
3745
            G.add_edges_from(thinkcomplexity.all_pairs(nodes))
3746
            if n and i >= n:
3747
                break
3748
    return G
3749
\end{code}
3750

3751
Compute the number of actors in the graph and the average degree.
3752
Plot the PMF of degree on a log-log scale.  Also plot the CDF of
3753
degree on a log-x scale, to see the general shape of the distribution,
3754
and on a log-log scale, to see whether the tail follows a power law.
3755

3756
Note: The actor network is not connected, so you might want to use
3757
\py{nx.connected_component_subgraphs} to find connected subsets of the
3758
nodes.
3759

3760
\end{exercise}
3761

3762

3763
\chapter{Cellular Automatons}
3764
\label{automatons}
3765

3766
A {\bf cellular automaton} (CA) is a model of a world with very simple
3767
physics.  ``Cellular'' means that the world is divided into discrete
3768
chunks, called cells.  An ``automaton'' is a machine that performs
3769
computations --- it could be a real machine, but more often the
3770
``machine'' is a mathematical abstraction or a computer simulation.
3771

3772
\index{cellular automaton}
3773

3774
This chapter presents experiments Stephen Wolfram performed
3775
in the 1980s, showing that some cellular automatons display
3776
surprisingly complicated behavior, including the ability to
3777
perform arbitrary computations.
3778

3779
\index{Wolfram, Stephen}
3780

3781
I discuss implications of these results, and at the end of the
3782
chapter I suggest methods for implementing CAs efficiently in Python.
3783

3784
The code for this chapter is in {\tt chap05.ipynb} in the repository
3785
for this book.  More information about working with the code is
3786
in Section~\ref{code}.
3787

3788

3789
\section{A simple CA}
3790

3791
Cellular automatons\footnote{You might also see the plural
3792
  ``automata''.} are governed by rules that determine how the state of
3793
the cells changes over time.
3794

3795
\index{time step}
3796

3797
As a trivial example, consider a cellular automaton (CA) with
3798
a single cell.  The state of the cell during time step $i$ is an integer, $x_i$.  As an initial condition, suppose $x_0 = 0$.
3799

3800
\index{state}
3801

3802
Now all we need is a rule.  Arbitrarily, I'll pick $x_{i+1} = x_{i} + 1$,
3803
which says that during each time step, the state of the CA gets
3804
incremented by 1.  So this CA performs a simple calculation: it counts.
3805

3806
\index{rule}
3807

3808
But this CA is atypical; normally the number of
3809
possible states is finite.  As an example, suppose a cell can only have one of two states, 0 or 1.  For a 2-state CA, we could write a rule like
3810
$x_{i+1} = (x_{i} + 1) \% 2$, where $\%$ is the remainder (or
3811
modulus) operator.
3812

3813
The behavior of this CA is simple: it blinks.  That is,
3814
the state of the cell switches between 0 and 1 during each time step.
3815

3816
Most CAs are {\bf deterministic}, which means that rules do not
3817
have any random elements; given the same initial state, they
3818
always produce the same result.  But some CAs are nondeterministic;
3819
we will see examples later.
3820

3821
\index{deterministic}
3822
\index{nondeterministic}
3823

3824
The CA in this section has only one cell, so we can think of
3825
it as zero-dimensional.  In the rest
3826
of this chapter, we explore one-dimensional (1-D) CAs; in the next chapter
3827
we explore two-dimensional CAs.
3828

3829
\index{1-D cellular automaton}
3830

3831

3832
\section{Wolfram's experiment}
3833
\label{onedim}
3834

3835
In the early 1980s Stephen Wolfram published a series of papers
3836
presenting a systematic study of 1-D CAs.  He identified
3837
four categories of behavior, each more interesting than
3838
the last.  You can read one of these papers, ``Statistical mechanics
3839
of cellular automata," at \url{https://thinkcomplex.com/ca}.
3840

3841
In Wolfram's experiments, the cells are arranged in a lattice (which you
3842
might remember from Section~\ref{watts}) where each cell is connected to two neighbors.  The lattice can be finite, infinite, or arranged in a ring.
3843

3844
The rules that determine how the system evolves in time are
3845
based on the notion of a ``neighborhood'', which is the set
3846
of cells that determines the next state of a given cell.
3847
Wolfram's experiments use a 3-cell neighborhood: the cell itself
3848
and its two neighbors.
3849

3850
\index{neighborhood}
3851

3852
In these experiments, the cells have two states, denoted 0 and 1 or ``off" and ``on".  A rule can be summarized by a table that maps from the
3853
state of the neighborhood (a tuple of three states) to the next state
3854
of the center cell.  The following table shows an example:
3855

3856
\index{rule table}
3857
\index{state}
3858

3859
\centerline{
3860
\begin{tabular}{|c|c|c|c|c|c|c|c|c|}
3861
\hline
3862
prev & 111 & 110 & 101 & 100 & 011 & 010 & 001 & 000 \\
3863
\hline
3864
next   & 0   & 0   & 1   & 1   & 0   & 0   & 1   & 0 \\
3865
\hline
3866
\end{tabular}}
3867

3868
The first row shows the eight states a
3869
neighborhood can be in.  The second row shows the state of
3870
the center cell during the next time step.  As a concise encoding
3871
of this table, Wolfram suggested reading the bottom row
3872
as a binary number; because 00110010 in binary is 50 in
3873
decimal, Wolfram calls this CA ``Rule 50''.
3874

3875
\index{Rule 50}
3876

3877
\begin{figure}
3878
\centerline{\includegraphics[height=1.5in]{figs/chap05-1.pdf}}
3879
\caption{Rule 50 after 10 time steps.}
3880
\label{chap05-1}
3881
\end{figure}
3882

3883
Figure~\ref{chap05-1} shows the effect of Rule 50 over 10
3884
time steps.  The first row shows the state of the system during the first
3885
time step; it starts with one cell ``on'' and the rest ``off''.
3886
The second row shows the state of the system during the
3887
next time step, and so on.
3888

3889
The triangular shape in the figure is typical of these CAs; is it a
3890
consequence of the shape of the neighborhood.  In one time step, each
3891
cell influences the state of one neighbor in either direction.  During
3892
the next time step, that influence can propagate one more cell in each
3893
direction.  So each cell in the past has a ``triangle of influence''
3894
that includes all of the cells that can be affected by it.
3895

3896
\index{triangle of influence}
3897

3898

3899
\section{Classifying CAs}
3900

3901
\begin{figure}
3902
\centerline{\includegraphics[height=1.5in]{figs/chap05-3.pdf}}
3903
\caption{Rule 18 after 64 steps.}
3904
\label{chap05-3}
3905
\end{figure}
3906

3907
How many of these CAs are there?
3908

3909
Since each cell is either on or off, we can specify the state
3910
of a cell with a single bit.  In a neighborhood with three cells,
3911
there are 8 possible configurations, so there are 8 entries
3912
in the rule tables.  And since each entry contains a single bit,
3913
we can specify a table using 8 bits.  With 8 bits, we can
3914
specify 256 different rules.
3915

3916
One of Wolfram's first experiments with CAs was to test all 256
3917
possibilities and classify them.
3918

3919
Examining the results visually, he proposed that the behavior of CAs
3920
can be grouped into four classes.  Class 1 contains the simplest (and
3921
least interesting) CAs, the ones that evolve from almost any starting
3922
condition to the same uniform pattern.  As a trivial example, Rule 0
3923
always generates an empty pattern after one time step.
3924

3925
\index{classifying cellular automatons}
3926

3927
Rule 50 is an example of Class 2.  It generates a simple pattern with
3928
nested structure, that is, a pattern that contains many smaller versions
3929
of itself.  Rule 18 makes the nested structure is even clearer;
3930
Figure~\ref{chap05-3} shows what it looks like after 64 steps.
3931

3932
\index{Rule 18}
3933

3934
\newcommand{\Sierpinski}{Sierpi\'{n}ski}
3935

3936
This pattern resembles the \Sierpinski~triangle, which
3937
you can read about at \url{https://thinkcomplex.com/sier}.
3938

3939
\index{Sierpi\'{n}ski triangle}
3940

3941
Some Class 2 CAs generate patterns that are intricate and
3942
pretty, but compared to Classes 3 and 4, they are relatively
3943
simple.
3944

3945

3946
\section{Randomness}
3947

3948
\begin{figure}
3949
\centerline{\includegraphics[height=2.5in]{figs/chap05-4.pdf}}
3950
\caption{Rule 30 after 100 time steps.}
3951
\label{chap05-4}
3952
\end{figure}
3953

3954
Class 3 contains CAs that generate randomness.
3955
Rule 30 is an example; Figure~\ref{chap05-4} shows what it looks like
3956
after 100 time steps.
3957

3958
\index{randomness}
3959
\index{Class 3 behavior}
3960
\index{Rule 30}
3961

3962
Along the left side there is an apparent pattern, and on the right
3963
side there are triangles in various sizes, but the center seems
3964
quite random.  In fact, if you take the center column and treat it as a
3965
sequence of bits, it is hard to distinguish from a truly random
3966
sequence.  It passes many of the statistical tests people use
3967
to test whether a sequence of bits is random.
3968

3969
Programs that produce random-seeming numbers are called
3970
{\bf pseudo-random number generators} (PRNGs).  They are not considered
3971
truly random because:
3972

3973
\index{pseudo-random number generator}
3974
\index{PRNG}
3975

3976
\begin{itemize}
3977

3978
\item Many of them produce sequences with regularities that can be
3979
  detected statistically.  For example, the original implementation of
3980
  \py{rand} in the C library used a linear congruential generator that
3981
  yielded sequences with easily detectable serial correlations.
3982

3983
  \index{linear congruential generator}
3984

3985
\item Any PRNG that uses a finite amount of state (that is, storage)
3986
  will eventually repeat itself.  One of the characteristics of a
3987
  generator is the {\bf period} of this repetition.
3988

3989
\index{period}
3990

3991
\item The underlying process is fundamentally deterministic, unlike
3992
  some physical processes, like radioactive decay and thermal noise,
3993
  that are considered to be fundamentally random.
3994

3995
\index{deterministic}
3996

3997
\end{itemize}
3998

3999
Modern PRNGs produce sequences that are statistically
4000
indistinguishable from random, and they can be implemented with
4001
periods so long that the universe will collapse before they repeat.
4002
The existence of these generators raises the question of whether there
4003
is any real difference between a good quality pseudo-random sequence
4004
and a sequence generated by a ``truly'' random process.  In {\em A New
4005
  Kind of Science}, Wolfram argues that there is not (pages 315--326).
4006

4007
\index{New Kind of Science@{\it A New Kind of Science}}
4008

4009

4010
\section{Determinism}
4011
\label{determinism}
4012

4013
The existence of Class 3 CAs is surprising.  To explain how
4014
surprising, let me start with philosophical
4015
{\bf determinism} (see \url{https://thinkcomplex.com/deter}).
4016
Many philosophical stances are hard to define precisely because
4017
they come in a variety of flavors.  I often find it useful
4018
to define them with a list of statements ordered from weak
4019
to strong:
4020

4021
\index{determinism}
4022

4023
\begin{description}
4024

4025
\item[D1:] Deterministic models can make accurate predictions
4026
for some physical systems.
4027

4028
\item[D2:] Many physical systems can be modeled by deterministic
4029
processes, but some are intrinsically random.
4030

4031
\item[D3:] All events are caused by prior events, but many
4032
physical systems are nevertheless fundamentally unpredictable.
4033

4034
\item[D4:] All events are caused by prior events, and can (at
4035
least in principle) be predicted.
4036

4037
\index{causation}
4038

4039
\end{description}
4040

4041
My goal in constructing this range is to make D1 so weak that
4042
virtually everyone would accept it, D4 so strong that almost no one
4043
would accept it, with intermediate statements that some people accept.
4044

4045
The center of mass of world opinion swings along this range in
4046
response to historical developments and scientific discoveries.  Prior
4047
to the scientific revolution, many people regarded the working of the
4048
universe as fundamentally unpredictable or controlled by supernatural
4049
forces.  After the triumphs of Newtonian mechanics, some optimists
4050
came to believe something like D4; for example, in 1814 Pierre-Simon
4051
Laplace wrote:
4052

4053
\index{Newtonian mechanics}
4054
\index{Laplace, Pierre-Simon}
4055

4056
\begin{quote}
4057
We may regard the present state of the universe as the effect of its
4058
past and the cause of its future. An intellect which at a certain
4059
moment would know all forces that set nature in motion, and all
4060
positions of all items of which nature is composed, if this intellect
4061
were also vast enough to submit these data to analysis, it would
4062
embrace in a single formula the movements of the greatest bodies of
4063
the universe and those of the tiniest atom; for such an intellect
4064
nothing would be uncertain and the future just like the past would be
4065
present before its eyes.
4066
\end{quote}
4067

4068
This ``intellect'' is now called ``Laplace's Demon''.
4069
See \url{https://thinkcomplex.com/demon}.  The word
4070
``demon'' in this context has the sense of ``spirit'', with no
4071
implication of evil.
4072

4073
\index{Laplace's Demon}
4074

4075
Discoveries in the 19th and 20th centuries gradually dismantled
4076
Laplace's hope.  Thermodynamics, radioactivity,
4077
and quantum mechanics posed successive challenges to strong
4078
forms of determinism.
4079

4080
\index{entropy}
4081
\index{radioactive decay}
4082
\index{quantum mechanics}
4083

4084
In the 1960s chaos theory showed that in some deterministic systems
4085
prediction is only possible over short time scales,  limited by
4086
precision in the measurement of initial conditions.
4087

4088
\index{chaos}
4089

4090
Most of these systems are continuous in space (if not time) and
4091
nonlinear, so the complexity of their behavior is not entirely
4092
surprising.  Wolfram's demonstration of complex behavior in simple
4093
cellular automatons is more surprising --- and disturbing, at least to a
4094
deterministic world view.
4095

4096
\index{complex behavior}
4097
\index{simple rules}
4098

4099
So far I have focused on scientific challenges to determinism, but the
4100
longest-standing objection is the apparent conflict between
4101
determinism and human free will.  Complexity science provides
4102
a possible resolution of this conflict; I'll come
4103
back to this topic in Section~\ref{freewill}.
4104

4105
\index{free will}
4106

4107

4108
\section{Spaceships}
4109
\label{spaceships}
4110

4111
\begin{figure}
4112
\centerline{\includegraphics[height=2.5in]{figs/chap05-5.pdf}}
4113
\caption{Rule 110 after 100 time steps.}
4114
\label{chap05-5}
4115
\end{figure}
4116

4117
The behavior of Class 4 CAs is even more surprising.  Several 1-D CAs,
4118
most notably Rule 110, are {\bf Turing complete}, which means that
4119
they can compute any computable function.  This property, also called
4120
{\bf universality}, was proved by Matthew Cook in 1998.  See
4121
\url{https://thinkcomplex.com/r110}.
4122

4123
\index{Turing complete}
4124
\index{universality}
4125
\index{Cook, Matthew}
4126

4127
Figure~\ref{chap05-5} shows what Rule 110 looks like with an initial
4128
condition of a single cell and 100 time steps.
4129
At this time scale it is not apparent that anything special is
4130
going on.  There are some regular patterns but also some features
4131
that are hard to characterize.
4132

4133
\index{Rule 110}
4134

4135
Figure~\ref{chap05-6} shows a bigger picture, starting with a random
4136
initial condition and 600 time steps:
4137

4138
\begin{figure}
4139
\centerline{\includegraphics[width=5.5in,height=5.5in]{figs/chap05-6.pdf}}
4140
\caption{Rule 110 with random initial conditions and 600 time steps.}
4141
\label{chap05-6}
4142
\end{figure}
4143

4144
After about 100 steps the background settles into a simple repeating
4145
pattern, but there are a number of persistent structures that appear
4146
as disturbances in the background.  Some of these structures
4147
are stable, so they appear as vertical lines.  Others translate in
4148
space, appearing as diagonals with different slopes, depending on
4149
how many time steps they take to shift by one column.  These
4150
structures are called {\bf spaceships}.
4151

4152
\index{spaceship}
4153

4154
Collisions between spaceships yield different results
4155
depending on the types of the spaceships and the phase they are in
4156
when they collide.  Some collisions annihilate both ships; others
4157
leave one ship unchanged; still others yield one or more ships of
4158
different types.
4159

4160
These collisions are the basis of computation in a Rule 110 CA.  If
4161
you think of spaceships as signals that propagate through space, and
4162
collisions as gates that compute logical operations like AND and OR,
4163
you can see what it means for a CA to perform a computation.
4164

4165

4166
\section{Universality}
4167

4168
To understand universality, we have to understand computability
4169
theory, which is about models of computation and what they compute.
4170

4171
\index{universality}
4172

4173
One of the most general models of computation is the Turing machine,
4174
which is an abstract computer proposed by Alan Turing in 1936.  A
4175
Turing machine is a 1-D CA, infinite in both directions, augmented
4176
with a read-write head.  At any time, the head is positioned over a
4177
single cell.  It can read the state of that cell (usually there are
4178
only two states) and it can write a new value into the cell.
4179

4180
\index{Turing machine}
4181
\index{Turing, Alan}
4182

4183
In addition, the machine has a register, which records the state
4184
of the machine (one of a finite number of states), and a table
4185
of rules.  For each machine state and cell state, the table
4186
specifies an action.  Actions include modifying the cell
4187
the head is over and moving one cell to the left or right.
4188

4189
\index{register}
4190
\index{tape}
4191
\index{read-write head}
4192
\index{cell}
4193

4194
A Turing machine is not a practical design for a computer, but it
4195
models common computer architectures.  For a given program running on
4196
a real computer, it is possible (at least in principle) to construct a
4197
Turing machine that performs an equivalent computation.
4198

4199
The Turing machine is useful because it is possible to characterize
4200
the set of functions that can be computed by a Turing machine,
4201
which is what Turing did.  Functions in this set are
4202
called ``Turing computable".
4203

4204
\index{computable function}
4205

4206
To say that a Turing machine can compute any Turing-computable
4207
function is a tautology: it is true by definition.  But
4208
Turing-computability is more interesting than that.
4209

4210
\index{tautology}
4211

4212
It turns out that just about every reasonable model of computation
4213
anyone has come up with is ``Turing complete"; that is, it can compute
4214
exactly the same set of functions as the Turing machine.
4215
Some of these models, like lamdba calculus, are very different
4216
from a Turing machine, so their equivalence is surprising.
4217

4218
\index{lambda calculus}
4219
\index{Church-Turing thesis}
4220

4221
This observation led to the Church-Turing Thesis, which is the claim that these definitions of computability capture something essential that is independent of any particular model of computation.
4222

4223
The Rule 110 CA is yet another model of computation, and remarkable
4224
for its simplicity.  That it, too, turns out to be Turing complete lends
4225
support to the Church-Turing Thesis.
4226

4227
In {\em A New Kind of Science}, Wolfram states a variation of this
4228
thesis, which he calls the ``principle of computational equivalence'' (see
4229
\url{https://thinkcomplex.com/equiv}):
4230

4231
\index{principle of computational equivalence}
4232
\index{New Kind of Science@{\it A New Kind of Science}}
4233

4234
\begin{quote}
4235
Almost all processes that are not obviously simple can be viewed as
4236
computations of equivalent sophistication.
4237

4238
More specifically, the principle of computational equivalence says
4239
that systems found in the natural world can perform computations up to
4240
a maximal (``universal'') level of computational power, and that most
4241
systems do in fact attain this maximal level of computational
4242
power. Consequently, most systems are computationally
4243
equivalent.
4244
\end{quote}
4245

4246
Applying these definitions to CAs, Classes 1 and 2 are ``obviously
4247
simple''.  It may be less obvious that Class 3 is simple, but in a way
4248
perfect randomness is as simple as perfect order; complexity happens
4249
in between.  So Wolfram's claim is that Class 4 behavior is common in
4250
the natural world, and that almost all systems that manifest it
4251
are computationally equivalent.
4252

4253
\index{Class 4 cellular automaton}
4254

4255

4256
\section{Falsifiability}
4257

4258
Wolfram holds that his principle is a stronger claim than the
4259
Church-Turing thesis because it is about the natural world rather
4260
than abstract models of computation.  But saying that natural processes
4261
``can be viewed as computations'' strikes me as a statement about
4262
theory choice more than a hypothesis about the natural world.
4263

4264
\index{falsifiability}
4265

4266
Also, with qualifications like
4267
``almost'' and undefined terms like ``obviously simple'', his
4268
hypothesis may be {\bf unfalsifiable}.  Falsifiability is
4269
an idea from the philosophy of science, proposed by Karl Popper
4270
as a demarcation between scientific hypotheses and pseudoscience.
4271
A hypothesis is falsifiable if there is an experiment, at least
4272
in the realm of practicality, that would contradict the hypothesis
4273
if it were false.
4274

4275
\index{Popper, Karl}
4276

4277
For example, the claim that all life on earth is descended
4278
from a common ancestor is falsifiable because it makes specific
4279
predictions about similarities in the genetics of modern species
4280
(among other things).  If we discovered a new species whose
4281
DNA was almost entirely different from ours, that would
4282
contradict (or at least bring into question) the theory of
4283
universal common descent.
4284

4285
\index{universal common descent}
4286

4287
On the other hand, ``special creation'', the claim that all species
4288
were created in their current form by a supernatural agent, is
4289
unfalsifiable because there is nothing that we could observe about the
4290
natural world that would contradict it.  Any outcome of any experiment
4291
could be attributed to the will of the creator.
4292

4293
\index{special creation}
4294

4295
Unfalsifiable hypotheses can be appealing because
4296
they are impossible to refute.  If your goal is never to be
4297
proved wrong, you should choose hypotheses that are as
4298
unfalsifiable as possible.
4299

4300
But if your goal is to make reliable predictions about the world --- and
4301
this is at least one of the goals of science --- unfalsifiable
4302
hypotheses are useless.  The problem is that they have
4303
no consequences (if they had consequences, they would be
4304
falsifiable).
4305

4306
\index{prediction}
4307

4308
For example, if the theory of special creation were true, what good
4309
would it do me to know it?  It wouldn't tell me anything about the
4310
creator except that he has an ``inordinate fondness for beetles''
4311
(attributed to J.~B.~S.~Haldane).  And unlike the
4312
theory of common descent, which informs many areas of science
4313
and bioengineering, it would be of no use for understanding
4314
the world or acting in it.
4315

4316
\index{Haldane, J.~B.~S.}
4317
\index{beetles}
4318

4319

4320
\section{What is this a model of?}
4321
\label{model3}
4322

4323
\begin{figure}
4324
\centerline{\includegraphics[height=2.5in]{figs/model3.pdf}}
4325
\caption{The logical structure of a simple physical model.}
4326
\label{fig.model3}
4327
\end{figure}
4328

4329
Some cellular automatons are primarily mathematical artifacts.  They are
4330
interesting because they are surprising, or useful, or pretty, or
4331
because they provide tools for creating new mathematics (like the
4332
Church-Turing thesis).
4333

4334
\index{mathematics}
4335

4336
But it is not clear that they are models of physical systems.  And if
4337
they are, they are highly abstracted, which is to say that they are
4338
not very detailed or realistic.
4339

4340
\index{physical model}
4341

4342
For example, some species of cone snail produce a pattern on their
4343
shells that resembles the patterns generated by cellular automatons
4344
(see \url{https://thinkcomplex.com/cone}).  So it is natural to
4345
suppose that a CA is a model of the mechanism that produces patterns
4346
on shells as they grow.  But, at least initially, it is not clear how
4347
the elements of the model (so-called cells, communication between
4348
neighbors, rules) correspond to the elements of a growing snail (real
4349
cells, chemical signals, protein interaction networks).
4350

4351
\index{cone snail}
4352
\index{abstract model}
4353

4354
For conventional physical models, being realistic is a virtue.  If the elements of a model correspond to the elements of a physical system, there is an obvious analogy between the model and the system.  In general, we expect a model that is more realistic to make better predictions and to provide more believable
4355
explanations.
4356

4357
\index{realistic model}
4358

4359
Of course, this is only true up to a point.  Models that are
4360
more detailed are harder to work with, and usually less
4361
amenable to analysis.  At some point, a model becomes so complex
4362
that it is easier to experiment with the system.
4363

4364
At the other extreme, simple models can be compelling exactly because
4365
they are simple.
4366

4367
Simple models offer a different kind of explanation than detailed
4368
models.  With a detailed model, the argument goes something
4369
like this: ``We are interested in physical system S, so we
4370
construct a detailed model, M, and show by analysis and simulation
4371
that M exhibits a behavior, B, that is similar (qualitatively
4372
or quantitatively) to an observation of the real system, O.
4373
So why does O happen?  Because S is similar to M, and
4374
B is similar to O, and we can prove that M leads to B.''
4375

4376
\index{argument by analogy}
4377

4378
With simple models we can't claim that S is similar to M, because it
4379
isn't.  Instead, the argument goes like this: ``There is a set of models
4380
that share a common set of features.  Any model that has these
4381
features exhibits behavior B.  If we make an observation, O, that
4382
resembles B, one way to explain it is to show that the system, S, has
4383
the set of features sufficient to produce B.''
4384

4385
For this kind of argument, adding more features doesn't help.  Making
4386
the model more realistic doesn't make the model more reliable; it only
4387
obscures the difference between the essential features that cause B
4388
and the incidental features that are particular to S.
4389

4390
Figure~\ref{fig.model3} shows the logical structure of this kind of
4391
model.  The features $x$ and $y$ are sufficient to produce the
4392
behavior.  Adding more detail, like features $w$ and $z$, might make
4393
the model more realistic, but that realism adds no explanatory power.
4394

4395

4396
\section{Implementing CAs}
4397

4398

4399

4400
To generate the figures in this chapter, I wrote a Python class called
4401
\py{Cell1D} that represents a 1-D cellular automaton, and a class called
4402
\py{Cell1DViewer} that plots the results.  Both are defined in \py{Cell1D.py} in the repository for this book.
4403

4404
\index{Cell1D}
4405
\index{Cell1DViewer}
4406

4407
To store the state of the CA, I use a NumPy array with one column for
4408
each cell and one row for each time step.
4409

4410
To explain how my implementation works, I'll start with a CA that computes the
4411
parity of the cells in each neighborhood.  The ``parity'' of a number
4412
is 0 if the number is even and 1 if it is odd.
4413

4414
\index{parity}
4415

4416
I use the NumPy function \py{zeros} to create an array of zeros, then put a 1 in the middle of the first row.  
4417

4418
\begin{code}
4419
rows = 5
4420
cols = 11
4421
array = np.zeros((rows, cols), dtype=np.uint8)
4422
array[0, 5] = 1
4423
print(array)
4424

4425
[[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
4426
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
4427
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
4428
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
4429
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]
4430
\end{code}
4431

4432
The data type \py{uint8} indicates that the elements of \py{array} are unsigned 8-bit integers.
4433

4434
\index{NumPy}
4435
\index{zeros}
4436
\index{dtype}
4437
\index{uint8}
4438

4439
\py{plot_ca} displays the elements of an \py{array} graphically:
4440

4441
\begin{code}
4442
import matplotlib.pyplot as plt
4443

4444
def plot_ca(array, rows, cols):
4445
    cmap = plt.get_cmap('Blues')
4446
    plt.imshow(array, cmap=cmap, interpolation='none')
4447
\end{code}
4448

4449
I import \py{pyplot} with the abbreviated name
4450
\py{plt}, which is conventional.  The function \py{get_cmap} returns a colormap, which maps from the values in the array to colors.  The colormap \py{'Blues'} draws the ``on" cells in dark blue and the ``off" cells in light blue.
4451

4452
\index{imshow}
4453
\index{colormap}
4454
\index{pyplot module}
4455

4456
\py{imshow} displays the array as an ``image''; that is, it draws a colored square for each element of the array.  Setting \py{interpolation} to
4457
\py{none} indicates that \py{imshow} should not interpolate between on and off cells. 
4458

4459
To compute the state of the CA during time step \py{i}, we have to add up consecutive elements of \py{array} and compute the parity of the sum.  We can due that using a slice operator to select the elements and the modulus operator to compute parity:
4460

4461
\index{slice operator}
4462
\index{modulus operator}
4463

4464
\begin{code}
4465
def step(array, i):
4466
    rows, cols = array.shape
4467
    row = array[i-1]
4468
    for j in range(1, cols):
4469
        elts = row[j-1:j+2]
4470
        array[i, j] = sum(elts) % 2
4471
\end{code}
4472

4473
\py{rows} and \py{cols} are the dimensions of the array.  \py{row} is the
4474
previous row of the array.
4475

4476
Each time through the loop, we select three elements from \py{row}, add them up, compute the parity, and store the result in row \py{i}.
4477

4478
In this example, the lattice is finite, so the first and last cells have only one neighbor.  To handle this special case, I don't update the first and last column; they are always 0.
4479

4480

4481
\section{Cross-correlation}
4482
\label{cross-correlation}
4483

4484
The operation in the previous section --- selecting elements
4485
from an array and adding them up --- is an example of an operation that is so useful, in so many domains, that it has a name: {\bf cross-correlation}.
4486
And NumPy provides a function, called \py{correlate}, that computes it.
4487
In this section I'll show how we can use NumPy to write a simpler, faster version of \py{step}.
4488

4489
\index{NumPy}
4490
\index{correlate}
4491
\index{cross-correlation}
4492
\index{window}
4493

4494
The NumPy \py{correlate} function takes an array, $a$, and a ``window", $w$, with length $N$ and computes a new array, $c$, where element \py{k} is the following summation:
4495
%
4496
\[ c_k = \sum_{n=0}^{N-1} a_{n+k} \cdot w_n \]
4497
%
4498
We can write this operation in Python like this:
4499

4500
\begin{code}
4501
def c_k(a, w, k):
4502
    N = len(w)
4503
    return sum(a[k:k+N] * w)
4504
\end{code}
4505

4506
This function computes element \py{k} of the correlation between \py{a} and \py{w}.  To show how it works, I'll create an array of integers:
4507

4508
\begin{code}
4509
N = 10
4510
row = np.arange(N, dtype=np.uint8)
4511
print(row)
4512

4513
[0 1 2 3 4 5 6 7 8 9]
4514
\end{code}
4515

4516
And a window:
4517

4518
\begin{code}
4519
window = [1, 1, 1]
4520

4521
print(window)
4522
\end{code}
4523

4524
With this window, each element, \py{c_k}, is the sum of consecutive elements from \py{a}:
4525

4526
\begin{code}
4527
c_k(row, window, 0)
4528
3
4529

4530
c_k(row, window, 1)
4531
6
4532
\end{code}
4533

4534
We can use \py{c_k} to write \py{correlate}, which computes the elements of \py{c} for all values of \py{k} where the window and the array overlap.
4535

4536
\begin{code}
4537
def correlate(row, window):
4538
    cols = len(row)
4539
    N = len(window)
4540
    c = [c_k(row, window, k) for k in range(cols-N+1)]
4541
    return np.array(c)
4542
\end{code}
4543

4544
Here's the result:
4545

4546
\begin{code}
4547
c = correlate(row, window)
4548
print(c)
4549

4550
[ 3  6  9 12 15 18 21 24]
4551
\end{code}
4552

4553
The NumPy function \py{correlate} does the same thing:
4554

4555
\begin{code}
4556
c = np.correlate(row, window, mode='valid')
4557
print(c)
4558

4559
[ 3  6  9 12 15 18 21 24]
4560
\end{code}
4561

4562
The argument \py{mode='valid'} means that the result contains only the elements where the window and array overlap, which are considered valid.
4563

4564
The drawback of this mode is that the result is not the same size as \py{array}.  We can fix that with \py{mode='same'}, which adds zeros to the beginning and end of \py{array}:
4565

4566
\begin{code}
4567
c = np.correlate(row, window, mode='same')
4568
print(c)
4569

4570
[ 1  3  6  9 12 15 18 21 24 17]
4571
\end{code}
4572

4573
Now the result is the same size as \py{array}.  As an exercise at the end of this chapter, you'll have a chance to write a version of \py{correlate} that does the same thing.
4574

4575
We can use NumPy's implementation of \py{correlate} to write a simple, faster version of \py{step}:
4576

4577
\begin{code}
4578
def step2(array, i, window=[1,1,1]):
4579
    row = array[i-1]
4580
    c = np.correlate(row, window, mode='same')
4581
    array[i] = c % 2
4582
\end{code}
4583

4584
In the notebook for this chapter, you'll see that \py{step2} yields the same results as \py{step}.
4585

4586

4587
\section{CA tables}
4588
\label{tables}
4589

4590
The function we have so far works if the CA is ``totalitic", which means that the rules only depend on the sum of the neighbors.  But most rules also depend on which neighbors are on and off.  For example, \py{100} and \py{001} have the same sum, but for many CAs, they would yield different results.
4591

4592
We can make \py{step2} more general using a window with elements
4593
\py{[4, 2, 1]}, which interprets the neighborhood as a binary number.
4594
For example, the neighborhood \py{100} yields 4; \py{010} yields 2,
4595
and \py{001} yields 1.  Then we can take these results and look them
4596
up in the rule table.
4597

4598
Here's the more general version of \py{step2}:
4599

4600
\begin{code}
4601
def step3(array, i, window=[4,2,1]):
4602
    row = array[i-1]
4603
    c = np.correlate(row, window, mode='same')
4604
    array[i] = table[c]
4605
\end{code}
4606

4607
The first two lines are the same.  Then the last line looks
4608
up each element from \py{c} in \py{table} and assigns the
4609
result to \py{array[i]}.
4610

4611
Here's the function that computes the table:
4612

4613
\begin{code}
4614
def make_table(rule):
4615
    rule = np.array([rule], dtype=np.uint8)
4616
    table = np.unpackbits(rule)[::-1]
4617
    return table
4618
\end{code}
4619

4620
The parameter, \py{rule}, is an integer between 0 and 255.
4621
The first line puts \py{rule} into an array with a single element
4622
so we can use \py{unpackbits}, which converts the rule number
4623
to its binary representation.  For
4624
example, here's the table for Rule 150:
4625

4626
\index{NumPy}
4627
\index{unpackbits}
4628

4629
\begin{code}
4630
>>> table = make_table(150)
4631
>>> print(table)
4632
[0 1 1 0 1 0 0 1]
4633
\end{code}
4634

4635
The code in this section is encapsulated in the \py{Cell1D} class, defined in \py{Cell1D.py} in the repository for this book.
4636

4637

4638
\section{Exercises}
4639

4640
The code for this chapter is in the Jupyter notebook {\tt chap05.ipynb}
4641
in the repository for this book.  Open this notebook, read the code,
4642
and run the cells.  You can use this notebook to work on the
4643
exercises in this chapter.  My solutions are in {\tt chap05soln.ipynb}.
4644

4645

4646
\begin{exercise}
4647
Write a version of \py{correlate} that returns the same result as \py{np.correlate} with \py{mode='same'}.  Hint: use the NumPy function \py{pad}.
4648
\end{exercise}
4649

4650
\index{NumPy}
4651
\index{pad}
4652
\index{correlate}
4653

4654
\begin{exercise}
4655

4656
This exercise asks you to experiment with Rule 110 and
4657
some of its spaceships.
4658

4659
\index{Rule 110}
4660
\index{spaceship}
4661
\index{background pattern}
4662

4663
\begin{enumerate}
4664

4665
\item Read the Wikipedia page about Rule 110, which describes its
4666
  background pattern and spaceships:
4667
  \url{https://thinkcomplex.com/r110}.
4668

4669
\item Create a Rule 110 CA with an initial condition that yields the
4670
  stable background pattern.
4671

4672
Note that the \py{Cell1D} class provides
4673
\py{start_string}, which allows you to initialize the state of
4674
the array using a string of \py{1}s and \py{0}s.
4675

4676
\item Modify the initial condition by adding different patterns in the
4677
  center of the row and see which ones yield spaceships.  You might
4678
  want to enumerate all possible patterns of $n$ bits, for some
4679
  reasonable value of $n$.  For each spaceship, can you find the
4680
  period and rate of translation?  What is the biggest spaceship you
4681
  can find?
4682

4683
\item What happens when spaceships collide?
4684

4685
\end{enumerate}
4686

4687
\end{exercise}
4688

4689

4690

4691
\begin{exercise}
4692

4693
The goal of this exercise is to implement a Turing machine.
4694

4695
\begin{enumerate}
4696

4697
\item Read about Turing machines at \url{https://thinkcomplex.com/tm}.
4698

4699
\item Write a class called \py{Turing} that implements a Turing machine.
4700
For the action table, use the rules for a 3-state busy beaver.
4701

4702
\index{Turing machine}
4703
\index{busy beaver}
4704

4705
\item Write a class named \py{TuringViewer} that generates an
4706
image that represents the state of the tape and the position and
4707
state of the head.  For one example of what that might look like,
4708
see \url{https://thinkcomplex.com/turing}.
4709

4710
\end{enumerate}
4711

4712
\end{exercise}
4713

4714

4715
\begin{exercise}
4716

4717
This exercise asks you to implement and test several PRNGs.
4718
For testing, you will need to install
4719
\py{DieHarder}, which you can download from
4720
\url{https://thinkcomplex.com/dh}, or it
4721
might be available as a package for your operating system.
4722

4723
\index{DieHarder}
4724

4725
\begin{enumerate}
4726

4727
\item Write a program that implements one of the linear congruential
4728
generators described at
4729
\url{https://thinkcomplex.com/lcg}.
4730
Test it using \py{DieHarder}.
4731

4732
\item Read the documentation of Python's \py{random} module.
4733
What PRNG does it use?  Test it.
4734
\index{random module@\py{random} module}
4735

4736
\item Implement a Rule 30 CA with a few hundred cells,
4737
run it for as many time steps as you can in a reasonable amount
4738
of time, and output the center column as a sequence of bits.
4739
Test it.
4740
\index{Rule 30}
4741

4742
%TODO: Do this exercise (project idea).
4743

4744
\end{enumerate}
4745

4746
\end{exercise}
4747

4748

4749
\begin{exercise}
4750

4751
Falsifiability is an appealing and useful idea, but among philosophers of science it is not generally accepted as a solution to the demarcation problem, as Popper claimed.
4752

4753
Read \url{https://thinkcomplex.com/false} and answer the
4754
following questions.
4755

4756
\begin{enumerate}
4757

4758
\item What is the demarcation problem?
4759

4760
\index{demarcation problem}
4761
\index{Popper, Karl}
4762
\index{falsifiability}
4763

4764
\item How, according to Popper, does falsifiability solve the
4765
demarcation problem?
4766

4767
\item Give an example of two theories, one considered scientific
4768
and one considered unscientific, that are successfully distinguished
4769
by the criterion of falsifiability.
4770

4771
\item Can you summarize one or more of the objections that
4772
philosophers and historians of science have raised to Popper's
4773
claim?
4774

4775
\item Do you get the sense that practicing philosophers think
4776
highly of Popper's work?
4777

4778
\end{enumerate}
4779

4780
\end{exercise}
4781

4782

4783

4784

4785
\chapter{Game of Life}
4786
\label{lifechap}
4787

4788
In this chapter we consider two-dimensional cellular automatons,
4789
especially John Conway's Game of Life (GoL).  Like some of
4790
the 1-D CAs in the previous chapter, GoL follows simple rules and
4791
produces surprisingly complicated behavior.  And like Wolfram's
4792
Rule 110, GoL turns out to be universal; that is, it can compute
4793
any computable function, at least in theory.
4794

4795
\index{computable function}
4796
\index{universal}
4797
\index{Turing complete}
4798

4799
Complex behavior in GoL raises issues in the philosophy of
4800
science, particularly related to scientific realism and instrumentalism.
4801
I discuss these issues and suggest additional reading.
4802

4803
At the end of the chapter, I demonstrate ways to implement
4804
GoL efficiently in Python.
4805

4806
The code for this chapter is in {\tt chap06.ipynb} in the repository
4807
for this book.  More information about working with the code is
4808
in Section~\ref{code}.
4809

4810

4811
\section{Conway's GoL}
4812
\label{life}
4813

4814
One of the first cellular automatons to be studied, and probably the
4815
most popular of all time, is a 2-D CA called ``The Game of Life'', or GoL
4816
for short.  It was developed by John H. Conway and popularized in 1970
4817
in Martin Gardner's column in {\em Scientific American}.
4818
See \url{https://thinkcomplex.com/gol}.
4819

4820
\index{Game of Life}
4821
\index{Conway, John H.}
4822
\index{Gardner, Martin}
4823
\index{grid}
4824

4825
The cells in GoL are arranged in a 2-D {\bf grid}, that is, an array of rows and columns.  Usually the grid is considered to be infinite, but in practice it is often ``wrapped''; that is, the right edge is connected to the left, and the top edge to the bottom.
4826

4827
Each cell in the grid has two states --- live and dead --- and 8 neighbors --- north, south, east, west, and the four diagonals.  This set of neighbors
4828
is sometimes called a ``Moore neighborhood''.
4829

4830
\index{Moore neighborhood}
4831
\index{neighborhood}
4832

4833
Like the 1-D CAs in the previous chapters, GoL evolves over time according to rules, which are like simple laws of physics.
4834

4835
In GoL, the next state of each cell depends on its current state and
4836
its number of live neighbors.  If a cell is alive, it stays alive if it
4837
has 2 or 3 neighbors, and dies otherwise.  If a cell is
4838
dead, it stays dead unless it has exactly 3 neighbors.
4839

4840
This behavior is loosely analogous to real cell growth: cells
4841
that are isolated or overcrowded die; at moderate densities they
4842
flourish.
4843

4844
GoL is popular because:
4845

4846
\begin{itemize}
4847

4848
\item There are simple initial conditions that yield
4849
surprisingly complex behavior.
4850

4851
\index{complex behavior}
4852

4853
\item There are many interesting stable patterns: some
4854
oscillate (with various periods) and some move like the
4855
spaceships in Wolfram's Rule 110 CA.
4856

4857
\item And like Rule 110, GoL is Turing complete.
4858

4859
\index{Turing complete}
4860
\index{universal}
4861

4862
\item Another factor that generated interest was Conway's conjecture --- that there is no initial condition that yields unbounded growth in the number
4863
of live cells --- and the \$50 bounty he offered to anyone who could prove
4864
or disprove it.
4865

4866
\index{unbounded}
4867

4868
\item Finally, the increasing availability of computers made it
4869
  possible to automate the computation and display the results
4870
  graphically.
4871

4872
\end{itemize}
4873

4874

4875

4876
\section{Life patterns}
4877
\label{lifepatterns}
4878

4879
\begin{figure}
4880
\centerline{\includegraphics[height=1.75in]{figs/chap06-1.pdf}}
4881
\caption{A stable pattern called a beehive.}
4882
\label{chap06-1}
4883
\end{figure}
4884

4885
\begin{figure}
4886
\centerline{\includegraphics[height=1.75in]{figs/chap06-2.pdf}}
4887
\caption{An oscillator called a toad.}
4888
\label{chap06-2}
4889
\end{figure}
4890

4891
\begin{figure}
4892
\centerline{\includegraphics[height=1.75in]{figs/chap06-3.pdf}}
4893
\caption{A spaceship called a glider.}
4894
\label{chap06-3}
4895
\end{figure}
4896

4897
If you run GoL from a random starting state, a number of stable
4898
patterns are likely to appear.  Over time, people have identified
4899
these patterns and given them names.
4900

4901
\index{Game of Life patterns}
4902
\index{beehive}
4903

4904
For example, Figure~\ref{chap06-1} shows a stable pattern called a
4905
``beehive''.  Every cell in the beehive
4906
has 2 or 3 neighbors, so they all survive, and none of the dead
4907
cells adjacent to the beehive has 3 neighbors, so no new cells
4908
are born.
4909

4910
Other patterns ``oscillate''; that is, they change over time but
4911
eventually return to their starting configuration (provided
4912
they don't collide with another pattern).  For example,
4913
Figure~\ref{chap06-2} shows a pattern called a ``toad'', which
4914
is an oscillator that alternates between two states.  The
4915
``period'' of this oscillator is 2.
4916

4917
\index{oscillator}
4918
\index{toad}
4919

4920
Finally, some patterns oscillate and return to the starting
4921
configuration, but shifted in space.  Because these patterns
4922
seem to move, they are called ``spaceships''.
4923

4924
Figure~\ref{chap06-3} shows a spaceship called a
4925
``glider''.  After a period of 4 steps, the glider is back in the
4926
starting configuration, shifted one unit down and to the right.
4927

4928
\index{glider}
4929
\index{spaceship}
4930

4931
Depending on the starting orientation, gliders can move along any
4932
of the four diagonals.  There are other spaceships that
4933
move horizontally and vertically.
4934

4935
People have spent embarrassing
4936
amounts of time finding and naming these patterns.  If you search
4937
the web, you will find many collections.
4938

4939
\section{Conway's conjecture}
4940

4941
From most initial conditions, GoL quickly reaches a stable
4942
state where the number of live cells is nearly constant
4943
(possibly with some oscillation).
4944

4945
\begin{figure}
4946
\centerline{\includegraphics[height=1.75in]{figs/chap06-4.pdf}}
4947
\caption{Starting and final configurations of the r-pentomino.}
4948
\label{chap06-4}
4949
\end{figure}
4950

4951
But there are some simple starting conditions that 
4952
yield a surprising number of live cells, and take a
4953
long time to settle down.  Because these patterns are so long-lived, they
4954
are called ``Methuselahs''.
4955

4956
\index{Methuselah}
4957
\index{Conway's conjecture}
4958

4959
One of the simplest Methuselahs is the
4960
r-pentomino, which has only five cells, roughly in the shape of the
4961
letter ``r''.  Figure~\ref{chap06-4} shows the initial configuration
4962
of the r-pentomino and the final configuration after 1103 steps.
4963

4964
This configuration is ``final'' in the sense that all remaining
4965
patterns are either stable, oscillators, or gliders that will never
4966
collide with another pattern.  In total, the r-pentomino yields 6
4967
gliders, 8 blocks, 4 blinkers, 4 beehives, 1 boat, 1 ship, and 1 loaf.
4968

4969
\index{r-pentomino}
4970

4971

4972
\begin{figure}
4973
\centerline{\includegraphics[height=1.75in]{figs/chap06-5.pdf}}
4974
\caption{Gosper's glider gun, which produces a stream of gliders.}
4975
\label{chap06-5}
4976
\end{figure}
4977

4978
The existence of long-lived patterns prompted Conway to wonder if
4979
there are initial patterns that never stabilize.  He
4980
conjectured that there were not, but he described two kinds of pattern
4981
that would prove him wrong, a ``gun'' and a ``puffer train''.  A gun
4982
is a stable pattern that periodically produces a spaceship --- as the
4983
stream of spaceships moves out from the source, the number of live
4984
cells grows indefinitely.  A puffer train is a translating pattern
4985
that leaves live cells in its wake.
4986

4987
\index{glider gun}
4988
\index{puffer train}
4989

4990
It turns out that both of these patterns exist.  A team led
4991
by Bill Gosper discovered the first, a glider gun now called
4992
Gosper's Gun, which is shown in Figure~\ref{chap06-5}.
4993
Gosper also discovered the first puffer train.
4994

4995
\index{Gosper, Bill}
4996

4997
There are many patterns of both types, but they are not easy to
4998
design or find.  That is not a coincidence.  Conway chose the
4999
rules of GoL so that his conjecture would not be obviously
5000
true or false.  Of all possible rules for a 2-D CA, most
5001
yield simple behavior: most initial conditions stabilize quickly
5002
or grow unboundedly.  By avoiding uninteresting CAs, Conway
5003
was also avoiding Wolfram's Class 1 and Class 2 behavior, and
5004
probably Class 3 as well.
5005

5006
If we believe Wolfram's Principle of Computational Equivalence, we
5007
expect GoL to be in Class 4, and it is.  The Game of Life was proved
5008
Turing complete in 1982 (and again, independently, in 1983).
5009
Since then, several people have constructed GoL patterns that implement
5010
a Turing machine or another machine known to be Turing complete.
5011

5012
\index{Class 4 behavior}
5013
\index{Turing complete}
5014
\index{universality}
5015

5016

5017
\section{Realism}
5018

5019
Stable patterns in GoL are hard not to notice, especially the ones
5020
that move.  It is natural to think of them as persistent entities, but
5021
remember that a CA is made of cells; there is no such thing as a toad
5022
or a loaf.  Gliders and other spaceships are even less real because
5023
they are not even made up of the same cells over time.  So these
5024
patterns are like constellations of stars.  We perceive them because
5025
we are good at seeing patterns, or because we have active
5026
imaginations, but they are not real.
5027

5028
\index{realism}
5029

5030
Right?
5031

5032
Well, not so fast.  Many entities that we consider ``real'' are also
5033
persistent patterns of entities at a smaller scale.  Hurricanes are
5034
just patterns of air flow, but we give them personal names.  And
5035
people, like gliders, are not made up of the same cells over time.
5036

5037
\index{hurricane}
5038

5039
This is not a new observation --- about 2500 years ago Heraclitus
5040
pointed out that you can't step in the same river twice --- but the
5041
entities that appear in the Game of Life are a useful test case for
5042
thinking about scientific realism.
5043

5044
\index{Heraclitus}
5045
\index{scientific realism}
5046

5047
{\bf Scientific realism} pertains to scientific theories and the
5048
entities they postulate.
5049
A theory postulates an entity if it is
5050
expressed in terms of the properties and behavior of the entity.
5051
For example, theories about electromagnetism are expressed in
5052
terms of electrical and magnetic fields.  Some theories about economics
5053
are expressed in terms of supply, demand, and market forces.
5054
And theories about biology are expressed in terms of genes.
5055

5056
But are these entities real?  That is, do they exist in the world
5057
independent of us and our theories?
5058

5059
\index{gene}
5060
\index{postulated entity}
5061
\index{theory}
5062

5063
Again, I find it useful to state philosophical positions in a range of
5064
strengths; here are four statements of scientific realism with increasing
5065
strength:
5066

5067
\begin{description}
5068

5069
\item[SR1:] Scientific theories are true or false to the degree that
5070
  they approximate reality, but no theory is exactly true.  Some
5071
  postulated entities may be real, but there is no principled way to
5072
  say which ones.
5073

5074
\item[SR2:] As science advances, our theories become better
5075
  approximations of reality.  At least some postulated entities are
5076
  known to be real.
5077

5078
\item[SR3:] Some theories are exactly true; others are approximately
5079
  true.  Entities postulated by true theories, and some entities
5080
  in approximate theories, are real.
5081

5082
\item[SR4:] A theory is true if it describes reality correctly, and
5083
  false otherwise.  The entities postulated by true theories are real;
5084
  others are not.
5085

5086
\end{description}
5087

5088
SR4 is so strong that it is probably untenable; by such a strict
5089
criterion, almost all current theories are known to be false.
5090
Most realists would accept something in the range
5091
between SR1 and SR3.
5092

5093

5094
\section{Instrumentalism}
5095

5096
But SR1 is so weak that it verges on {\bf instrumentalism}, which is
5097
the view that theories are instruments that we use for our purposes: a theory is useful, or not, to the degree that it is fit for its purpose, but we can't say whether it is true or false.
5098

5099
\index{instrumentalism}
5100

5101
To see whether you are comfortable with instrumentalism, I made up the following test.  Read the following statements and give yourself a point for each one you agree with.  If you score 4 or more, you might be an instrumentalist!
5102

5103

5104
\begin{quote}
5105
``Entities in the Game of Life aren't real; they are just patterns of
5106
  cells that people have given cute names.''
5107
\end{quote}
5108

5109
\begin{quote}
5110
``A hurricane is just a pattern of air flow, but it is a useful
5111
  description because it allows us to make predictions and communicate
5112
  about the weather.''
5113
\end{quote}
5114

5115
\index{hurricane}
5116

5117
\begin{quote}
5118
``Freudian entities like the Id and the Superego aren't real, but they
5119
  are useful tools for thinking and communicating about psychology (or
5120
  at least some people think so).''
5121
\end{quote}
5122

5123
\index{Id}
5124
\index{Freud, Sigmund}
5125
\index{Superego}
5126

5127
\begin{quote}
5128
``Electric and magnetic fields are postulated entities in our best
5129
  theory of electromagnetism, but they aren't real.  We could
5130
  construct other theories, without postulating fields, that would be
5131
  just as useful.''
5132
\end{quote}
5133

5134
\index{electron}
5135

5136
\begin{quote}
5137
``Many of the things in the world that we identify as objects are
5138
  arbitrary collections like constellations.  For example, a mushroom
5139
  is just the fruiting body of a fungus, most of which grows
5140
  underground as a barely-contiguous network of cells.  We focus
5141
  on mushrooms for practical reasons like visibility and edibility.''
5142
\end{quote}
5143

5144
\index{mushroom}
5145

5146
\begin{quote}
5147
``Some objects have sharp boundaries, but many are fuzzy.  For
5148
  example, which molecules are part of your body: Air in your lungs?
5149
  Food in your stomach?  Nutrients in your blood?  Nutrients in a
5150
  cell?  Water in a cell?  Structural parts of a cell?  Hair?  Dead
5151
  skin?  Dirt?  Bacteria on your skin?  Bacteria in your gut?
5152
  Mitochondria?  How many of those molecules do you include when you
5153
  weigh yourself?  Conceiving the world in terms of discrete objects
5154
  is useful, but the entities we identify are not real.''
5155
\end{quote}
5156

5157
If you are more comfortable with some of these statements than
5158
others, ask yourself why.  What are the differences in these
5159
scenarios that influence your reaction?  Can you make
5160
a principled distinction between them?
5161

5162
For more on instrumentalism, see
5163
\url{https://thinkcomplex.com/instr}.
5164

5165

5166

5167
\section{Implementing Life}
5168
\label{implife}
5169

5170
The exercises at the end of this chapter ask you to experiment
5171
with and modify the Game of Life, and implement other 2-D cellular
5172
automatons.  This section explains my implementation of GoL, which
5173
you can use as a starting place for your experiments.
5174

5175
\index{cellular automaton}
5176
\index{NumPy}
5177

5178
To represent the state of the cells, I use a NumPy array of 8-bit unsigned integers.  As an example, the
5179
following line creates a 10 by 10 array initialized with random
5180
values of 0 and 1.
5181

5182
\index{dtype}
5183
\index{uint8}
5184

5185
\begin{code}
5186
a = np.random.randint(2, size=(10, 10), dtype=np.uint8)
5187
\end{code}
5188

5189
\index{NumPy}
5190
\index{random}
5191
\index{randint}
5192

5193
There are a few ways we can compute the GoL rules.  The simplest
5194
is to use \py{for} loops to iterate through the rows and columns of
5195
the array:
5196

5197
\begin{code}
5198
b = np.zeros_like(a)
5199
rows, cols = a.shape
5200
for i in range(1, rows-1):
5201
    for j in range(1, cols-1):
5202
        state = a[i, j]
5203
        neighbors = a[i-1:i+2, j-1:j+2]
5204
        k = np.sum(neighbors) - state
5205
        if state:
5206
            if k==2 or k==3:
5207
                b[i, j] = 1
5208
        else:
5209
            if k == 3:
5210
                b[i, j] = 1
5211
\end{code}
5212

5213
Initially, \py{b} is an array of zeros with the same size as \py{a}.
5214
Each time through the loop, \py{state} is the condition of the center
5215
cell and \py{neighbors} is the 3x3 neighborhood.  \py{k} is the number
5216
of live neighbors (not including the center cell).  The nested \py{if}
5217
statements evaluate the GoL rules and turn on cells in \py{b}
5218
accordingly.
5219

5220
\index{neighborhood}
5221
\index{cross-correlation}
5222
\index{SciPy}
5223
\index{correlate2d}
5224

5225
This implementation is a straightforward translation of the rules, but
5226
it is verbose and slow.  We can do better using cross-correlation, as
5227
we saw in Section~\ref{cross-correlation}.  There, we used
5228
\py{np.correlate} to compute a 1-D correlation.  Now, to perform 2-D
5229
correlation, we'll use \py{correlate2d} from \py{scipy.signal}, a
5230
SciPy module that provides functions related to signal processing:
5231

5232
\begin{code}
5233
from scipy.signal import correlate2d
5234

5235
kernel = np.array([[1, 1, 1],
5236
                   [1, 0, 1],
5237
                   [1, 1, 1]])
5238

5239
c = correlate2d(a, kernel, mode='same')
5240
\end{code}
5241

5242
What we called a ``window'' in the context of 1-D correlation is
5243
called a ``kernel'' in the context of 2-D correlation, but the idea
5244
is the same:  \py{correlate2d} multiplies the kernel and the array to
5245
select a neighborhood, then adds up the result.  This kernel selects
5246
the 8 neighbors that surround the center cell.
5247

5248
\index{window}
5249
\index{kernel}
5250

5251
\py{correlate2d} applies the kernel to each location in the array.  With
5252
\py{mode='same'}, the result has the same size as \py{a}.
5253

5254
Now we can use logical operators to compute the rules:
5255

5256
\begin{code}
5257
b = (c==3) | (c==2) & a
5258
b = b.astype(np.uint8)
5259
\end{code}
5260

5261
The first line computes a boolean array with \py{True} where there
5262
should be a live cell and \py{False} elsewhere.  Then \py{astype}
5263
converts the boolean array to an array of integers.
5264

5265
\index{boolean array}
5266

5267
This version is faster, and probably good enough,
5268
but we can simplify it slightly by modifying the
5269
kernel:
5270

5271
\begin{code}
5272
kernel = np.array([[1, 1, 1],
5273
                   [1,10, 1],
5274
                   [1, 1, 1]])
5275

5276
c = correlate2d(a, kernel, mode='same')
5277
b = (c==3) | (c==12) | (c==13)
5278
b = b.astype(np.uint8)
5279
\end{code}
5280

5281
This version of the kernel includes the center cell and gives it a
5282
weight of 10.  If the center cell is 0, the result is between 0 and 8;
5283
if the center cell is 1, the result is between 10 and 18.
5284
Using this kernel, we can simplify the logical operations, selecting
5285
only cells with the values 3, 12, and 13.
5286

5287
That might not seem like a big improvement, but it allows one more
5288
simplification: with this kernel, we can use a table to look up
5289
cell values, as we did in Section~\ref{tables}.
5290

5291
\begin{code}
5292
table = np.zeros(20, dtype=np.uint8)
5293
table[[3, 12, 13]] = 1
5294
c = correlate2d(a, kernel, mode='same')
5295
b = table[c]
5296
\end{code}
5297

5298
\py{table} has zeros everywhere except locations 3, 12, and 13.  When
5299
we use \py{c} as an index into \py{table}, NumPy performs element-wise
5300
lookup; that is, it takes each value from \py{c}, looks it up in
5301
\py{table}, and puts the result into \py{b}.
5302

5303
This version is faster and more concise than the others; the only
5304
drawback is that it takes more explaining.
5305

5306
\py{Life.py}, which is included in the repository for this book,
5307
provides a \py{Life} class that encapsulates this implementation of
5308
the rules.  If you run \py{Life.py}, you should see an animation
5309
of a ``puffer train'', which is a spaceship that leaves a trail of
5310
detritus in its wake.
5311

5312
\index{puffer train}
5313

5314

5315
\section{Exercises}
5316

5317
The code for this chapter is in the Jupyter notebook {\tt chap06.ipynb}
5318
in the repository for this book.  Open this notebook, read the code,
5319
and run the cells.  You can use this notebook to work on the
5320
following exercises.  My solutions are in {\tt chap06soln.ipynb}.
5321

5322

5323
\begin{exercise}
5324

5325
Start GoL in a random state and run it until it stabilizes.
5326
What stable patterns can you identify?
5327

5328
\end{exercise}
5329

5330

5331
\begin{exercise}
5332

5333
Many named patterns are available in portable file formats.
5334
Modify \py{Life.py} to parse one of these formats and initialize
5335
the grid.
5336

5337
\end{exercise}
5338

5339

5340

5341
\begin{exercise}
5342

5343
One of the longest-lived small patterns is ``rabbits'', which starts with
5344
9 live cells and takes 17,331 steps to stabilize.  You can get the
5345
initial configuration in various formats from
5346
\url{https://thinkcomplex.com/rabbits}.  Load this configuration
5347
and run it.
5348

5349
\end{exercise}
5350

5351

5352
\begin{exercise}
5353

5354
In my implementation, the \py{Life} class is based on a parent class
5355
called \py{Cell2D}, and the \py{LifeViewer} class is based on \py{Cell2DViewer}.  You can use these base classes to implement other 2-D cellular automatons.
5356

5357
\index{Cell2D class}
5358
\index{Cell2DViewer class}
5359
\index{LifeViewer class}
5360
\index{Life class}
5361

5362
For example, one variation of GoL, called ``Highlife'', has the
5363
same rules as GoL, plus one additional rule: a dead cell with 6
5364
neighbors comes to life.
5365

5366
\index{Highlife}
5367

5368
Write a class named \py{Highlife} that inherits from \py{Cell2D} and implements
5369
this version of the rules.  Also write a class named \py{HighlifeViewer}
5370
that inherits from \py{Cell2DViewer} and try different ways
5371
to visualize the results.  As a simple example, use a different
5372
colormap.
5373

5374
One of the more interesting patterns in Highlife is the replicator (see \url{https://thinkcomplex.com/repl}).
5375
Use \py{add_cells} to initialize Highlife with a replicator and see what it
5376
does.
5377

5378
\end{exercise}
5379

5380

5381
\begin{exercise}
5382

5383
If you generalize the Turing machine to two dimensions, or
5384
add a read-write head to a 2-D CA, the result is a
5385
cellular automaton called a Turmite.  It is named after a
5386
termite because of the way the read-write head moves, but
5387
spelled wrong as an homage to Alan Turing.
5388

5389
\index{turmite}
5390
\index{Turing, Alan}
5391
\index{Turing machine}
5392

5393
The most famous Turmite is Langton's Ant, discovered by Chris Langton
5394
in 1986.  See \url{https://thinkcomplex.com/langton}.
5395

5396
\index{Langton's Ant}
5397
\index{Langton, Chris}
5398

5399
The ant is a read-write head with
5400
four states, which you can think of as facing north, south,
5401
east or west.  The cells have two states, black and white.
5402

5403
\index{read-write head}
5404

5405
The rules are simple.  During each time step, the ant checks the color
5406
of the cell it is on.  If black, the ant turns to the right,
5407
changes the cell to white, and moves forward one space.  If the cell
5408
is white, the ant turns left, changes the cell to black, and moves
5409
forward.
5410

5411
\index{simple rules}
5412

5413
Given a simple world, a simple set of rules, and only one moving part,
5414
you might expect to see simple behavior --- but you should know
5415
better by now.  Starting with all white cells, Langton's ant
5416
moves in a seemingly random pattern for more than 10,000 steps
5417
before it enters a cycle with a period of 104 steps.  After
5418
each cycle, the ant is translated diagonally, so it leaves
5419
a trail called the ``highway''.
5420

5421
\index{complex behavior}
5422
\index{period}
5423

5424
Write an implementation of Langton's Ant.
5425

5426
\end{exercise}
5427

5428

5429

5430

5431
\chapter{Physical modeling}
5432
\label{modeling}
5433

5434
The cellular automatons we have seen so far are not physical models;
5435
that is, they are not intended to describe systems in the real world.
5436
But some CAs are intended as physical models.
5437

5438
In this chapter we consider a CA that models chemicals that diffuse (spread
5439
out) and react with each other, which is a process Alan Turing proposed
5440
to explain how some animal patterns develop.
5441

5442
And we'll experiment with a CA that models percolation of liquid
5443
through porous material, like water through coffee grounds.  This
5444
model is the first of several models that exhibit {\bf phase change}
5445
behavior and {\bf fractal geometry}, and I'll explain what both of
5446
those mean.
5447

5448
\index{percolation}
5449

5450
The code for this chapter is in {\tt chap07.ipynb} in the repository
5451
for this book.  More information about working with the code is
5452
in Section~\ref{code}.
5453

5454

5455
\section{Diffusion}
5456

5457
In 1952 Alan Turing published a paper called ``The chemical basis
5458
of morphogenesis'', which describes the behavior of systems involving
5459
two chemicals that diffuse in space and react with each other.  He
5460
showed that these systems produce a wide range of patterns, depending
5461
on the diffusion and reaction rates, and conjectured that systems
5462
like this might be an important mechanism in biological growth processes,
5463
particularly the development of animal coloration patterns.
5464

5465
\index{Turing, Alan}
5466
\index{morphogenesis}
5467

5468
Turing's model is based on differential equations, but it can
5469
be implemented using a cellular automaton.
5470

5471
Before we get to Turing's model, we'll start with something simpler:
5472
a diffusion system with just one chemical.  We'll use a 2-D CA where the
5473
state of each cell is a continuous quantity (usually between 0 and 1)
5474
that represents the concentration of the chemical.
5475

5476
\index{diffusion}
5477

5478
We'll model the diffusion process by comparing each cell with the
5479
average of its neighbors.  If the concentration of the center cell
5480
exceeds the neighborhood average, the chemical flows from the center
5481
to the neighbors.  If the concentration of the center cell is lower,
5482
the chemical flows the other way.
5483

5484
\index{correlate2d}
5485
\index{kernel}
5486

5487
The following kernel computes the difference between each cell
5488
and the average of its neighbors:
5489

5490
\begin{code}
5491
    kernel = np.array([[0, 1, 0],
5492
                       [1,-4, 1],
5493
                       [0, 1, 0]])
5494
\end{code}
5495

5496
Using \py{np.correlate2d}, we can apply this kernel to each cell
5497
in an array:
5498

5499
\begin{code}
5500
    c = correlate2d(array, kernel, mode='same')
5501
\end{code}
5502

5503
We'll use a diffusion constant, \py{r}, that relates the difference
5504
in concentration to the rate of flow:
5505

5506
\begin{code}
5507
    array += r * c
5508
\end{code}
5509

5510
\begin{figure}
5511
\centerline{\includegraphics[height=2in]{figs/chap07-1.pdf}}
5512
\caption{A simple diffusion model after 0, 5, and 10 steps.}
5513
\label{chap07-1}
5514
\end{figure}
5515

5516
Figure~\ref{chap07-1} shows results for a CA with size \py{n=9}, diffusion constant \py{r=0.1}, and initial concentration 0 everywhere except for an ``island'' in the
5517
middle.  The figure shows the starting configuration and the state of
5518
the CA after 5 and 10 steps.  The chemical spreads from the center
5519
outward, continuing until the concentration is the same everywhere.
5520

5521

5522
\section{Reaction-diffusion}
5523

5524
Now let's add a second chemical.  I'll define a new object,
5525
\py{ReactionDiffusion}, that contains two arrays, one for each
5526
chemical:
5527

5528
\index{reaction-diffusion}
5529

5530
\begin{code}
5531
class ReactionDiffusion(Cell2D):
5532

5533
   def __init__(self, n, m, params, noise=0.1):
5534
        self.params = params
5535
        self.array = np.ones((n, m), dtype=float)
5536
        self.array2 = noise * np.random.random((n, m))
5537
        add_island(self.array2)
5538
\end{code}
5539

5540
\py{n} and \py{m} are the number of rows and columns in the array.
5541
\py{params} is a tuple of parameters, which I explain below.
5542

5543
\index{NumPy}
5544
\index{ones}
5545
\index{random}
5546

5547
\py{array} represents the concentration of the first chemical, \py{A};
5548
the NumPy function \py{ones} initializes it to 1 everywhere.  The data type \py{float} indicates that the elements of \py{A} are floating-point values.
5549

5550
\index{dtype}
5551
\index{float}
5552

5553
\py{array2} represents the concentration of \py{B}, which is initialized with random values between 0 and \py{noise}, which is 0.1 by default.  Then \py{add_island} adds an island of higher concentration in the middle: 
5554

5555
\begin{code}
5556
def add_island(a, height=0.1):
5557
    n, m = a.shape
5558
    radius = min(n, m) // 20
5559
    i = n//2
5560
    j = m//2
5561
    a[i-radius:i+radius, j-radius:j+radius] += height
5562
\end{code}
5563

5564
\index{NumPy}
5565
\index{slice}
5566

5567
The radius of the island is one twentieth of \py{n} or \py{m},
5568
whichever is smaller.  The height of the island is \py{height}, with the default value 0.1.
5569

5570
\index{island}
5571

5572
Here is the \py{step} function that updates the arrays:
5573

5574
\begin{code}
5575
    def step(self):
5576
        A = self.array
5577
        B = self.array2
5578
        ra, rb, f, k = self.params
5579

5580
        cA = correlate2d(A, self.kernel, **self.options)
5581
        cB = correlate2d(B, self.kernel, **self.options)
5582

5583
        reaction = A * B**2
5584
        self.array += ra * cA - reaction + f * (1-A)
5585
        self.array2 += rb * cB + reaction - (f+k) * B
5586
\end{code}
5587

5588
The parameters are
5589

5590
\begin{description}
5591

5592
\item[\py{ra}:] The diffusion rate of \py{A} (analogous to \py{r} in the
5593
previous section).
5594

5595
\item[\py{rb}:] The diffusion rate of \py{B}.  In most versions of
5596
this model, \py{rb} is about half of \py{ra}.
5597

5598
\item[\py{f}:] The ``feed'' rate, which controls how quickly \py{A} is
5599
added to the system.
5600

5601
\item[\py{k}:] The ``kill'' rate, which controls how quickly \py{B} is
5602
removed from the system.
5603

5604
\end{description}
5605

5606
Now let's look more closely at the update statements:
5607

5608
\begin{code}
5609
        reaction = A * B**2
5610
        self.array += ra * cA - reaction + f * (1-A)
5611
        self.array2 += rb * cB + reaction - (f+k) * B
5612
\end{code}
5613

5614
The arrays \py{cA} and \py{cB} are the result of applying a diffusion
5615
kernel to \py{A} and \py{B}.  Multiplying by \py{ra} and \py{rb} yields
5616
the rate of diffusion into or out of each cell.
5617

5618
The term \py{A * B**2} represents the rate that \py{A} and \py{B}
5619
react with each other.  Assuming that the reaction consumes \py{A} and
5620
produces \py{B}, we subtract this term in the first equation and add
5621
it in the second.
5622

5623
The term \py{f * (1-A)} determines the rate that \py{A} is added to
5624
the system.  Where \py{A} is near 0, the maximum feed rate is \py{f}.
5625
Where \py{A} approaches 1, the feed rate drops off to zero.
5626

5627
Finally, the term \py{(f+k) * B} determines the rate that \py{B} is
5628
removed from the system.  As \py{B} approaches 0, this rate goes
5629
to zero.
5630

5631
As long as the rate parameters are not too high, the values of
5632
\py{A} and \py{B} usually stay between 0 and 1.
5633

5634
\begin{figure}
5635
\centerline{\includegraphics[height=2in]{figs/chap07-2.pdf}}
5636
\caption{Reaction-diffusion model with parameters \py{f=0.035} and
5637
  \py{k=0.057} after 1000, 2000, and 4000 steps.}
5638
\label{chap07-2}
5639
\end{figure}
5640

5641
With different parameters, this model can produce patterns similar
5642
to the stripes and spots on a variety of animals.  In some cases,
5643
the similarity is striking, especially when the feed and
5644
kill parameters vary in space.
5645

5646
\index{animal pattern}
5647

5648
For all simulations in this section, \py{ra=0.5} and \py{rb=0.25}.
5649

5650
Figure~\ref{chap07-2} shows results with \py{f=0.035} and
5651
\py{k=0.057}, with the concentration of \py{B} shown in darker colors.
5652
With these parameters, the system evolves toward a stable configuration
5653
with light spots of \py{A} on a dark background of \py{B}.
5654

5655
\begin{figure}
5656
\centerline{\includegraphics[height=2in]{figs/chap07-3.pdf}}
5657
\caption{Reaction-diffusion model with parameters \py{f=0.055} and
5658
  \py{k=0.062} after 1000, 2000, and 4000 steps.}
5659
\label{chap07-3}
5660
\end{figure}
5661

5662
Figure~\ref{chap07-3} shows results with \py{f=0.055} and
5663
\py{k=0.062}, which yields a coral-like pattern of \py{B} on a background
5664
of \py{A}.
5665

5666
\begin{figure}
5667
\centerline{\includegraphics[height=2in]{figs/chap07-4.pdf}}
5668
\caption{A reaction-diffusion model with parameters \py{f=0.039} and
5669
  \py{k=0.065} after 1000, 2000, and 4000 steps.}
5670
\label{chap07-4}
5671
\end{figure}
5672

5673
Figure~\ref{chap07-4} shows results with \py{f=0.039} and
5674
\py{k=0.065}.  These parameters produce spots of \py{B} that grow and
5675
divide in a process that resembles mitosis, ending with a stable pattern
5676
of equally-spaced spots.
5677

5678
Since 1952, observations and experiments have provided some support
5679
for Turing's conjecture.  At this point it seems likely, but not yet
5680
proven, that many animal patterns are actually formed by
5681
reaction-diffusion processes of some kind.
5682

5683

5684
\section{Percolation}
5685

5686
Percolation is a process in which a fluid flows through a semi-porous
5687
material.  Examples include oil in rock formations, water in paper,
5688
and hydrogen gas in micropores.  Percolation models are also used to
5689
study systems that are not literally percolation, including epidemics
5690
and networks of electrical resistors.  See
5691
\url{https://thinkcomplex.com/perc}.
5692

5693
\index{percolation}
5694

5695
Percolation models are often represented using random graphs like the
5696
ones we saw in Chapter~\ref{graphs}, but they can also be represented
5697
using cellular automatons.  In the next few sections we'll explore
5698
a 2-D CA that simulates percolation.
5699

5700
In this model:
5701

5702
\begin{itemize}
5703

5704
\item Initially, each cell is either ``porous'' with probability \py{q} or
5705
``non-porous'' with probability \py{1-q}.
5706

5707
\item When the simulation begins, all cells are considered ``dry'' except the top row, which is ``wet''.
5708

5709
\item During each time step, if a porous cell has at least one wet neighbor,
5710
it becomes wet.  Non-porous cells stay dry.
5711

5712
\item The simulation runs until it reaches a ``fixed point'' where no
5713
more cells change state.
5714

5715
\end{itemize}
5716

5717
If there is a path of wet cells from the top to the bottom row, we say
5718
that the CA has a ``percolating cluster''.
5719

5720
\index{cluster}
5721
\index{percolating cluster}
5722

5723
Two questions of interest regarding percolation are (1) the probability that a random array contains a percolating cluster, and (2) how that probability depends on \py{q}.  These questions might remind you of Section~\ref{randomgraphs}, where we considered the probability that a random \Erdos-\Renyi~graph is connected.  We will see several connections between that model and this one.
5724

5725
I define a new class to represent a percolation model:
5726

5727
\begin{code}
5728
class Percolation(Cell2D):
5729

5730
    def __init__(self, n, q):
5731
        self.q = q
5732
        self.array = np.random.choice([1, 0], (n, n), p=[q, 1-q])
5733
        self.array[0] = 5
5734
\end{code}
5735

5736
\index{NumPy}
5737
\index{random}
5738
\index{choice}
5739

5740
\py{n} and \py{m} are the number of rows and columns in the CA.
5741

5742
The state of the CA is stored in \py{array}, which is initialized
5743
using \py{np.random.choice} to choose 1 (porous) with probability
5744
\py{q}, and 0 (non-porous) with probability \py{1-q}.
5745

5746
The state of the top row is set to 5, which represents a wet cell.  Using 5, rather than the more obvious 2, makes it possible to use \py{correlate2d} to check whether any porous cell has a wet neighbor.  Here is the kernel:
5747

5748
\begin{code}
5749
    kernel = np.array([[0, 1, 0],
5750
                       [1, 0, 1],
5751
                       [0, 1, 0]])
5752
\end{code}
5753

5754
This kernel defines a 4-cell ``von Neumann'' neighborhood; unlike the Moore neighborhood we saw in Section~\ref{life}, it does not include the diagonals.
5755

5756
\index{neighborhood}
5757
\index{kernel}
5758

5759
This kernel adds up the states of the neighbors.  If any of them are wet, the result will exceed 5.  Otherwise the maximum result is 4 (if all neighbors happen to be porous).
5760

5761
We can use this logic to write a simple, fast \py{step} function:
5762

5763
\begin{code}
5764
    def step(self):
5765
        a = self.array
5766
        c = correlate2d(a, self.kernel, mode='same')
5767
        self.array[(a==1) & (c>=5)] = 5
5768
\end{code}
5769

5770
This function identifies porous cells, where \py{a==1}, that have at least one wet neighbor, where \py{c>=5}, and sets their state to 5, which indicates that they are wet.
5771

5772
\begin{figure}
5773
\centerline{\includegraphics[height=2in]{figs/chap07-5.pdf}}
5774
\caption{The first three steps of a percolation model with \py{n=10} and
5775
\py{p=0.7}.}
5776
\label{chap07-5}
5777
\end{figure}
5778

5779
Figure~\ref{chap07-5} shows the first few steps of a percolation model
5780
with \py{n=10} and \py{p=0.7}.  Non-porous cells are white, porous cells
5781
are lightly shaded, and wet cells are dark.
5782

5783

5784
\section{Phase change}
5785

5786
Now let's test whether a random array contains a percolating cluster:
5787

5788
\begin{code}
5789
def test_perc(perc):
5790
    num_wet = perc.num_wet()
5791

5792
    while True:
5793
        perc.step()
5794

5795
        if perc.bottom_row_wet():
5796
            return True
5797
        
5798
        new_num_wet = perc.num_wet()
5799
        if new_num_wet == num_wet:
5800
            return False
5801

5802
        num_wet = new_num_wet
5803
\end{code}
5804

5805
\py{test_perc} takes a \py{Percolation} object as a parameter.  Each
5806
time through the loop, it advances the CA one time step.  It
5807
checks the bottom row to see if any cells are wet; if so,
5808
it returns \py{True}, to indicate that there is a percolating cluster.
5809

5810
During each time step, it also computes the number of wet cells and
5811
checks whether the number increased since the last step.  If not, we
5812
have reached a fixed point without finding a percolating cluster, so
5813
\py{test_perc} returns \py{False}.
5814

5815
To estimate the probability of a percolating cluster, we generate
5816
many random arrays and test them:
5817

5818
\begin{code}
5819
def estimate_prob_percolating(n=100, q=0.5, iters=100):
5820
    t = [test_perc(Percolation(n, q)) for i in range(iters)]
5821
    return np.mean(t)
5822
\end{code}
5823

5824
\py{estimate_prob_percolating} makes 100 \py{Percolation} objects with the given values of \py{n} and \py{q} and calls \py{test_perc} to see how many of them have a percolating cluster.  The return value is the fraction that do.
5825

5826
\index{NumPy}
5827
\index{mean}
5828
\index{list comprehension}
5829

5830
When \py{p=0.55}, the probability of a percolating cluster is near 0.
5831
At \py{p=0.60}, it is about 70\%, and at \py{p=0.65} it is near 1.
5832
This rapid transition suggests that there is a critical value of
5833
\py{p} near 0.6.
5834

5835
\index{random walk}
5836
\index{critical value}
5837

5838
We can estimate the critical value more precisely using a {\bf random walk}.  Starting from an initial value of \py{q}, we construct a \py{Percolation} object and check whether it has a percolating cluster. If so, \py{q} is probably too high, so we decrease it.  If not, \py{q} is probably too low, so we increase it.
5839

5840
Here's the code:
5841

5842
\begin{code}
5843
def find_critical(n=100, q=0.6, iters=100):
5844
    qs = [q]
5845
    for i in range(iters):
5846
        perc = Percolation(n, q)
5847
        if test_perc(perc):
5848
            q -= 0.005
5849
        else:
5850
            q += 0.005
5851
        qs.append(q)
5852
    return qs
5853
\end{code}
5854

5855
The result is a list of values for \py{q}.  We can estimate the critical value, \py{q_crit}, by computing the mean of this list.  With \py{n=100} the
5856
mean of \py{qs} is about 0.59; this value does not seem to depend on \py{n}.
5857

5858
\index{phase change}
5859

5860
The rapid change in behavior near the critical value is called a {\bf phase change} by analogy with phase changes in physical systems, like the way water changes from liquid to solid at its freezing point.
5861

5862
\index{critical phenomena}
5863
\index{criticality}
5864

5865
A wide variety of systems display a common set of behaviors and characteristics when they are at or near a critical point. These behaviors are known collectively as {\bf critical phenomena}. In the next section, we explore one of them: fractal geometry.
5866

5867

5868
\section{Fractals}
5869
\label{fractals}
5870

5871
To understand fractals, we have to start with dimensions.
5872

5873
\index{fractal}
5874
\index{dimension}
5875

5876
For simple geometric objects, dimension is defined in terms of scaling
5877
behavior.  For example, if the side of a square has length $l$, its
5878
area is $l^2$.  The exponent, 2, indicates that a square is
5879
two-dimensional.  Similarly, if the side of a cube has length $l$, its
5880
volume is $l^3$, which indicates that a cube is three-dimensional.
5881

5882
%TODO: These exponents don't look good in the O'Reilly edition.
5883

5884
More generally, we can estimate the dimension of an object by
5885
measuring some kind of size (like area or volume) as a function of some kind of linear measure (like the length of a side).
5886

5887
As an example, I'll estimate the dimension of a 1-D cellular
5888
automaton by measuring its area (total number of ``on'' cells)
5889
as a function of the number of rows.
5890

5891

5892
\begin{figure}
5893
\centerline{\includegraphics[height=2in]{figs/chap07-7.pdf}}
5894
\caption{One-dimensional CAs with rules 20, 50, and 18, after 32 time steps.}
5895
\label{chap07-7}
5896
\end{figure}
5897

5898
Figure~\ref{chap07-7} shows three 1-D CAs like the ones we saw
5899
in Section~\ref{onedim}.  Rule 20 (left) generates
5900
a set of cells that seems like a line, so we expect it to be one-dimensional.  Rule 50 (center) produces something like a triangle, so
5901
we expect it to be 2-D.  Rule 18 (right) also produces something like a
5902
triangle, but the density is not uniform, so its scaling behavior is
5903
not obvious.
5904

5905
\index{scaling}
5906

5907
I'll estimate the dimension of these CAs with the following function,
5908
which counts the number of on cells after each time step.
5909
It returns a list of tuples, where each tuple contains $i$,
5910
$i^2$, and the total number of cells.
5911

5912
\begin{code}
5913
def count_cells(rule, n=500):
5914
    ca = Cell1D(rule, n)
5915
    ca.start_single()
5916

5917
    res = []
5918
    for i in range(1, n):
5919
        cells = np.sum(ca.array)
5920
        res.append((i, i**2, cells))
5921
        ca.step()
5922

5923
    return res
5924
\end{code}
5925

5926
\begin{figure}
5927
\centerline{\includegraphics[height=2in]{figs/chap07-8.pdf}}
5928
\caption{Number of ``on'' cells versus number of time steps for
5929
rules 20, 50, and 18.}
5930
\label{chap07-8}
5931
\end{figure}
5932

5933
Figure~\ref{chap07-8} shows the results plotted on a log-log scale.
5934

5935
In each figure, the top dashed line shows $y = i^2$.  Taking
5936
the log of both sides, we have $\log y = 2 \log i$.  Since the
5937
figure is on a log-log scale, the slope of this line is 2.
5938

5939
Similarly, the bottom dashed line shows $y = i$.  On a log-log
5940
scale, the slope of this line is 1.
5941

5942
%TODO: The math in the following paragraph looks bad.
5943

5944
Rule 20 (left) produces 3 cells every 2 time steps, so the total number
5945
of cells after $i$ steps is $y = 1.5 i$.  Taking the log of both
5946
sides, we have $\log y = \log 1.5 + \log i$, so on a log-log
5947
scale, we expect a line with slope 1.  In fact, the estimated
5948
slope of the line is 1.01.
5949

5950
Rule 50 (center) produces $i+1$ new cells during the $i$th time step,
5951
so the total number of cells after $i$ steps is $y = i^2 + i$.  If we
5952
ignore the second term and take the log of both sides, we have $\log y
5953
\sim 2 \log i$, so as $i$ gets large, we expect to see a line with
5954
slope 2.  In fact, the estimated slope is 1.97.
5955

5956
\index{Rule 20}
5957
\index{Rule 50}
5958
\index{Rule 18}
5959
\index{fractal dimension}
5960

5961
Finally, for Rule 18 (right), the estimated slope is about 1.57, which
5962
is clearly not 1, 2, or any other integer.  This suggests that the
5963
pattern generated by Rule 18 has a ``fractional dimension''; that is,
5964
it is a fractal.
5965

5966
This way of estimating a fractal dimension is called {\bf box-counting}.  For more about it, see \url{https://thinkcomplex.com/box}.
5967

5968

5969
\section{Fractals and Percolation Models}
5970
\label{fracperc}
5971

5972
%TODO: The typesetting in this caption is broken.
5973

5974
\begin{figure}
5975
\centerline{\includegraphics[height=2in]{figs/chap07-6.pdf}}
5976
\caption{Percolation models with \py{q=0.6} and \py{n=100},
5977
\py{200}, and \py{300}.}
5978
\label{chap07-6}
5979
\end{figure}
5980

5981
Now let's get back to percolation models.  Figure~\ref{chap07-6} shows
5982
clusters of wet cells in percolation simulations with \py{p=0.6} and
5983
\py{n=100}, \py{200}, and \py{300}.  Informally, they resemble fractal
5984
patterns seen in nature and in mathematical models.
5985

5986
\index{percolation}
5987

5988
To estimate their fractal dimension, we can run CAs with
5989
a range of sizes, count the number of wet cells in each percolating
5990
cluster, and then see how the cell counts scale as we increase the
5991
size of the array.
5992

5993
The following loop runs the simulations:
5994

5995
\begin{code}
5996
    res = []
5997
    for size in sizes:
5998
        perc = Percolation(size, q)
5999
        if test_perc(perc):
6000
            num_filled = perc.num_wet() - size
6001
            res.append((size, size**2, num_filled))
6002
\end{code}
6003

6004
The result is a list of tuples where each tuple contains \py{size},
6005
\py{size**2}, and the number of cells in the
6006
percolating cluster (not including the initial wet cells in the top
6007
row).
6008

6009
\begin{figure}
6010
\centerline{\includegraphics[height=2in]{figs/chap07-9.pdf}}
6011
\caption{Number of cells in the percolating cluster versus CA size.}
6012
\label{chap07-9}
6013
\end{figure}
6014

6015
Figure~\ref{chap07-9} shows the results for a range of sizes from 10
6016
to 100.  The dots show the number of cells in each percolating
6017
cluster.  The slope of a line fitted to these dots is often near 1.85,
6018
which suggests that the percolating cluster is, in fact, fractal when
6019
\py{q} is near the critical value.
6020

6021
\index{critical value}
6022

6023
When \py{q} is larger than the critical value, nearly every porous
6024
cell gets filled, so the number of wet cells is close to \py{q * size^2},
6025
which has dimension 2.
6026

6027
When \py{q} is substantially smaller than the critical value, the number
6028
of wet cells is proportional to the linear size of the array, so it has
6029
dimension 1.
6030

6031
% TODO: Check this chapter for leftover p's that should be q's
6032

6033
\section{Exercises}
6034

6035

6036
\begin{exercise}
6037

6038
In Section~\ref{fracperc} we showed that the Rule 18 CA produces a fractal.
6039
Can you find other 1-D CAs that produce fractals?
6040

6041
\index{Cell1D}
6042

6043
Note: the \py{Cell1D} object does not wrap around
6044
from the left edge to the right, which creates artifacts at the
6045
boundaries for some rules.  You might want to use \py{Wrap1D}, which
6046
is a child class of \py{Cell1D} that wraps around.  It is defined
6047
in \py{Cell1D.py} in the repository for this book.
6048

6049
\end{exercise}
6050

6051

6052
\begin{exercise}
6053

6054
In 1990 Bak, Chen and Tang proposed a cellular automaton that is
6055
an abstract model of a forest fire.  Each cell is in one of three
6056
states: empty, occupied by a tree, or on fire.
6057

6058
\index{Bak, Per}
6059
\index{forest fire model}
6060

6061
The rules of the CA are:
6062

6063
\begin{enumerate}
6064

6065
\item An empty cell becomes occupied with probability $p$.
6066

6067
\item A cell with a tree burns if any of its neighbors
6068
  is on fire.
6069

6070
\item A cell with a tree spontaneously burns, with
6071
  probability $f$, even if none of its neighbors is on fire.
6072

6073
\item A cell with a burning tree becomes an empty cell in the next
6074
  time step.
6075

6076
\end{enumerate}
6077

6078
Write a program that implements this model.  You might want to inherit
6079
from \py{Cell2D}.  Typical values for the parameters are $p=0.01$ and
6080
$f=0.001$, but you might want to experiment with other values.
6081

6082
Starting from a random initial condition, run the model until it reaches
6083
a steady state where the number of trees no longer increases or
6084
decreases consistently.
6085

6086
In steady state, is the geometry of the forest fractal?
6087
What is its fractal dimension?
6088

6089
\end{exercise}
6090

6091

6092

6093

6094

6095

6096
\chapter{Self-organized criticality}
6097
\label{soc}
6098

6099
In the previous chapter we saw an example of a system with a critical
6100
point and we explored one of the common properties of critical
6101
systems, fractal geometry.
6102

6103
\index{critical system}
6104
\index{heavy-tailed distribution}
6105
\index{pink noise}
6106

6107
%TODO: Check whether we really want to use Section titles, rather than
6108
% numbers, in cross references.
6109

6110
In this chapter, we explore two other properties of critical systems:
6111
heavy-tailed distributions, which we saw in Chapter~\ref{heavytail}
6112
and pink noise, which I'll explain in this chapter.
6113

6114
These properties are interesting in part because they appear
6115
frequently in nature; that is, many natural systems produce
6116
fractal-like geometry, heavy-tailed distributions, and pink noise.
6117

6118
This observation raises a natural question: why do so many natural
6119
systems have properties of critical systems?  A possible answer is
6120
{\bf self-organized criticality} (SOC), which is the tendency of some
6121
systems to evolve toward, and stay in, a critical state.
6122

6123
\index{self-organized criticality}
6124
\index{SOC}
6125

6126
In this chapter I'll present a {\bf sand pile model} that was the
6127
first system shown to exhibit SOC.
6128

6129
The code for this chapter is in {\tt chap08.ipynb} in the repository
6130
for this book.  More information about working with the code is
6131
in Section~\ref{code}.
6132

6133

6134
\section{Critical Systems}
6135

6136
Many critical systems demonstrate common behaviors:
6137

6138
\begin{itemize}
6139

6140
\item Fractal geometry: For example, freezing water tends to form
6141
  fractal patterns, including snowflakes and other crystal
6142
  structures.  Fractals are characterized by self-similarity; that is,
6143
  parts of the pattern are similar to scaled copies of the whole.
6144

6145
\index{fractal geometry}
6146
\index{self-similarity}
6147

6148
\item Heavy-tailed distributions of some physical quantities: For
6149
  example, in freezing water the distribution of crystal sizes is
6150
  characterized by a power law.
6151

6152
\index{long tail}
6153
\index{power law}
6154

6155
\item Variations in time that exhibit {\bf pink noise}:  Complex
6156
  signals can be decomposed into their frequency components.  In pink
6157
  noise, low-frequency components have more power than high-frequency
6158
  components.  Specifically, the power at frequency $f$ is
6159
  proportional to $1/f$.
6160

6161
\index{pink noise}
6162
\index{1/f noise@$1/f$ noise}
6163

6164
\end{itemize}
6165

6166
Critical systems are usually unstable.  For example, to keep water in
6167
a partially frozen state requires active control of the temperature.
6168
If the system is near the critical temperature, a small deviation
6169
tends to move the system into one phase or the other.
6170

6171
\index{unstable}
6172

6173
Many natural systems exhibit characteristic behaviors of criticality,
6174
but if critical points are unstable, they should not be common in
6175
nature.  This is the puzzle Bak, Tang and Wiesenfeld address.  Their
6176
solution is called self-organized criticality (SOC), where
6177
``self-organized'' means that from any initial condition, the system
6178
moves toward a critical state, and stays there, without
6179
external control.
6180

6181
\index{SOC}
6182

6183
\section{Sand Piles}
6184

6185
The sand pile model was proposed by Bak, Tang and Wiesenfeld in 1987.
6186
It is not meant to be a realistic model of a sand pile, but rather an
6187
abstraction that models physical systems with a large number of
6188
elements that interact with their neighbors.
6189

6190
\index{sand pile model}
6191
\index{abstract model}
6192
\index{grid}
6193

6194
The sand pile model is a 2-D cellular automaton where the state of
6195
each cell represents the slope of a part of a sand pile.  During each
6196
time step, each cell is checked to see whether it exceeds a critical
6197
value, $K$, which is usually 3.  If so, it ``topples'' and transfers
6198
sand to four neighboring cells; that is, the slope of the cell is
6199
decreased by 4, and each of the neighbors is increased by 1.
6200
At the perimeter of the grid, all cells are kept at slope 0, so the
6201
excess spills over the edge.
6202

6203
\index{cellular automaton}
6204
\index{state}
6205

6206
Bak, Tang and Wiesenfeld initialize all cells at a
6207
level greater than \py{K} and run the model until it stabilizes.
6208
Then they observe the effect of small perturbations: they choose a
6209
cell at random, increment its value by 1, and run the model
6210
again until it stabilizes.
6211

6212
For each perturbation, they measure \py{T}, the number of time steps
6213
the pile takes to stabilize, and \py{S}, the total number of cells
6214
that topple\footnote{The original paper uses a different definition
6215
of \py{S}, but most later work uses this definition.}.
6216

6217
%TODO: The typesetting of this footnote is broken.
6218

6219
\index{perturbation}
6220

6221
Most of the time, dropping a single grain causes no cells to topple,
6222
so \py{T=1} and \py{S=0}.  But occasionally a single grain can cause an
6223
{\bf avalanche} that affects a substantial fraction of the grid.  The
6224
distributions of \py{T} and \py{S} turn out to be heavy-tailed, which
6225
supports the claim that the system is in a critical state.
6226

6227
\index{avalanche}
6228

6229
They conclude that the sand pile model exhibits ``self-organized
6230
criticality'', which means that it evolves toward a critical state without the need for external control or
6231
what they call ``fine tuning'' of any parameters.  And the model
6232
stays in a critical state as more grains are added.
6233

6234
In the next few sections I replicate their experiments and
6235
interpret the results.
6236

6237

6238
\section{Implementing the Sand Pile}
6239

6240
To implement the sand pile model, I define a class called \py{SandPile}
6241
that inherits from \py{Cell2D}, which is defined in \py{Cell2D.py} in
6242
the repository for this book.
6243

6244
\begin{code}
6245
class SandPile(Cell2D):
6246

6247
    def __init__(self, n, m, level=9):
6248
        self.array = np.ones((n, m)) * level
6249
\end{code}
6250

6251
All values in the array are initialized to \py{level}, which is
6252
generally greater than the toppling threshold, \py{K}.
6253

6254
\index{Cell2D}
6255
\index{SandPile}
6256

6257
Here's the \py{step} method that finds all cells above \py{K} and
6258
topples them:
6259

6260
\begin{code}
6261
    kernel = np.array([[0, 1, 0],
6262
                       [1,-4, 1],
6263
                       [0, 1, 0]])
6264

6265
    def step(self, K=3):
6266
        toppling = self.array > K
6267
        num_toppled = np.sum(toppling)
6268
        c = correlate2d(toppling, self.kernel, mode='same')
6269
        self.array += c
6270
        return num_toppled
6271
\end{code}
6272

6273
To show how \py{step} works, I'll start with a small pile that has two cells ready to topple:
6274

6275
\begin{code}
6276
pile = SandPile(n=3, m=5, level=0)
6277
pile.array[1, 1] = 4
6278
pile.array[1, 3] = 4
6279
\end{code}
6280

6281
Initially, \py{pile.array} looks like this:
6282

6283
\begin{code}
6284
[[0 0 0 0 0]
6285
 [0 4 0 4 0]
6286
 [0 0 0 0 0]]
6287
\end{code}
6288

6289
Now we can select the cells that are above the toppling threshold:
6290

6291
\index{toppling threshold}
6292

6293
\begin{code}
6294
toppling = pile.array > K
6295
\end{code}
6296

6297
The result is a boolean array, but we can use it as if it were
6298
an array of integers like this:
6299

6300
\begin{code}
6301
[[0 0 0 0 0]
6302
 [0 1 0 1 0]
6303
 [0 0 0 0 0]]
6304
\end{code}
6305

6306
If we correlate this array with the kernel, it makes copies of the
6307
kernel at each location where \py{toppling} is 1.
6308

6309
\index{boolean array}
6310

6311
\begin{code}
6312
c = correlate2d(toppling, kernel, mode='same')
6313
\end{code}
6314

6315
And here's the result:
6316

6317
\begin{code}
6318
[[ 0  1  0  1  0]
6319
 [ 1 -4  2 -4  1]
6320
 [ 0  1  0  1  0]]
6321
\end{code}
6322

6323
Notice that where the copies of the kernel overlap, they add up.
6324

6325
This array contains the change for each cell, which we use
6326
to update the original array:
6327

6328
\begin{code}
6329
pile.array += c
6330
\end{code}
6331

6332
And here's the result.
6333

6334
\begin{code}
6335
[[0 1 0 1 0]
6336
 [1 0 2 0 1]
6337
 [0 1 0 1 0]]
6338
\end{code}
6339

6340
So that's how \py{step} works.
6341

6342
With \py{mode='same'}, \py{correlate2d} considers the boundary of the array
6343
to be fixed at zero, so any grains of sand that go over the edge
6344
disappear.
6345

6346
\py{SandPile} also provides \py{run}, which calls \py{step} until
6347
no more cells topple:
6348

6349
\begin{code}
6350
    def run(self):
6351
        total = 0
6352
        for i in itertools.count(1):
6353
            num_toppled = self.step()
6354
            total += num_toppled
6355
            if num_toppled == 0:
6356
                return i, total
6357
\end{code}
6358

6359
The return value is a tuple that contains the number of time
6360
steps and the total number of cells that toppled.
6361

6362
\index{itertools module}
6363

6364
If you are not familiar with \py{itertools.count}, it is an
6365
infinite generator that counts up from the given initial value,
6366
so the \py{for} loop runs until \py{step} returns 0.
6367
You can read about the \py{itertools} module at \url{https://thinkcomplex.com/iter}.
6368

6369
Finally, the \py{drop} method chooses a random cell and adds a
6370
grain of sand:
6371

6372
\begin{code}
6373
    def drop(self):
6374
        a = self.array
6375
        n, m = a.shape
6376
        index = np.random.randint(n), np.random.randint(m)
6377
        a[index] += 1
6378
\end{code}
6379

6380
\index{NumPy}
6381
\index{random}
6382
\index{randint}
6383

6384
Let's look at a bigger example, with \py{n=20}:
6385

6386
\begin{code}
6387
pile = SandPile(n=20, level=10)
6388
pile.run()
6389
\end{code}
6390

6391
\begin{figure}
6392
\centerline{\includegraphics[height=3in]{figs/chap08-1.pdf}}
6393
\caption{Sand pile model initial state (left), after 200 steps (middle), and 400 steps (right).}
6394
\label{chap08-1}
6395
\end{figure}
6396

6397
With an initial level of \py{10}, this sand pile takes 332 time steps to
6398
reach equilibrium, with a total of 53,336 topplings.
6399
Figure~\ref{chap08-1} (left) shows the configuration after this initial
6400
run.  Notice that it has the repeating elements that
6401
are characteristic of fractals.  We'll come back to that soon.
6402

6403
Figure~\ref{chap08-1} (middle) shows the configuration of the sand
6404
pile after dropping 200 grains onto random cells, each time running
6405
until the pile reaches equilibrium.  The symmetry of the initial
6406
configuration has been broken; the configuration looks random.
6407

6408
Finally Figure~\ref{chap08-1} (right) shows the configuration
6409
after 400 drops.  It looks similar to the configuration after 200
6410
drops.  In fact, the pile is now in a steady state where its statistical
6411
properties don't change over time.  I'll explain some of those
6412
statistical properties in the next section.
6413

6414

6415
\section{Heavy-tailed distributions}
6416
\label{heavysand}
6417

6418
If the sand pile model is in a critical state, we expect to find
6419
heavy-tailed distributions for quantities like the duration and size
6420
of avalanches.  So let's take a look.
6421

6422
\index{heavy-tailed distribution}
6423

6424
I'll make a larger sand pile, with \py{n=50} and an initial level of
6425
\py{30}, and run until equilibrium:
6426

6427
\begin{code}
6428
pile2 = SandPile(n=50, level=30)
6429
pile2.run()
6430
\end{code}
6431

6432
Next, I'll run 100,000 random drops
6433

6434
\begin{code}
6435
iters = 100000
6436
res = [pile2.drop_and_run() for _ in range(iters)]
6437
\end{code}
6438

6439
As the name suggests, \py{drop_and_run} calls \py{drop} and \py{run} and
6440
returns the duration of the avalanche and total number of cells
6441
that toppled.
6442

6443
\index{NumPy}
6444
\index{transpose}
6445

6446
So \py{res} is a list of \py{(T, S)} tuples, where \py{T} is duration,
6447
in time steps, and \py{S} is cells toppled.  We can use \py{np.transpose} to unpack \py{res} into two NumPy arrays:
6448

6449
\begin{code}
6450
T, S = np.transpose(res)
6451
\end{code}
6452

6453
A large majority of drops have duration 1 and no toppled cells; if we filter them out before plotting, we get a clearer view of the rest of the distribution.
6454

6455
\begin{code}
6456
T = T[T>1]
6457
S = S[S>0]
6458
\end{code}
6459

6460
The distributions of \py{T} and \py{S} have many small values and a few
6461
very large ones.  I'll use the \py{Pmf} class from \py{thinkstats2} to
6462
make a PMF of the values, that is, a map from each value to
6463
its probability of occurring (see Section~\ref{degree}).
6464

6465
\index{thinkstats2 module}
6466
\index{Pmf}
6467

6468
\begin{code}
6469
pmfT = Pmf(T)
6470
pmfS = Pmf(S)
6471
\end{code}
6472

6473
\begin{figure}
6474
\centerline{\includegraphics[height=3in]{figs/chap08-2.pdf}}
6475
\caption{Distribution of avalanche duration (left) and size (right), linear scale.}
6476
\label{chap08-2}
6477
\end{figure}
6478

6479
\begin{figure}
6480
\centerline{\includegraphics[height=3in]{figs/chap08-3.pdf}}
6481
\caption{Distribution of avalanche duration (left) and size (right), log-log scale.}
6482
\label{chap08-3}
6483
\end{figure}
6484

6485
Figure~\ref{chap08-2} shows the results for values less than 50.
6486

6487
As we saw in Section~\ref{heavytail}, we can get a clearer picture of
6488
these distributions by plotting them on a log-log scale, as shown
6489
in Figure~\ref{chap08-3}.
6490

6491
For values between 1 and 100, the distributions are nearly straight
6492
on a log-log scale, which is characteristic of a heavy tail.  The
6493
gray lines in the figure have slopes near -1, which suggests that these
6494
distributions follow a power law with parameters near $\alpha=1$.
6495

6496
\index{power law}
6497

6498
For values greater than 100, the distributions fall away more quickly
6499
than the power law model, which means there are fewer very large
6500
values than the model predicts.  One possibility is that this
6501
effect is due to the finite size of the sand pile; if so, we might
6502
expect larger piles to fit the power law better.
6503

6504
Another possibility, which you can explore in one of the exercises at the end of this chapter, is that these distributions do not strictly obey a power
6505
law.  But even if they are not power-law distributions, they might still be heavy-tailed.
6506

6507
%TODO: Explain more?
6508

6509

6510
\section{Fractals}
6511
\label{sandfrac}
6512

6513
Another property of critical systems is fractal geometry.  The
6514
initial configuration in Figure~\ref{chap08-1} (left) resembles a fractal,
6515
but you can't always tell by looking.
6516
A more reliable way to identify a fractal is to estimate its
6517
fractal dimension, as we saw in Section~\ref{fractals}
6518
and Section~\ref{fracperc}.
6519

6520
\index{fractal}
6521

6522
I'll start by making a bigger sand pile, with \py{n=131} and initial
6523
level \py{22}.
6524

6525
\begin{code}
6526
pile3 = SandPile(n=131, level=22)
6527
pile3.run()
6528
\end{code}
6529

6530
It takes 28,379 steps for this pile to reach equilibrium,
6531
with more than 200 million cells toppled.
6532

6533
To see the resulting pattern more clearly, I select cells with
6534
levels 0, 1, 2, and 3, and plot them separately:
6535

6536
\begin{code}
6537
def draw_four(viewer, levels=range(4)):
6538
    thinkplot.preplot(rows=2, cols=2)
6539
    a = viewer.viewee.array
6540

6541
    for i, level in enumerate(levels):
6542
        thinkplot.subplot(i+1)
6543
        viewer.draw_array(a==level, vmax=1)
6544
\end{code}
6545

6546
\py{draw_four} takes a \py{SandPileViewer} object, which is defined
6547
in \py{Sand.py} in the repository for this book.  The parameter \py{levels} is the list of levels we want to plot; the default is the range 0 through 3.  If the sand pile has run until equilibrium, these are the only levels that should exist.
6548

6549
Inside the loop, it uses \py{a==level} to make a boolean array that's \py{True} where the array is \py{level} and \py{False} otherwise.  \py{draw_array} treats these booleans as 1s and 0s.
6550

6551
\begin{figure}
6552
\centerline{\includegraphics[height=3in]{figs/chap08-4.pdf}}
6553
\caption{Sand pile model in equilibrium, selecting cells with levels 0, 1, 2, and 3, left to right, top to bottom. }
6554
\label{chap08-4}
6555
\end{figure}
6556

6557
\index{box-counting algorithm}
6558

6559
Figure~\ref{chap08-4} shows the results for \py{pile3}.  Visually, these patterns resemble fractals, but looks can be deceiving.  To be more confident, we can estimate the fractal dimension for each pattern using {\bf box-counting}, as we saw in Section~\ref{fractals}.
6560

6561
We'll count the number of
6562
cells in a small box at the center of the pile, then see
6563
how the number of cells increases as the box gets bigger.
6564
Here's my implementation:
6565

6566
\begin{code}
6567
def count_cells(a):
6568
    n, m = a.shape
6569
    end = min(n, m)
6570

6571
    res = []
6572
    for i in range(1, end, 2):
6573
        top = (n-i) // 2
6574
        left = (m-i) // 2
6575
        box = a[top:top+i, left:left+i]
6576
        total = np.sum(box)
6577
        res.append((i, i**2, total))
6578

6579
    return np.transpose(res)
6580
\end{code}
6581

6582
The parameter, \py{a}, is a boolean array.
6583
The size of the box is initially 1.  Each time through the
6584
loop, it increases by 2 until it reaches \py{end}, which is the
6585
smaller of \py{n} and \py{m}.
6586

6587
\index{boolean array}
6588

6589
Each time through the loop, \py{box} is a set of cells with width
6590
and height \py{i}, centered in the array.  \py{total} is the number
6591
of ``on'' cells in the box.
6592

6593
\index{NumPy}
6594
\index{transpose}
6595

6596
The result is a list of tuples, where each tuple contains
6597
\py{i}, \py{i**2}, and the number of cells in the box.
6598
When we pass this result to \py{transpose}, NumPy converts it to
6599
an array with three columns, and then {\bf transposes} it; that is,
6600
it makes the columns into rows and the rows into columns.  The result
6601
is an array with 3 rows: \py{i}, \py{i**2}, and \py{total}.
6602

6603
\index{fractal dimension}
6604

6605
Here's how we use \py{count_cells}:
6606

6607
\begin{code}
6608
res = count_cells(pile.array==level)
6609
steps, steps2, cells = res
6610
\end{code}
6611

6612
The first line creates a boolean array that contains \py{True}
6613
where the array equals \py{level}, calls
6614
\py{count_cells}, and gets an array with three rows.
6615

6616
The second line unpacks the rows and assigns them to \py{steps},
6617
\py{steps2}, and \py{cells}, which we can plot like this:
6618

6619
\begin{code}
6620
    thinkplot.plot(steps, steps2, linestyle='dashed')
6621
    thinkplot.plot(steps, cells)
6622
    thinkplot.plot(steps, steps, linestyle='dashed')
6623
\end{code}
6624

6625
\begin{figure}
6626
\centerline{\includegraphics[height=4in]{figs/chap08-5.pdf}}
6627
\caption{Box counts for cells with levels 0, 1, 2, and 3, compared to dashed lines with slopes 1 and 2.}
6628
\label{chap08-5}
6629
\end{figure}
6630

6631
Figure~\ref{chap08-5} shows the results.
6632
On a log-log scale, the cell counts form nearly straight lines,
6633
which indicates that we are measuring fractal dimension over
6634
a valid range of box sizes.
6635

6636
\index{log-log scale}
6637

6638
To estimate the slopes of these lines, we can use the SciPy function
6639
\py{linregress}, which fits a line to the data by linear regression 
6640
(see \url{https://thinkcomplex.com/regress}).
6641

6642
\index{SciPy}
6643
\index{linregress}
6644

6645
\begin{code}
6646
from scipy.stats import linregress
6647

6648
params = linregress(np.log(steps), np.log(cells))
6649
slope = params[0]
6650
\end{code}
6651

6652
The estimated fractal dimensions are:
6653

6654
\begin{code}
6655
0  1.871
6656
1  3.502
6657
2  1.781
6658
3  2.084
6659
\end{code}
6660

6661
The fractal dimension for levels 0, 1, and 2 seems to be clearly
6662
non-integer, which indicates that the image is fractal.
6663

6664
The estimate for level 3 is indistinguishable from
6665
2, but given the results for the other values, the apparent curvature
6666
of the line, and the appearance of the pattern, it seems likely that
6667
it is also fractal.
6668

6669
\index{fractal dimension}
6670

6671
One of the exercises in the notebook for this chapter asks you to run
6672
this analysis again with different values of \py{n} and the initial \py{level} to see if the estimated dimensions are consistent.
6673

6674

6675
\section{Pink noise}
6676

6677
The title of the original paper that presented the sand pile model
6678
is ``Self-Organized Criticality: An Explanation of $1/f$ Noise''.
6679
You can read it at \url{https://thinkcomplex.com/bak}.
6680

6681
As the subtitle suggests, Bak, Tang and Wiesenfeld were trying to
6682
explain why many natural and engineered systems exhibit $1/f$
6683
noise, which is also known as ``flicker noise'' and ``pink noise''.
6684

6685
\index{pink noise}
6686
\index{flicker noise}
6687
\index{$1/f$ noise}
6688
\index{signal}
6689
\index{power spectrum}
6690
\index{noise}
6691

6692
To understand pink noise, we have to take a detour to understand
6693
signals, power spectrums, and noise.
6694

6695
\begin{description}
6696

6697
\item[Signal:] A {\bf signal} is any quantity that varies in time.
6698
  One example is sound, which is variation in air density.  In the sand pile model, the signals we'll consider are avalanche durations and sizes as they vary over time.
6699

6700
\item[Power spectrum:] Any signal can be decomposed into a set of frequency components with different levels of {\bf power}, which is related to amplitude or volume.  The {\bf power spectrum} of a signal is a function that shows the power of each frequency component.
6701

6702
\item[Noise:] In common use, {\bf noise} is usually an unwanted sound,
6703
but in the context of signal processing, it is a signal that
6704
contains many frequency components.
6705

6706
\end{description}
6707

6708
There are many kinds of noise.  For example, ``white noise'' is a
6709
signal that has components with equal power over a wide range of
6710
frequencies.
6711

6712
\index{white noise}
6713
\index{frequency}
6714
\index{power}
6715
\index{red noise}
6716

6717
Other kinds of noise have different relationships between frequency
6718
and power.  In ``red noise'', the power at frequency $f$ is
6719
$1/f^2$, which we can write like this:
6720
%
6721
\[ P(f) = 1/f^2 \]
6722
%
6723
We can generalize this equation by replacing the exponent $2$ with
6724
a parameter $\beta$:
6725
%
6726
\[ P(f) = 1/f^\beta \]
6727
%
6728
When $\beta=0$, this equation describes white noise; when $\beta=2$ it
6729
describes red noise.  When the parameter is near 1, the result is called
6730
$1/f$ noise.  More generally, noise with any value between 0 and 2
6731
is called ``pink'', because it's between white and red.
6732

6733
\index{logarithm}
6734

6735
We can use this relationship to derive a test for pink noise.
6736
Taking the log of both sides yields
6737
%
6738
\[ \log P(f) = -\beta \log f \]
6739
%
6740
So if we plot $P(f)$ versus $f$ on a log-log scale, we
6741
expect a straight line with slope $-\beta$.
6742

6743
What does this have to do with the sand pile model?  Suppose that every time
6744
a cell topples, it makes a sound.  If we record a sand pile model
6745
while its running, what would it sound like?  In the next section, we'll simulate the sound of the sand pile model and see if it is pink noise.
6746

6747
\index{sand pile model}
6748

6749
\section{The sound of sand}
6750

6751
As my implementation of \py{SandPile} runs, it records the number of
6752
cells that topple during each time step, accumulating the results in
6753
a list called \py{toppled_seq}.  After running the model
6754
in Section~\ref{heavysand}, we can extract the resulting signal:
6755

6756
\begin{code}
6757
signal = pile2.toppled_seq
6758
\end{code}
6759

6760
To compute the power spectrum of this signal we can use the SciPy function \py{welch}:
6761

6762
\begin{code}
6763
from scipy.signal import welch
6764

6765
nperseg = 2048
6766
freqs, spectrum = welch(signal, nperseg=nperseg, fs=nperseg)
6767
\end{code}
6768

6769
\index{SciPy}
6770
\index{Welch's method}
6771

6772
This function uses Welch's method, which splits the signal into segments and computes the power spectrum of each segment.  The result is typically noisy, so Welch's method averages across segments to estimate the average power at each frequency.  For more about Welch's method, see \url{https://thinkcomplex.com/welch}.
6773

6774
The parameter \py{nperseg} specifies the number of time steps per segment.  With longer segments, we can estimate the power for more frequencies.  With shorter segments, we get better estimates for each frequency.  The value I chose, 2048, balances these tradeoffs.
6775

6776
\index{sampling frequency}
6777

6778
The parameter \py{fs} is the ``sampling frequency'', which is the number of data points in the signal per unit of time.  By setting \py{fs=nperseg},
6779
we get a range of frequencies from 0 to \py{nperseg/2}.  This range is convenient, but because the units of time in the model are arbitrary,
6780
it doesn't mean much.
6781

6782
The return values, \py{freqs} and \py{powers}, are NumPy arrays containing
6783
the frequencies of the components and their corresponding powers, which we can
6784
plot.  Figure~\ref{chap08-6} shows the result.
6785

6786
\begin{figure}
6787
\centerline{\includegraphics[height=3in]{figs/chap08-6.pdf}}
6788
\caption{Power spectrum of the number of toppled cells over time, log-log scale.}
6789
\label{chap08-6}
6790
\end{figure}
6791

6792
For frequencies between
6793
10 and 1000 (in arbitrary units), the spectrum falls on a straight
6794
line, which is what we expect for pink or red noise.
6795

6796
The gray line in the figure has slope $-1.58$, which indicates that
6797
%
6798
\[ \log P(f) \sim -\beta \log f \]
6799
%
6800
with parameter $\beta=1.58$, which is the same parameter reported by Bak,
6801
Tang, and Wiesenfeld.  This result confirms that the sand pile model generates pink noise.
6802

6803

6804
\section{Reductionism and Holism}
6805
\label{model2}
6806

6807
The original paper by Bak, Tang and Wiesenfeld is one of
6808
the most frequently-cited papers in the last few decades.
6809
Some subsequent papers have reported other systems that are apparently
6810
self-organized critical (SOC).  Others have studied the sand pile model in
6811
more detail.
6812

6813
\index{sand pile model}
6814

6815
As it turns out, the sand pile model is not a good model of a
6816
sand pile.  Sand is dense and not very sticky, so momentum has a
6817
non-negligible effect on the behavior of avalanches.  As a result,
6818
there are fewer very large and very small avalanches than the model
6819
predicts, and the distribution might not be heavy-tailed.
6820

6821
Bak has suggested that this observation misses the point.
6822
The sand pile model is not meant to be a realistic model of a sand
6823
pile; it is meant to be a simple example of a broad category of
6824
models.
6825

6826
To understand this point, it is useful to think about two
6827
kinds of models, {\bf reductionist} and {\bf holistic}.  A
6828
reductionist model describes a system by describing its parts
6829
and their interactions.  When a reductionist model is used
6830
as an explanation, it depends on an analogy between the
6831
components of the model and the components of the system.
6832

6833
\index{reductionism}
6834
\index{holism}
6835
\index{ideal gas law}
6836

6837
For example, to explain why the ideal gas law holds, we can model the
6838
molecules that make up a gas with point masses and model their
6839
interactions as elastic collisions.  If you simulate or analyze this
6840
model, you find that it obeys the ideal gas law.  This model is
6841
satisfactory to the degree that molecules in a gas behave like
6842
molecules in the model.  The analogy is between the parts of the
6843
system and the parts of the model.
6844

6845
\index{analogy}
6846

6847
\begin{figure}
6848
\centerline{\includegraphics[width=5in]{figs/model2.pdf}}
6849
\caption{The logical structure of a holistic model.\label{fig.model2}}
6850
\end{figure}
6851

6852
Holistic models are more focused on similarities between systems and
6853
less interested in analogous parts.  A holistic approach to modeling
6854
consists of these steps:
6855

6856
\index{holistic model}
6857

6858
\begin{itemize}
6859

6860
\item Observe a behavior that appears in a variety of systems.
6861

6862
\item Find a simple model that demonstrates that behavior.
6863

6864
\item Identify the elements of the model that are necessary and
6865
sufficient to produce the behavior.
6866

6867
\end{itemize}
6868

6869
For example, in {\em The Selfish Gene}, Richard Dawkins suggests that
6870
genetic evolution is just one example of an evolutionary system.  He
6871
identifies the essential elements of the category --- discrete
6872
replicators, variability, and differential reproduction --- and proposes
6873
that any system with these elements will show evidence of evolution.
6874

6875
\index{Selfish Gene@{\em The Selfish Gene}}
6876
\index{Dawkins, Richard}
6877
\index{evolution}
6878

6879
As another example of an evolutionary system, he proposes ``memes'',
6880
which are thoughts or behaviors that are replicated by transmission
6881
from person to person\footnote{This use of ``meme'' is original to
6882
  Dawkins, and predates the distantly-related use of the word on the
6883
  Internet by about 20 years.}.  As memes compete for the resource of
6884
human attention, they evolve in ways that are similar to genetic
6885
evolution.
6886

6887
\index{meme}
6888
\index{replicator}
6889

6890
Critics of the meme model have pointed out that
6891
memes are a poor analogy for genes; they differ from genes in many
6892
obvious ways.  Dawkins has argued that these differences are
6893
beside the point because memes are not {\em supposed} to be analogous
6894
to genes.  Rather, memes and genes are examples of the same
6895
category: evolutionary systems.  The differences between them
6896
emphasize the real point, which is that evolution is a general model
6897
that applies to many seemingly disparate systems.  The logical
6898
structure of this argument is shown in Figure~\ref{fig.model2}.
6899

6900
\index{gene}
6901
\index{genetics}
6902
\index{self-organized criticality}
6903

6904
Bak has made a similar argument that self-organized criticality is a
6905
general model for a broad category of systems:
6906

6907
\begin{quote}
6908
Since these phenomena appear everywhere, they cannot depend on any
6909
specific detail whatsoever...  If the physics of a large class of
6910
problems is the same, this gives [the theorist] the option of selecting
6911
the {\em simplest} possible [model] belonging to that class for detailed
6912
study.\footnote{Bak, {\em How Nature Works}, Springer-Verlag 1996, page 43.}
6913
\end{quote}
6914

6915
\index{Bak, Per}
6916

6917
Many natural systems demonstrate behaviors characteristic of critical
6918
systems.  Bak's explanation for this prevalence is that these systems
6919
are examples of the broad category of self-organized criticality.
6920
There are two ways to support this argument.  One is to build
6921
a realistic model of a particular system and show that the model
6922
exhibits SOC.  The second is to show that SOC is a feature of many
6923
diverse models, and to identify the essential characteristics
6924
those models have in common.
6925

6926
\index{SOC}
6927

6928
The first approach, which I characterize as reductionist, can explain
6929
the behavior of a particular system.  The second approach, which I am calling holistic, can explain the prevalence of criticality in natural systems.  They are different models with different purposes.
6930

6931
\index{prevalence}
6932

6933
For reductionist models, realism is the primary virtue, and simplicity
6934
is secondary.  For holistic models, it is the other way around.
6935

6936

6937
\section{SOC, causation, and prediction}
6938

6939
If a stock market index drops by a fraction of a percent in a
6940
day, there is no need for an explanation.  But if it drops 10\%,
6941
people want to know why.  Pundits
6942
on television are willing to offer explanations, but the real
6943
answer may be that there is no explanation.
6944

6945
\index{stock market}
6946

6947
Day-to-day variability in the stock market shows evidence of
6948
criticality: the distribution of value changes is heavy-tailed
6949
and the time series exhibits pink noise.
6950
If the stock market is a critical system, we
6951
should expect occasional large changes as part of the ordinary
6952
behavior of the market.
6953

6954
The distribution of earthquake sizes is also heavy-tailed,
6955
and there are simple models of the dynamics of geological faults
6956
that might explain this behavior.  If these models are right,
6957
they imply that large earthquakes are not exceptional; that is,
6958
they do not require explanation any more than small earthquakes do.
6959

6960
\index{earthquake}
6961
\index{prediction}
6962
\index{causation}
6963

6964
Similarly, Charles Perrow has suggested that failures in large
6965
engineered systems, like nuclear power plants, are like avalanches
6966
in the sand pile model.  Most failures are small, isolated, and
6967
harmless, but occasionally a coincidence of bad fortune yields a
6968
catastrophe.  When big accidents occur, investigators go looking for
6969
the cause, but if Perrow's ``normal accident theory'' is correct,
6970
there may be no special cause of large failures.
6971

6972
\index{normal accident theory}
6973
\index{Perrow, Charles}
6974

6975
These conclusions are not comforting.  Among other things, they
6976
imply that large earthquakes and some kinds of accidents are
6977
fundamentally unpredictable.  It is impossible to look at the
6978
state of a critical system and say whether a large avalanche
6979
is ``due''.  If the system is in a critical state, then a large
6980
avalanche is always possible.  It just depends on the
6981
next grain of sand.
6982

6983
In a sand pile model, what is the cause of a large avalanche?
6984
Philosophers sometimes distinguish the {\bf proximate} cause, which is
6985
most immediately responsible, from the {\bf ultimate} cause, which is considered some deeper kind of explanation (see \url{https://thinkcomplex.com/cause}).
6986

6987
\index{proximate cause}
6988
\index{ultimate cause}
6989

6990
In the sand pile model, the proximate cause of an avalanche is
6991
a grain of sand, but the grain that causes a large avalanche
6992
is identical to every other grain, so it offers no special explanation.
6993
The ultimate cause of a large avalanche is the structure and
6994
dynamics of the systems as a whole: large avalanches occur because
6995
they are a property of the system.
6996

6997
\index{avalanche}
6998

6999
Many social phenomena, including wars, revolutions, epidemics,
7000
inventions, and terrorist attacks, are characterized by heavy-tailed
7001
distributions.  If these distributions are prevalent because social systems are SOC, major historical events may be fundamentally unpredictable and unexplainable.
7002

7003
\index{heavy-tailed distribution}
7004

7005

7006
\section{Exercises}
7007

7008
The code for this chapter is in the Jupyter notebook {\tt chap08.ipynb}
7009
in the repository for this book.  Open this notebook, read the code,
7010
and run the cells.  You can use this notebook to work on the
7011
following exercises.  My solutions are in {\tt chap08soln.ipynb}.
7012

7013
\begin{exercise}
7014

7015
To test whether the distributions of \py{T} and \py{S} are
7016
heavy-tailed, we plotted their PMFs on a log-log scale, which is
7017
what Bak, Tang and Wiesenfeld show in their paper.  But as we saw in
7018
Section~\ref{cdf}, this visualization can obscure the shape of the
7019
distribution.  Using the same data, make a plot that shows the
7020
cumulative distributions (CDFs) of \py{S} and \py{T}.  What can you
7021
say about their shape?  Do they follow a power law?  Are they heavy-tailed?
7022

7023
\index{CDF}
7024
\index{cumulative distribution function}
7025
\index{log-log scale}
7026

7027
You might find it helpful to plot the CDFs on a log-x scale and on a
7028
log-log scale.
7029

7030
\end{exercise}
7031

7032
\begin{exercise}
7033

7034
In Section~\ref{sandfrac} we showed that the initial configuration of
7035
the sand pile model produces fractal patterns.  But after we drop a
7036
large number of random grains, the patterns look more random.
7037

7038
\index{fractal}
7039

7040
Starting with the example in Section~\ref{sandfrac}, run the sand pile
7041
model for a while and then compute fractal dimensions for each of the
7042
4 levels.  Is the sand pile model fractal in steady state?
7043

7044
\end{exercise}
7045

7046
\begin{exercise}
7047

7048
Another version of the sand pile model, called the ``single source''
7049
model, starts from a different initial condition: instead of all cells
7050
at the same level, all cells are set to 0 except the center cell,
7051
which is set to a large value.  Write a function that creates a
7052
\py{SandPile} object, sets up the single source initial condition, and
7053
runs until the pile reaches equilibrium.  Does the result appear to be
7054
fractal?
7055

7056
\index{single source sand pile}
7057

7058
You can read more about this version of the sand pile model at
7059
\url{https://thinkcomplex.com/sand}.
7060

7061
\end{exercise}
7062

7063

7064
\begin{exercise}
7065

7066
In their 1989 paper, Bak, Chen and Creutz suggest that the Game of Life is
7067
a self-organized critical system (see \url{https://thinkcomplex.com/bak89}).
7068

7069
\index{Game of Life}
7070
\index{self-organized criticality}
7071
\index{SOC}
7072

7073
To replicate their tests, start with a random configuration and run the GoL CA until it stabilizes.  Then choose a random cell and flip it.  Run the CA until
7074
it stabilizes again, keeping track of \py{T}, the number
7075
of time steps it takes, and \py{S}, the number of cells affected.
7076
Repeat for a large number of trials and plot the distributions
7077
of \py{T} and \py{S}.  Also, estimate the power spectrums of \py{T} and \py{S} as signals in time, and see if they are consistent with pink noise.
7078

7079
%TODO: Do this exercise (project idea).
7080

7081
\end{exercise}
7082

7083

7084
\begin{exercise}
7085

7086
In {\it The Fractal Geometry of Nature}, Benoit Mandelbrot proposes
7087
what he calls a ``heretical'' explanation for the prevalence of
7088
heavy-tailed distributions in natural systems.  It may not
7089
be, as Bak suggests, that many systems can generate this behavior in
7090
isolation.  Instead there may be only a few, but interactions between systems might cause the behavior to propagate.
7091

7092
\index{Mandelbrot, Benoit}
7093
\index{Fractal Geometry of Nature@{\it The Fractal Geometry of Nature}}
7094
\index{heavy-tailed distribution}
7095

7096
To support this argument, Mandelbrot points out:
7097

7098
\begin{itemize}
7099

7100
\item The distribution of observed data is often ``the joint
7101
  effect of a fixed underlying {\em true distribution} and a highly
7102
  variable {\em filter}''.
7103

7104
\item Heavy-tailed distributions are robust to filtering; that is,
7105
  ``a wide variety of filters leave their asymptotic behavior
7106
  unchanged''.
7107

7108
\end{itemize}
7109

7110
What do you think of this argument?  Would you characterize
7111
it as reductionist or holist?
7112

7113
\index{reductionism}
7114
\index{holism}
7115

7116
\end{exercise}
7117

7118

7119
\begin{exercise}
7120

7121
Read about the ``Great Man'' theory of history at
7122
\url{https://thinkcomplex.com/great}.  What implication
7123
does self-organized criticality have for this theory?
7124

7125
\index{Great Man theory}
7126
\end{exercise}
7127

7128

7129

7130
\chapter{Agent-based models}
7131
\label{agent-based}
7132

7133
The models we have seen so far might be characterized as ``rule-based''
7134
in the sense that they involve systems governed by simple rules.  In this
7135
and the following chapters, we explore {\bf agent-based models}.
7136

7137
\index{agent-based model}
7138

7139
Agent-based models include {\bf agents} that are
7140
intended to model people and other entities that gather
7141
information about the world, make decisions, and take actions.
7142

7143
The agents are usually situated in space or in a network, and
7144
interact with each other locally.  They usually have imperfect or
7145
incomplete information about the world.
7146

7147
Often there are differences among agents, unlike previous models where
7148
all components are identical.  And agent-based models often include
7149
randomness, either among the agents or in the world.
7150

7151
Since the 1970s, agent-based modeling has become an important tool in
7152
economics, other social sciences, and some natural sciences.
7153

7154
Agent-based models are useful for modeling the dynamics of systems
7155
that are not in equilibrium (although they are also used to study
7156
equilibrium).  And they are particularly useful for understanding
7157
relationships between individual decisions and system behavior.
7158

7159
The code for this chapter is in \py{chap09.ipynb}, which is a
7160
Jupyter notebook in the repository for this book.  For more information
7161
about working with this code, see Section~\ref{code}.
7162

7163

7164
\section{Schelling's Model}
7165

7166
In 1969 Thomas Schelling published ``Models of Segregation'',
7167
which proposed a simple model of racial segregation.  You can read it
7168
at \url{https://thinkcomplex.com/schell}.
7169

7170
The Schelling model of the world is a grid where each cell represents a house.  The houses are occupied by two kinds of agents,
7171
labeled red and blue, in roughly equal numbers.  About 10\% of the
7172
houses are empty.
7173

7174
\index{grid}
7175
\index{Schelling, Thomas}
7176

7177
At any point in time, an agent might be happy or unhappy, depending
7178
on the other agents in the neighborhood, where the
7179
``neighborhood" of each house is the set of eight adjacent cells.
7180
In one version of the model, agents are happy if they have at least
7181
two neighbors like themselves, and unhappy if they have one or zero.
7182

7183
\index{agent-based model}
7184
\index{neighborhood}
7185

7186
The simulation proceeds by choosing an agent at random and checking
7187
to see whether they are happy.  If so, nothing happens; if not,
7188
the agent chooses one of the unoccupied cells at random and moves.
7189

7190
You will not be surprised to hear that this model leads to some
7191
segregation, but you might be surprised by the degree.  From a random starting point, clusters of similar agents form almost immediately.  The clusters
7192
grow and coalesce over time until there are a small number
7193
of large clusters and most agents live in homogeneous
7194
neighborhoods.
7195

7196
\index{cluster}
7197
\index{segregation}
7198

7199
If you did not know the process and only saw the result, you might
7200
assume that the agents were racist, but in fact all of them
7201
would be perfectly happy in a mixed neighborhood.  Since they prefer
7202
not to be greatly outnumbered, they might be considered mildly xenophobic.  Of course, these agents are a wild simplification of real people, so it may not be appropriate to apply these descriptions at all.
7203

7204
\index{racism}
7205
\index{xenophobia}
7206

7207
Racism is a complex human problem; it is hard to imagine that such a
7208
simple model could shed light on it.  But in fact it provides a strong
7209
argument about the relationship between a system and its parts: if you
7210
observe segregation in a real city, you cannot conclude that
7211
individual racism is the immediate cause, or even that the people in
7212
the city are racists.
7213

7214
\index{causation}
7215

7216
Of course, we have to keep in mind the limitations of this argument:
7217
Schelling's model demonstrates a possible cause of segregation, but
7218
says nothing about actual causes.
7219

7220

7221
\section{Implementation of Schelling's model}
7222

7223
To implement Schelling's model, I wrote yet another class that
7224
inherits from \py{Cell2D}:
7225

7226
\begin{code}
7227
class Schelling(Cell2D):
7228

7229
    def __init__(self, n, p):
7230
        self.p = p
7231
        choices = [0, 1, 2]
7232
        probs = [0.1, 0.45, 0.45]
7233
        self.array = np.random.choice(choices, (n, n), p=probs)
7234
\end{code}
7235

7236
\py{n} is the size of the grid,
7237
and \py{p} is the threshold on the fraction of similar neighbors.
7238
For example, if \py{p=0.3}, an agent will be unhappy if fewer
7239
than 30\% of their neighbors are the same color.
7240

7241
\index{NumPy}
7242
\index{random}
7243
\index{choice}
7244

7245
\py{array} is a NumPy array where each cell is 0 if empty, 1 if
7246
occupied by a red agent, and 2 if occupied by a blue agent.
7247
Initially 10\% of the cells are empty, 45\% red, and 45\% blue.
7248

7249
The \py{step} function for Schelling's model is substantially more
7250
complicated than previous examples.  If you are not
7251
interested in the details, you can skip to the next section.
7252
But if you stick around, you might pick up some NumPy tips.
7253

7254
\index{boolean array}
7255

7256
First, I make boolean arrays that indicate which cells are red, blue, and empty:
7257

7258
\begin{code}
7259
        a = self.array
7260
        red = a==1
7261
        blue = a==2
7262
        empty = a==0
7263
\end{code}
7264

7265
Then I use \py{correlate2d} to count, for each location,
7266
the number of neighboring cells that are red, blue, and non-empty.
7267
We saw \py{correlate2d} in Section~\ref{implife}.
7268

7269
\index{SciPy}
7270
\index{correlate2d}
7271

7272
\begin{code}
7273
    options = dict(mode='same', boundary='wrap')
7274

7275
    kernel = np.array([[1, 1, 1],
7276
                       [1, 0, 1],
7277
                       [1, 1, 1]], dtype=np.int8)
7278

7279
    num_red = correlate2d(red, kernel, **options)
7280
    num_blue = correlate2d(blue, kernel, **options)
7281
    num_neighbors = num_red + num_blue
7282
\end{code}
7283

7284
\py{options} is a dictionary that contains the options we pass to \py{correlate2d}.  With \py{mode='same'}, the result is the same size as the input.  With \py{boundary='wrap'}, the top edge is wrapped to meet the bottom, and the left edge is wrapped to meet the right.
7285

7286
\py{kernel} indicates that we want to consider the eight neighbors that surround each cell.
7287

7288
After computing \py{num_red} and \py{num_blue}, we can compute the fraction of neighbors, for each location, that are red and blue.
7289

7290
\begin{code}
7291
    frac_red = num_red / num_neighbors
7292
    frac_blue = num_blue / num_neighbors
7293
\end{code}
7294

7295
Then, we can compute the fraction of neighbors, for each agent, that are the same color as the agent.  I use \py{np.where}, which is like an element-wise \py{if} expression.  The first parameter is a condition that selects elements from
7296
the second or third parameter.
7297

7298
\index{NumPy}
7299
\index{where}
7300

7301
\begin{code}
7302
    frac_same = np.where(red, frac_red, frac_blue)
7303
    frac_same[empty] = np.nan
7304
\end{code}
7305

7306
In this case, wherever \py{red} is \py{True}, \py{frac_same} gets
7307
the corresponding element of \py{frac_red}.  Where \py{red} is
7308
\py{False}, \py{frac_same} gets the corresponding element of \py{frac_blue}.
7309
Finally, where \py{empty} indicates that a cell is empty, \py{frac_same} is set to \py{np.nan}, which is a special value that indicates ``Not a Number''.
7310

7311
\index{NaN}
7312
\index{Not a Number}
7313

7314
Now we can identify the locations of the unhappy agents:
7315

7316
\begin{code}
7317
    unhappy = frac_same < self.p
7318
    unhappy_locs = locs_where(unhappy)
7319
\end{code}
7320

7321
\py{locs_where} is a wrapper function for \py{np.nonzero}:
7322

7323
\begin{code}
7324
def locs_where(condition):
7325
    return list(zip(*np.nonzero(condition)))
7326
\end{code}
7327

7328
\py{np.nonzero} takes an array and returns the coordinates of
7329
all non-zero cells; the result is a tuple of arrays, one for
7330
each dimension.  Then \py{locs_where} uses \py{list} and \py{zip} to convert this result to a list of coordinate pairs.
7331

7332
\index{nonzero}
7333
\index{zip}
7334

7335
Similarly, \py{empty_locs} is an array that contains the coordinates
7336
of the empty cells:
7337

7338
\begin{code}
7339
    empty_locs = locs_where(empty)
7340
\end{code}
7341

7342
Now we get to the core of the simulation.  We loop through the
7343
unhappy agents and move them:
7344

7345
\begin{code}
7346
    num_empty = np.sum(empty)
7347
    for source in unhappy_locs:
7348
        i = np.random.randint(num_empty)
7349
        dest = empty_locs[i]
7350
            
7351
        a[dest] = a[source]
7352
        a[source] = 0
7353
        empty_locs[i] = source
7354
\end{code}
7355

7356
\index{NumPy}
7357
\index{random}
7358
\index{randint}
7359

7360
\py{i} is the index of a random empty cell;
7361
\py{dest} is a tuple containing the coordinates of the empty cell.
7362

7363
\index{tuple}
7364

7365
In order to move an agent, we copy its value (1 or 2) from \py{source} to
7366
\py{dest}, and then set the value of \py{source} to 0 (since it is now
7367
empty).
7368

7369
Finally, we replace the entry in \py{empty_locs} with \py{source}, so the
7370
cell that just became empty can be chosen by the next agent.
7371

7372

7373
\section{Segregation}
7374

7375
\begin{figure}
7376
\centerline{\includegraphics[height=3in]{figs/chap09-1.pdf}}
7377
\caption{Schelling's segregation model with \py{n=100}, initial
7378
condition (left), after 2 steps (middle), and after 10 steps (right).}
7379
\label{chap09-1}
7380
\end{figure}
7381

7382
%TODO: Another caption with \py in it.
7383

7384
Now let's see what happens when we run the model.  I'll start
7385
with \py{n=100} and \py{p=0.3}, and run for 10 steps.
7386

7387
\begin{code}
7388
grid = Schelling(n=100, p=0.3)
7389
for i in range(10):
7390
    grid.step()
7391
\end{code}
7392

7393
Figure~\ref{chap09-1} shows the initial configuration (left),
7394
the state of the simulation after 2 steps (middle), and the state after 10
7395
steps (right).
7396

7397
\index{segregation}
7398
\index{cluster}
7399

7400
Clusters form almost immediately and grow quickly, until most agents live in highly-segregated neighborhoods.
7401

7402
As the simulation runs, we can compute the degree of segregation,
7403
which is the average, across agents, of the fraction of neighbors who are the same color as the agent:
7404

7405
\begin{code}
7406
    np.nanmean(frac_same)
7407
\end{code}
7408

7409
\index{NumPy}
7410
\index{nanmean}
7411

7412
In Figure~\ref{chap09-1}, the average fraction of similar
7413
neighbors is 50\% in the initial configuration, 65\% after two
7414
steps, and 76\% after 10 steps!
7415

7416
Remember that when \py{p=0.3} the agents would be happy if 3 of 8
7417
neighbors were their own color, but they end up living in
7418
neighborhoods where 6 or 7 of their neighbors are their own color,
7419
typically.
7420

7421

7422
\begin{figure}
7423
\centerline{\includegraphics[height=3in]{figs/chap09-2.pdf}}
7424
\caption{Degree of segregation in Schelling's model, over time,
7425
for a range of \py{p}.}
7426
\label{chap09-2}
7427
\end{figure}
7428

7429
Figure~\ref{chap09-2} shows how the degree of segregation increases
7430
and where it levels off for several values of \py{p}.  When \py{p=0.4},
7431
the degree of segregation in steady state is about 82\%, and a majority
7432
of agents have no neighbors with a different color.
7433

7434
These results are surprising to many people, and they make a striking
7435
example of the unpredictable relationship between
7436
individual decisions and system behavior.
7437

7438

7439
\section{Sugarscape}
7440

7441
In 1996 Joshua Epstein and Robert Axtell proposed Sugarscape, an
7442
agent-based model of an ``artificial society'' intended to support
7443
experiments related to economics and other social sciences.
7444

7445
\index{Epstein, Joshua}
7446
\index{Axtell, Robert}
7447
\index{Sugarscape}
7448
\index{artificial society}
7449

7450
Sugarscape is a versatile model that has been adapted for a wide
7451
variety of topics.  As examples, I will replicate the
7452
first few experiments from Epstein and Axtell's book, {\it Growing
7453
Artificial Societies}.
7454

7455
\index{grid}
7456

7457
In its simplest form, Sugarscape is a model of a simple economy where
7458
agents move around on a 2-D grid, harvesting and accumulating ``sugar'',
7459
which represents economic wealth.  Some parts of the grid produce more
7460
sugar than others, and some agents are better at finding it than
7461
others.
7462

7463
\index{wealth}
7464
\index{inequality}
7465

7466
This version of Sugarscape is often used to explore and explain the
7467
distribution of wealth, in particular the tendency toward inequality.
7468

7469
In the Sugarscape grid, each cell has a capacity, which is the maximum
7470
amount of sugar it can hold.  In the original configuration, there
7471
are two high-sugar regions, with capacity 4, surrounded by concentric
7472
rings with capacities 3, 2, and 1.
7473

7474
\begin{figure}
7475
\centerline{\includegraphics[height=3in]{figs/chap09-3.pdf}}
7476
\caption{Replication of the original Sugarscape model: initial
7477
configuration (left), after 2 steps (middle) and after 100 steps (right).}
7478
\label{chap09-3}
7479
\end{figure}
7480

7481
Figure~\ref{chap09-3} (left) shows the initial configuration, with the darker
7482
areas indicating cells with higher capacity, and small dots
7483
representing the agents.
7484

7485
\index{agent}
7486

7487
Initially there are 400 agents placed at random locations.  Each
7488
agent has three randomly-chosen attributes:
7489

7490
\begin{description}
7491

7492
\item[Sugar:] Each agent starts with an endowment of sugar chosen
7493
from a uniform distribution between 5 and 25 units.
7494

7495
\item[Metabolism:] Each agent has some amount of sugar they must
7496
consume per time step, chosen uniformly between 1 and 4.
7497

7498
\item[Vision:] Each agent can ``see'' the amount
7499
of sugar in nearby cells and move to the cell with the most, but
7500
some agents can see and move farther than others.  The distance agents
7501
see is chosen uniformly between 1 and 6.
7502

7503
\end{description}
7504

7505
During each time step, agents move one at a time in a random order.
7506
Each agent follows these rules:
7507

7508
\begin{itemize}
7509

7510
\item The agent surveys \py{k} cells in each of the 4 compass directions,
7511
where \py{k} is the range of the agent's vision.
7512

7513
\item It chooses the unoccupied cell with the most sugar.  In case
7514
of a tie, it chooses the closer cell; among cells at the same
7515
distance, it chooses randomly.
7516

7517
\item The agent moves to the selected cell and harvests the sugar,
7518
adding the harvest to its accumulated wealth and leaving the cell
7519
empty.
7520

7521
\item The agent consumes some part of its wealth, depending on its
7522
metabolism.  If the resulting total is negative, the agent ``starves''
7523
and is removed.
7524

7525
\end{itemize}
7526

7527
After all agents have executed these steps, the cells grow back
7528
some sugar, typically 1 unit, but the total sugar in each cell is
7529
bounded by its capacity.
7530

7531
Figure~\ref{chap09-3} (middle) shows the state of the model after two
7532
steps.  Most agents are moving toward the areas with the most sugar.
7533
Agents with high vision move the fastest; agents with low vision
7534
tend to get stuck on the plateaus, wandering randomly until they get
7535
close enough to see the next level.
7536

7537
Agents born in the areas with the least sugar are likely to starve
7538
unless they have a high initial endowment and high vision.
7539

7540
Within the high-sugar areas, agents compete with each other to
7541
find and harvest sugar as it grows back.  Agents with high metabolism
7542
or low vision are the most likely to starve.
7543

7544
When sugar grows back at 1 unit per time step, there is not enough
7545
sugar to sustain the 400 agents we started with.  The population
7546
drops quickly at first, then more slowly, and levels off around 250.
7547

7548
Figure~\ref{chap09-3} (right) shows the state of the model after 100
7549
time steps, with about 250 agents.  The agents who survive tend to
7550
be the lucky ones, born with high vision and/or low metabolism.
7551
Having survived to this point, they are likely to survive forever,
7552
accumulating unbounded stockpiles of sugar.
7553

7554

7555
\section{Wealth inequality}
7556

7557
In its current form, Sugarscape models a simple ecology, and could
7558
be used to explore the relationship between the parameters of the
7559
model, like the growth rate and the attributes of the agents, and
7560
the carrying capacity of the system (the number of agents that
7561
survive in steady state).  And it models a form of natural selection,
7562
where agents with higher ``fitness'' are more likely to survive.
7563

7564
\index{fitness}
7565

7566
The model also demonstrates a kind of wealth inequality, with some
7567
agents accumulating sugar faster than others.  But it would be hard
7568
to say anything specific about the distribution of wealth because it
7569
is not ``stationary''; that is, the distribution changes over time and
7570
does not reach a steady state.
7571

7572
\index{stationary}
7573

7574
However, if we give the agents finite lifespans, the model produces
7575
a stationary distribution of wealth.  Then we can run experiments to
7576
see what effect the parameters and rules have on this distribution.
7577

7578
\index{distribution}
7579

7580
In this version of the model, agents have an age that gets incremented
7581
each time step, and a random lifespan chosen from a uniform distribution between 60 to 100.  If an agent's age exceeds its lifespan, it dies.
7582

7583
When an agent dies, from starvation or old age, it is replaced by
7584
a new agent with random attributes, so the number of agents is
7585
constant.
7586

7587
\begin{figure}
7588
\centerline{\includegraphics[height=3in]{figs/chap09-4.pdf}}
7589
\caption{Distribution of sugar (wealth) after 100, 200, 300, and
7590
400 steps (gray lines) and 500 steps (dark line).  Linear scale (left)
7591
and log-x scale (right). }
7592
\label{chap09-4}
7593
\end{figure}
7594

7595
Starting with 250 agents (which is close to carrying capacity) I run
7596
the model for 500 steps.  After each 100 steps, I plot the cumulative distribution function (CDF) of sugar accumulated by the agents.  We saw CDFs in Section~\ref{cdf}.  Figure~\ref{chap09-4} shows the
7597
results on a linear scale (left) and a log-x scale (right).
7598

7599
\index{CDF}
7600
\index{cumulative distribution function}
7601

7602
After about 200 steps (which is twice the longest lifespan) the
7603
distribution doesn't change much.  And it is skewed to the right.
7604

7605
Most agents have little accumulated wealth: the 25th percentile is
7606
about 10 and the median is about 20.  But a few agents have accumulated
7607
much more: the 75th percentile is about 40, and the highest value is
7608
more than 150.
7609

7610
On a log scale the shape of the distribution resembles a Gaussian or
7611
normal distribution, although the right tail is truncated.  If it were
7612
actually normal on a log scale, the distribution would be lognormal,
7613
which is a heavy-tailed distribution.  And in fact, the distribution
7614
of wealth in practically every country, and in the world, is a
7615
heavy-tailed distribution.
7616

7617
\index{Gaussian distribution}
7618
\index{normal distribution}
7619
\index{lognormal distribution}
7620
\index{heavy-tailed distribution}
7621

7622
It would be too much to claim that Sugarscape explains why wealth
7623
distributions are heavy-tailed, but the prevalence of inequality in
7624
variations of Sugarscape suggests that inequality is
7625
characteristic of many economies, even very simple ones.
7626
And experiments with rules that model taxation and other income
7627
transfers suggest that it is not easy to avoid or mitigate.
7628

7629

7630

7631
\section{Implementing Sugarscape}
7632

7633
Sugarscape is more complicated than the previous models, so I won't
7634
present the entire implementation here.  I will outline the
7635
structure of the code and you can see the details in the Jupyter notebook
7636
for this chapter, {\tt chap09.ipynb}, which is in the repository
7637
for this book.  If you are not interested in the details, you
7638
can skip this section.
7639

7640
During each step, the agent moves, harvests sugar, and ages.
7641
Here is the \py{Agent} class and its \py{step} method:
7642

7643
\begin{code}
7644
class Agent:
7645

7646
    def step(self, env):
7647
        self.loc = env.look_and_move(self.loc, self.vision)
7648
        self.sugar += env.harvest(self.loc) - self.metabolism
7649
        self.age += 1
7650
\end{code}
7651

7652
\index{Sugarscape}
7653

7654
The parameter \py{env} is a reference to the environment, which
7655
is a \py{Sugarscape} object.  It provides methods \py{look_and_move} and
7656
\py{harvest}:
7657

7658
\begin{itemize}
7659

7660
\item \py{look_and_move} takes the location of the agent, which is a
7661
tuple of coordinates, and the range of the agent's vision, which is
7662
an integer.  It returns the agent's new location, which is the
7663
visible cell with the most sugar.
7664

7665
\item \py{harvest} takes the (new) location of the agent, and removes and
7666
returns the sugar at that location.
7667

7668
\end{itemize}
7669

7670
\py{Sugarscape} inherits from \py{Cell2D}, so it is similar to the
7671
other grid-based models we've seen.
7672

7673

7674
The attributes include \py{agents}, which is a list of \py{Agent} objects, and \py{occupied}, which is a set of tuples, where each tuple contains the coordinates of a cell occupied by an agent.
7675

7676
\index{Cell2D}
7677

7678
Here is the \py{Sugarscape} class and its \py{step} method:
7679

7680
\begin{code}
7681
class Sugarscape(Cell2D):
7682

7683
    def step(self):
7684

7685
        # loop through the agents in random order
7686
        random_order = np.random.permutation(self.agents)
7687
        for agent in random_order:
7688

7689
            # mark the current cell unoccupied
7690
            self.occupied.remove(agent.loc)
7691

7692
            # execute one step
7693
            agent.step(self)
7694

7695
            # if the agent is dead, remove from the list
7696
            if agent.is_starving():
7697
                self.agents.remove(agent)
7698
            else:
7699
                # otherwise mark its cell occupied
7700
                self.occupied.add(agent.loc)
7701

7702
        # grow back some sugar
7703
        self.grow()
7704
        return len(self.agents)
7705
\end{code}
7706

7707
\index{NumPy}
7708
\index{random}
7709
\index{permutation}
7710

7711
During each step, the \py{Sugarscape} uses the NumPy function \py{permutation} so it loops through the agents in
7712
random order.  It invokes \py{step} on each agent and then checks whether
7713
it is dead.  After all agents have moved, some of the sugar grows back.
7714
The return value is the number of agents still alive.
7715

7716
I won't show more details here; you can see them in the notebook for this chapter.  If you want to learn more about NumPy, you might want to look at these functions in particular:
7717

7718
\begin{itemize}
7719

7720
\item \py{make_visible_locs}, which builds the array of locations
7721
an agent can see, depending on its vision.  The locations are sorted by distance, with locations at the same distance
7722
appearing in random order.  This function uses \py{np.random.shuffle}
7723
and \py{np.vstack}.
7724

7725
\index{NumPy}
7726
\index{random}
7727
\index{shuffle}
7728

7729
\item \py{make_capacity}, which
7730
initializes the capacity of the cells using NumPy functions
7731
\py{indices}, \py{hypot}, \py{minimum}, and \py{digitize}.
7732

7733
\item \py{look_and_move}, which uses \py{argmax}.
7734

7735
\end{itemize}
7736

7737
\index{shuffle}
7738
\index{vstack}
7739
\index{indices}
7740
\index{hypot}
7741
\index{digitize}
7742
\index{minimum}
7743
\index{argmax}
7744

7745

7746
\section{Migration and Wave Behavior}
7747

7748
\begin{figure}
7749
\centerline{\includegraphics[height=3in]{figs/chap09-5.pdf}}
7750
\caption{Wave behavior in Sugarscape: initial configuration (left),
7751
after 6 steps (middle) and after 12 steps (right). }
7752
\label{chap09-5}
7753
\end{figure}
7754

7755
Although the purpose of Sugarscape is not primarily to explore the
7756
movement of agents in space, Epstein and Axtell observed some
7757
interesting patterns when agents migrate.
7758

7759
\index{migration}
7760

7761
If we start with all agents in the lower-left corner, they quickly move
7762
toward the closest ``peak'' of high-capacity cells.  But if there
7763
are more agents than a single peak can support, they quickly exhaust
7764
the sugar and agents are forced to move into lower-capacity areas.
7765

7766
The ones with the longest vision cross the valley between the
7767
peaks and propagate toward the northeast in a pattern that
7768
resembles a wave front.  Because they leave a stripe of empty cells
7769
behind them, other agents don't follow until the sugar grows back.
7770

7771
\index{wave}
7772
\index{spaceship}
7773
\index{Rule 110}
7774

7775
The result is a series of discrete waves of migration, where each wave
7776
resembles a coherent object, like the spaceships we saw in the Rule
7777
110 CA and Game of Life (see Section~\ref{spaceships} and
7778
Section~\ref{lifepatterns}).
7779

7780
Figure~\ref{chap09-5} shows the initial condition (left) and the
7781
state of the model after 6 steps (middle) and 12 steps (right).
7782
You can see the first two waves reaching and moving through the
7783
second peak, leaving a stripe of empty cells behind.  You can
7784
see an animated version of this model, where the wave patterns are
7785
more clearly visible, in the notebook for this chapter.
7786

7787
These waves
7788
move diagonally, which is surprising because the agents
7789
themselves only move north or east, never northeast.  Outcomes
7790
like this --- groups or ``aggregates'' with properties and behaviors
7791
that the agents don't have --- are common in agent-based models.
7792
We will see more examples in the next chapter.
7793

7794

7795
\section{Emergence}
7796

7797
The examples in this chapter demonstrate one of the most important
7798
ideas in complexity science: emergence.  An {\bf emergent property} is
7799
a characteristic of a system that results from the interaction of its
7800
components, not from their properties.
7801

7802
\index{emergence}
7803
\index{emergent property}
7804

7805
To clarify what emergence is, it helps to consider what it isn't.  For
7806
example, a brick wall is hard because bricks and mortar are hard, so
7807
that's not an emergent property.  As another example, some rigid
7808
structures are built from flexible components, so that seems like a
7809
kind of emergence.  But it is at best a weak kind, because structural
7810
properties follow from well understood laws of mechanics.
7811

7812
\index{brick wall}
7813

7814
In contrast, the segregation we see in Schelling's model is an emergent
7815
property because it is not caused by racist agents.  Even when the
7816
agents are only mildly xenophobic, the outcome of the system is
7817
substantially different from the intention of the agent's decisions.
7818

7819
\index{Schelling's model}
7820

7821
The distribution of wealth in Sugarscape might be an emergent
7822
property, but it is a weak example because we could
7823
reasonably predict it based on the distributions of vision, metabolism,
7824
and lifespan.  The wave behavior we saw in the last example might
7825
be a stronger example, since the wave displays a capability --- diagonal
7826
movement --- that the agents do not have.
7827

7828
Emergent properties are surprising: it is hard to predict the behavior
7829
of the system even if we know all the rules.  That difficulty is not
7830
an accident; in fact, it may be the defining characteristic of emergence.
7831

7832
As Wolfram discusses in {\em A New Kind of Science}, conventional science
7833
is based on the axiom that if you know the rules that govern a system,
7834
you can predict its behavior.  What we call ``laws'' are often
7835
computational shortcuts that allow us to predict the outcome of a
7836
system without building or observing it.
7837

7838
\index{New Kind of Science@{\it A New Kind of Science}}
7839
\index{Wolfram, Stephen}
7840
\index{natural law}
7841

7842
But many cellular automatons are {\bf computationally irreducible},
7843
which means that there are no shortcuts.  The only way to get the
7844
outcome is to implement the system.
7845

7846
\index{computationally irreducible}
7847
\index{shortcut}
7848

7849
The same may be true of complex systems in general.  For physical
7850
systems with more than a few components, there is usually no model
7851
that yields an analytic solution.  Numerical methods provide a kind of
7852
computational shortcut, but there is still a qualitative difference.
7853

7854
\index{constant-time algorithm}
7855

7856
Analytic solutions often provide a constant-time algorithm for
7857
prediction; that is, the run time of the computation does not depend
7858
on $t$, the time scale of prediction.  But numerical methods,
7859
simulation, analog computation, and similar methods take time
7860
proportional to $t$.  And for many systems, there is a bound on $t$
7861
beyond which we can't compute reliable predictions at all.
7862

7863
These observations suggest that emergent properties are fundamentally
7864
unpredictable, and that for complex systems we should not expect to
7865
find natural laws in the form of computational shortcuts.
7866

7867
To some people, ``emergence'' is another name for ignorance; by this
7868
reckoning, a property is emergent if we don't have a reductionist
7869
explanation for it, but if we come to understand it better in the
7870
future, it would no longer be emergent.
7871

7872
The status of emergent properties is a topic of debate, so it is
7873
appropriate to be skeptical.  When we see an apparently emergent
7874
property, we should not assume that there can never be a reductionist
7875
explanation.  But neither should we assume that there has to be one.
7876

7877
The examples in this book and the principle of computational
7878
equivalence give good reasons to believe that at least some emergent
7879
properties can never be ``explained'' by a classical reductionist
7880
model.
7881

7882
You can read more about emergence at
7883
\url{https://thinkcomplex.com/emerge}.
7884

7885

7886
\section{Exercises}
7887

7888

7889
The code for this chapter is in the Jupyter notebook {\tt chap09.ipynb}
7890
in the repository for this book.  Open this notebook, read the code,
7891
and run the cells.  You can use this notebook to work on the following
7892
exercises.  My solutions are in {\tt chap09soln.ipynb}.
7893

7894

7895
\begin{exercise}
7896

7897
Bill Bishop, author of {\em The Big Sort}, argues that
7898
American society is increasingly segregated by political
7899
opinion, as people choose to live among like-minded neighbors.
7900

7901
\index{Big Sort@{\em The Big Sort}}
7902
\index{Bishop, Bill}
7903

7904
The mechanism Bishop hypothesizes is not that people, like the agents
7905
in Schelling's model, are more likely to move if they are
7906
isolated, but that when they move for any reason, they are
7907
likely to choose a neighborhood with people like themselves.
7908

7909
\index{segregation}
7910

7911
Modify your implementation of Schelling's model to simulate
7912
this kind of behavior and see if it yields similar degrees of
7913
segregation.
7914

7915
There are several ways you can model Bishop's hypothesis.  In my
7916
implementation, a random selection of agents moves during each step.
7917
Each agent considers \py{k} randomly-chosen empty locations and
7918
chooses the one with the highest fraction of similar neighbors.
7919
How does the degree of segregation depend on \py{k}?
7920

7921
\end{exercise}
7922

7923

7924
\begin{exercise}
7925

7926
In the first version of SugarScape, we never add agents, so once the
7927
population falls, it never recovers.  In the second version, we only
7928
replace agents when they die, so the population is constant.  Now
7929
let's see what happens if we add some ``population pressure''.
7930

7931
\index{population pressure}
7932

7933
Write a version of SugarScape that adds a new agent at the end of
7934
every step.  Add code to compute the average vision and the average
7935
metabolism of the agents at the end of each step.  Run the model for a
7936
few hundred steps and plot the population over time, as well as the
7937
average vision and average metabolism.
7938

7939
You should be able to implement this model by inheriting from
7940
\py{SugarScape} and overriding \py{__init__} and \py{step}.
7941

7942
\end{exercise}
7943

7944

7945
\begin{exercise}
7946

7947
Among people who study philosophy of mind, ``Strong AI" is the theory that an
7948
appropriately-programmed computer could have a mind in the same sense
7949
that humans have minds.
7950

7951
\index{strong AI}
7952
\index{Searle, John}
7953
\index{Chinese Room argument}
7954
\index{system reply}
7955

7956
John Searle presented a thought experiment called ``The Chinese Room'',
7957
intended to show that Strong AI is false.  You can read about
7958
it at \url{https://thinkcomplex.com/searle}.
7959

7960
What is the {\bf system reply} to the Chinese Room argument?
7961
How does what you have learned about emergence influence
7962
your reaction to the system response?
7963

7964
\end{exercise}
7965

7966

7967

7968
\chapter{Herds, Flocks, and Traffic Jams}
7969

7970
The agent-based models in the previous chapter are based on grids: the agents occupy discrete locations in two-dimensional space.  In this chapter we consider agents that move is continuous space, including simulated cars on a one-dimensional highway and simulated birds in three-dimensional space.
7971

7972
\index{grid}
7973
\index{space}
7974
\index{dimension}
7975

7976
The code for this chapter is in \py{chap10.ipynb}, which is a
7977
Jupyter notebook in the repository for this book.  For more information
7978
about working with this code, see Section~\ref{code}.
7979

7980

7981
\section{Traffic jams}
7982

7983
What causes traffic jams?  Sometimes there is an obvious cause,
7984
like an accident, a speed trap, or something else that disturbs
7985
the flow of traffic.  But other times traffic jams appear for no
7986
apparent reason.
7987

7988
\index{traffic jam}
7989

7990
Agent-based models can help explain spontaneous traffic jams.
7991
As an example, I implement a highway simulation based on
7992
a model in Mitchell Resnick's book, {\em Turtles, Termites and Traffic Jams}.
7993

7994
\index{Turtles, Termites and Traffic Jams@{\em Turtles, Termites and Traffic Jams}}
7995
\index{Resnick, Mitchell}
7996
\index{highway}
7997

7998
Here's the class that represents the ``highway'':
7999

8000
\begin{code}
8001
class Highway:
8002

8003
    def __init__(self, n=10, length=1000, eps=0):
8004
        self.length = length
8005
        self.eps = eps
8006

8007
        # create the drivers
8008
        locs = np.linspace(0, length, n, endpoint=False)
8009
        self.drivers = [Driver(loc) for loc in locs]
8010

8011
        # and link them up
8012
        for i in range(n):
8013
            j = (i+1) % n
8014
            self.drivers[i].next = self.drivers[j]
8015
\end{code}
8016

8017
\py{n} is the number of cars, \py{length} is the length of the
8018
highway, and \py{eps} is the amount of random noise we'll add to the system.
8019

8020
\index{Highway}
8021
\index{Driver}
8022

8023
\index{NumPy}
8024
\index{linspace}
8025

8026
\py{locs} contains the locations of the drivers; the NumPy function \py{linspace} creates an array of \py{n} locations equally spaced between \py{0} and \py{length}.
8027

8028
The \py{drivers} attribute is a list of \py{Driver} objects.  The \py{for} loop links them so each \py{Driver} contains a reference to the next.  The highway is circular, so the last \py{Driver} contains a reference to the first.
8029

8030
During each time step, the \py{Highway} moves each of the drivers:
8031

8032
\begin{code}
8033
# Highway
8034

8035
    def step(self):
8036
        for driver in self.drivers:
8037
            self.move(driver)
8038
\end{code}
8039

8040
The \py{move} method lets the \py{Driver} choose its acceleration.  Then \py{move} computes the updated speed and position.  Here's the implementation:
8041

8042
\begin{code}
8043
# Highway
8044

8045
    def move(self, driver):
8046
        dist = self.distance(driver)
8047

8048
        # let the driver choose acceleration
8049
        acc = driver.choose_acceleration(dist)
8050
        acc = min(acc, self.max_acc)
8051
        acc = max(acc, self.min_acc)
8052
        speed = driver.speed + acc
8053

8054
        # add random noise to speed
8055
        speed *= np.random.uniform(1-self.eps, 1+self.eps)
8056

8057
        # keep it nonnegative and under the speed limit
8058
        speed = max(speed, 0)
8059
        speed = min(speed, self.speed_limit)
8060

8061
        # if current speed would collide, stop
8062
        if speed > dist:
8063
            speed = 0
8064

8065
        # update speed and loc
8066
        driver.speed = speed
8067
        driver.loc += speed
8068
\end{code}
8069

8070
\index{NumPy}
8071
\index{random}
8072
\index{uniform}
8073

8074
\py{dist} is the distance between \py{driver} and the next driver ahead.
8075
This distance is passed to \py{choose_acceleration}, which
8076
specifies the behavior of the driver.  This is the only decision the
8077
driver gets to make; everything else is determined by the ``physics''
8078
of the simulation.
8079

8080
\begin{itemize}
8081

8082
\item \py{acc} is acceleration, which is bounded by \py{min_acc} and
8083
  \py{max_acc}.  In my implementation, cars can accelerate with
8084
  \py{max_acc=1} and brake with \py{min_acc=-10}.
8085

8086
\item \py{speed} is the old speed plus the requested acceleration, but
8087
then we make some adjustments.  First, we add random noise to
8088
\py{speed}, because the world is not perfect.  \py{eps} determines the magnitude of the relative error; for example, if \py{eps} is 0.02, \py{speed} is multiplied by a random factor between 0.98 and 1.02.
8089

8090
\item Speed is bounded between 0 and \py{speed_limit}, which is
8091
40 in my implementation, so cars are not allowed to go backward or speed.
8092

8093
\item If the requested speed would cause a collision with the next car,
8094
\py{speed} is set to 0.
8095

8096
\item Finally, we update the \py{speed} and \py{loc} attributes of \py{driver}.
8097

8098
\end{itemize}
8099

8100
\index{collision}
8101

8102
Here's the definition for the \py{Driver} class:
8103

8104
\begin{code}
8105
class Driver:
8106

8107
    def __init__(self, loc, speed=0):
8108
        self.loc = loc
8109
        self.speed = speed
8110

8111
    def choose_acceleration(self, dist):
8112
        return 1
8113
\end{code}
8114

8115
The attributes \py{loc} and \py{speed} are the location and speed of the
8116
driver.
8117

8118
This implementation of \py{choose_acceleration} is simple:
8119
it always accelerates at the maximum rate.
8120

8121
Since the cars start out equally spaced, we expect them all to
8122
accelerate until they reach the speed limit, or until their speed
8123
exceeds the space between them.  At that point, at least one
8124
``collision'' will occur, causing some cars to stop.
8125

8126
\begin{figure}
8127
\centerline{\includegraphics[height=3in]{figs/chap10-1.pdf}}
8128
\caption{Simulation of drivers on a circular highway at three points in time.  Squares indicate the position of the drivers; triangles indicate places where one driver has to brake to avoid another.}
8129
\label{chap10-1}
8130
\end{figure}
8131

8132
Figure~\ref{chap10-1} shows a few steps in this process, starting with
8133
30 cars and \py{eps=0.02}.  On the left is the configuration after
8134
16 time steps, with the highway mapped to a circle.
8135
Because of random noise, some cars are going faster
8136
than others, and the spacing has become uneven.
8137

8138
During the next time step (middle) there are two collisions, indicated
8139
by the triangles.
8140

8141
During the next time step (right) two cars collide with the stopped
8142
cars, and we can see the initial formation of a traffic jam.  Once
8143
a jam forms, it tends to persist, with additional cars approaching from
8144
behind and colliding, and with cars in the front accelerating
8145
away.
8146

8147
Under some conditions, the jam itself propagates backwards, as you
8148
can see if you watch the animations in the notebook for this chapter.
8149

8150

8151
\section{Random perturbation}
8152

8153
\begin{figure}
8154
\centerline{\includegraphics[height=3in]{figs/chap10-2.pdf}}
8155
\caption{Average speed as a function of the number of cars, for three magnitudes of added random noise.}
8156
\label{chap10-2}
8157
\end{figure}
8158

8159
As the number of cars increases, traffic jams become more severe.
8160
Figure~\ref{chap10-2} shows the average speed cars are able to
8161
achieve, as a function of the number of cars.
8162

8163
\index{noise}
8164

8165
The top line shows results with \py{eps=0}, that is, with no random
8166
variation in speed.  With 25 or fewer cars, the spacing between
8167
cars is greater than 40, which allows cars to reach and maintain
8168
the maximum speed, which is 40.  With more than 25 cars, traffic
8169
jams form and the average speed drops quickly.
8170

8171
This effect is a direct result of the physics of the simulation,
8172
so it should not be surprising.  If the length of the road is
8173
1000, the spacing between \py{n} cars is \py{1000/n}.  And since cars
8174
can't move faster than the space in front of them, the highest
8175
average speed we expect is \py{1000/n} or 40, whichever is less.
8176

8177
But that's the best case scenario.  With just a small amount of
8178
randomness, things get much worse.
8179

8180
Figure~\ref{chap10-2} also shows results with \py{eps=0.001} and
8181
\py{eps=0.01}, which correspond to errors in speed of 0.1\% and 1\%.
8182

8183
With 0.1\% errors, the capacity of the highway
8184
drops from 25 to 20 (by ``capacity'' I mean the maximum number
8185
of cars that can reach and sustain the speed limit).  And
8186
with 1\% errors, the capacity drops to 10.  Ugh.
8187

8188
As one of the exercises at the end of this chapter, you'll have a
8189
chance to design a better driver; that is, you will experiment with
8190
different strategies in \py{choose_acceleration} and see if you can
8191
find driver behaviors that improve average speed.
8192

8193

8194
\section{Boids}
8195

8196
In 1987 Craig Reynolds published ``Flocks, herds and
8197
schools: A distributed behavioral model'', which describes an
8198
agent-based model of herd behavior.  You can download his
8199
paper from \url{https://thinkcomplex.com/boid}.
8200

8201
\index{Reynolds, Craig}
8202

8203
Agents in this model are called ``Boids'', which is both a
8204
contraction of ``bird-oid'' and an accented pronunciation of ``bird''
8205
(although Boids are also used to model fish and herding land animals).
8206

8207
\index{Boid}
8208

8209
Each agent simulates three behaviors:
8210

8211
\begin{description}
8212

8213
\item[Flock centering:] Move toward the center of the flock.
8214

8215
\item[Collision avoidance:] Avoid obstacles, including other Boids.
8216

8217
\item[Velocity matching:] Align velocity (speed and direction) with neighboring Boids.
8218

8219
\end{description}
8220

8221
Boids make decisions based on local information only; each Boid
8222
only sees (or pays attention to) other Boids in its field of
8223
vision.
8224

8225
\index{local information}
8226

8227
In the repository for this book, you will find {\tt Boids7.py}, which contains my implementation of Boids, based in part
8228
on the description in Gary William Flake's book, {\it The Computational Beauty of Nature}.
8229

8230
\index{Flake, Gary William}
8231
\index{Computational Beauty of Nature@{\it The Computational Beauty of Nature}}
8232
\index{VPython}
8233
\index{Anaconda}
8234

8235
My implementation uses VPython, which is a library that provides 3-D graphics.
8236
VPython provides a \py{Vector} object, which I use to represent the position and velocity of Boids in three dimensions.  You can read about them at \url{https://thinkcomplex.com/vector}.
8237

8238
\index{Vector}
8239

8240
\section{The Boid algorithm}
8241

8242
\py{Boids7.py} defines two classes: \py{Boid}, which implements the Boid
8243
behaviors, and \py{World}, which contains a list of Boids and a
8244
``carrot'' the Boids are attracted to.
8245

8246
\index{carrot}
8247

8248
The \py{Boid} class defines the following methods:
8249

8250
\begin{itemize}
8251

8252
\item \py{center}: Finds other Boids within range and computes a vector
8253
toward their centroid.
8254

8255
\index{centroid}
8256

8257
\item \py{avoid}: Finds objects, including other Boids, within a given range,
8258
and computes a vector that points away from their centroid.
8259

8260
\item \py{align}: Finds other Boids within range and computes the average of their headings.
8261

8262
\item \py{love}: Computes a vector that points toward the carrot. 
8263

8264
\end{itemize}
8265

8266
Here's the implementation of \py{center}:
8267

8268
\begin{code}
8269
    def center(self, boids, radius=1, angle=1):
8270
        neighbors = self.get_neighbors(boids, radius, angle)
8271
        vecs = [boid.pos for boid in neighbors]
8272
        return self.vector_toward_center(vecs)
8273
\end{code}
8274

8275
The parameters \py{radius} and \py{angle} are the radius and angle of
8276
the field of view, which determines which other Boids are taken into consideration.  \py{radius} is in arbitrary units of length; \py{angle} is in radians.
8277

8278
\py{center} uses \py{get_neighbors} to get a list of \py{Boid} objects that are in the field of view.  \py{vecs} is a list of \py{Vector} objects that represent their positions.
8279

8280
Finally, \py{vector_toward_center} computes a \py{Vector} that points from \py{self} to the centroid of \py{neighbors}.
8281

8282
\index{centroid}
8283

8284
Here's how \py{get_neighbors} works:
8285

8286
\begin{code}
8287
    def get_neighbors(self, boids, radius, angle):
8288
        neighbors = []
8289
        for boid in boids:
8290
            if boid is self:
8291
                continue
8292
                
8293
            # if not in range, skip it
8294
            offset = boid.pos - self.pos
8295
            if offset.mag > radius:
8296
                continue
8297

8298
            # if not within viewing angle, skip it
8299
            if self.vel.diff_angle(offset) > angle:
8300
                continue
8301

8302
            # otherwise add it to the list
8303
            neighbors.append(boid)
8304

8305
        return neighbors
8306
\end{code}
8307

8308
For each other Boid, \py{get_neighbors} uses vector subtraction to compute the
8309
vector from \py{self} to \py{boid}.  The magnitude of this vector is the distance between them; if this magnitude exceeds \py{radius}, we ignore \py{boid}.
8310

8311
\index{magnitude}
8312
\index{angle}
8313

8314
\py{diff_angle} computes the angle between the velocity of \py{self}, which points in the direction the Boid is heading, and the position of \py{boid}.
8315
If this angle exceeds \py{angle}, we ignore \py{boid}.
8316

8317
Otherwise \py{boid} is in view, so we add it to \py{neighbors}.
8318

8319
Now here's the implementation of \py{vector_toward_center}, which computes a vector from \py{self} to the centroid of its neighbors.
8320

8321
\begin{code}
8322
    def vector_toward_center(self, vecs):
8323
        if vecs:
8324
            center = np.mean(vecs)
8325
            toward = vector(center - self.pos)
8326
            return limit_vector(toward)
8327
        else:
8328
            return null_vector
8329
\end{code}
8330

8331
VPython vectors work with NumPy, so \py{np.mean} computes the mean of \py{vecs}, which is a sequence of vectors.  \py{limit_vector} limits the magnitude of the result to 1; \py{null_vector} has magnitude 0.
8332

8333
\index{NumPy}
8334
\index{mean}
8335
\index{magnitude}
8336

8337
We use the same helper methods to implement \py{avoid}:
8338

8339
\begin{code}
8340
    def avoid(self, boids, carrot, radius=0.3, angle=np.pi):
8341
        objects = boids + [carrot]
8342
        neighbors = self.get_neighbors(objects, radius, angle)
8343
        vecs = [boid.pos for boid in neighbors]
8344
        return -self.vector_toward_center(vecs)
8345
\end{code}
8346

8347
\py{avoid} is similar to \py{center}, but it takes into account the carrot as well as the other Boids.  Also, the parameters are different: \py{radius} is smaller, so Boids only avoid objects that are too close, and \py{angle} is wider, so Boids avoid objects from all directions.  Finally, the result from \py{vector_toward_center} is negated, so it points {\em away} from the centroid of any objects that are too close.
8348

8349
Here's the implementation of \py{align}:
8350

8351
\begin{code}
8352
    def align(self, boids, radius=0.5, angle=1):
8353
        neighbors = self.get_neighbors(boids, radius, angle)
8354
        vecs = [boid.vel for boid in neighbors]
8355
        return self.vector_toward_center(vecs)
8356
\end{code}
8357

8358
\py{align} is also similar to \py{center}; the big difference is that it computes the average of the neighbors' velocities, rather than their positions.  If the neighbors point in a particular direction, the Boid tends to steer toward that direction.
8359

8360
\index{velocity}
8361

8362
Finally, \py{love} computes a vector that points in the direction of the carrot.
8363

8364
\begin{code}
8365
    def love(self, carrot):
8366
        toward = carrot.pos - self.pos
8367
        return limit_vector(toward)
8368
\end{code}
8369

8370
The results from \py{center}, \py{avoid}, \py{align}, and \py{love} are what Reynolds calls ``acceleration requests", where each request is intended to achieve a different goal.
8371

8372
\index{acceleration}
8373
\index{arbitration}
8374

8375
\section{Arbitration}
8376

8377
To arbitrate among these possibly conflicting goals, we compute a weighted sum of the four requests:
8378

8379
\index{weighted sum}
8380

8381
\begin{code}
8382
    def set_goal(self, boids, carrot):
8383
        w_avoid = 10
8384
        w_center = 3
8385
        w_align = 1
8386
        w_love = 10
8387
        
8388
        self.goal = (w_center * self.center(boids) +
8389
                     w_avoid * self.avoid(boids, carrot) +
8390
                     w_align * self.align(boids) +
8391
                     w_love * self.love(carrot))
8392
        self.goal.mag = 1
8393
\end{code}
8394

8395
\py{w_center}, \py{w_avoid}, and the other weights determine the importance of the acceleration requests.  Usually \py{w_avoid} is relatively high and \py{w_align} is relatively low.
8396

8397
After computing a goal for each Boid, we update their velocity and position:
8398

8399
\begin{code}
8400
    def move(self, mu=0.1, dt=0.1):
8401
        self.vel = (1-mu) * self.vel + mu * self.goal
8402
        self.vel.mag = 1
8403
        self.pos += dt * self.vel
8404
        self.axis = self.length * self.vel
8405
\end{code}
8406

8407
The new velocity is the weighted sum of the old velocity
8408
and the goal.  The parameter \py{mu} determines how quickly
8409
the birds can change speed and direction.  Then we normalize velocity so its magnitude is 1, and orient the axis of the Boid to point in the direction it is moving.
8410

8411
\index{maneuverability}
8412

8413
To update position, we multiply velocity by the time step, \py{dt}, to get the change in position.  Finally, we update \py{axis} so the orientation of the Boid when it is drawn is aligned with its velocity.
8414

8415
\index{flock behavior}
8416

8417
Many parameters influence flock behavior, including the radius, angle
8418
and weight for each behavior, as well as maneuverability, \py{mu}.
8419
These parameters determine the ability of the Boids to form and
8420
maintain a flock, and the patterns of motion and organization within the
8421
flock.  For some settings, the Boids resemble a flock of birds; other
8422
settings resemble a school of fish or a cloud of flying insects.
8423

8424
As one of the exercises at the end of this chapter, you can modify these parameters and see how they affect Boid behavior.
8425

8426

8427
\section{Emergence and free will}
8428
\label{freewill}
8429

8430
Many complex systems have properties, as a whole, that their components do not:
8431

8432
\begin{itemize}
8433

8434
\item The Rule 30 cellular automaton is deterministic, and the rules
8435
  that govern its evolution are completely known.  Nevertheless, it
8436
  generates a sequence that is statistically indistinguishable from
8437
  random.
8438

8439
\item The agents in Schelling's model are not racist, but the outcome
8440
  of their interactions is a high degree of segregation.
8441

8442
\item Agents in Sugarscape form waves that move diagonally even
8443
  though the agents cannot.
8444

8445
\item Traffic jams move backward even though the cars in them are
8446
  moving forward.
8447

8448
\item Flocks and herds behave as if they are centrally organized even though the animals in them are making individual decisions based on local information.
8449

8450
\end{itemize}
8451

8452
These examples suggest an approach to several old and challenging questions, including the problems of consciousness and free will.
8453

8454
\index{emergence}
8455
\index{free will}
8456
\index{determinism}
8457
\index{James, William}
8458
\index{Hume, David}
8459

8460
Free will is the ability to make choices, but if our bodies and brains
8461
are governed by deterministic physical laws, our choices are completely determined.  
8462

8463
Philosophers and scientists have proposed many possible resolutions to this apparent conflict; for example:
8464

8465
\begin{itemize}
8466

8467
\item William James proposed a two-stage model in which
8468
  possible actions are generated by a random process and then selected
8469
  by a deterministic process.  In that case our actions are
8470
  fundamentally unpredictable because the process that generates them
8471
  includes a random element.
8472

8473
\item David Hume suggested that our perception of making choices
8474
  is an illusion; in that case, our actions are deterministic because
8475
  the system that produces them is deterministic.
8476

8477
\end{itemize}
8478

8479
These arguments reconcile the conflict in opposite ways, but they agree that there is a conflict: the system cannot have free will if the parts are deterministic.
8480

8481
The complex systems in this book suggest the alternative that free will, at the level of options and decisions, is compatible with determinism at the level of neurons (or some lower level).  In the same way that a traffic jam moves backward while the cars move forward, a person can have free will even though neurons don't.
8482

8483

8484
\section{Exercises}
8485

8486
The code for the traffic jam simulation is in the Jupyter notebook {\tt chap09.ipynb} in the repository for this book.  Open this notebook, read the code, and run the cells.  You can use this notebook to work on the following
8487
exercise.  My solutions are in {\tt chap09soln.ipynb}.
8488

8489
\begin{exercise}
8490

8491
In the traffic jam simulation, define a class, \py{BetterDriver}, that inherits from \py{Driver} and overrides \py{choose_acceleration}. See if you can define driving rules that do better than the basic implementation in \py{Driver}.  You might try to achieve higher average speed, or a lower number of collisions.
8492

8493
% TODO: Add a crash counter.  Maybe pass the Highway object to
8494
% choose_acceleration
8495

8496
\end{exercise}
8497

8498

8499

8500
\begin{exercise}
8501

8502
The code for my Boid implementation is in {\tt Boids7.py} in the repository for this book.  To run it, you will need VPython, a library for 3-D graphics and animation.  If you use Anaconda (as I recommend in Section~\ref{code}), you can run the following in a terminal or Command Window:
8503

8504
\begin{code}
8505
    conda install -c vpython vpython
8506
\end{code}
8507

8508
Then run \py{Boids7.py}.  It should either launch a browser or create a window in a running browser, and create an animated display showing Boids, as white cones, circling a red sphere, which is the carrot.  If you click and move the mouse, you can move the carrot and see how the Boids react.
8509

8510
Read the code to see how the parameters control Boid behaviors.  Experiment with different parameters.  What happens if you ``turn off'' one
8511
of the behaviors by setting its weight to 0?
8512

8513
\index{parameter}
8514

8515
To generate more bird-like behavior, Flake suggests adding a behavior to maintain a clear line of sight; in other words, if there is another bird directly ahead, the Boid should move away laterally.  What effect do you expect this rule to have on the behavior of the flock?  Implement it and see.
8516

8517
\end{exercise}
8518

8519

8520
\begin{exercise}
8521

8522
Read more about free will at \url{https://thinkcomplex.com/will}.  The view that free will is compatible with determinism is called {\bf compatibilism}.  One of the strongest challenges to compatibilism is the ``consequence
8523
argument''.  What is the consequence argument?  What response can you
8524
give to the consequence argument based on what you have read in this
8525
book?
8526

8527
\index{free will}
8528
\index{determinism}
8529
\index{compatibilism}
8530
\index{consequence argument}
8531

8532
\end{exercise}
8533

8534

8535

8536

8537

8538
\chapter{Evolution}
8539

8540
The most important idea in biology, and possibly all of science, is the {\bf theory of evolution by natural selection}, which claims that {\em new species are created and existing species change due to natural selection}.  Natural selection is a process in which inherited variations between individuals cause differences in survival and reproduction.
8541

8542
\index{evolution}
8543
\index{survival}
8544
\index{reproduction}
8545
\index{theory of evolution}
8546

8547
Among people who know something about biology, the theory of evolution is widely regarded as a fact, which is to say that it is consistent with all current observations; it is highly unlikely to be contradicted by future observations; and, if it is revised in the future, the changes will almost certainly leave the central ideas substantially intact.
8548

8549
\index{Pew Research Center}
8550

8551
Nevertheless, many people do not believe in evolution.  In a survey run by the Pew Research Center, survey respondents were asked which of the following claims is closer to their view:
8552

8553
\begin{enumerate}
8554

8555
\item Humans and other living things have evolved over time.
8556

8557
\item Humans and other living things have existed in their present form since the beginning of time.
8558

8559
\end{enumerate}
8560

8561
About 34\% of Americans chose the second (see \url{https://thinkcomplex.com/arda}).
8562

8563
Even among the ones who believe that living things have evolved, barely more than half believe that the cause of evolution is natural selection.  In other words, only a third of Americans believe that the theory of evolution is true.
8564

8565
How is this possible?  In my opinion, contributing factors include:
8566

8567
\begin{itemize}
8568

8569
\item Some people think that there is a conflict between evolution and their religious beliefs.  Feeling like they have to reject one, they reject evolution.
8570

8571
\item Others have been actively misinformed, often by members of the first group, so that much of what they know about evolution is misleading or false.  For example, many people think that evolution means humans evolved from monkeys.  It doesn't, and we didn't.
8572

8573
\item And many people simply don't know anything about evolution.
8574

8575
\end{itemize}
8576

8577
There's probably not much I can do about the first group, but I think I can help the others.  Empirically, the theory of evolution is hard for people to understand.  At the same time, it is profoundly simple: for many people, once they understand it, it seems both obvious and irrefutable.
8578

8579
To help people make this transition from confusion to clarity, the most powerful tool I have found is computation.  Ideas that are hard to understand in theory can be easy to understand when we see them happening in simulation.
8580
That is the goal of this chapter.
8581

8582
\index{simulation}
8583

8584
The code for this chapter is in \py{chap11.ipynb}, which is a
8585
Jupyter notebook in the repository for this book.  For more information
8586
about working with this code, see Section~\ref{code}.
8587

8588

8589
\section{Simulating evolution}
8590

8591
% manifest
8592
% show evidence of
8593
% evoke
8594
% instantiate
8595
% induce
8596

8597
I start with a simple model that demonstrates a basic form of evolution.  According to the theory, the following features are sufficient to produce evolution:
8598

8599
\begin{itemize}
8600

8601
\item Replicators: We need a population of agents that can reproduce
8602
  in some way.  We'll start with replicators that make perfect copies
8603
  of themselves.  Later we'll add imperfect copying, that is,
8604
  mutation.
8605

8606
\item Variation: We need variability in the population, that
8607
  is, differences between individuals.
8608

8609
\item Differential survival or reproduction: The differences between
8610
  individuals have to affect their ability to survive or reproduce.
8611

8612
\end{itemize}
8613

8614
To simulate these features, we'll define a population of agents
8615
that represent individual organisms.  Each agent has genetic
8616
information, called its {\bf genotype}, which is the information that
8617
gets copied when the agent replicates.  In our model\footnote{This
8618
  model is a variant of the NK model developed primarily by Stuart
8619
  Kauffman (see \url{https://thinkcomplex.com/nk}).}, a
8620
genotype is represented by a sequence of \py{N} binary digits (zeros
8621
and ones), where \py{N} is a parameter we choose.
8622

8623
\index{genotype}
8624
\index{Kauffman, Stuart}
8625
\index{agent}
8626
\index{survival}
8627
\index{reproduction}
8628
\index{fitness}
8629

8630
To generate variation, we create a population with a variety of genotypes; later we will explore mechanisms that create or increase variation.
8631

8632
Finally, to generate differential survival and reproduction, we define a function that maps from each genotype to a {\bf fitness}, where fitness is a quantity related to the ability of an agent to survive or reproduce.
8633

8634
\section{Fitness landscape}
8635

8636
The function that maps from genotype to fitness is called a {\bf fitness landscape}.  In the landscape metaphor, each genotype corresponds to a location in an \py{N}-dimensional space, and fitness corresponds to the ``height" of the landscape at that location.  For visualizations that might clarify this metaphor, see \url{https://thinkcomplex.com/fit}.
8637

8638
\index{fitness landscape}
8639
\index{phenotype}
8640

8641
In biological terms, the fitness landscape represents information about how the genotype of an organism is related to its physical form and capabilities, called its {\bf phenotype}, and how the phenotype interacts with its {\bf environment}.
8642

8643
In the real world, fitness landscapes are complicated, but we don't need to build a realistic model.  To induce evolution, we need {\em some} relationship between genotype and fitness, but it turns out that it can be {\em any} relationship.  To demonstrate this point, we'll use a totally random fitness landscape.
8644

8645
Here is the definition for a class that represents a fitness landscape:
8646

8647
\begin{code}
8648
class FitnessLandscape:
8649

8650
    def __init__(self, N):
8651
        self.N = N
8652
        self.one_values = np.random.random(N)
8653
        self.zero_values = np.random.random(N)
8654

8655
    def fitness(self, loc):
8656
        fs = np.where(loc, self.one_values,
8657
                           self.zero_values)
8658
        return fs.mean()
8659
\end{code}
8660

8661
The genotype of an agent, which corresponds to its location in the fitness landscape, is represented by a NumPy array of zeros and ones called \py{loc}.
8662
The fitness of a given genotype is the mean of \py{N} {\bf fitness contributions}, one for each element of \py{loc}.
8663

8664
\index{NumPy}
8665
\index{random}
8666
\index{where}
8667

8668
To compute the fitness of a genotype, \py{FitnessLandscape} uses two arrays:
8669
\py{one_values}, which contains the fitness contributions of having a \py{1} in each element of \py{loc}, and \py{zero_values}, which contains the fitness contributions of having a \py{0}.
8670

8671
The \py{fitness} method uses \py{np.where} to select a value from \py{one_values} where \py{loc} has a \py{1}, and a value from \py{zero_values} where \py{loc} has a \py{0}.
8672

8673
As an example, suppose \py{N=3} and
8674

8675
\begin{code}
8676
one_values =  [0.1, 0.2, 0.3]
8677
zero_values = [0.4, 0.7, 0.9]
8678
\end{code}
8679

8680
In that case, the fitness of \py{loc = [0, 1, 0]} would be the mean of \py{[0.4, 0.2, 0.9]}, which is \py{0.5}.
8681

8682

8683
\section{Agents}
8684

8685
Next we need agents.  Here's the class definition:
8686

8687
\begin{code}
8688
class Agent:
8689

8690
    def __init__(self, loc, fit_land):
8691
        self.loc = loc
8692
        self.fit_land = fit_land
8693
        self.fitness = fit_land.fitness(self.loc)
8694

8695
    def copy(self):
8696
        return Agent(self.loc, self.fit_land)
8697
\end{code}
8698

8699
The attributes of an \py{Agent} are:
8700

8701
\begin{itemize}
8702

8703
\item \py{loc}: The location of the \py{Agent} in the fitness landscape.
8704

8705
\item \py{fit_land}: A reference to a \py{FitnessLandscape} object.
8706

8707
\item \py{fitness}: The fitness of this \py{Agent} in the \py{FitnessLandscape}, represented as a number between 0 and 1.
8708

8709
\end{itemize}
8710

8711
\py{Agent} provides \py{copy}, which copies the genotype exactly.  Later, we will see a version that copies with mutation, but mutation is not necessary for evolution.
8712

8713

8714
\section{Simulation}
8715
\label{evosim}
8716

8717
Now that we have agents and a fitness landscape, I'll define a class called \py{Simulation} that simulates the creation, reproduction, and death of the agents.  To avoid getting bogged down, I'll present a simplified version of the code here; you can see the details in the notebook for this chapter.
8718

8719
\index{simulation}
8720

8721
Here's the definition of \py{Simulation}:
8722

8723
\begin{code}
8724
class Simulation:
8725

8726
    def __init__(self, fit_land, agents):
8727
        self.fit_land = fit_land
8728
        self.agents = agents
8729
\end{code}
8730

8731
The attributes of a \py{Simulation} are:
8732

8733
\begin{itemize}
8734

8735
\item \py{fit_land}: A reference to a \py{FitnessLandscape} object.
8736

8737
\item \py{agents}: An array of \py{Agent} objects.
8738

8739
\end{itemize}
8740

8741
The most important function in \py{Simulation} is \py{step}, which simulates one time step:
8742

8743
\begin{code}
8744
# class Simulation:
8745

8746
    def step(self):
8747
        n = len(self.agents)
8748
        fits = self.get_fitnesses()
8749

8750
        # see who dies
8751
        index_dead = self.choose_dead(fits)
8752
        num_dead = len(index_dead)
8753

8754
        # replace the dead with copies of the living
8755
        replacements = self.choose_replacements(num_dead, fits)
8756
        self.agents[index_dead] = replacements
8757
\end{code}
8758

8759
\py{step} uses three other methods:
8760

8761
\begin{itemize}
8762

8763
\item \py{get_fitnesses} returns an array containing the fitness of each agent.
8764

8765
\item \py{choose_dead} decides which agents die during this time step, and returns an array that contains the indices of the dead agents.
8766

8767
\item \py{choose_replacements} decides which agents reproduce during this time step, invokes \py{copy} on each one, and returns an array of new \py{Agent} objects.
8768

8769
\end{itemize}
8770

8771
In this version of the simulation, the number of new agents during each time step equals the number of dead agents, so the number of live agents is constant.
8772

8773

8774
\section{No differentiation}
8775

8776
Before we run the simulation, we have to specify the behavior of \py{choose_dead} and \py{choose_replacements}.  We'll start with simple versions of these functions that don't depend on fitness:
8777

8778
\begin{code}
8779
# class Simulation
8780

8781
     def choose_dead(self, fits):
8782
        n = len(self.agents)
8783
        is_dead = np.random.random(n) < 0.1
8784
        index_dead = np.nonzero(is_dead)[0]
8785
        return index_dead
8786
\end{code}
8787

8788
\index{NumPy}
8789
\index{random}
8790
\index{choice}
8791
\index{nonzero}
8792
\index{boolean array}
8793

8794
In \py{choose_dead}, \py{n} is the number of agents and \py{is_dead} is a boolean array that contains \py{True} for the agents who die during this time step.  In this version, every agent has the same probability of dying: 0.1.
8795
\py{choose_dead} uses \py{np.nonzero} to find the indices of the non-zero elements of \py{is_dead} (\py{True} is considered non-zero).
8796

8797
\begin{code}
8798
# class Simulation
8799

8800
    def choose_replacements(self, n, fits):
8801
        agents = np.random.choice(self.agents, size=n, replace=True)
8802
        replacements = [agent.copy() for agent in agents]
8803
        return replacements
8804
\end{code}
8805

8806
In \py{choose_replacements}, \py{n} is the number of agents who reproduce during this time step.  It uses \py{np.random.choice} to choose \py{n} agents with replacement.  Then it invokes \py{copy} on each one and returns a list of new \py{Agent} objects.
8807

8808
These methods don't depend on fitness, so this simulation does not have differential survival or reproduction.  As a result, we should not expect to see evolution.  But how can we tell?
8809

8810

8811
\section{Evidence of evolution}
8812
\label{instrument}
8813

8814
The most inclusive definition of evolution is a change in the distribution of genotypes in a population.  Evolution is an aggregate effect: in other words, individuals don't evolve; populations do.
8815

8816
In this simulation, genotypes are locations in a high-dimensional space, so it is hard to visualize changes in their distribution.  However, if the genotypes change, we expect their fitness to change as well.  So we will use {\em changes in the distribution of fitness} as evidence of evolution.  In particular, we'll look at the mean and standard deviation of fitness over time.
8817

8818
Before we run the simulation, we have to add an \py{Instrument}, which is an object that gets updated after each time step, computes a statistic of interest, or ``metric", and stores the result in a sequence we can plot later.
8819

8820
Here is the parent class for all instruments:
8821

8822
\begin{code}
8823
class Instrument:
8824
    def __init__(self):
8825
        self.metrics = []
8826
\end{code}
8827

8828
And here's the definition for \py{MeanFitness}, an instrument that computes the mean fitness of the population at each time step:
8829

8830
\begin{code}
8831
class MeanFitness(Instrument):
8832
    def update(self, sim):
8833
        mean = np.nanmean(sim.get_fitnesses())
8834
        self.metrics.append(mean)
8835
\end{code}
8836

8837
\index{NumPy}
8838
\index{nanmean}
8839

8840
Now we're ready to run the simulation.  To avoid the effect of random changes in the starting population, we start every simulation with the same set of agents.  And to make sure we explore the entire fitness landscape, we start with one agent at every location.  Here's the code that creates the \py{Simulation}:
8841

8842
\begin{code}
8843
N = 8
8844
fit_land = FitnessLandscape(N)
8845
agents = make_all_agents(fit_land, Agent)
8846
sim = Simulation(fit_land, agents)
8847
\end{code}
8848

8849
\py{make_all_agents} creates one \py{Agent} for every location; the implementation is in the notebook for this chapter.
8850

8851
Now we can create and add a \py{MeanFitness} instrument, run the simulation, and plot the results:
8852

8853
\begin{code}
8854
instrument = MeanFitness()
8855
sim.add_instrument(instrument)
8856
sim.run()
8857
\end{code}
8858

8859
\py{Simulation} keeps a list of \py{Instrument} objects.  After each time step it invokes \py{update} on each \py{Instrument} in the list.
8860

8861
\index{Instrument}
8862

8863
\begin{figure}
8864
\centerline{\includegraphics[height=3in]{figs/chap11-1.pdf}}
8865
\caption{Mean fitness over time for 10 simulations with no differential survival or reproduction.}
8866
\label{chap11-1}
8867
\end{figure}
8868

8869
Figure~\ref{chap11-1} shows the result of running this simulation 10 times.  The mean fitness of the population drifts up or down at random.  Since the distribution of fitness changes over time, we infer that the distribution of phenotypes is also changing.  By the most inclusive definition, this {\bf random walk} is a kind of evolution.  But it is not a particularly interesting kind.
8870

8871
\index{adaptation}
8872
\index{diversity}
8873

8874
In particular, this kind of evolution does not explain how biological species change over time, or how new species appear.  The theory of evolution is powerful because it explains phenomena we see in the natural world that seem inexplicable:
8875

8876
\begin{itemize}
8877

8878
\item Adaptation: Species interact with their environments in ways that seem too complex, too intricate, and too clever to happen by chance.  Many features of natural systems seem as if they were designed.
8879

8880
\item Increasing diversity: Over time the number of species on earth has generally increased (despite several periods of mass extinction).
8881

8882
\item Increasing complexity: The history of life on earth starts with relatively simple life forms, with more complex organisms appearing later in the geological record.
8883

8884
\end{itemize}
8885

8886
These are the phenomena we want to explain.  So far, our model doesn't do the job.
8887

8888

8889
\section{Differential survival}
8890
\label{diffsurv}
8891

8892
Let's add one more ingredient, differential survival.  Here's a class that extends \py{Simulation} and overrides \py{choose_dead}:
8893

8894
\begin{code}
8895
class SimWithDiffSurvival(Simulation):
8896

8897
    def choose_dead(self, fits):
8898
        n = len(self.agents)
8899
        is_dead = np.random.random(n) > fits
8900
        index_dead = np.nonzero(is_dead)[0]
8901
        return index_dead
8902
\end{code}
8903

8904
\index{NumPy}
8905
\index{random}
8906

8907
Now the probability of survival depends on fitness; in fact, in this version, the probability that an agent survives each time step {\em is} its fitness.
8908

8909
\index{differential survival}
8910

8911
Since agents with low fitness are more likely to die, agents with high fitness are more likely to survive long enough to reproduce.  Over time we expect the number of low-fitness agents to decrease, and the number of high-fitness agents to increase.
8912

8913
\begin{figure}
8914
\centerline{\includegraphics[height=3in]{figs/chap11-2.pdf}}
8915
\caption{Mean fitness over time for 10 simulations with differential survival.}
8916
\label{chap11-2}
8917
\end{figure}
8918

8919
Figure~\ref{chap11-2} shows mean fitness over time for 10 simulations with differential survival.  Mean fitness increases quickly at first, but then levels off.
8920

8921
You can probably figure out why it levels off: if there is only one agent at a particular location and it dies, it leaves that location unoccupied.  Without mutation, there is no way for it to be occupied again.
8922

8923
With \py{N=8}, this simulation starts with 256 agents occupying all possible locations.  Over time, the number of occupied locations decreases; if the simulation runs long enough, eventually all agents will occupy the same location.
8924

8925
So this simulation starts to explain adaptation: increasing fitness means that the species is getting better at surviving in its environment.  But the number of occupied locations decreases over time, so this model does not explain increasing diversity at all.
8926

8927
\index{adaptation}
8928

8929
In the notebook for this chapter, you will see the effect of differential reproduction.  As you might expect, differential reproduction also increases mean fitness.  But without mutation, we still don't see increasing diversity.
8930

8931

8932
\section{Mutation}
8933

8934
In the simulations so far, we start with the maximum possible diversity --- one agent at every location in the landscape --- and end with the minimum possible diversity, all agents at one location.
8935

8936
\index{mutation}
8937
\index{diversity}
8938

8939
That's almost the opposite of what happened in the natural world, which apparently began with a single species that branched, over time, into the millions, or possibly billions, of species on Earth today (see \url{https://thinkcomplex.com/bio}).
8940

8941
With perfect copying in our model, we never see increasing diversity.  But if we add mutation, along with differential survival and reproduction, we get a step closer to understanding evolution in nature.
8942

8943
Here is a class definition that extends \py{Agent} and overrides \py{copy}:
8944

8945
\begin{code}
8946
class Mutant(Agent):
8947

8948
    def copy(self, prob_mutate=0.05)::
8949
        if np.random.random() > prob_mutate:
8950
            loc = self.loc.copy()
8951
        else:
8952
            direction = np.random.randint(self.fit_land.N)
8953
            loc = self.mutate(direction)
8954
        return Mutant(loc, self.fit_land)
8955
\end{code}
8956

8957
\index{NumPy}
8958
\index{random}
8959

8960
In this model of mutation, every time we call \py{copy}, there is a 5\% chance of mutation.  In case of mutation, we choose a random direction from the current location --- that is, a random bit in the genotype --- and flip it.  Here's \py{mutate}:
8961

8962
\begin{code}
8963
    def mutate(self, direction):
8964
        new_loc = self.loc.copy()
8965
        new_loc[direction] ^= 1
8966
        return new_loc
8967
\end{code}
8968

8969
The operator \py{^=} computes ``exclusive OR"; with the operand 1, it
8970
has the effect of flipping a bit (see
8971
\url{https://thinkcomplex.com/xor}).
8972

8973
\index{exclusive OR}
8974
\index{XOR}
8975

8976
Now that we have mutation, we don't have to start with an agent at every location.  Instead, we can start with the minimum variability: all agents at the same location.
8977

8978
\begin{figure}
8979
\centerline{\includegraphics[height=3in]{figs/chap11-3.pdf}}
8980
\caption{Mean fitness over time for 10 simulations with mutation and differential survival and reproduction.}
8981
\label{chap11-3}
8982
\end{figure}
8983

8984
Figure~\ref{chap11-3} shows the results of 10 simulations with mutation and differential survival and reproduction.  In every case, the population evolves toward the location with maximum fitness.
8985

8986
\begin{figure}
8987
\centerline{\includegraphics[height=3in]{figs/chap11-4.pdf}}
8988
\caption{Number of occupied locations over time for 10 simulations with mutation and differential survival and reproduction.}
8989
\label{chap11-4}
8990
\end{figure}
8991

8992
To measure diversity in the population, we can plot the number of occupied locations after each time step.  Figure~\ref{chap11-4} shows the results.  We start with 100 agents at the same location.  As mutations occur, the number of occupied locations increases quickly.
8993

8994
When an agent discovers a high-fitness location, it is more likely to survive and reproduce.  Agents at lower-fitness locations eventually die out.  Over time, the population migrates through the landscape until most agents are at the location with the highest fitness.
8995

8996
\index{equilibrium}
8997

8998
At that point, the system reaches an equilibrium where mutation occupies new locations at the same rate that differential survival causes lower-fitness locations to be left empty.
8999

9000
The number of occupied locations in equilibrium depends on the mutation rate and the degree of differential survival.  In these simulations the number of unique occupied locations at any point is typically 5--15.
9001

9002
It is important to remember that the agents in this model don't move, just as the genotype of an organism doesn't change.  When an agent dies, it can leave a location unoccupied.  And when a mutation occurs, it can occupy a new location.  As agents disappear from some locations and appear in others, the population migrates across the landscape, like a glider in Game of Life.  But organisms don't evolve; populations do.
9003

9004
\index{glider}
9005
\index{Game of Life}
9006

9007
\section{Speciation}
9008
\label{speciation}
9009

9010
The theory of evolution says that natural selection changes existing species and creates new ones.  In our model, we have seen changes, but we have not seen a new species.  It's not even clear, in the model, what a new species would look like.
9011

9012
\index{speciation}
9013

9014
Among species that reproduce sexually, two organisms are considered the same species if they can breed and produce fertile offspring.  But the agents in the model don't reproduce sexually, so this definition doesn't apply.
9015

9016
Among organisms that reproduce asexually, like bacteria, the definition of species is not as clear-cut.  Generally, a population is considered a species if their genotypes form a cluster, that is, if the genetic differences within the population are small compared to the differences between populations.
9017

9018
Before we can model new species, we need the ability to identify clusters of agents in the landscape, which means we need a definition of {\bf distance} between locations.  Since locations are represented with arrays of bits, we'll define distance as the number of bits that differ between locations.  \py{FitnessLandscape} provides a \py{distance} method:
9019

9020
% Q: Why didn't I use the ^ operator in distance()?
9021
% A: Because ^ is bitwise XOR.  In this case the result would be
9022
% the same, but logical_xor is more precisely what is called for.
9023

9024
\begin{code}
9025
# class FitnessLandscape
9026

9027
    def distance(self, loc1, loc2):
9028
        return np.sum(np.logical_xor(loc1, loc2))
9029
\end{code}
9030

9031
\begin{figure}
9032
\centerline{\includegraphics[height=3in]{figs/chap11-5.pdf}}
9033
\caption{Mean distance between agents over time.}
9034
\label{chap11-5}
9035
\end{figure}
9036

9037
The \py{logical_xor} function computes ``exclusive OR", which is \py{True} for bits that differ, and \py{False} for the bits that are the same.
9038

9039
\index{exclusive OR}
9040
\index{XOR}
9041

9042
To quantify the dispersion of a population, we can compute the mean of the distances between pairs of agents.  In the notebook for this chapter, you'll see the \py{MeanDistance} instrument, which computes this metric after each time step.
9043

9044
\index{distance}
9045

9046
Figure~\ref{chap11-5} shows mean distance between agents over time.  Because we start with identical mutants, the initial distances are 0.  As mutations occur, mean distance increases, reaching a maximum while the population migrates across the landscape.
9047

9048
Once the agents discover the optimal location, mean distance decreases until the population reaches an equilibrium where increasing distance due to mutation is balanced by decreasing distance as agents far from the optimal location disappear.  In these simulations, the mean distance in equilibrium is near 1; that is, most agents are only one mutation away from optimal.
9049

9050
Now we are ready to look for new species.  To model a simple kind of speciation, suppose a population evolves in an unchanging environment until it reaches steady state (like some species we find in nature that seem to have changed very little over long periods of time).
9051

9052
\index{speciation}
9053

9054
Now suppose we either change the environment or transport the population to a new environment.  Some features that increased fitness in the old environment might decrease it in the new environment, and vice versa.
9055

9056
We can model these scenarios by running a simulation until the population reaches steady state, then changing the fitness landscape, and then resuming the simulation until the population reaches steady state again.
9057

9058
\begin{figure}
9059
\centerline{\includegraphics[height=3in]{figs/chap11-6.pdf}}
9060
\caption{Mean fitness over time.  After 500 time steps, we change the fitness landscape.}
9061
\label{chap11-6}
9062
\end{figure}
9063

9064
Figure~\ref{chap11-6} shows results from a simulation like that.  We start with 100 identical mutants at a random location, and run the simulation for 500 time steps.  At that point, many agents are at the optimal location, which has fitness near 0.65 in this example.  The genotypes of the agents form a cluster, with the mean distance between agents near 1.
9065

9066
After 500 steps, we run \py{FitnessLandscape.set_values}, which changes the fitness landscape; then we resume the simulation.  Mean fitness is lower, as we expect because the optimal location in the old landscape is no better than a random location in the new landscape.
9067

9068
After the change, mean fitness increases again as the population migrates across the new landscape, eventually finding the new optimum, which has fitness near 0.75 (which happens to be higher in this example, but needn't be).
9069

9070
Once the population reaches steady state, it forms a new cluster, with mean distance between agents near 1 again.
9071

9072
Now if we compute the distance between the agents' locations before and after the change, they differ by more than 6, on average.  The distances between clusters are much bigger than the distances between agents in each cluster, so we can interpret these clusters as distinct species.
9073

9074

9075
\section{Summary}
9076

9077
We have seen that mutation, along with differential survival and reproduction, is sufficient to cause increasing fitness, increasing diversity, and a simple form of speciation.  This model is not meant to be realistic; evolution in natural systems is much more complicated than this.  Rather, it is meant to be a ``sufficiency theorem"; that is, a demonstration that the features of the model are sufficient to produce the behavior we are trying to explain (see \url{https://thinkcomplex.com/suff}).
9078

9079
\index{sufficiency theorem}
9080

9081
Logically, this ``theorem" doesn't prove that evolution in nature is caused by these mechanisms alone.  But since these mechanisms do appear, in many forms, in biological systems, it is reasonable to think that they at least contribute to natural evolution.
9082

9083
Likewise, the model does not prove that these mechanisms always cause evolution.  But the results we see here turn out to be robust: in almost any model that includes these features --- imperfect replicators, variability, and differential reproduction --- evolution happens.
9084

9085
\index{replicator}
9086
\index{variability}
9087
\index{differential reproduction}
9088

9089
I hope this observation helps to demystify evolution.  When we look at natural systems, evolution seems complicated.  And because we primarily see the results of evolution, with only glimpses of the process, it can be hard to imagine and hard to believe.
9090

9091
But in simulation, we see the whole process, not just the results.  And by including the minimal set of features to produce evolution --- temporarily ignoring the vast complexity of biological life --- we can see evolution as the surprisingly simple, inevitable idea that it is.
9092

9093
%TODO: A reviewer suggests that I have made the case for "simple" but
9094
% not "inevitable".
9095

9096

9097
\section{Exercises}
9098

9099
The code for this chapter is in the Jupyter notebook {\tt chap11.ipynb}
9100
in the repository for this book.  Open the notebook, read the code,
9101
and run the cells.  You can use the notebook to work on the following
9102
exercises.  My solutions are in {\tt chap11soln.ipynb}.
9103

9104
\begin{exercise}
9105

9106
The notebook shows the effects of differential reproductions and survival separately.  What if you have both?  Write a class called \py{SimWithBoth} that uses the version of \py{choose_dead} from \py{SimWithDiffSurvival}
9107
and the version of \py{choose_replacements} from \py{SimWithDiffReproduction}.  Does mean fitness increase more quickly?
9108

9109
As a Python challenge, can you write this class without copying code?  
9110

9111
\end{exercise}
9112

9113

9114
\begin{exercise}
9115

9116
When we change the landscape as in Section~\ref{speciation}, the number of occupied locations and the mean distance usually increase, but the effect is not always big enough to be obvious. Try out some different random seeds to see how general the effect is.
9117

9118
\end{exercise}
9119

9120

9121

9122
\chapter{Evolution of cooperation}
9123

9124
In this final chapter, I take on two questions, one from biology and one from philosophy:
9125

9126
\begin{itemize}
9127

9128
\item In biology, the ``problem of altruism" is the apparent conflict between natural selection, which suggests that animals live in a state of constant competition, and altruism, which is the tendency of many animals to help other animals, even to their own detriment.  See \url{https://thinkcomplex.com/altruism}.
9129

9130
\item In moral philosophy, the question of human nature asks whether humans are fundamentally good, or evil, or blank states shaped by their environment.  See \url{https://thinkcomplex.com/nature}.
9131

9132
\end{itemize}
9133

9134
The tools I use to address these questions are agent-based simulation (again) and game theory, which is a set of abstract models meant to describe ways agents interact.  Specifically, the game we will consider is the Prisoner's Dilemma.
9135

9136
\index{altruism}
9137
\index{human nature}
9138

9139
The code for this chapter is in \py{chap12.ipynb}, which is a
9140
Jupyter notebook in the repository for this book.  For more information
9141
about working with this code, see Section~\ref{code}.
9142

9143

9144
\section{Prisoner's Dilemma}
9145
\label{prisoners}
9146

9147
The Prisoner's Dilemma is a topic in game theory, but it's not the fun kind of game.  Instead, it is the kind of game that sheds light on human motivation and behavior.  Here is the presentation of the dilemma from Wikipedia (\url{https://thinkcomplex.com/pd}):
9148

9149
\index{Prisoner's Dilemma}
9150
\index{game theory}
9151

9152
\begin{quote}
9153
Two members of a criminal gang are arrested and imprisoned. Each prisoner is in solitary confinement with no means of communicating with the other. The prosecutors lack sufficient evidence to convict the pair on the principal charge, but they have enough to convict both on a lesser charge. Simultaneously, the prosecutors offer each prisoner a bargain. Each prisoner is given the opportunity to either: (1) betray the other by testifying that the other committed the crime, or (2) cooperate with the other by remaining silent. The offer is:
9154

9155
\begin{itemize}
9156

9157
\item If A and B each betray the other, each of them serves 2 years in prison.
9158

9159
\item If A betrays B but B remains silent, A will be set free and B will serve 3 years in prison (and vice versa).
9160

9161
\item If A and B both remain silent, both of them will only serve 1 year in prison (on the lesser charge).
9162

9163
\end{itemize}
9164

9165
\end{quote}
9166

9167
Obviously, this scenario is contrived, but it is meant to represent a variety of interactions where agents have to choose whether to ``cooperate" with each other or ``defect", and where the reward (or punishment) for each agent depends on what the other chooses.
9168

9169
\index{cooperate}
9170
\index{defect}
9171

9172
With this set of punishments, it is tempting to say that the players should cooperate, that is, that both should remain silent.  But neither agent knows what the other will do, so each has to consider two possible outcomes.  First, looking at it from A's point of view:
9173

9174
\begin{itemize}
9175

9176
\item If B remains silent, A is better off defecting; she would go free rather than serve 1 year.
9177

9178
\item If B defects, A is still better off defecting; she would serve only 2 years rather than 3.
9179

9180
\end{itemize}
9181

9182
No matter what B does, A is better off defecting.
9183
And because the game is symmetric, this analysis is the same from B's point of view: no matter what A does, B is better off defecting.
9184

9185
In the simplest version of this game, we assume that A and B have no other considerations to take into account.  They can't communicate with each other, so they can't negotiate, make promises, or threaten each other.  And they consider only the immediate goal of minimizing their sentences; they don't take into account any other factors.
9186

9187
\index{rational}
9188

9189
Under those assumptions, the rational choice for both agents is to defect.  That might be a good thing, at least for purposes of criminal justice.  But for the prisoners, it is frustrating because there is, apparently, nothing they can do to achieve the outcome they both want.  And this model applies to other scenarios in real life where cooperation would be better for the greater good as well as for the players.
9190

9191
Studying these scenarios, and ways to escape from the dilemma, is the focus of people who study game theory, but it is not the focus of this chapter.  We are headed in a different direction.
9192

9193

9194
\section{The problem of nice}
9195

9196
Since the Prisoner's Dilemma was first discussed in the 1950s, it has been a popular topic of study in social psychology.  Based on the analysis in the previous section, we can say what a perfectly rational agent {\em should} do; it is harder to predict what real people actually do.  Fortunately, the experiment has been done\footnote{Here's a recent report with references to previous experiments:
9197
Barreda-Tarrazona, Jaramillo-Guti\'{e}rrez, Pavan, and Sabater-Grande,
9198
``Individual Characteristics vs. Experience: An Experimental Study on Cooperation in Prisoner's Dilemma", Frontiers in Psychology, 2017; 8: 596.
9199
\url{https://thinkcomplex.com/pdexp}.}.
9200

9201
\index{social psychology}
9202

9203
If we assume that people are smart enough to do the analysis (or understand it when explained), and that they generally act in their own interest, we would expect them to defect pretty much all the time.  But they don't.  In most experiments, subjects cooperate much more than the rational agent model predicts\footnote{For an excellent video summarizing what we have discussed so far, see \url{https://thinkcomplex.com/pdvid1}.}.
9204

9205
\index{rational agent model}
9206

9207
The most obvious explanation of this result is that people are not rational agents, which should not be a surprise to anyone.  But why not?  Is it because they are not smart enough to understand the scenario or because they are knowingly acting contrary to their own interest?
9208

9209
Based on experimental results, it seems that at least part of the explanation is plain altruism: many people are willing to incur a cost to themselves in order to benefit another person.  Now, before you nominate that conclusion for publication in the {\it Journal of Obvious Results}, let's keep asking why:
9210

9211
\index{altruism}
9212

9213
\begin{itemize}
9214

9215
\item Why do people help other people, even at a cost to themselves?  At least part of the reason is that they want to; it makes them feel good about themselves and the world.
9216

9217
\item And why does being nice make people feel good?  It might be tempting to say that they were raised right, or more generally trained by society to want to do good things.  But there is little doubt that some part of altruism is innate; a proclivity for altruism is the result of normal brain development.
9218

9219
\item Well, why is that?  The innate parts of brain development, and the personal characteristics that follow, are the result of genetic information.  Of course, the relationship between genes and altruism is complicated; there are probably many genes that interact with each other and with environmental factors to cause people to be more or less altruistic in different circumstances.  Nevertheless, there are almost certainly genes that tend to make people altruistic.
9220

9221
\item Finally, why is that?  If, under natural selection, animals are in constant competition with each other to survive and reproduce, it seems obvious that altruism would be counterproductive.  In a population where some people help others, even to their own detriment, and others are purely selfish, it seems like the selfish ones would benefit, the altruistic ones would suffer, and the genes for altruism would be driven to extinction.
9222

9223
\end{itemize}
9224

9225
This apparent contradiction is the ``problem of altruism": {\em why haven't the genes for altruism died out}?
9226

9227
\index{problem of altruism}
9228

9229
Among biologists, there are many possible explanations, including reciprocal altruism, sexual selection, kin selection, and group selection.  Among non-scientists, there are even more explanations.  I leave it to you to explore the alternatives; for now I want to focus on just one explanation, arguably the simplest one: maybe altruism is adaptive.  In other words, maybe genes for altruism make people more likely to survive and reproduce.
9230

9231
It turns out that the Prisoner's Dilemma, which raises the problem of altruism, might also help resolve it.
9232

9233

9234
\section{Prisoner's dilemma tournaments}
9235

9236
In the late 1970s Robert Axelrod, a political scientist at the University of Michigan, organized a tournament to compare strategies for playing Prisoner's Dilemma (PD).
9237

9238
\index{Axelrod, Robert}
9239
\index{tournament}
9240

9241
He invited participants to submit strategies in the form of computer programs, then played the programs against each other and kept score.  Specifically, they played the iterated version of PD, in which the agents play multiple rounds against the same opponent, so their decisions can be based on history.
9242

9243
In Axelrod's tournaments, a simple strategy that did surprisingly well was called ``tit for tat", or TFT.  TFT always cooperates during the first round of an iterated match; after that, it copies whatever the opponent did during the previous round.  If the opponent keeps cooperating, TFT keeps cooperating.  If the opponent defects at any point, TFT defects in the next round.  But if the opponent goes back to cooperating, so does TFT.
9244

9245
\index{TFT}
9246
\index{tit for tat}
9247

9248
For more information about these tournaments, and an explanation of why TFT does so well, see this video: \url{https://thinkcomplex.com/pdvid2}.
9249

9250
Looking at the strategies that did well in these tournaments, Alexrod identified the characteristics they tended to share:
9251

9252
\begin{itemize}
9253

9254
\item Nice: The strategies that do well cooperate during the first round, and generally cooperate as often as they defect in subsequent rounds.
9255

9256
\item Retaliating: Strategies that cooperate all the time did not do as well as strategies that retaliate if the opponent defects.
9257

9258
\item Forgiving: But strategies that were too vindictive tended to punish themselves as well as their opponents.
9259

9260
\item Non-envious: Some of the most successful strategies seldom outscore their opponents; they are successful because they do {\em well enough} against a wide variety of opponents.
9261

9262
\end{itemize}
9263

9264
TFT has all of these properties.
9265

9266
Axelrod's tournaments offer a partial, possible answer to the problem of altruism: maybe the genes for altruism are prevalent because they are adaptive.  To the degree that many social interactions can be modeled as variations on the Prisoner's Dilemma, a brain that is wired to be nice, tempered by a balance of retaliation and forgiveness, will tend to do well in a wide variety of circumstances.
9267

9268
But the strategies in Axelrod's tournaments were designed by people; they didn't evolve.  We need to consider whether it is credible that genes for niceness, retribution, and forgiveness could appear by mutation, successfully invade a population of other strategies, and resist being invaded by subsequent mutations.
9269

9270

9271
\section{Simulating evolution of cooperation}
9272

9273
{\em Evolution of Cooperation} is the title of the first book where Axelrod presented results from Prisoner's Dilemma tournaments and discussed the implications for the problem of altruism.  Since then, he and other researchers have explored the evolutionary dynamics of PD tournaments, that is, how the distribution of strategies changes over time in a population of PD contestants.    In the rest of this chapter, I run a version of those experiments and present the results.
9274

9275
\index{Evolution of Cooperation@{\it Evolution of Cooperation}}
9276

9277
First, we'll need a way to encode a PD strategy as a genotype.  For this experiment, I consider strategies where the agent's choice in each round depends only on the opponent's choice in the previous two rounds.  I represent a strategy using a dictionary that maps from the opponent's previous two choices to the agent's next choice.
9278

9279
\index{strategy}
9280

9281
Here is the class definition for these agents:
9282

9283
\begin{code}
9284
class Agent:
9285

9286
    keys = [(None, None),
9287
            (None, 'C'),
9288
            (None, 'D'),
9289
            ('C', 'C'),
9290
            ('C', 'D'),
9291
            ('D', 'C'),
9292
            ('D', 'D')]
9293

9294
    def __init__(self, values, fitness=np.nan):
9295
        self.values = values
9296
        self.responses = dict(zip(self.keys, values))
9297
        self.fitness = fitness
9298
\end{code}
9299

9300
\py{keys} is the sequence of keys in each agent's dictionary, where the tuple \py{('C', 'C')} means that the opponent cooperated in the previous two rounds; \py{(None, 'C')} means that only one round has been played and the opponent cooperated; and \py{(None, None)} means that no rounds have been played.
9301

9302
\index{zip}
9303

9304
In the \py{__init__} method, \py{values} is a sequence of choices, either \py{'C'} or \py{'D'}, that correspond to \py{keys}.  So if the first element of \py{values} is \py{'C'}, that means that this agent will cooperate in the first round.  If the last element of \py{values} is \py{'D'}, this agent will defect if the opponent defected in the previous two rounds.
9305

9306
In this implementation, the genotype of an agent who always defects is \py{'DDDDDDD'}; the genotype of an agent who always cooperates is \py{'CCCCCCC'}, and the genotype for TFT is \py{'CCDCDCD'}.
9307

9308
\index{TFT}
9309

9310
The \py{Agent} class provides \py{copy}, which makes another agent with the same genotype, but with some probability of mutation:
9311

9312
\begin{code}
9313
    def copy(self, prob_mutate=0.05):
9314
        if np.random.random() > prob_mutate:
9315
            values = self.values
9316
        else:
9317
            values = self.mutate()
9318
        return Agent(values, self.fitness)
9319
\end{code}
9320

9321
\index{NumPy}
9322
\index{random}
9323
\index{choice}
9324

9325
Mutation works by choosing a random value in the genotype and flipping from \py{'C'} to \py{'D'}, or vice versa:
9326

9327
\begin{code}
9328
    def mutate(self):
9329
        values = list(self.values)
9330
        index = np.random.choice(len(values))
9331
        values[index] = 'C' if values[index] == 'D' else 'D'
9332
        return values
9333
\end{code}
9334

9335
Now that we have agents, we need a tournament.
9336

9337

9338
\section{The Tournament}
9339

9340
The \py{Tournament} class encapsulates the details of the PD competition:
9341

9342
\begin{code}
9343
    payoffs = {('C', 'C'): (3, 3),
9344
               ('C', 'D'): (0, 5),
9345
               ('D', 'C'): (5, 0),
9346
               ('D', 'D'): (1, 1)}
9347

9348
    num_rounds = 6
9349

9350
    def play(self, agent1, agent2):
9351
        agent1.reset()
9352
        agent2.reset()
9353

9354
        for i in range(self.num_rounds):
9355
            resp1 = agent1.respond(agent2)
9356
            resp2 = agent2.respond(agent1)
9357

9358
            pay1, pay2 = self.payoffs[resp1, resp2]
9359

9360
            agent1.append(resp1, pay1)
9361
            agent2.append(resp2, pay2)
9362

9363
        return agent1.score, agent2.score
9364
\end{code}
9365

9366
\py{payoffs} is a dictionary that maps from the agents' choices to their rewards.  For example, if both agents cooperate, they each get 3 points.  If one defects and the other cooperates, the defector gets 5 and the cooperator gets 0.  If they both defect, each gets 1.  These are the payoffs Axelrod used in his tournaments.
9367

9368
\index{tournament}
9369

9370
The \py{play} method runs several rounds of the PD game.  It uses the following methods from the \py{Agent} class:
9371

9372
\begin{itemize}
9373

9374
\item \py{reset}: Initializes the agents before the first round, resetting their scores and the history of their responses.
9375

9376
\item \py{respond}: Asks each agent for their response, given the opponent's previous responses.
9377

9378
\item \py{append}: Updates each agent by storing the choices and adding up the scores from successive rounds.
9379

9380
\end{itemize}
9381

9382
After the given number of rounds, \py{play} returns the total score for each agent.  I chose \py{num_rounds=6} so that each element of the genotype is accessed with roughly the same frequency.  The first element is only accessed during the first round, or one sixth of the time.  The next two elements are only accessed during the second round, or one twelfth each.  The last four elements are accessed four of six times, or one sixth each, on average.
9383

9384
\index{melee}
9385

9386
\py{Tournament} provides a second method, \py{melee}, that determines which agents compete against each other:
9387

9388
\begin{code}
9389
    def melee(self, agents, randomize=True):
9390
        if randomize:
9391
            agents = np.random.permutation(agents)
9392

9393
        n = len(agents)
9394
        i_row = np.arange(n)
9395
        j_row = (i_row + 1) % n
9396

9397
        totals = np.zeros(n)
9398

9399
        for i, j in zip(i_row, j_row):
9400
            agent1, agent2 = agents[i], agents[j]
9401
            score1, score2 = self.play(agent1, agent2)
9402
            totals[i] += score1
9403
            totals[j] += score2
9404

9405
        for i in i_row:
9406
            agents[i].fitness = totals[i] / self.num_rounds / 2
9407
\end{code}
9408

9409
\py{melee} takes a list of agents and a boolean, \py{randomize}, that determines whether each agent fights the same neighbors every time, or whether the pairings are randomized.
9410

9411
\index{NumPy}
9412
\index{arange}
9413

9414
\py{i_row} and \py{j_row} contain the indices of the pairings.  \py{totals} contains the total score of each agent.
9415

9416
Inside the loop, we select two agents, invoke \py{play}, and update \py{totals}.  At the end, we compute the average number of points each agent got, per round and per opponent, and store the results in the \py{fitness} attribute of each agent.
9417

9418

9419
\section{The Simulation}
9420

9421
The \py{Simulation} class for this chapter is based on the one in Section~\ref{evosim}; the only differences are in \py{__init__} and \py{step}.
9422

9423
\index{simulation}
9424

9425
Here's the \py{__init__} method:
9426

9427
\begin{code}
9428
class PDSimulation(Simulation):
9429

9430
    def __init__(self, tournament, agents):
9431
        self.tournament = tournament
9432
        self.agents = np.asarray(agents)
9433
        self.instruments = []
9434
\end{code}
9435

9436
A \py{Simulation} object contains a \py{Tournament} object, a sequence of agents, and a sequence of \py{Instrument} objects (as in Section~\ref{instrument}).
9437

9438
Here's the \py{step} method:
9439

9440
\begin{code}
9441
    def step(self):
9442
        self.tournament.melee(self.agents)
9443
        Simulation.step(self)
9444
\end{code}
9445

9446
This version of \py{step} uses \py{Tournament.melee}, which sets the \py{fitness} attribute for each agent; then it calls the \py{step} method from the \py{Simulation} class, reproduced here:
9447

9448
\begin{code}
9449
# class Simulation
9450

9451
    def step(self):
9452
        n = len(self.agents)
9453
        fits = self.get_fitnesses()
9454

9455
        # see who dies
9456
        index_dead = self.choose_dead(fits)
9457
        num_dead = len(index_dead)
9458

9459
        # replace the dead with copies of the living
9460
        replacements = self.choose_replacements(num_dead, fits)
9461
        self.agents[index_dead] = replacements
9462

9463
        # update any instruments
9464
        self.update_instruments()
9465
\end{code}
9466

9467
\py{Simulation.step} collects the agents' fitnesses in an array; then it calls \py{choose_dead} to decide which agents die, and \py{choose_replacements} to decide which agents reproduce.
9468

9469
\index{differential survival}
9470

9471
My simulation includes differential survival, as in Section~\ref{diffsurv}, but not differential reproduction.  You can see the details in the notebook for this chapter.  As one of the exercises, you will have a chance to explore the effect of differential reproduction.
9472

9473

9474
\section{Results}
9475

9476
Suppose we start with a population of three agents: one always cooperates, one always defects, and one plays the TFT strategy.  If we run \py{Tournament.melee} with this population, the cooperator gets 1.5 points per round, the TFT agent gets 1.9, and the defector gets 3.33.  This result suggests that ``always defect" should quickly become the dominant strategy.
9477

9478
But ``always defect" contains the seeds of its own destruction.  If nicer strategies are driven to extinction, the defectors have no one to take advantage of.  Their fitness drops, and they become vulnerable to invasion by cooperators.
9479

9480
\index{equilibrium}
9481

9482
Based on this analysis, it is not easy to predict how the system will behave: will it find a stable equilibrium, or oscillate between various points in the genotype landscape?  Let's run the simulation and find out!
9483

9484
I start with 100 identical agents who always defect, and run the simulation for 5000 steps:
9485

9486
\begin{code}
9487
tour = Tournament()
9488
agents = make_identical_agents(100, list('DDDDDDD'))
9489
sim = PDSimulation(tour, agents)
9490
\end{code}
9491

9492

9493
\begin{figure}
9494
\centerline{\includegraphics[height=3in]{figs/chap12-1.pdf}}
9495
\caption{Average fitness (points scored per round of Prisoner's Dilemma). }
9496
\label{chap12-1}
9497
\end{figure}
9498

9499
Figure~\ref{chap12-1} shows mean fitness over time (using the \py{MeanFitness} instrument from Section~\ref{instrument}).  Initially mean fitness is 1, because when defectors face each other, they get only 1 point each per round.
9500

9501
After about 500 time steps, mean fitness increases to nearly 3, which
9502
is what cooperators get when they face each other.  However, as we
9503
suspected, this situation in unstable.  Over the next 500 steps, mean
9504
fitness drops below 2, climbs back toward 3, and continues to
9505
oscillate.
9506

9507
The rest of the simulation is highly variable, but with the exception of one big drop, mean fitness is usually between 2 and 3, with the long-term mean close to 2.5.
9508

9509
And that's not bad!  It's not quite a utopia of cooperation, which would average 3 points per round, but it's a long way from the dystopia of perpetual defection.  And it's a lot better than what we might expect from the natural selection of self-interested agents.
9510

9511
\index{cooperation}
9512

9513
To get some insight into this level of fitness, let's look at a few more instruments.  \py{Niceness} measures the fraction of cooperation in the genotypes of the agents after each time step:
9514

9515
\begin{code}
9516
class Niceness(Instrument):
9517

9518
    def update(self, sim):
9519
        responses = np.array([agent.values
9520
                              for agent in sim.agents])
9521
        metric = np.mean(responses == 'C')
9522
        self.metrics.append(metric)
9523
\end{code}
9524

9525
\py{responses} is an array with one row for each agent and one column for each element of the genome.  \py{metric} is the fraction of elements that are \py{'C'}, averaged across agents.
9526

9527
\index{NumPy}
9528
\index{mean}
9529

9530
\begin{figure}
9531
\centerline{\includegraphics[height=3in,width=7in]{figs/chap12-2.pdf}}
9532
\caption{Average niceness across all genomes in the population (left), and fraction of population that cooperates in the first round (right).}
9533
\label{chap12-2}
9534
\end{figure}
9535

9536
Figure~\ref{chap12-2} (left) shows the results: starting from 0, average niceness increases quickly to 0.75, then oscillates between 0.4 and 0.85, with a long-term mean near 0.65.  Again, that's a lot of niceness!
9537

9538
Looking specifically at the opening move, we can track the fraction of agents that cooperate in the first round.  Here's the instrument:
9539

9540
\begin{code}
9541
class Opening(Instrument):
9542

9543
    def update(self, sim):
9544
        responses = np.array([agent.values[0]
9545
                              for agent in sim.agents])
9546
        metric = np.mean(responses == 'C')
9547
        self.metrics.append(metric)
9548
\end{code}
9549

9550
Figure~\ref{chap12-2} (right) shows the results, which are highly variable.  The fraction of agents who cooperate in the first round is often near 1, and occasionally near 0.  The long-term average is close to 0.65, similar to overall niceness.  These results are consistent with Axelrod's tournaments; in general, nice strategies do well.
9551

9552
The other characteristics Axelrod identifies in successful strategies are retaliation and forgiveness.  To measure retaliation, I define this instrument:
9553

9554
\begin{code}
9555
class Retaliating(Instrument):
9556

9557
    def update(self, sim):
9558
        after_d = np.array([agent.values[2::2]
9559
                            for agent in sim.agents])
9560
        after_c = np.array([agent.values[1::2]
9561
                            for agent in sim.agents])
9562
        metric = np.mean(after_d=='D') - np.mean(after_c=='D')
9563
        self.metrics.append(metric)
9564
\end{code}
9565

9566
\py{Retaliating} compares the number of elements in all genomes where an agent defects after the opponent defects (elements 2, 4, and 6) with the number of places where an agents defects after the opponent cooperates.  As you might expect by now, the results vary substantially (you can see the graph in the notebook).  On average the difference between these fractions is less than 0.1, so if agents defect 30\% of the time after the opponent cooperates, they might defect 40\% of the time after a defection.
9567

9568
This result provides weak support for the claim that successful strategies retaliate.  But maybe it's not necessary for all agents, or even many, to be retaliatory; if there is at least some tendency toward retaliation in the population as a whole, that might be enough to prevent high-defection strategies from gaining ground.
9569

9570
\index{retaliation}
9571
\index{forgiveness}
9572

9573
To measure forgiveness, I define one more instrument to see whether agents might be more likely to cooperate after D-C in the previous two rounds, compared to C-D.  In my simulations, there is no evidence for this particular kind of forgiveness.  On the other hand, the strategies in these simulations are necessarily forgiving because they consider only the previous two rounds of history.  In this context, forgetting is a kind of forgiving.
9574

9575

9576
\section{Conclusions}
9577

9578
Axelrod's tournaments suggest a possible resolution to the problem of altruism: maybe being nice, but not {\em too} nice, is adaptive.  But the strategies in the original tournaments were designed by people, not evolution, and the distribution of strategies did not change over the course of the tournaments.
9579

9580
So that raises a question: strategies like TFT might do well in a fixed population of human-designed strategies, but can they evolve?  In other words, can they appear in a population through mutation, compete successfully with their ancestors, and resist invasion by their descendants?
9581

9582
The simulations in this chapter suggest:
9583

9584
\begin{itemize}
9585

9586
\item Populations of defectors are vulnerable to invasion by nicer strategies.
9587

9588
\item Populations that are too nice are vulnerable to invasion by defectors.
9589

9590
\item As a result, the average level of niceness oscillates, but the average amount of niceness is generally high, and the average level of fitness is generally closer to a utopia of cooperation than to a dystopia of defection.
9591

9592
\item TFT, which was a successful strategy in Alexrod's tournaments, does not seem to be a specially optimal strategy in an evolving population.  In fact, there is probably no stable optimal strategy.
9593

9594
\item Some degree of retaliation may be adaptive, but it might not be
9595
necessary for all agents to retaliate.  If there is enough retaliation in the population as a whole, that might be enough to prevent invasion by defectors\footnote{And that introduces a whole new topic in game theory, the free-rider problem (see \url{https://thinkcomplex.com/rider})}.
9596

9597
\end{itemize}
9598

9599
Obviously, the agents in these simulations are simple, and the Prisoner's Dilemma is a highly abstract model of a limited range of social interactions.  Nevertheless, the results in this chapter provide some insight into human nature.  Maybe our inclinations toward cooperation, retaliation, and forgiveness are innate, at least in part.  These characteristics are a result of how our brains are wired, which is controlled by our genes, at least in part.  And maybe our genes build our brains that way because over the history of human evolution, genes for less altruistic brains were less likely to propagate.
9600

9601
\index{altruism}
9602

9603
Maybe that's why selfish genes build altruistic brains.
9604

9605

9606
\section{Exercises}
9607

9608
The code for this chapter is in the Jupyter notebook {\tt chap12.ipynb}
9609
in the repository for this book.  Open the notebook, read the code,
9610
and run the cells.  You can use this notebook to work on the
9611
following exercises.  My solutions are in {\tt chap12soln.ipynb}.
9612

9613

9614
\begin{exercise}
9615

9616
The simulations in this chapter depend on conditions and parameters I chose arbitrarily. As an exercise, I encourage you to explore other conditions to see what effect they have on the results. Here are some suggestions:
9617

9618
\begin{enumerate}
9619

9620
\item Vary the initial conditions: instead of starting with all defectors, see what happens if you start with all cooperators, all TFT, or random agents.
9621

9622
\item In \py{Tournament.melee}, I shuffle the agents at the beginning of each time step, so each agent plays against two randomly-chosen agents. What happens if you don't shuffle? In that case, each agent plays against the same neighbors repeatedly. That might make it easier for a minority strategy to invade a majority, by taking advantage of locality.
9623

9624
\item Since each agent only plays against two other agents, the outcome of each round is highly variable: an agent that would do well against most other agents might get unlucky during any given round, or the other way around. What happens if you increase the number of opponents each agent plays against during each round? Or what if an agent's fitness at the end of each step is the average of its current score and its fitness at the end of the previous round?
9625

9626
\item The function I chose for \py{prob_survival} varies from 0.7 to 0.9, so the least fit agent, with \py{p=0.7}, lives for 3.33 time steps on average, and the most fit agent lives for 10 time steps. What happens if you make the degree of differential survival more or less ``aggressive"?
9627

9628
\item I chose \py{num_rounds=6} so that each element of the genome has roughly the same impact on the outcome of a match. But that is substantially shorter than what Alexrod used in his tournaments. What happens if you increase \py{num_rounds}?  Note: if you explore the effect of this parameter, you might want to modify \py{Niceness} to measure the niceness of the last 4 elements of the genome, which will be under more selective pressure as \py{num_rounds} increases.
9629

9630
\item My implementation has differential survival but not differential reproduction.  What happens if you add differential reproduction?
9631

9632
\end{enumerate}
9633

9634
\end{exercise}
9635

9636
\begin{exercise}
9637

9638
In my simulations, the population never converges to a state where a majority share the same, presumably optimal, genotype. There are two possible explanations for this outcome: one is that there is no optimal strategy, because whenever the population is dominated by a majority genotype, that condition creates an opportunity for a minority to invade; the other possibility is that the mutation rate is high enough to maintain a diversity of genotypes.
9639

9640
To distinguish between these explanations, try lowering the mutation rate to see what happens. Alternatively, start with a random population and run without mutation until only one genotype survives. Or run with mutation until the system reaches something like a steady state; then turn off mutation and run until there is only one surviving genotype. What are the characteristics of the genotypes that prevail in these conditions?
9641

9642
\end{exercise}
9643

9644

9645
\begin{exercise}
9646

9647
The agents in my experiment are ``reactive'' in the sense that their
9648
choice during each round depends only on what the opponent did during
9649
previous rounds.  Explore strategies that also take into
9650
account the agent's past choices.  These strategies can
9651
distinguish an opponent who retaliates from an opponent who defects
9652
without provocation.
9653

9654
\index{reactive}
9655

9656
\end{exercise}
9657

9658

9659

9660
\appendix
9661

9662
\chapter{Reading list}
9663
\label{reading}
9664

9665
The following are selected books related to topics in this book.  Most are written for a non-technical audience.
9666

9667

9668
\begin{itemize}
9669

9670
\item Axelrod, Robert,  {\it  Complexity of Cooperation},
9671
Princeton University Press, 1997.
9672

9673
\item Axelrod, Robert{\it  The Evolution of Cooperation},
9674
Basic Books, 2006.
9675

9676
\item Bak, Per {\it  How Nature Works},
9677
Copernicus (Springer), 1996.
9678

9679
\item \Barabasi, Albert-L\'{a}szl\'{o}, {\it  Linked},
9680
Perseus Books Group, 2002.
9681

9682
\item Buchanan, Mark, {\it  Nexus},
9683
W.~W.~Norton \& Company, 2002.
9684

9685
\item Dawkins, Richard, {\it The Selfish Gene},
9686
Oxford University Press, 2016.
9687

9688
\item Epstein, Joshua and Axtell, Robert, {\it  Growing Artificial Societies},
9689
Brookings Institution Press \& MIT Press, 1996.
9690

9691
\item Fisher, Len, {\it  The Perfect Swarm},
9692
Basic Books, 2009.
9693

9694
\item Flake, Gary William, {\it  The Computational Beauty of Nature},
9695
MIT Press 2000.
9696

9697
\item Goldstein, Rebecca, {\it Incompleteness},
9698
W.~W.~Norton \& Company, 2005.
9699

9700
\item Goodwin, Brian {\it  How the Leopard Changed Its Spots},
9701
Princeton University Press, 2001.
9702

9703
\item Holland, John, {\it  Hidden Order},
9704
Basic Books, 1995.
9705

9706
\item Johnson, Steven, {\it  Emergence},
9707
Scribner, 2001.
9708

9709
\item Kelly, Kevin, {\it  Out of Control},
9710
Basic Books, 2002.
9711

9712
\item Kosko, Bart, {\it Fuzzy Thinking}, 
9713
Hyperion, 1993.
9714

9715
\item Levy, Steven {\it  Artificial Life}, 
9716
Pantheon, 1992.
9717

9718
\item Mandelbrot, Benoit, {\it Fractal Geometry of Nature},
9719
Times Books, 1982.
9720

9721
\item  McGrayne, Sharon Bertsch, {\it The Theory That Would Not Die},
9722
Yale University Press, 2011.
9723

9724
\item Mitchell, Melanie, {\it  Complexity: A Guided Tour}.
9725
Oxford University Press, 2009.
9726

9727
\item Waldrop, M. Mitchell {\it Complexity: The Emerging Science at the Edge of Order and Chaos},
9728
Simon \& Schuster, 1992.
9729

9730
\item Resnick, Mitchell, {\it  Turtles, Termites, and Traffic Jams},
9731
Bradford, 1997.
9732

9733
\item Rucker, Rudy, {\it  The Lifebox, the Seashell, and the Soul},
9734
Thunder's Mouth Press, 2005.
9735

9736
\item Sawyer, R. Keith, {\it  Social Emergence: Societies as Complex Systems},
9737
Cambridge University Press, 2005.
9738

9739
\item Schelling, Thomas, {\it  Micromotives and Macrobehaviors},
9740
W.~W.~Norton \& Company, 2006.
9741

9742
\item Strogatz, Steven, {\it  Sync},
9743
Hachette Books, 2003.
9744

9745
\item Watts, Duncan, {\it  Six Degrees},
9746
W.~W.~Norton \& Company, 2003.
9747

9748
\item Wolfram, Stephen, {\it  A New Kind Of Science},
9749
Wolfram Media, 2002.
9750

9751
\end{itemize}
9752

9753

9754

9755
\backmatter
9756
\printindex
9757

9758
\afterpage{\blankpage}
9759

9760

9761
\end{document}
9762

9763

9764
\section{Pareto distributions}
9765

9766
The Pareto distribution is named after the economist Vilfredo
9767
Pareto, who used it to describe the distribution of wealth;
9768
see \url{https://thinkcomplex.com/pareto}.  Since then,
9769
people have used it to describe
9770
phenomena in the natural and social sciences
9771
including sizes of cities and towns, sand particles
9772
and meteorites, forest fires and earthquakes.
9773
\index{Pareto distribution}
9774
\index{Pareto, Vilfredo}
9775

9776
The Pareto distribution is characterized by a CDF with the following
9777
form:
9778
%
9779
\[ CDF(x) = 1 - \left( \frac{x}{x_m} \right) ^{-\alpha} \]
9780
%
9781
The parameters $x_m$ and $\alpha$ determine the location and shape of
9782
the distribution.  $x_m$ is the minimum possible quantity.
9783
\index{parameter}
9784

9785
Values from a Pareto distribution often have these properties:
9786

9787
\begin{description}
9788

9789
\item[Long tail:] Pareto distributions contain many small values and a
9790
  few very large ones.
9791
  \index{long tail}
9792

9793
\item[80/20 rule:] The large values in a Pareto distribution are so
9794
  large that they make up a disproportionate share of the total.  In
9795
  the context of wealth, the 80/20 rule says that 20\% of the people
9796
  own 80\% of the wealth.
9797
  \index{80/20 rule}
9798

9799
\item[Scale free:] Short-tailed distributions are centered around a
9800
  typical size, which is called a ``scale''.  For example, the great
9801
  majority of adult humans are between 100 and 200 cm in height, so we
9802
  could say that the scale of human height is a few hundred
9803
  centimeters.  But for heavy-tailed distributions, there is no
9804
  similar range (bounded by a factor of two) that contains the bulk of
9805
  the distribution.  So we say that these distributions are
9806
  ``scale-free''.
9807
  \index{scale-free}
9808

9809
\end{description}
9810

9811
To get a sense of the difference between the Pareto and Gaussian
9812
distributions, imagine what the world would be like if the
9813
distribution of human height were Pareto.
9814

9815
In Pareto World, the shortest person is 100 cm,
9816
and the median is 150 cm, so that part of the distribution is not
9817
very different from ours.
9818

9819
\index{Pareto World}
9820

9821
But if you generate 6 billion values from this distribution
9822
distribution, the tallest person might
9823
be 100 km --- that's what it means to
9824
be scale-free!
9825

9826
There is a simple visual test that indicates whether an empirical
9827
distribution is well-characterized by a Pareto distribution: on a
9828
log-log scale, the CCDF looks like a straight line.  The derivation is
9829
similar to what we saw in the previous section.
9830

9831
The equation for the CCDF is:
9832
%
9833
\[ y = 1 - CDF(x) \sim \left( \frac{x}{x_m} \right) ^{-\alpha} \]
9834
%
9835
Taking the log of both sides yields:
9836
%
9837
\[ \log y \sim -\alpha (\log x - \log x_m ) \]
9838
%
9839
So if you plot $\log y$ versus $\log x$, it should look like a
9840
straight line with slope $-\alpha$ and intercept $\alpha \log x_m$.
9841
\index{log-log plot}
9842

9843
\begin{exercise}
9844

9845
Write a version of \py{plot_ccdf} that plots the complementary
9846
CCDF on a log-log scale.
9847

9848
To test your function, use \py{paretovariate} from the \py{random}
9849
module to generate 100 values from a Pareto distribution.  Plot
9850
the CCDF on a log-$y$ scale and see if it falls on a straight line.
9851
What happens to the curve as you increase the number of values?
9852
\index{random module@\py{random} module}
9853

9854
\end{exercise}
9855

9856

9857

9858
\section{Pareto and the power law}
9859

9860
Starting with a power-law distribution, we have:
9861
%
9862
\[ P(k) \sim k^{- \gamma} \]
9863
%
9864
If we choose a random node in a scale free network,
9865
$P(k)$ is the probability that its degree equals $k$.
9866
\index{degree}
9867

9868
The cumulative distribution function, $CDF(k)$, is the probability
9869
that the degree is less than or equal to $k$, so we can
9870
get that by summation:
9871
%
9872
\[ CDF(k) = \sum_{i=0}^k P(i) \]
9873
%
9874
For large values of $k$ we can approximate the summation with
9875
an integral:
9876
%
9877
\[ \sum_{i=0}^k i^{- \gamma} \sim \int_{i=0}^k i^{- \gamma} =
9878
\frac{1}{\gamma -1} (1 - k^{-\gamma + 1}) \]
9879
%
9880
To make this a proper CDF we could normalize it so that it
9881
goes to 1 as $k$ goes to infinity, but that's not necessary,
9882
because all we need to know is:
9883
%
9884
\[ CDF(k) \sim 1 - k^{-\gamma + 1} \]
9885
%
9886
Which shows that the distribution of $k$ is asymptotic to a
9887
Pareto distribution with $\alpha = \gamma - 1$.
9888

9889

9890
\section{Continuous distributions}
9891

9892
The distributions we have seen so far are sometimes called
9893
{\bf empirical distributions} because they are based on a
9894
dataset that comes from some kind of empirical observation.
9895
\index{empirical distribution}
9896

9897
An alternative is a {\bf continuous distribution},
9898
which is characterized by a CDF that is a continuous function.
9899
Some of these distributions, like the
9900
Gaussian or normal
9901
distribution, are well known, at least to people who have studied
9902
statistics.  Many real world phenomena can be approximated by
9903
continuous distributions, which is why they are useful.
9904
\index{continuous distribution}
9905
\index{normal distribution}
9906
\index{Gaussian distribution}
9907

9908
For example, if you observe a mass of radioactive material with
9909
an instrument that can detect decay events, the distribution
9910
of times between events will most likely fit an exponential
9911
distribution.  The same is true for any series where
9912
an event is equally likely at any time.
9913
\index{exponential distribution}
9914

9915
The CDF of the exponential distribution is:
9916

9917
\[ CDF(x) = 1 - e^{-\lambda x} \]
9918

9919
The parameter, $\lambda$, determines the mean and variance
9920
of the distribution.  This equation can be used to derive
9921
a simple visual test for whether a dataset can be well
9922
approximated by an exponential distribution.  All you
9923
have to do is plot the {\bf complementary distribution}
9924
on a log-$y$ scale.
9925
\index{parameter}
9926
\index{complementary distribution}
9927

9928
The complementary distribution (CCDF) is just $1 - CDF(x)$;
9929
if you plot the complementary distribution of a dataset
9930
that you think is exponential, you expect to see a function
9931
like:
9932

9933
\[ y = 1 - CDF(x) \sim e^{-\lambda x} \]
9934

9935
If you take the log of both sides of this equation, you get:
9936
%
9937
\[ \log y \sim -\lambda x \]
9938
%
9939
So on a log-$y$ scale the CCDF should look like a straight line
9940
with slope $-\lambda$.
9941

9942
\begin{exercise}
9943

9944
Write a function called \py{plot_ccdf} that takes
9945
a list of values and the corresponding list of probabilities
9946
and plots the CCDF on a log-$y$ scale.
9947

9948
To test your function, use \py{expovariate} from the \py{random}
9949
module to generate 100 values from an exponential distribution.  Plot
9950
the CCDF on a log-$y$ scale and see if it falls on a straight line.
9951
\index{random module@\py{random} module}
9952

9953
\end{exercise}
9954

9955

9956
Leftover exercises:
9957

9958
Connecting grids and networks:
9959

9960
\begin{exercise}
9961
Implement percolation on a network.
9962
\end{exercise}
9963

9964
\begin{exercise}
9965
Check whether the clusters in Schelling's model are connected.
9966
\end{exercise}
9967

9968

9969
%\begin{figure}
9970
%\centerline{\includegraphics[height=1.75in]{figs/array.pdf}}
9971
%\caption{A list of lists (left) and a NumPy array (right).\label{fig.array}}
9972
%\end{figure}
9973

9974
Figure~\ref{fig.array} shows why.
9975
The diagram on the left shows a list of lists of integers; each
9976
dot represents a reference, which takes up 4--8 bytes.  To access
9977
one of the integers, you have to follow two references.
9978

9979
\index{NumPy}
9980
\index{array}
9981
\index{nested list}
9982
\index{reference}
9983

9984
The diagram on the right shows an array of the same integers.  Because
9985
the elements are all the same size, they can be stored contiguously in
9986
memory.  This arrangement saves space because it doesn't use
9987
references, and it saves time because the location of an element can
9988
be computed directly from the indices; there is no need to follow a
9989
series of references.
9990

9991
Product

Resources

Company