diff --git a/docs/2023-8-4-162/redaction/todo.html b/docs/2023-8-4-162/redaction/todo.html new file mode 100644 index 0000000..ec26678 --- /dev/null +++ b/docs/2023-8-4-162/redaction/todo.html @@ -0,0 +1,131 @@ + + + + + + works in public + + + +
+ cover +
+
+

todo

+ +

format

+ + + +

conclusion

+ + + +

chap 4 - programming

+ + + +

programming languages

+ +

styles

+ + + +

functions

+ + + +

chap 3 - beauty

+ +

aesthetics

+ + + +

literature

+ + + +

architecture

+ + + +

mathematics

+ + + +

chap 2 - understanding

+ + + +

chap 1 - ideals

+ + + +

introduction

+ + + +
+ + diff --git a/docs/index.html b/docs/index.html index 3eaf8c2..88f7c86 100644 --- a/docs/index.html +++ b/docs/index.html @@ -31,6 +31,14 @@

the role of aesthetics in the understandings of source code

+

2023 - 8 - 4

+ +

162

+ +
  • redaction/todo.html
  • + + +

    2023 - 8 - 2

    2056

    diff --git a/redaction/corpus/error_handling.go b/redaction/corpus/error_handling.go deleted file mode 100644 index 994e18a..0000000 --- a/redaction/corpus/error_handling.go +++ /dev/null @@ -1,11 +0,0 @@ -package main - -import ( - "fmt" - "os/exec" -) - -func main() { - d, _ := exec.LookPath("date") - fmt.Printf("Today is %s", d) -} diff --git a/redaction/corpus/iterating.c b/redaction/corpus/iterating.c new file mode 100644 index 0000000..3a82b58 --- /dev/null +++ b/redaction/corpus/iterating.c @@ -0,0 +1,12 @@ +#include + +int main() +{ + int max_count = 5; + struct int my_list[max_count] = {2046, 2047, 2048, 2049, 2050}; + + for (int i = 0; i < max_count; i++) + { + printf("%d", my_list[i]); + } +} \ No newline at end of file diff --git a/redaction/corpus/iterating.py b/redaction/corpus/iterating.py new file mode 100644 index 0000000..2eb33ce --- /dev/null +++ b/redaction/corpus/iterating.py @@ -0,0 +1,4 @@ +my_list = [2046, 2047, 2048, 2049, 2050] + +for item in my_list: + print(item) \ No newline at end of file diff --git a/redaction/corpus/multiple_returns.go b/redaction/corpus/multiple_returns.go new file mode 100644 index 0000000..ab6bfee --- /dev/null +++ b/redaction/corpus/multiple_returns.go @@ -0,0 +1,9 @@ +package main + +func getNumbers() (int, float64, int) { + return 1, 2.0, 3 +} + +func main() { + first, _, third := getNumbers() +} diff --git a/redaction/corpus/multiple_returns.js b/redaction/corpus/multiple_returns.js new file mode 100644 index 0000000..99adabe --- /dev/null +++ b/redaction/corpus/multiple_returns.js @@ -0,0 +1,8 @@ +let getNumbers = () => { + return [1, 2.0, 3] +} + + +numbers = getNumbers() +first = numbers[0] +second = numbers[2] \ No newline at end of file diff --git a/redaction/corpus/non-thread.go b/redaction/corpus/non-thread.go new file mode 100644 index 0000000..48a05d5 --- /dev/null +++ b/redaction/corpus/non-thread.go @@ -0,0 +1,20 @@ +package main + +import ( + "fmt" + "math/rand" + "time" +) + +func recall(date int) { + random_delay := (rand.Int() % 5) + 1 + time.Sleep(time.Second * time.Duration(random_delay)) + fmt.Println(date) +} + +func main() { + recall(2045) + recall(2046) + + fmt.Println("We're done!") +} diff --git a/redaction/corpus/thread.c b/redaction/corpus/thread.c index b4610ff..a51851c 100644 --- a/redaction/corpus/thread.c +++ b/redaction/corpus/thread.c @@ -1,16 +1,23 @@ #include #include +#include +#include void recall(int date) { - std::cout << date << '\n'; + r = (rand() % 5) + 1 sleep(r) + std::cout << date << '\n'; } int main() { - std::thread thread(recall, 2046); + pthread_t thread1; + pthread_t thread2; + pthread_create(&thread1, NULL, recall, 2045); + pthread_create(&thread2, NULL, recall, 2046); - thread.join(); + pthread_join(thread1, NULL); + pthread_join(thread2, NULL); cout << "We're done!"; diff --git a/redaction/corpus/thread.go b/redaction/corpus/thread.go index fc87e1f..e6e9d7d 100644 --- a/redaction/corpus/thread.go +++ b/redaction/corpus/thread.go @@ -2,13 +2,19 @@ package main import ( "fmt" + "math/rand" + "time" ) func recall(date int) { + random_delay := (rand.Int() % 5) + 1 + time.Sleep(time.Second * time.Duration(random_delay)) fmt.Println(date) } + func main() { go recall(2046) + go recall(2047) fmt.Println("We're done!") } diff --git a/redaction/introduction.tex b/redaction/introduction.tex index e4a3b5c..a2c7149 100644 --- a/redaction/introduction.tex +++ b/redaction/introduction.tex @@ -67,7 +67,7 @@ \subsection{Beautiful code} Considered one of the most canonical textbooks in the field, \emph{The Art of Computer Programming} highlights two important aspects of programming for our purpose: that it can be an aesthetic experience and that it is the result of a craft, rather than of a highly-formalized systematic process, as we will see in \ref{subsubsec:crafting-software}. -Craftsmanship is an essentially fleeting phenomenon, a practice rather than a theory, in the vein of Michel De Certeau's \textit{tactics}, bottom-up actions informally designed and implemented by the users of a situation, product or technology as opposed to \textit{strategies} \citep{certeau_invention_1990}, in which ways of doing are deliberately prescribed in a top-down fashion. Craft is hard to formalize, and the development of expertise in the field happens more often through practice thanthrough formal education \citep{sennett_craftsman_2009}. It is also one in which function and beauty exist in an intricate, embodied and implicit relationship, based on subjective qualitative standards and functional purposes rather than strictly quantitative measurements \citep{pye_nature_2008}. Approaching programming as a craft has been a recurrent perspective \citep{levy_programmation_1992,dijkstra_craftsman_1982}, and connects to the multiple testimonies of encountering beautiful code, some of which have made their ways into edited volumes or monographs \citep{oram_beautiful_2007,chandra_geek_2014,gabriel_patterns_1998}. +Craftsmanship is an essentially fleeting phenomenon, a practice rather than a theory, in the vein of Michel De Certeau's \textit{tactics}, bottom-up actions informally designed and implemented by the users of a situation, product or technology as opposed to \textit{strategies} \citep{certeau_invention_1990}, in which ways of doing are deliberately prescribed in a top-down fashion. Craft is hard to formalize, and the development of expertise in the field happens more often through practice than through formal education \citep{sennett_craftsman_2009}. It is also one in which function and beauty exist in an intricate, embodied and implicit relationship, based on subjective qualitative standards and functional purposes rather than strictly quantitative measurements \citep{pye_nature_2008}. Approaching programming as a craft has been a recurrent perspective \citep{levy_programmation_1992,dijkstra_craftsman_1982}, and connects to the multiple testimonies of encountering beautiful code, some of which have made their ways into edited volumes or monographs \citep{oram_beautiful_2007,chandra_geek_2014,gabriel_patterns_1998}. Additionally, informal exchanges among programmers on forums, mailing lists, blog posts and code repositories often mention beautiful code, either as a central discussion point or simply in passing. These testimonies constitute the first part of our corpus, as sources in which programmers comment on the aesthetic dimension of their practice. The second part of the corpus is composed of selected program texts, which we will examine in order to identify and formalize which aspects of the textual manifestation of software can elicit an aesthetic experience. diff --git a/redaction/programming.tex b/redaction/programming.tex index 06dd1d6..b2c03cf 100644 --- a/redaction/programming.tex +++ b/redaction/programming.tex @@ -1,38 +1,38 @@ \chapter{Machine languages} \label{chap:programming} -After analyzing the discourses of programmers with regards to beautiful code, after highlighting the specific cognitive hurdles and bypasses inherent to software, and after having investigated how aesthetics enable various forms of understanding in associated fields, we now offer a framework for the aesthetics of source code. To do this, this chapter develops on the medium itself of source code beauty: the programming languages. Understanding what they are and how they are used will allow us to highlight two important aspects. First, that there is a tension between human-meaning and machine-meaning, a tension between syntax and semantics. Second, it will allow us to highlight yet another contextual aspect of source code aesthetics—just like natural languages, machine languages also act as linguistic communities. +After analyzing the discourses of programmers on beautiful code, after highlighting the specific cognitive complexities inherent to software and how they are dealt with, and after having investigated how aesthetics enable various forms of understanding in adjacent fields, we now offer a framework for the aesthetics of source code. -Once we laid this material groundwork, we propose two approaches to the aesthetic manifestations in program texts. First, we build on a close-reading approach to suggest a perspective of various scales. We will therefore see how aesthetic manifestations exist along a linear axis of vocabulary, syntax and structure. Finally, we will take a step back to the standards established in \ref{chap:ideals} (e.g. clean, elegant, etc.) and show how these create different contexts for aesthetic appreciation. +To do this, this chapter begins with the medium of source code: programming languages. Understanding what they are and how they are used will allow us to highlight two important aspects. First, that there is a tension between human-meaning and machine-meaning, a tension between different interpretations of the same syntax. Second, it will allow us to highlight another contextual aspect of source code aesthetics—just like natural languages, machine languages also act as linguistic communities. -Ultimately, this chapter continues on our investigation of how programmers establish mental spaces in order to understand a program text. Programming languages therefore act as an interface between a \emph{base space} (the text) and \emph{built space} (the imagination that the readers has of the text), resulting in a functional cognitive model. +Once we laid this material groundwork, we propose two approaches to the aesthetic manifestations in program texts. First, we build on a close-reading approach to suggest a framework composed of various scales. Focusing on the spatiality of program texts, we will show how programming languages act as an interface between a program text and a mental model. We then develop on how syntax and vocabulary make use of metaphors to enable the representation of positive values such as abstraction, openness and function. + +It is with the concept of function that we conclude the chapter, and with the essential role that function plays in aesthetic appreciation. That is, we will show that such role is dual: a functional source code is required for aesthetic judgment to take place, and the aesthetic experience has the function of enabling understanding. \section{Linguistic interfaces} \label{sec:linguistic-interfaces} -Software is an idea ultimately represented in specific hardware configurations. The immediate medium of this representation, from the programmer's perspective, is the programming language in which the idea is written down. Programming languages have so far been set aside when examining which sensual aspects of source code resulted in what could be deemed a "beautiful" program text. The relationship between semantics (deep-structure) and its syntactic representation (surface-structure) is framed by programming languages, as they define the legal organization of form. +Software is an idea ultimately represented in specific hardware configurations. The immediate medium of this representation, from the programmer's perspective, is the programming language in which the idea is written down. Programming languages have so far been set aside when examining which sensual aspects of source code resulted in what could be deemed a "beautiful" program text. And yet, the relationship between semantics (deep-structure) and its syntactic representation (surface-structure) is framed by programming languages, as they define the legal organization of form. -This section examines the influence of programming languages on the aesthetic manifestations of source code. To do so, we first go over a broad description of programming languages, ending on what makes a programming language expressive. Second, we touch upon the problem of semantics in programming languages, and how they might differ from a human understanding of semantics. We then we assess their fit as an artistic, expressive system by introducing notions to style and idiomaticity in programming language communities. Finally, we highlight a couple of computing-specific concepts which are made explicit by programming language research, and further define the kinds of concepts that are defined and manipulated when writing code. +This section examines the influence of programming languages on the aesthetic manifestations of source code. To do so, we first go over a broad description of programming languages, focusing on what makes a programming language expressive. Second, we touch upon the problem of semantics in programming languages, and how they might differ from a human understanding of semantics. We then we assess their fit as an artistic, expressive system by introducing notions to style and idiomaticity in programming language communities. In so doing, we highlight a couple of computing-specific concepts that are made accessible by programming languages, discussing how different linguistic interfaces propose different representations. \subsection{Programming languages} \label{subsec:programming-languages} -As an introduction to the section, we start by recalling the historical and technical developments of programming languages, relocating them as an interface between hardware and software, before investigating what makes them good from a general, language designer point of view. This will highlight generic qualities such as orthogonality, abstraction and simplicity, which we will then compare to specific implementations. +We start by recalling the historical and technical developments of programming languages, relocating them as an interface between hardware and software. With a better technical understanding, this will allow us to pinpoint the overlap and differences between human semantics and machine semantics. \subsubsection{History and developments} \label{subsubsec:history-developments} -A programming language is a strictly-defined set of syntactic rules and symbols for describing instructions to be executed by the processor. The history of programming languages is, in a sense, the history of decoupling the means of creating software from hardware. The earliest programming languages were embedded in hardware itself, such as piano rolls and punched cards for Jacquard looms \citep{sack_software_2019}. Operating on similar principles, the first electric computers—such as the ENIAC, the UNIVAC or the MUC—still required manual re-wiring in order to implement any change in the algorithm being computed. This process then gave way to programming through the stack of cards fed into the machine, a process which nonetheless retained a definite material aspect. It is with the shift to the stored-program model, at the dawn of the 1950s, that the programs could be written, stored, recalled and executed in their electro(-mecha)nical form, essentially freeing the software result from any immediately physical representation. - -This tendency to have software gradually separate from hardware saw a parallel in the development of programming languages themselves. Ultimately, any software instruction needs to execute one of the built-in, hardwired instructions of the processor. Also called \emph{machine language}, these instructions set describe the specific implementation of the most common operations executed by a computer (e.g. \lstinline{add, move, read, load}, etc.), and are the oldest and most direct semantic interface to the hardware. While these are represented as binary numbers to the processing unit, a first layer of a family of languages called Assembly, translate those machines instructions into another set of instructions, whose syntax is loosely based on English. Considered today as some of the most low-level code one can write, Assembly languages are machine-dependent, featuring a one-to-one translation from English keywords to the kind of instruction sets known to the processor they are expected to interface with. As such, a program written for a particular architecture of a computer (e.g. x86 or ARM) cannot be executed without any modifications on a another machine. +A programming language is a strictly-defined set of syntactic rules and symbols for describing instructions to be executed by the processor. The history of programming languages is, in a sense, the history of decoupling the means of creating software from hardware. The earliest programming languages were embedded in hardware itself, such as piano rolls and punched cards for Jacquard looms \citep{sack_software_2019}. Operating on similar principles, the first electric computers—such as the ENIAC, the UNIVAC or the MUC—still required manual re-wiring in order to implement any change in the algorithm being computed. This process then gave way to programming through the stack of cards fed into the machine, a more modular process which nonetheless retained a definite material aspect. It is with the shift to the stored-program model, at the dawn of the 1950s, that the programs could be written, stored, recalled and executed in their electro(-mecha)nical form, essentially freeing the software result from any immediately physical representation. -% add a part about assembly code poems? +This tendency to have software gradually separate from hardware saw a parallel in the development of programming languages themselves. Ultimately, any software instruction needs to execute one of the built-in, hardwired instructions of the processor. Also called \emph{machine language}, these instructions set describe the specific implementation of the most common operations executed by a computer (e.g. \lstinline{add, move, read, load}, etc.), and are part of the oldest and most direct semantic interface to the hardware. These operations are ultimately represented as binary numbers to the processing unit. To represent these binary combinations, a first layer of a family of languages called Assembly, provides a syntax which is loosely based on English. When read by the CPU, each of these Assembly mnenmonics is converted into binary representation\footnote{For an example of Assembly language translated into machine code, see \ref{code:level_asm} and \ref{code:level_byte}}. Considered today as some of the most low-level code one can write, Assembly languages are machine-dependent, featuring a one-to-one translation from English keywords to the kind of instruction sets known to the processor they are expected to interface with. As such, a program written for a particular architecture of a computer (e.g. x86 or ARM) cannot be executed without any modifications on a another machine. -The first widely acknoweldged high-level language which allowed for a complete decoupling of hardware and software was FORTRAN\footnote{Even though programming languages such as Plankalkül, Short Code and Autocode were partial proposals of such decouppling before FORTRAN.}. At this point, programmers did not need to care about the specifics of the machine that they were running on anymore, and found more freedom in their exploration of what could be done in writing software, expanding beyond scientific and military applications into the commercial world (see \ref{sec:practice-programmers}). Moving away from hand-crafted and platform-specific Assembly code also implied a certain sense of looseness incompatible with the extension of its application domain. As such, FORTRAN\footnote{Literally meaning FORmula TRANslation, thus making clear its role as a mediator.}, and the subsquent COBOL, Lisp and ALGOL 58 also started being concerned with the specific definition of their syntax in a non-ambiguous manner to ensure reliability. Using Backus-Naur Form notation, it became possible to formalize their syntactic rules in order to prevent any unexpected behaviour and support rigorous reasoning for the implementation and research of current and subsequent languages. With such specifications, and with the decoupling from hardware, programming languages became, in a way, context-free. +The first widely acknoweldged high-level language which allowed for a complete decoupling of hardware and software is FORTRAN\footnote{Even though programming languages such as Plankalkül, Short Code and Autocode were partial proposals of such decoupling before FORTRAN.}. At this point, programmers did not need to care about the specifics of the machine that they were running on anymore, and found more freedom in their exploration of what could be done in writing software, expanding beyond scientific and military applications into the commercial world (see \ref{sec:practice-programmers}). Moving away from hand-crafted and platform-specific Assembly code also implied a certain sense of looseness incompatible with the extension of its application domain: widening the problem domain demanded tightening the specification of such languages. As such, FORTRAN\footnote{An acronym for FORmula TRANslation, thus making clear its role as a mediator.}, and the subsquent COBOL, Lisp and ALGOL 58 also started being concerned with the specific definition of their syntax in a non-ambiguous manner to ensure reliability. Using Backus-Naur Form notation, it became possible to formalize their syntactic rules in order to prevent any unexpected behaviour and support rigorous reasoning for the implementation and research of current and subsequent languages. With such specifications, and with the decoupling from hardware, programming languages became, in a way, context-free. -The context-free grammatical basis for programming allowed for the further development of compilers and interpreters, binary programs which, given a syntactically-valid program text, output their machine code representation. Such a machine-code representation can then be executed by the processor\footnote{The main difference between a compiler and an interpreter is that the compiler parses the whole program text as once, resulting in a binary object, while interpreters parse only one line at a time, which is then immediately executed}. At this point, a defining aspect of programming languages is their theoretical lack of ambiguity. This need for disambiguation was reflected both in the engineering roots of computation\footnote{Punch cards and electrical circuits are ultimately discreete—hole or no hole, voltage or no voltage.} and in their formal mathematic roots notation\footnote{For instance, Plankalkül was based on Frege's \emph{Begriffschrift}, a lineage we've seen in \ref{subsec:knowing-what-how}}, and was a requirement of the further development of functional software engineering. +The context-free grammatical basis for programming allowed for the further development of compilers and interpreters, binary programs which, given a syntactically-valid program text, output their machine code representation. Such a machine-code representation can then be executed by the processor\footnote{The main difference between a compiler and an interpreter is that the compiler parses the whole program text as once, resulting in a binary object, while interpreters parse only one line at a time, which is then immediately executed.}. At this point, a defining aspect of programming languages is their theoretical lack of ambiguity. This need for disambiguation was reflected both in the engineering roots of computation\footnote{Punch cards and electrical circuits are ultimately discreete—hole or no hole, voltage or no voltage.} and in their formal mathematic roots notation\footnote{For instance, Plankalkül was based on Frege's \emph{Begriffschrift}, a lineage we've seen in \ref{subsec:knowing-what-how}}, and was thus a requirement of the further development of functional software engineering. -Nowadays, most programming languages are Turing-complete: that is, their design allows for the implementation of a Turing machine and therefore for the simulation of any possible aspect of computation. This means that any programming language that is Turing-complete is equivalent to any other Turing-complete programming language, creating essentially a chain of equivalency between all programming languages. And yet, programming language history is full of rise and fall of languages, of hypes and dissapointments, of self-claimed beautiful ones and criticized ugly ones, from COBOL to Ada, Delphi and C. This is because, given such a wide, quasi-universal problem set, the decision space requires creative constraints: individual programmers resort to different approaches of writing computational procedures, echoing what Gilles Gaston-Granger undestands as \emph{style}, as a formal way to approach the production and communication of aesthetic, linguistic and scientific works \citep{granger_essai_1988}. We have already seen one difference in approaching the domain of computation: compilation vs. interpretation. While the input and outputs are the same \footnote{a program text goes in, and machine code comes out}, there are pros and cons\footnote{For instance, a compiled binary does not need an extra runtime to be executed on a machine, but cannot be immediately used on a different architecture than the one it was compiled for.} to each approach, which in turn allows programmers to bestow value judgments on which on they consider better than the other. Ultimately all programming languages need to address these basic components of computation, but they can do it in the way they want. +Nowadays, most programming languages are Turing-complete: that is, their design allows for the implementation of a Turing machine and therefore for the simulation of any possible aspect of computation. This means that any programming language that is Turing-complete is \emph{functionally} equivalent to any other Turing-complete programming language, creating essentially a chain of equivalency between all programming languages. And yet, programming language history is full of rise and fall of languages, of hypes and dissapointments, of self-claimed beautiful ones and criticized ugly ones, from COBOL to Ada, Delphi and C. This is because, given such a wide, quasi-universal problem set, the decision space requires creative constraints: individual programmers resort to different approaches of writing computational procedures, echoing what Gilles Gaston-Granger undestands as \emph{style}, as a formal way to approach the production and communication of aesthetic, linguistic and scientific works \citep{granger_essai_1988}. We have already seen one example of such difference in approaching the domain of computation: compilation vs. interpretation. While the input and outputs are the same \footnote{a program text goes in, and machine code comes out}, there are pros and cons\footnote{For instance, a compiled binary does not need an extra runtime to be executed on a machine, but cannot be immediately used on a different architecture than the one it was compiled for.} to each approach, which in turn allows programmers to bestow value judgments on which on they consider better than the other. Ultimately all programming languages need to address these basic components of computation, but they can do it in the way they want. Such basic components are, according to Milner \citep{milner_semantic_1996}: \begin{itemize} \item{\emph{data}: what kinds of basic datatypes are built-in the language, e.g. signed integers, classes} @@ -43,68 +43,71 @@ \subsubsection{History and developments} \item{\emph{operating environment}: how the program can run, e.g. virtual machine or not} \end{itemize} -This decision to change the way of doing something while retaining the same goal is particularly salient in the emergence of programming paradigms. A programming paradigm is an approach to programming based on a coherent set of principles, sometimes involving mathematical theory, and grouped into families. Some of these concepts include encapsulation and interfaces (in object-oriented programming), pure function and lacks of side effects (in functional programming), or mathematical logic (in declarative programming). Each paradigm supports a set of concepts that makes it the best for a certain kind of problem \citep{vanroy_programming_2012}, these concepts in turn act as stances which influence how to approach, represent and prioritize the computational concepts mentioned above. +This decision to change the way of doing something while retaining the same goal is particularly salient in the emergence of programming paradigms. A programming paradigm is an approach to programming based on a coherent set of principles, sometimes involving mathematical theory or a specific domain of application. Some of these concepts include encapsulation and interfaces (in object-oriented programming), pure function and lacks of side effects (in functional programming), or mathematical logic (in declarative programming). Each paradigm supports a set of concepts that makes it the best for a certain kind of problem \citep{vanroy_programming_2012}, these concepts in turn act as stances which influence how to approach, represent and prioritize the computational concepts mentioned above, and as tools to operate on their problem domain. -Along with programming paradigms, programming languages also present syntactic affordances for engaging with computational concepts. In order to assess how these formal differences relate to similar concepts, and how a value judgment can be made between languages, we first turn to the question of semantics—before the question of how do computers understand, we look at what they understand. +Along with programming paradigms, programming languages also present syntactic affordances for engaging with computational concepts. Nonetheless, this is only one part of the picture: the interpretation of syntax necessarily involves semantics. Machine semantics, as we will see, operate a delicate balance between computational operations and human assumptions. \subsubsection{Machine semantics and human semantics} \label{subsubsec:machine-semantics-human-semantics} -One of the reasonings behind the formal approach to programming languages, besides the very material machine requirements of a circuit design based on discreete distinctions, is, according to the designers of ALGOL 58, the dissatisfaction with the fact that subtle semantic questions remained unanswered due to a lack of clear description \citep{sethi_programming_1996}. If the goal of a program text is to be syntactically and semantically clear, and if programming languages are syntactically unambiguous, we examine here under what form do semantics exist as computer representations, and what kind of specific semantic issues are at stake when writing program texts. The very requirement for semantic representation in program language design is first and foremost due to the fact that: +One of the reasonings behind the formal approach to programming languages is, according to the designers of ALGOL 58, the dissatisfaction with the fact that subtle semantic questions remained unanswered due to a lack of clear description \citep{sethi_programming_1996}. If the goal of a program text is to produce a functional and deterministic execution, then programming languages must be syntactically unambiguous, and the compiler must be given a framework to interpret this syntax. The very requirement for semantic representation in program language design is first and foremost due to the fact that: \begin{quote} The first and most obvious point is that whenever someone writes a program, it is a program about something. \citep{winograd_language_1982} \end{quote} -A statement which is itself followed by the tension between semantics and syntax. Semantics have the properties of aboutness and directedness (they point towards something external to them), and syntax has the property of (local) consistency and combination (they function as a mostly closed system). Looking at programing languages as applied mathematics, in the sense that it is the art and science of constituting complex systems through the manipulation formal tokens, tokens which in turn represent elements in the world of some kind, we arrive at the issue of defining semantics in strictly computer-understandable terms. +The issue that he points out in the rest of his work is that humans and computers do not have the same understanding of what a program text is about. In general, semantics have the properties of aboutness and directedness (they point towards something external to them), and syntax has the property of (local) consistency and combination (they function as a mostly closed system). Looking at programing languages as applied mathematics, in the sense that it is the art and science of constituting complex systems through the manipulation formal tokens, tokens which in turn represent elements in the world of some kind, we arrive at the issue of defining semantics in strictly computer-understandable terms. + +In attempting to develop early forms of artificial intelligence in the 1970s, Terry Winograd and Fernando Flores develop a framework for machine cognition as related to human cognition, through the analysis of language-based meaning-making \citep{winograd_understanding_1986}. In short, they consider meaning as created by a process of active reading, in which the linguistic form enables interpretation, rather than exclusively conveying information. They further state that interpretation happens through \emph{grounding}, essentially contextualizing information in order to interpret it and extract meaning. He identifies three different kinds of grounding: experiential, formal, and social. The \emph{experiential} grounding, in which verification is made by direct observation, relates to the role of the senses in the constitution of the conceptual structures that enable our understanding of the world—also known as the material implementation of knowledge. The \emph{formal} grounding relies on logical and logical statements to deduce meaning from previous, given statements that are known, which we can see at play in mathematical reasoning. Finally, \emph{social} grounding relies on a community of individuals sharing similar conceptual structures in order to qualify for meaning to be confirmed. Of these three groundings, programming languages rely on the second. + +The reason for the bypassing of experiential and social grounding can be found in one of the foundations of computer science, as well as information science: Claude Shannon's mathematical theory of communication. In it, he postulates the separation of meaning from information, making only the distinction between signal and noise. Only formal manipulation of signal can then reconstitute meaning\footnote{An affordance that is shared in distinguishing literature from gibberish, according to Peter Suber\citep{suber_what_1988}}. We think of computers as digital machines but they can also be seen as only the digital implementation of the phenomenon of computation. Indeed, according to Brian Cantwell Smith, computing is \emph{meaning mechanically realized}, due to the fact that the discipline has both mechanical and non-mechanical lineages\citep{smith_aos_2016}. It is therefore through formal logic that one can recreate meaning through the exclusive use of the computer. -In order to compare human-meaning with machine-meaning, we turn to the work of Terry Winograd and Fernando Flores. In attempting to develop early forms of artificial intelligence in the 1970s, they develop a framework for machine cognition as related to human cognition \citep{winograd_understanding_1986}. In short, they consider meaning as created by a process of active reading, in which the linguistic form enables interpretation, rather than exclusively conveying information. They further state that interpretation happens through \emph{grounding}, essentially contextualizing information in order to interpret it and extract meaning. He identifies three different kinds of grounding: experiential, formal, and social. The \emph{experiential} grounding, in which verification is made by direct observation, related to the role of the senses in the constitution of the conceptual structures that enable our understanding of the world—also known as the material implementation of knowledge. The \emph{formal} grounding relies on logical and logical statements to deduce meaning from previous, given statements that are known. Finally, \emph{social} grounding relies on a community of individuals sharing similar conceptual structures in order to qualify for meaning to be confirmed. Of these three groundings, computers, through programming languages rely on the second. +This machine meaning is also represented through several layers. A computer is a collection of layers, each defining different levels of machines, with different semantic capabilities. First, it is a physical machine, dealing with voltage differences. These voltage differences are then quantized into binary symbols, in order to become manipulable by a logical machine. From this logical machine is built an abstract machine, which uses logical grounding in order to execute specific, pre-determined commands. The interpretation of which commands to execute, however, leaves no room for the kind of semantic room for error that humans exhibit (particularly in hermeneutics). It is a strictly defined mapping of an input to an output, whose first manifestation can be found in the symbols table in Turing's seminal paper \citep{turing_computable_1936}. The abstract machine, in turn, allows for high-level machines (or, more precisely, high-level languages which can implement any other abstract machine). These languages themselves have linguistic constructs which allow the development of representational schemes for data (i.e. data structures such as \lstinline{structs, lists, tuples, objects}, etc.). Finally, the last frontier, so to speak, is the problem domain: the thing(s) that the programmer is talking about and intends to act upon. Going back down the ladder of abstractions, these entities in the problem domain are then represented in data structures, manipulated through high-level languages, processed by an abstract machine and executed by a logical machine which turns these pre-established commands into voltage variations. -The reason for the bypassing of experiential and social grounding can be found in one of the foundations of computer science, as well as information science: Claude Shannon's mathematical theory of communication. In it, he postulates the separation of meaning from information, making only the distinction between signal and noise. Only formal manipulation of signal can then reconstitute meaning\footnote{An affordance that is shared with literature, according to Peter Suber\citep{suber_what_1988}}. Indeed, according to Brian Cantwell-Smith, computing is \emph{meaning mechanically realized}, due to the fact that the machine comes from non-mechanical origins\citep{smith_aos_2016}. We think of computers as digital but they can be seen as only the digital implementation of the phenomenon of computation, with its roots in formal logic. It is therefore through formal logic that one can recreate meaning through the exclusive use of the computer. +The problem domain is akin to a semantic domain, a set of related meaningful entities, operating within a specific context, and which a particular syntax refers to. Yet, there is only one context which the computer provides: itself. Within this unique context, semantics still hold a place in any programming language textbook, and is addressed regularly in programming language research. Concretely, \emph{semantics in computer programming focuses on how variables and functions should behave in relation to one another} \citep{sethi_programming_1996}. Given the statement \lstinline{l := j + p}, the goal of programming language semantics is to deduce what is the correct way to process such a statement; there will be different ways to do so depending on the value and the type of the \lstinline{j} and \lstinline{p} variables. If they are strings, then the value of \lstinline{j} will be their concatenation, putting one next to the other. If they are numbers, it will be their addition, and so on. -A computer is a collection of layers, each defining different levels of machines, with different semantic capabilities. First, it is a physical machine, dealing with voltage differences. These voltage differences are then quantized into binary symbols, in order to become manipulable by a logical machine. From this logical machine is built an abstract machine, which uses logical grounding in order to execute specific, pre-determined commands. The interpretation of which commands to execute, however, leaves no room for the kind of semantic room for error that humans exhibits (particularly in hermeneutics). It is a strictly defined mapping of an input to an output, whose first manifestation can be found in the symbols table in Turing's seminal paper. The abstract machine, in turn, allows for high-level machines (or, more precisely, high-level languages which can implement any other abstract machine). These languages themselves have linguistic constructs which allow the development of representational schemes for data (i.e. data structures such as \lstinline{structs, lists, tuples, objects}, etc.). Finally, the last frontier, so to speak, is the subject domain: the things that the programmer is talking about and intends to act upon. These are then represented in data structures, manipulated through high-level languages, processed by an abstract machine and executed by a logical machine which turns these representations into voltage variations. +This problem of determining which operation should take place given a particular type of variables is called the \emph{use-mention} problem. It requires the reconciliation of the name of entities, tokens in source code, with the entities themselves, composed of a value and a type. The way this is achieved is actually quite similar to how syntax is dealt with. The compiler (or interpreter), after lexical analysis, constructs an abstract syntax tree (AST) representation of the statement, separating it, in the above case, in the tokens: \lstinline{l}, \lstinline{:=}, \lstinline{j}, \lstinline{+} and \lstinline{p}. Among these, \lstinline{:=} and \lstinline{+} are considered terminal nodes, or leaves, while the other values still need to be determined. The second pass represents a second abstract syntax tree through a so-called semantic analysis, which then \emph{decorates} the first tree, assigning specific values (attributes) and types to the non-terminal nodes, given the working environment (e.g. production, development, test). This process is called \emph{binding}, as it associates (binds) the name of a variable with its value and its type. -The problem domain is akin to a semantic domain, a specific conceptual place that shares a set of meanings, or a language that holds its meaning, within the given context of this place. And there is only one context which the computer provides: itself. Within this unique context, semantics still hold a place in any programming language textbook, and is addressed regularly in programming language research. Concretely, \emph{semantics in computer programming focuses on how variables and functions should behave} \citep{sethi_programming_1996}. Given the statement \lstinline{l := j + p}, the goal of programming language semantics is to deduce what is the correct way to process such a statement; there will be different ways to do so depending on the value and the type of the \lstinline{j} and \lstinline{p} variables. If they are strings, then the value of \lstinline{j} will be their concatenation. If they are numbers, it will be their addition, and so on. +Semantics is thus the decoration of parsed ASTs, evaluating attribute—which can be either synthesized or inherited. Since decoration is the addition of a new layer (a semantic layer) on top of a base layer (a syntactic one), but of a similar tree form, this leads to the use of what can be described as a \emph{meta-syntax tree}. -This problem is called the \emph{use-mention} problem, which requires the reconciliation of the name of entities, tokens in source code, with the entities themselves, composed of a value and a type. The way this is achieved is actually quite similar to how syntax is dealt with. The compiler (or interpreter), after lexical analysis, constructs an abstract syntax tree representation of the statement, separating it, in the above case, in the tokens: \lstinline{l}, \lstinline{:=}, \lstinline{j}, \lstinline{+} and \lstinline{p}. Among these, \lstinline{:=} and \lstinline{+} are considered terminal nodes, or leaves, while the other values still need to be determined. The second pass represents a second abstract syntax tree through a so-called semantic analysis, which then \emph{decorates} the first tree, assigning specific values (attributes) and types to the non-terminal nodes, given the working environment (e.g. production, development, test). This process is called \emph{binding}, as it associates (binds) the name of a variable with its value and its type. Semantics is thus the decoration of parsed ASTs, evaluating attribute—which can be either synthesized or inherited. Since decoration is the addition of a new layer (a semantic layer) on top of a base layer (a syntactic one), but of a similar tree form, this leads to the use of what can be described as a \emph{meta-syntax tree}. +Regarding when the values are being bound, there are multiple different binding times, such as language-design time (when the meaning of \lstinline{+} is defined), compile time, linker time, and program-writing time. It is only during the last one of these times, that the programmer inserts their own interpretation of a particular meaning (e.g. \lstinline{j := "jouer"}, meaning one of the four possible actions to be taken from the start screen of a hypothetical video game). Such a specific meaning is then shadowed by its literal representation (the five consecutive characters which form the string) and its pre-defined type (here, it would be the \lstinline{string} type, although different languages have different terms to refer to the same consecutive list of alphanumeric characters). -In terms when the values are being bound, there are multiple different binding times, such as language-design time (when the meaning of \lstinline{+} is defined), compile time, linker time, and program-writing time. It is only during the last one of these times, that the programmer inserts their interpretation of a particular meaning (e.g. \lstinline{j := "jouer"}, meaning one of the four possible actions to be taken from the start screen of a hypothetical video game). Such a specific meaning is then shadowed by its literal representation (the five consecutive characters which form the string) and its pre-defined type (\lstinline{strings}, here in Go). This process does show that the meaning of a formal expression can, with significant difficulty and clumsiness, nonetheless be explained; but the conceptual content still eludes the computer, varying from the mundane (e.g. a simple counter) to the almost-esoteric (e.g. a playful activity). Even the most human-beautiful code cannot force the computer to deal with new environments in which meaning has, imperceptibly, changed. Indeed, +Ultimately, this process shows that the meaning of a formal expression can, with significant difficulty and clumsiness, nonetheless be explained; but the conceptual content still eludes the computer, varying from the mundane (e.g. a simple counter) to the almost-esoteric (e.g. a playful activity). Even the most human-beautiful code cannot force the computer to deal with new environments in which meaning has, imperceptibly, changed. Indeed, \begin{quote} In programming languages, variables are truly variable, whereas variables in mathematics are actually constant \citep{wirth_essence_2003}. \end{quote} -From this perspective, the only thing that the computer does know that the programmer doesn't, and which would "make its life easier", the same way that the programmer's life can be made easier through beautiful code, is how the code is represented in an AST, and where in physical memory is located the data required to give meaning to that tree\citep{stansifer_study_1994}. We might hypothesize that beautiful code, from the computer's perspective, is code which is tailored to its physical architecture, a feat which might only be realistically available when writing in Assembly. Before we turn to how such a code is written by the particular group of humans referred to as hackers, there are nevertheless some concepts in programming which do not have simple meaning for humans, re-iterating the need of aesthetics to make these concepts graspable. +This implies that the content of the variables, when set during program-writing time, might throw off the whole interpretative process of the computer. In turn, this would transform a functional program into a buggy one, defeating the very purpose of the program. While programming languages are rigorously specified, they are nonetheless designed in a way that leaves space for the programmer's expressivity. % include a quote from iverson_notation_as_tool_for_thought -As we've seen with software patterns, what also matters to programming languages is not just their design, but their \emph{situated} use: +At this point, the only thing that the computer does know that the programmer does not is how the code is represented in an AST, and where in physical memory is located the data required to give meaning to that tree\citep{stansifer_study_1994}. We might hypothesize that beautiful code, from the computer's perspective, is code which is tailored to its physical architecture, a feat which might only be realistically available when writing in Assembly, with deep knowledge of the hardware architecture being worked on. + +Just like some human concepts that are complicated to make the computer on its own terms, there are also computer concepts that are hard to grasp for humans. As we've seen with software patterns, what also matters to programming languages is not just their design, but their \emph{situated} use: \begin{quote} It must be a pleasure and a joy to work with a language, at least for the orderly mind. The language is the primary, daily tool. If the programmer cannot love his tool, he cannot love his work, and he cannot identify himself with it. \citep{wirth_essence_2003} \end{quote} -Indeed, if there is one version of how the computer interprets instructions, it is through programming languages that both form and content, syntax and semantics are made accessible to the programmer. Within computation as a single whole, exist multiple programming languages, designed by humans for humans. Particularly, programming languages differentiate themselves by how they guide the programmer in their reading and writing. +While there is only one version of how the computer interprets instructions, it is through programming languages that both form and content, syntax and semantics are made accessible to the programmer. Within computation as a whole, a plethora of programming languages exist, designed by humans for humans, differentiating themselves by how the representations they afford guide the programmer in reading and writing source code. \subsection{Qualities of programming languages} \label{subsec:qualities-programming-languages} -While programming languages all stem from and relate to a single commonality—Turing-completeness and data processing—, these linguistic interfaces nonetheless offer many approaches to performing computation, including a diversity and reliability of functions characteristic of tools. Since diversity within equivalence supports qualified preference, we can now examine what it is that makes a programming language good—i.e. receive a positive value judgment—before turning to the question of the extent to which a good programming language enables the writing of good program texts. - -\subsubsection{Abstraction, simplicity and orthogonality} -\label{subsubsec:abstraction-simplicity-orthogonality} +All programming languages stem from and relate to a single commonality—Turing-completeness and data processing—, and yet these linguistic interfaces nonetheless offer many approaches to performing computation, including a diversity and reliability of functional affordances and stylistic phrasing. Since diversity within equivalence supports qualified preference, we can now examine what makes a programming language good—i.e. receive a positive value judgment—before turning to the question of the extent to which a good programming language enables the writing of good program texts. -Every programming language of practical use takes a particular approach to those basic components, sometimes backed by an extended rationale (e.g. ALGOL 68), or sometimes not (e.g. JavaScript). In the case in which one is circumscribed to context-free grammars, it would be possible to optimize a particular language for an objective standard (e.g. compile time, time use, cycles used). And still, as computers exist to solve problems beyond their own technical specifications, such problems are diverse in nature and therefore necessitate different approaches\footnote{Patterns, addressed in \ref{subsec:patterns-structures} are one way that diverse approaches can be applied to diverse problems}. These different approaches to the problem domain are in turn influenced the development of those different paradigms, since a problem domain might have different data representations (e.g. objects, text strings, formal rules, dynamic models, etc.) or data flows (e.g. sequential, parallel, non-deterministic). For instance, two of the early programming languages, FORTRAN and Lisp, addressed to very different problem domains: the accounting needs of businesses and the development of formal rules for artificial intelligence, respectively. One might therefore consider that there are better programming languages than others. Under which conditions could this be true? +Every programming language of practical use takes a particular approach to those basic components, sometimes backed by an extended rationale (e.g. ALGOL 68), or sometimes not (e.g. JavaScript). In the case in which one is circumscribed to context-free grammars, it would be possible to optimize a particular language for a quantifiable standard (e.g. compile time, time use, cycles used). And still, as computers exist to solve problems beyond their own technical specifications, such problems are diverse in nature and therefore necessitate different approaches\footnote{Patterns, addressed in \ref{subsec:patterns-structures} are one way that diverse approaches can be applied to diverse problems}. These different approaches to the problem domain are in turn influenced the development of different programming languages and paradigms, since a problem domain might have different data representations (e.g. objects, text strings, formal rules, dynamic models, etc.) or data flows (e.g. sequential, parallel, non-deterministic). For instance, two of the early programming languages, FORTRAN and Lisp, addressed two very different problem domains: the accounting needs of businesses and the development of formal rules for artificial intelligence, respectively. Within programming languages, there is room to distinguish better ones and worse ones, based on particular qualities, and given standards. -What makes a good programming language is a matter which has been discussed amongst computer scientists, at least since the \lstinline{GOTO} statement has been publicly considered harmful. Some of these discussions include both subjective arguments over preferred languages, as well as objective arguments related to performance and ease-of-use. According to Pratt and Zelkowitz: +What makes a good programming language is a matter which has been discussed amongst computer scientists, at least since the \lstinline{GOTO} statement has been publicly considered harmful \citep{dijkstra_letters_1968}, or that the BASIC language is damaging to one's cognitive abilities\footnote{"\emph{It is practically impossible to teach good programming to students that have had a prior exposure to BASIC: as potential programmers they are mentally mutilated beyond hope of regeneration.}" \citep{dijkstra_how_1975}}. Some of these discussions include both subjective arguments over preferred languages, as well as objective arguments related to performance and ease-of-use \citep{gannon_impact_1975}. According to Pratt and Zelkowitz: \begin{quote} The difference among programming languages are not quantitative differences in what can be done, by only qualitative differences in how elegantly, easily and effectively things can be done. \citep{pratt_programming_2000} \end{quote} -As a concrete example, one can turn to Brian Kernighan's discussion of his preferences between the language PASCAL and C \citep{kernighan_why_1981}. Going through the generic features of a programming languages, he comments on the approaches taken by the programming languages on each of these. Declaring his preference for strong typing, which is "telling the truth about data", explicit control flow, cosmetic annoyances and his dislike for an environment in which "considerable pains must be taken to simulate sensible input", he professes his preference for the C language \citep{kernighan_why_1981}, all the while acknowledging that PASCAL can nonetheless be a toy language suitable for teaching\footnote{Thus pointing again the context-dependence of value judgments in programming.}. +As a concrete example, one can turn to Brian Kernighan's discussion of his preferences between the language PASCAL and C \citep{kernighan_why_1981}. Going through the generic features of a programming languages, he comments on the approaches taken by the programming languages on each of these. He professes his preference for the C language, based on their shared inclination for strong typing\footnote{Something that is, according to him, "\emph{telling the truth about data}" \citep{kernighan_why_1981}.}, explicit control flow, cosmetic annoyances and his dislike for an environment in which "\emph{considerable pains must be taken to simulate sensible input}" \citep{kernighan_why_1981}. Nonetheless,he acknowledges that PASCAL can nonetheless be a toy language suitable for teaching, thus pointing again the context-dependence of value judgments in programming. -While this example reveals that individual preferences for programming languages can be based on objective criteria when compared to what a prototypal language should be able to achieve, Turing-completeness offers an interesting challenge to the Sapir-Whorf hypothesis—if natural languages might only weakly affect the kinds of cognitive structures speakers of those languages can construct, programming languages are claimed to do so to large extents. For instance, Alan Perlis's famous \emph{Epigrams on Programming} mentions that "A language that doesn't affect the way you think about programming, is not worth knowing." \citep{perlis_special_1982}. These differences in the ways of doing illustrates how, in reality, different programming languages are applicable to different domains, and do so through different kinds of notations—different aesthetic features when it comes to realizing the same task. +While this example reveals that individual preferences for programming languages can be based on objective criteria when compared to what an ideal language should be able to achieve, Turing-completeness offers an interesting challenge to the Sapir-Whorf hypothesis—if natural languages might only weakly affect the kinds of cognitive structures speakers of those languages can construct, programming languages are claimed to do so to large extents. For instance, Alan Perlis's famous \emph{Epigrams on Programming} mentions that "\emph{A language that doesn't affect the way you think about programming, is not worth knowing.}" \citep{perlis_special_1982}. These differences in the ways of doing illustrates how different programming languages are applicable to different domains and different styles of approaching those domains. They do so through different kinds of notations—different aesthetic features—when it comes to realizing the same task. -Of the two programs presented below, the output result is exactly the same, but the aesthetic differences are obvious. +Of the two programs presented in \ref{code:hello-ruby} and in \ref{code:hello-java}, the function is exactly the same, but the aesthetic differences are obvious. \begin{listing} \inputminted{ruby}{./corpus/hello.rb} @@ -118,41 +121,52 @@ \subsubsection{Abstraction, simplicity and orthogonality} \label{code:hello-java} \end{listing} -The code in \ref{code:hello-ruby} is written in Ruby, a language designed by Yukihiro Matsumoto, while the code in \ref{code:hello-java} is written in Java, designed by James Gosling, both in the mid-1990s. While Ruby is dynamically-typed, interpreted, Java is a statically-typed and compiled language, and both include garbage collection and object-orientation. These two snippets are obviously quite dissimilar at first glance\footnote{Indeed, \ref{code:hello-ruby} is also a valid program in Python and Perl, both scripting languages.}. +The code in \ref{code:hello-ruby} is written in Ruby, a language designed by Yukihiro Matsumoto, while the code in \ref{code:hello-java} is written in Java, designed by James Gosling, both in the mid-1990s. While Ruby is dynamically-typed, interpreted, Java is a statically-typed and compiled language, and both include garbage collection and object-orientation. These two snippets are obviously quite dissimilar at first glance, as the Ruby listing only includes one reserved keyword\footnote{Indeed, \ref{code:hello-ruby} is also a valid program in Python and Perl, both scripting languages.}, \lstinline{puts}, while the Java listing involves a lot more lexical scaffolding, including class and function declaration. From a language design perspective, Robert Sebesta suggests three main features of programming languages in order to be considered good: \emph{abstraction}, \emph{simplicity} and \emph{orthogonality} \citep{sebesta_concepts_2018}. From the two snippets, we now explore some of the most important criteria in programming language design, and how they could underpin the writing of good programs. -Abstraction is the ability of the language to allow for the essential idea of a statement to be expressed without being encumbered by specifics which do not relate directly to the matter at hand, or to no matter at all. Abstract programming languages can lead to more succint code, and tend to hide complexity (of the machine, and of the language), from the programmer. For instance, the Java snippet above explicitly states the usage of the \lstinline{System} object, in order to access its \lstinline{out} attribute, and then call its \lstinline{println()} method. While a lot of code here might seem verbose, or superfluous, it is in part due to it being based on an object-oriented paradigm. However, \lstinline{out} object itself might seem to go particularly contrary to the requirement of programming languages to abstract out unnecessary details: \lstinline{println()} is definitely a system call dealing with I/O, and therefore already implicitly relates to the output; one shouldn't have to specify it explicitly. In contrast, Ruby entierly abstracts away the system component of the print call, by taking advantadge of its status as an interpreted language: the runtime already provides such standard features of the language. Printing, in Java, does not abstract the machine, while printing, in Ruby, abstracts it away in order to focus on the actual appearance of the message. Another abstraction is that of the language name itself from the import statements. When we write in Java, we (hopefully) know that we write in Java, and therefore probably assume that the default imports come from the Java ecosystem—there shouldn't be any need to explicitly redeclare it. For instance, \lstinline{System.out.println()} isn't written \lstinline{java.io.System.out.println()}. Meanwhile, the Ruby snippet hides the implicit \lstinline{require ".../lib/ruby/3.1.0"}, allow the programmer to focus, through visual clarity, on the real problem at hand, which the logic of the program being written is supposed to address. In this direction, languages which provide more abstraction (such as Ruby), or which handle errors in an abstract way (such as Perl) tend to allow for greater readability by focusing on the most import tokens, rather than aggregating system-related and operational visual clutter—also called verbosity. +\subsubsection{Abstraction} +\label{subsubsec:programming-abstraction} + +Abstraction is the ability of the language to allow for the essential idea of a statement to be expressed without being encumbered by specifics which do not relate directly to the matter at hand, or to any matter at all. Programming languages which facilitate abstraction can lead to more succint code, and tend to hide complexity (of the machine, and of the language), from the programmer, allowing her to move between different levels of reasoning. For instance, the Java snippet in \ref{code:hello-java} explicitly states the usage of the \lstinline{System} object, in order to access its \lstinline{out} attribute, and then call its \lstinline{println()} method. While a lot of code here might seem verbose, or superfluous, it is in part due to it being based on an object-oriented paradigm. However, \lstinline{out} object itself might seem to go particularly contrary to the requirement of programming languages to abstract out unnecessary details: \lstinline{println()} is a system call whose purpose is to write something on the screen, and therefore already implicitly relates to the output; one shouldn't have to specify it explicitly. -Related to abstraction is the approach to \emph{typing}, the process of specifiying the type of a variable or of a return value (such as integer, string, vector, etc.). A strictly-typed language such as C++ might end up being harder to read because of its verbosity, while a type-free language might be simpler to read and write (at a small-scale), but might not provide guarantees of reliability when executed. The tradeoff here is again between being explicit and safe (because a word cannot usually and intuitively be operated on in a similar way as a floating-point number, or as a list), and being implicit, subtle, and dangerous (such as JavaScript's very liberal understanding of typing). In some instances, typing can usually be inferred by purely aesthetic means: Python's boolean values are capitalized (\lstinline{True, False}) and its difference between string and byte is represented by the use of double-quotes for the former and single-quotes for the latter. In the case above, explicitly having to mention that \lstinline{greeting} is of type \lstinline{String} is again redundant, since it is already hinted at by the double-quotes. Ruby does not force programmers to explicitly declare variable types (they can, if they want to), but in this case they let the computers do the heavy lifting of specifying something that is already obvious to the programmer, through a process called dynamic typing. +In contrast, Ruby entierly abstracts away the system component of the print call, by taking advantadge of its status as an interpreted language: the runtime already provides such standard features of the language. Printing, in Java, does not abstract away the machine, while printing, in Ruby, hides it in order to focus on the actual appearance of the message. Another abstraction is that of the language name itself from the import statements. When we write in Java, we (hopefully) know that we write in Java, and therefore probably assume that the default imports come from the Java ecosystem—there shouldn't be any need to explicitly redeclare it. For instance, \lstinline{System.out.println()} isn't written \lstinline{java.io.System.out.println()}. Meanwhile, the Ruby listing makes implicit the necessary declaration of \lstinline{require ".../lib/ruby/3.1.0"}, allowing the programmer to focus, through visual clarity, on the real problem at hand, which the logic of the program being written is supposed to address. In this direction, languages which provide more abstraction (such as Ruby), or which handle errors in an abstract way (such as Perl) tend to allow for greater readability by focusing on the most import tokens, rather than aggregating system-related and operational visual clutter—also called verbosity. -A particularly note-worthy example of an elegant solution to the tradeoff between guarantee of functionality (safety) and readability can be found in the Go programming language's handling of error values returned by functions: +Related to abstraction is the approach to \emph{typing}, the process of specifiying the type of a variable or of a return value (such as integer, string, vector, etc.). A strictly-typed language such as C++ might end up being harder to read because of its verbosity, while a type-free language might be simpler to read and write, but might not provide guarantees of reliability when executed. The tradeoff here is again between being explicit and reliable, and being implicit, subtle, and dangerous (such as JavaScript's very liberal understanding of typing). In some instances, typing can usually be inferred by purely aesthetic means: Python's boolean values are capitalized (\lstinline{True, False}) and its difference between string and byte is represented by the use of double-quotes for the former and single-quotes for the latter. In the case above, explicitly having to mention that \lstinline{greeting} is of type \lstinline{String} is again redundant, since it is already hinted at by the double-quotes. Ruby does not force programmers to explicitly declare variable types (they can, if they want to), but in this case they let the computers do the heavy lifting of specifying something that is already obvious to the programmer, through a process called dynamic typing. + +A particularly note-worthy example of an elegant solution to the tradeoff between guarantee of functionality (safety) and readability can be found in some programming languages handling of values returned by functions, such as in the Go listing in \ref{code:multiple-returns-go}: \begin{listing} - \inputminted{go}{./corpus/error_handling.go} - \caption{Go proposes an elegant way of ignoring errors, with the use of the underscore token.} - \label{code:error-handling} + \inputminted{go}{./corpus/multiple_returns.go} + \caption{Go proposes an elegant way of ignoring certain variables, with the use of the underscore token.} + \label{code:multiple-returns-go} \end{listing} -The \lstinline{_} character which we see on the first line is the choice made by Go's designers to force the user to both acknowledge and ignore the potential error value that is returned by executing the external command. This particular character, acting as an empty line, \emph{represents absence}, not cluttering the layout of the source, while reminding subtly of the \emph{potential} of this particular statement to go wrong and crash the program. Abstraction is therefore a tradeoff between explicitly highlighting the computer concern (how to operate practically on some data or statement), and hiding anything but the human concern (whether or not that operation is of immediate concern to the problem at hand at all). As such, languages who offer powerful abstractions tend not to stand in the way of the thinking process of the programmer. This particular example of the way in which Go deals with error-handling is a great example of the designer's explicit stylistic choice. In the words of Niklaus Wirth: +The \lstinline{_} character which we see on the first line is the choice made by Go's designers to force the user to both acknowledge and ignore the value that is returned by calling the function \lstinline{getNumber()}. This particular character, acting as an empty line, \emph{represents absence}, not cluttering the layout of the source, while reminding subtly of the \emph{potential} of this particular statement to go wrong and crash the program. Conversely, the functionally equivalent code written in JavaScript and shown in \ref{code:multiple-returns-js} does not have this semantic feature (a variable named \lstinline{_} is still a valid name), and thus requires additional steps to reach the same result. -\begin{quote} - Stylistic arguments may appear to many as irrelevant in a technical environment, because they seem to be merely a matter of taste. I oppose this view, and on the contrary claim that stylistic elements are the most visible parts of a language. They mirror the mind and spirit of the designer very directly, and they are reflected in every program written. \citep{wirth_essence_2003} -\end{quote} +\begin{listing} + \inputminted{js}{./corpus/multiple_returns.js} + \caption{JavaScript does not have any built-in syntax to ignore certain variables, resulting in more cumbersome code.} + \label{code:multiple-returns-js} +\end{listing} -An error is, in a programming language, a unique concept with one or more syntactical incarnations. Orthogonality is the affordance for a language to offer a small set of simple syntactics constructs which can be recombined in order to achieve greater complexity, while remaining independent from each other. A direct consequence of such a feature is the ease with which the programmer can familiarize themselves with the number of constructs in the language, and therefore their ease in using them without resorting to the language's reference, or external program texts under the form of packages, libraries, etc. The orthogonality of a language offers a simple but powerful solution to the complexity of understanding software. Because an orthogonal programming language ensures that there is no unintended side-effect at that each program token's action is independent from each other. The functionality of a statement thus comes not just from the individual keywords, but also from their combination. +Abstraction in programming languages is therefore a tradeoff between explicitly highlighting the computer concern (how to operate practically on some data or statement), and hiding anything but the human concern (whether or not that operation is of immediate concern to the problem at hand at all). As such, languages which offer powerful abstractions tend not to stand in the way of the thinking process of the programmer. This particular example of the way in which Go deals with non-needed values is a good example of the designer's explicit stylistic choice. -For instance, the example of Lisp treats both data and functions in a similar way, essentially allowing the same construct to be recombined in powerful ways, while the Ruby language, and its foundational design choice which makes every type (themselves abstracted away) an object allows for greater creativity, through familiarity, in writing code, making the language itself more habitable, if more uncertain\footnote{The infamous monkey-patching technique of Ruby allows the programmer to even modify standard library functions.}. Orthogonality implies both independence, since all constructs operate distinctly from each other, while remaining related, and cooperation with each other, because their functional restrictions requires that they be used in conjunction with one another. This offers a solution to the cognitive burden of the \emph{non-atomicity} of computer programs, in which data can end up being tangled in a non-linear program execution, and become ungraspable. This unreadability is triggered, not by verbosity, but because of the uncertainty of, and confusion about, the potential side-effects caused by any statement. Such independence in programming constructs in turn presents a kind of \emph{symmetry}—a well-accepted aesthetic feature of any artefact—, in that the use of each of the constructs is similar. This similarity eases the cognitive friction in writing and reading code since an orthogonal language allows the programmer to rely on the fact that everything behaves as stated, without having to keep track of a collection of quirks and arbitrary decisions. For example, the below C code is illegal: +\subsubsection{Orthogonality} +\label{subsubsec:programming-orthogonality} -\begin{listing} - \inputminted{go}{./corpus/illegal_return.c} - \caption{C syntax sometimes behaves arbitrarily.} - \label{code:illegal-return} -\end{listing} +Orthogonality is the affordance for a language to offer a small set of simple syntactics constructs which can be recombined in order to achieve greater complexity, while remaining independent from each other\footnote{An analogy of such affordance is that of the building blocks: for instance, the original LEGO bricks set offers very high orthogonality.}. A direct consequence of such a feature is the ease with which the programmer can familiarize themselves with the number of constructs in the language, and therefore their ease in using them without resorting to the language's reference, or external program texts under the form of packages, libraries, etc. The orthogonality of a language offers a simple but powerful solution to the complexity of understanding software. Importantly, an orthogonal programming language must make sure that there are no unintended side-effects, such that each program token's action is independent from each other. The functionality of a statement thus comes not just from the individual keywords, but also from their combination. + +For instance, the language Lisp treats both data and functions in a similar way, essentially allowing the same construct to be recombined in powerful and elegant ways. To the beginner, however, it might prove confusing to express whole problem domains exclusively with lists. Conversely, the Ruby language makes every data type (themselves abstracted away) an object, therefore making each building block a slightly different version of each other, providing less orthogonality. The silver lining from Ruby's design choice is that it allows for greater creativity in writing code, since everything is an object, which elicits a feeling of familiarity. In turn, this makes the language more habitable, if more uncertain\footnote{The infamous monkey-patching technique of Ruby allows the programmer to even modify standard library functions.}. -The above code is a specific instance of one of those quirks: the fact that C cannot return arrays from functions requires both a deep knowledge of the language implementation and a willingness to accept that this is how things are, even though other languages allow for such a feature. In this case, the language exhibits an un-orthogonal property since the two constructs (\lstinline{return} and \lstinline{int[]}) interact with each other in non-independent ways. +Orthogonality implies both independence, since all constructs operate distinctly from each other, while remaining related, and cooperation with each other, because their functional restrictions requires that be used in conjunction with one another. This offers a solution to the cognitive burden of programs, in which data can end up being tangled in a non-linear execution, and become ungraspable. This unreadability is triggered, not by verbosity, but because of the uncertainty of, and confusion about, the potential side-effects caused by any statement. Doing one thing, and doing it well, is a generally-accepted measure of quality in software development practices. -Finally, one of the consequences of such a feature is the shift from computer semantic interpretation (usually connected to strongly-typed languages) to human interpretation (and weakly-typed languages). Non-orthogonality implies that the compiler (as a procedural representation of the language) has the final say in what can be expressed, reifing seemingly arbitrary design choices, and requiring cognitive effort from the programmer to identify these unwanted interactions, while orthogonal languages leave more leeway to the writer in focusing on the interaction of all programming constructs used, rather than on a subset of those interactions which does not relate to the program's intent. +Such independence in programming constructs also presents a kind of \emph{symmetry}—a well-accepted aesthetic feature of any artefact—, in that each construct is similar, not in their functionality, but in the fact that their self-contained parts of an orthogonal systems, and therefore share the same quality. This similarity eases the cognitive friction in writing and reading code since an orthogonal language allows the programmer to rely on the fact that everything behaves as stated, without having to keep track of a collection of quirks and arbitrary decisions\footnote{For example, returning an array in C is considered illegal syntax, while it is a perfectly common feature of more contemporary programming languages. In this case, the language exhibits an un-orthogonal property since the two constructs (\lstinline{return} and \lstinline{int[]}) interact with each other in non-independent ways.}. + +Finally, one of the consequences of different amounts of orthogonality is the shift from computer semantic interpretation to human interpretation. Non-orthogonality implies that the compiler (as a procedural representation of the language) has the final say in what can be expressed, reifing seemingly arbitrary design choices, and requiring cognitive effort from the programmer to identify these unwanted interactions, while orthogonal languages leave more leeway to the writer in focusing on the interaction of all programming constructs used, rather than on a subset of those interactions which does not relate to the program's intent. + +\subsubsection{Simplicity} +\label{subsubsec:programming-simplicity} Both of these features, abstraction and orthogonality, ultimately relate to simplicity. As Ryan Stansifer puts it: @@ -160,9 +174,9 @@ \subsubsection{Abstraction, simplicity and orthogonality} Simplicity enters in four guises: uniformity (rules are few and simple), generality (a small number of general functions provide as special cases a host of more specialized functions, orthogonality), familiarity (familiar symbols and usages are adopted whenever possible), and brevity (economy of expression is sought). \citep{stansifer_study_1994} \end{quote} -The point of a simple programming language is to not stand in the way of the program being written, or of the problem being addressed. From a language design perspective, simplicity is achieved by letting the programmer do more (or as much) with less. This means that the set of syntactical tokens exposed to the writer and reader combine in sufficient ways to enable desired expressiveness, and thus relating back to orthogonality\footnote{James Rumbaugh describes his conception of simplicity in designing the UML language as such: "\emph{If you constantly are faced with four or five alternate ways to model a straightforward situation, it isn't simple}" \citep{biancuzzi_masterminds_2009}}. +The point of a simple programming language is to not stand in the way of the program being written, or of the problem being addressed. From a language design perspective, simplicity is achieved by letting the programmer do more (or as much) with less, recalling definitions of elegance. This means that the set of syntactical tokens exposed to the writer and reader combine in sufficient ways to enable desired expressiveness, and thus relating back to orthogonality\footnote{James Rumbaugh describes his conception of simplicity in designing the UML language as such: "\emph{If you constantly are faced with four or five alternate ways to model a straightforward situation, it isn't simple}" \citep{biancuzzi_masterminds_2009}}. -Moving away from broad language design, and more specific applications, the goal of simplicity is also achieved by having accurate conceptual mappings between computer expression semantics and human semantics (refer to \ref{subsec:psychology-programming} for a discussion of mappings). If one is to write a program related to an interactive fiction in which sentences are being input and output in C, then the apparently simple data structure \lstinline{char} of the language reveals itself to be cumbersone and complex when each word and the sentence that the programmer wants to deal with must be present not as setences nor words, but as series of \lstinline{char}\footnote{Hence the origin of the name of the data type \lstinline{string}, as a continuous series of \lstinline{char}, or characters stringed together.}. A simple language does not mean that it is easy \footnote{Perhaps the simplest language of all being lambda-calculus, is far from an easy construct to grasp, just like the game of Go of which it is said that it is simple to learning, but difficult to master}. By making things simple, but not too simple \citep{biancuzzi_masterminds_2009}, it remains a means to an end, akin to any other tool or instrument\footnote{For a further parallel on musical instruments, see Rich Hickey's keynote address at RailsConf 2012 \citep{confreaks_rails_2012}}. +Moving away from broad language design, and more specific applications, the goal of simplicity is also achieved by having accurate conceptual mappings between computer expression semantics and human semantics (refer to \ref{subsec:psychology-programming} for a discussion of mappings). If one is to write a program related to an interactive fiction in which sentences are being input and output in C, then the apparently simple data structure \lstinline{char} of the language reveals itself to be cumbersone and complex when each word and the sentence that the programmer wants to deal with must be present not as sentences nor words, but as series of \lstinline{char}\footnote{Hence the origin of the name of the data type \lstinline{string}, as a continuous series of \lstinline{char}, or characters stringed together.}. A simple language does not mean that it is easy \footnote{Perhaps the simplest language of all being lambda-calculus, is far from an easy construct to grasp, just like the game of Go of which it is said that it is simple to learning, but difficult to master}. By making things simple, but not too simple \citep{biancuzzi_masterminds_2009}, it remains a means to an end, akin to any other tool or instrument\footnote{For a further parallel on musical instruments, see Rich Hickey's keynote address at RailsConf 2012 \citep{confreaks_rails_2012}}. A proper combination of orthogonality, abstraction and simplicity results, once more, in elegance. Mobilizing the architectural domain, the language designer Bruce McLennan further presses the point: @@ -170,20 +184,38 @@ \subsubsection{Abstraction, simplicity and orthogonality} There are other reasons that elegance is relevant to a well-engineered programming language. The programming language is something the professional programmer will live with - even live in. It should feel comfortable and safe, like a well-designed home or office; in this way it can contribute to the quality of the activities that take place within it. Would you work better in an oriental garden or a sweatshop? \citep{mclennan_who_1997} \end{quote} -Programming languages are thus both tools and environments, but they are, more uniquely, \emph{symbolic}, manipulating and shaping \emph{symbolic} matter. Looking at these languages from a Goodmanian perspective provides a backdrop to examine their communicative and expressive power. From the perspective of the computer, programming languages are unambiguous insofar as any expression or statement will ultimately result in an unambiguous execution by the CPU (if any ambiguity remains, the program crashes). They are also syntactically disjointed (i.e. clearly distinguishable from one another), but not semantically: two programming tokens can have the same effect under different appearances. The use of formal specifications aimed at resolving any possible ambiguity in the syntax of the language in a very clear fashion, but fashionable equivalence can come back as a desire of the language designer. The semantics of programming languages, as we will see below, also aim at being thoroughly disjointed: a variable cannot be of multiple types at the same time. Finally, programming languages are also differentiated systems since no symbol can refer to two things at the same time. +Programming languages are thus both tools and environments, and moreover eminently \emph{symbolic}, manipulating and shaping \emph{symbolic} matter. Looking at these languages from a Goodmanian perspective provides a backdrop to examine their communicative and expressive power. From the perspective of the computer, programming languages are unambiguous insofar as any expression or statement will ultimately result in an unambiguous execution by the CPU (if any ambiguity remains, the program does not compile, the ambiguity gets resolved by the compiler, or the program crashes during execution). They are also syntactically disjointed (i.e. clearly distinguishable from one another), but not semantically: two programming tokens can have the same effect under different appearances. The use of formal specifications aims at resolving any possible ambiguity in the syntax of the language in a very clear fashion, but fashionable equivalence can come back as a desire of the language designer. The semantics of programming languages, as we will see below, also aim at being somewhat disjointed: a variable cannot be of multiple types at the exact same time, even though a function might have multiple signatures in some languages. Finally, programming languages are also differentiated systems since no symbol can refer to two things at the same time. -The tension arises when it comes to the criteria of unambiguity, from a human perspective. The most natural-language-like component of programs, the variable and function names, always have the potential of being ambiguous \footnote{For instance, does \lstinline{int numberOfFlowers} refer to the current number of flowers in memory? To the total number of potential of flowers? To a specific kind of number whose denomination is that of a flower?}. We consider this ambiguity a productive opportunity for creativity, and a hindrance for program effectiveness. So, given the qualification of programming languages as symbolic systems, we could expand our short analysis above by inspecting how programming languages allow for program texts which denote, label, represent, etc. in order to further argument how source code has the potential, and has examples, of being an artistic means of expression and comprehension, from a cognitive point of view. +The tension arises when it comes to the criteria of unambiguity, from a human perspective. The most natural-language-like component of programs, the variable and function names, always have the potential of being ambiguous\footnote{For instance, does \lstinline{int numberOfFlowers} refer to the current number of flowers in memory? To the total number of potential of flowers? To a specific kind of number whose denomination is that of a flower?}. We consider this ambiguity both a productive opportunity for creativity, and a hindrance for program reliability. If programming languages are aesthetic symbol systems, then they can allow for expressiveness, first and foremost of computational concepts. It is in the handling of particularly complex concepts that programming languages also differentiate themselves in value. The differences in programming language design and us thus amounts to differences in style. In the words of Niklaus Wirth: -If programming languages are aesthetic symbol systems, then they can allow for expressiveness, first and foremost of computational concepts. It is in the handling of particularly complex concepts that programming languages also differentiate themselves in value. +\begin{quote} + Stylistic arguments may appear to many as irrelevant in a technical environment, because they seem to be merely a matter of taste. I oppose this view, and on the contrary claim that stylistic elements are the most visible parts of a language. They mirror the mind and spirit of the designer very directly, and they are reflected in every program written. \citep{wirth_essence_2003} +\end{quote} \subsubsection{Idiosyncratic implementations} \label{subsubsec:idiosyncracies-implementations} -Software, as an abstract artifact, can be understood at the physical, design and intentional levels\citep{moor_three_1978}. With modern programming languages allowing us to safely ignore the hardware level, it is at the interaction of the design (programming) and intentional (human) level that things get complicated; the question "what does a Turing machine do?" has \lstinline{n+1} answers, \lstinline{1} syntactic answer, and \lstinline{n} semantic ones, based on however many interpretations. +Software, as an abstract artifact, can be understood at the physical, design and intentional levels\citep{moor_three_1978}. With modern programming languages allowing us to safely ignore the hardware level, it is at the interaction of the design (programming) and intentional (human) level that things get complicated; all programming languages can do the same thing, but they all do it in a slightly different way. In order to illustrate the expressivity of programming languages, we highlight three programming concepts which are innate to any modern computing environment, and yet relatively complex to deal with for humans: \emph{iterating}, \emph{referencing} and \emph{threading}. -In order to illustrate the expressivity of programming languages, we highlight three programming concepts which are innate to any modern computing environment, and yet relatively complex to deal with for humans: \emph{iterating}, \emph{referencing} and \emph{threading}. +The first and the most straightforward example is iteration, or the process of counting through the items of a list. Since, ultimately, all program text is organized as continuours series of binary encodings, going through such a list in a fundamental operation in programming. Different implementations of such an operation are shown in \ref{code:iterating-c} for the C language and in \ref{code:iterating-py}. -Referencing is a surface-level consequence of the \emph{use-mention} problem referred to above, the separation between a name and its value, with the two being bound together by the address of the physical location in memory. As somewhat independent entities, it is possible to manipulate them separately, with consequences that are not intuitive to grasp. Some programming languages allow for this direct manipulation, through something called \emph{pointer arithmetic}\footnote{For better or worse, C is very liberal with what can be done with pointers.}. Indeed, the possibility to add and substract memory locations independent of the values held in these locations, as well as the ability to do arithmetic operations between an address and its value isn't a process whose meaning comes from a purely experiential or social perspective, but rather exists meaningfully for humans only through logical grounding, by understanding the theoretical architecture of the computer. What also transpires from these operations is another dimension of the non-linearity of programming languages, demanding complex mental models to be constructed and updated to anticipate what the program will ultimately result in when executed. Notation attempts at remediating those issues by offering symbols to represent these differences, such as in \ref{code:references-c}. +\begin{listing} + \inputminted{c}{./corpus/iterating.c} + \caption{Iterating in C involves keeping track of an iterating counter and knowing the maximum value of a list beforehand.} + \label{code:iterating-c} +\end{listing} + +\begin{listing} + \inputminted{py}{./corpus/iterating.py} + \caption{Iterating in Python is done through a specific syntax which abstracts away the details of the process.} + \label{code:iterating-py} +\end{listing} + +This comparison shows how a similar function can be performed via different syntaxes. Particularly, we can see how the Python listing implies a more human-readable syntax, getting rid of machine-required punctuation, and thus facilitating the pronounciation out loud. In contrast, the C listing states the parts of the loop in an order that is not intuitive to human comprehension. Read out loud, the C listing would be equivalent to "\emph{For an index named i starting at 0, and while i is less than a value named max_count, increase i by one on each iteration}", which focuses more on the index management than on the list itself; while the Python listing would read "\emph{for an item in my list}", much more concise and expressive. + +Referencing is a more complex problem than iterating. It is a surface-level consequence of the \emph{use-mention} problem referred to above, the separation between a name and its value, with the two being bound together by the address of the physical location in memory. As somewhat independent entities, it is possible to manipulate them separately, with consequences that are not intuitive to grasp. For instance, when one sees the name of a variable in a program text, is the name referencing the value of the variable, or the location at which this value is stored? Here, we need a mark which allows the programmer to tell the difference. + +Notation attempts at remediating those issues by offering symbols to represent these differences, such as in \ref{code:references-c}, or to hide it completely as in \ref{code:references-ruby}. \begin{listing} \inputminted{c}{./corpus/references.c} @@ -201,50 +233,58 @@ \subsubsection{Idiosyncratic implementations} Meanwhile, in \ref{code:references-ruby}, we see that the two variables are actually referring to the same data. The design decision here is not to allow the programmer to make the difference between a reference and an actual value, and instead prefer that the programmer constructs programs which, on one side, might be less memory-efficient but are, on the other side, easier to read and write, since variable manipulation only ever occurs in one single way—through reference. -Threading is the ability to do multiple things at the same time, yet in parallel. The concept itself is simple, to the point that we take it for granted in modern computer applications since the advent of time-sharing systems (we can have a text editor take input and scan that input for typos at the same time). However, the proper handling of threading when writing and reading software is quite a complex task. This involves the ability to demultiply the behaviour of routines (whose execution is already non-linear) to keep track of what could be going on at any point in the execution of the program, including use and modification of shared resources, the scheduling of thread start and end, as well as synchronization of race conditions (e.g. if two things happen at the same time, which one happens first, such that the consistence of the global state is preserved?). As Edward A. Lee put it: +Notation does not exclusively operate at the surface level. Some programming languages signify, by their use of the above characters, that they allow for this direct manipulation, through something called \emph{pointer arithmetic}\footnote{For better or worse, C is very liberal with what can be done with pointers.}. Indeed, the possibility to add and substract memory locations independent of the values held in these locations, as well as the ability to do arithmetic operations between an address and its value isn't a process whose meaning comes from a purely experiential or social perspective, but rather exists meaningfully for humans only through logical grounding, by understanding the theoretical architecture of the computer. What also transpires from these operations is another dimension of the non-linearity of programming languages, demanding complex mental models to be constructed and updated to anticipate what the program will ultimately result in when executed. -\begin{quote} - Although threads seem to be a small step from sequential computation, in fact, they represent a huge step. They discard the most essential and appealing properties of sequential computation: understandability, predictability, and determinism. Threads, as a model of computation, are wildly non-deterministic, and the job of the programmer becomes one of pruning that nondeterminism \citep{lee_problem_2006}. -\end{quote} +Threading is the ability to do multiple things at the same time, in parallel. The concept itself is simple, to the point that we take it for granted in modern computer applications since the advent of time-sharing systems: we can have a text editor take input and scan that input for typos at the same time, as well as scanning for updates in a linked bibliography file. However, the proper handling of threading when writing and reading software is quite a complex task\footnote{As Edward A. Lee puts it: "\emph{Although threads seem to be a small step from sequential computation, in fact, they represent a huge step. They discard the most essential and appealing properties of sequential computation: understandability, predictability, and determinism. Threads, as a model of computation, are wildly non-deterministic, and the job of the programmer becomes one of pruning that nondeterminism.}" \citep{lee_problem_2006}.}. First, every program is executed as a process. Then, such a process can create children subprocesses for which it is responsible. This responsibility involves figuring out how do the children process communicate information back to the parent process, how do they communicate between each other, and how does the parent process make sure all the children process have exited before exiting itself. -Threading shows how the complexity of a deep-structure benefits to be adequately represented in the surface. Once again, aesthetically-satisfying (simple, concise, expressive) notation can help programmers in understanding what is going on in a multi-threaded program, by removing additional cognitive overload generated by verbosity. Different syntax can help allevaite some of this burden, as we can see in comparing \ref{code:threading-c}, written in C, and \ref{code:threading-go}, written in Go. +This involves the ability to demultiply the behaviour of routines (whose execution is already non-linear) to keep track of what could be going on at any point in the execution of the program, including use and modification of shared resources, the scheduling of thread start and end, as well as synchronization of race conditions (e.g. if two things happen at the same time, which one happens first, such that the consistence of the global state is preserved?). + +For instance, we can look at printing numbers at a random interval. As seen in the non-threaded example in \ref{code:non-threading-go}, it is somewhat deterministic since we know that \lstinline{2045} will alway print \emph{before} \lstinline{2046}. In the threaded equivalent in \ref{code:threading-go}, such a result is not guaranteed. \begin{listing} - \inputminted{python}{./corpus/thread.c} - \caption{Complex way to do threads in C.} - \label{code:threading-c} + \inputminted{python}{./corpus/non-thread.go} + \caption{Nice way to do threads in Go.} + \label{code:non-threading-go} \end{listing} +Nonetheless, the threading syntax in \ref{code:threading-go} allows the programmer to keep their mental modal of a function execution, while the threading syntax in C, shown in \ref{code:threading-c}, creates a lot more cognitive overhead, by declaring specific types, calling a specific function with unknown arguments, and then manually closing the thread afterwards. + \begin{listing} \inputminted{python}{./corpus/thread.go} \caption{Nice way to do threads in Go.} \label{code:threading-go} \end{listing} -Here, we see how the abstraction provided by some language constructs in Go result in a simpler and more expressive program text. In this case, the non-essential properties of the thread are abstracted away from programmer concern. The \emph{double-meaning} embedded in the \lstinline{go} keyword even uses a sensual evokation of moving away (from the main thread) in order to stimulate implicit understanding of what is going on. Meanwhile, the version written in C includes the necessary headers at the top of the file, the explicit type declaration when starting the thread and the additional \lstinline{join()} method call in order to make sure that the parallel thread returns to the main thread, and does not create a memory leak in the program once the main thread exits. While both behaviours are the same, the syntax of Go allows for a cleaner and simpler representation. +\begin{listing} + \inputminted{python}{./corpus/thread.c} + \caption{Complex way to do threads in C.} + \label{code:threading-c} +\end{listing} + +Threading shows how the complexity of a deep-structure benefits to be adequately represented in the surface. Once again, aesthetically-satisfying (simple, concise, expressive) notation can help programmers in understanding what is going on in a multi-threaded program, by removing additional cognitive overload generated by verbosity. -Programming languages tend to help programmers solve semantic issues in the problem domain through elegant syntactical means while reducing unnecessary interactions with the underlying technical system. Nonetheless, we have seen how languages differ in the ways in which they enable the programmer's access to and manipulation of computational actions. Beyond a language designer's perspective, there also exists a social influence on how a source code should be written according to its linguistic community. +Here, we see how the abstraction provided by some language constructs in Go result in a simpler and more expressive program text. In this case, the non-essential properties of the thread are abstracted away from programmer concern. The \emph{double-meaning} embedded in the \lstinline{go} keyword even uses a sensual evokation of moving away (from the main thread) in order to stimulate implicit understanding of what is going on. Meanwhile, the version written in C includes the necessary headers at the top of the file, the explicit type declaration when starting the thread, the call to \lstinline{pthread_create}, without a clear idea of what the \lstinline{p} stands for, as well as the final \lstinline{join()} method call in order to make sure that the parallel thread returns to the main process, and does not create a memory leak in the program once it exits. While both behaviours are the same, the syntax of Go allows for a cleaner and simpler representation. + +Programming languages aim at helping programmers solve semantic issues in the problem domain through elegant syntactical means while reducing unnecessary interactions with the underlying technical system. These styles also have a functional component, as we have seen how languages differ in the ways in which they enable the programmer's access to and manipulation of computational actions. Beyond a language designer's perspective, there also exists a social influence on how a source code should be written according to its linguistic community. \subsection{Styles and idioms in programming} \label{subsec:style-idioms-programming} -% The problem of style is a problem framed by Georg Simmel's statement regarding the observation that \emph{the practical existence of humanity is absorbed in the struggle between individuality and generality} \citep{simmel_problem_1991}. Simmel's investigation of the topic, which will serve as the foundation for our definition of style, focuses on the dichotomy between works of fine art and mass-produced works of applied arts. Indeed, Simmel draws a distinction between the former, as indiosyncratic objects displaying the subjectivity of its maker, and the latter, as industrially produced and replicated, in which the copy cannot be told apart from the original. The work of fine art, according to him, is \emph{a world unto itself, is its own end, symbolizing by its very frame that it refuses any participation in the movements of a practical life beyond itself}, while the work of applied arts only exists beyond this individuality, first and foremost as a practical object. As these two kinds of work exist at the opposite extremes of a single continuum, we can insert a third approach: that of the crafted object, as a repeated display of its maker's subjectivity, destined for active use rather than passive contemplation. So while style can be seen as a general principle which either mixes with, replaces or displaces individuality, style in programming doesn't stand neatly at either extreme. It involves individual preferences, but also transitions through the objective via such intermediary objects that are linters. - Concrete use of programming languages operate on a different level of formality: if programming paradigms are top-down strategies specified by the language designers, they are also complemented by the bottom-up tactics of softare developers. Such practices crystallize, for instance, in \emph{idiomatic writing}. Idiomaticity refers, in traditional linguistics, to the realized way in which a given language is used, in contrast with its possible, syntactically-correct and semantically-equivalent, alternatives. For instance, it is idiomatic to say "The hungry dog" in English, but not "The hungered dog" (a correct sentence, whose equivalent is idiomatic in French and German). It therefore refers to the way in which a language is a social, experiential construct, relying on intersubjective communication \citep{voloshinov_marxism_1986}. Idiomaticity is therefore not a purely theoretical feature, but first and foremost a social one. This social component in programming languages is therefore related to how one writes a language "properly". In this sense, programming language communities are akin to hobbyists clubs, with their names\footnote{Pythonistas for Python, Rubyists for Ruby, Rustaceans for Rust, Gophers for Go, etc.} meetups, mascots, conferences and inside-jokes. Writing in a particular language can be due to external requirements, but also to personal preference: \begin{quote} I think a programming language should have a philosophy of helping our thinking, and so Ruby's focus is on productivity and the joy of programming. Other programming languages, for example, focus instead on simplicity, performance, or something like that. Each programming language has a different philosophy and design. If you feel comfortable with Ruby's philosophy, that means Ruby is your language. \citep{matsumoto_yukihiro_2019} \end{quote} -So an idiom in a programming language depends on the social interpretation of the formal programming paradigms\footnote{This is even more present in contemporary programming languages, since paradigms in these languages are often blended and no language is purely single-paradigmatic; for instance, Ruby is a declarative language with functional properties \citep{kidd_why_2005}}. Such an interpretation is also manifested in community-created and community-owned documents, such as \emph{The Zen of Python} \citep{peters_code_1999}. +So an idiom in a programming language depends on the social interpretation of the formal programming paradigms\footnote{This is even more present in contemporary programming languages, since paradigms in these languages are often blended and no language is purely single-paradigmatic; for instance, Ruby is a declarative language with functional properties \citep{kidd_why_2005}}. Such an interpretation is also manifested in community-created and community-owned documents. -PEP 20, titled \emph{The Zen of Python}, shows how the philosophy of a programming language relates to the practice of programming in it\footnote{For equivalent guides in other languages see for instance \citep{spencer_ten_1994} or \citep{cheney_practical_2019}}. Without particular explicit directives, it nonetheless highlights \emph{attitudes} that one should keep in mind and exhibit when writing Python code. Such a document sets the mood and the priorities of the Python community at large (being included in its official guidelines in 2004), and highlights a very perspective on the priorities of theoretical language design. For instance, the first Zen is clearly states the priorities of idiomatic Python: +PEP 20, is one of such documents. Informally titled \emph{The Zen of Python}, it shows how the philosophy of a programming language relates to the practice of programming in it\footnote{For equivalent guides in other languages see for instance \citep{spencer_ten_1994} or \citep{cheney_practical_2019}} \citep{peters_code_1999}. Without particular explicit directives, it nonetheless highlights \emph{attitudes} that one should keep in mind and exhibit when writing Python code. Such a document sets the mood and the priorities of the Python community at large (being included in its official guidelines in 2004), and highlights a very perspective on the priorities of theoretical language design. For instance, the first Zen is clearly states the priorities of idiomatic Python: \begin{quote} Beautiful is better than ugly. \citep{peters_pep_2004} \end{quote} -This epigram sets the focus on a specific feature of the code, rather than on a specific implementation. With such a broad statements, it also contributes to strengthening the community bonds by creating shared values as folk knowledge. In practice, writing idiomatic code requires not only the awareness of the community standards around such an idiomaticity, but also knowledge of the language construct themselves which differentiate it from different programming languages. In the case of PEP20 quoted about, one can even include it inside the program text with \lstinline{import this}, showing the tight coupling between abstract statements and concrete code. For instance, in \ref{code:range-operator}, distinct syntactical operators are semantically equivalent but only the second example is considered idiomatic Python, partly because it is \emph{specific} to Python, and because it is more performing than the first example, due to the desire of the developers of Python to encourage idiomaticity (i.e. what they consider good Python to be). +This epigram sets the focus on a specific feature of the code, rather than on a specific implementation. With such a broad statements, it also contributes to strengthening the community bonds by creating shared values as folk knowledge. In practice, writing idiomatic code requires not only the awareness of the community standards around such an idiomaticity, but also knowledge of the language construct themselves which differentiate it from different programming languages. In the case of PEP20 quoted about, one can even include it inside the program text with \lstinline{import this}, showing the tight coupling between abstract statements and concrete code. For instance, in \ref{code:range-operator}, distinct syntactical operators are semantically equivalent but only the second example is considered idiomatic Python, partly because it is \emph{specific} to Python, and because it is more performing than the first example, due to the desire of the developers of Python to encourage idiomaticity; that is, what they consider good Python to be. \begin{listing} \inputminted{python}{./corpus/range.py} @@ -278,21 +318,19 @@ \subsection{Styles and idioms in programming} There are absolutely no functional differences in the statements above, and the question mark is just here to make the code seem more natural and intuitive to humans. Checking for a boolean (or non-nil value) in an if statement is, in the end, the equivalent of asking a question about that value. Here, Ruby makes that explicit, therefore making it easier to read with the most minimal amount of additional visual noise (i.e. one character). -We've seen how programming languages can be subjected to aesthetic judgment, but those aesthetic criteria are only there to ultimately support the writing of good (i.e. functional and beautiful) code. Such a support exists via design choices (abstraction, orthogonality, simplicity), but also through the practical uses of programming languages, notably in terms of idiomaticity and of syntactic sugar, allowing some languages more readability than others. Like all tools, it is their (knowledgeable) use which matters, rather than their design, and it is the problems that they are used to deal with, and the way in which they are dealt with which ultimately informs whether or not a program text in that language will exhibit aesthetic features. - -This concept of appropriateness also relates to material honesty. As seen in \ref{subsubsec:crafting-software}, the fact that a programmer tends to identify their practice with craft implies that they work with tools and materials. Programming languages being their tools, and computation the material, one can extend to the concept of material honesty to the source code \citep{sennett_craftsman_2009}. In this case, working with, and in respect of, the material and tools at hand is a display of excellence in the community of practitioners, and results in an artefact which is in harmony and is well-adapted to the technical environment which allowed it to be. Source code written in accordance with the principles and the affordances of its programming language is therefore more prone to receive a positive aesthetic judgment. +We have seen how programming languages can be subjected to aesthetic judgment, but those aesthetic criteria are only there to ultimately support the writing of good (i.e. functional and beautiful) code. Such a support exists via design choices (abstraction, orthogonality, simplicity), but also through the practical uses of programming languages, notably in terms of idiomaticity and of syntactic sugar, allowing some languages more readability than others. Like all tools, it is their (knowledgeable) use which matters, rather than their design, and it is the problems that they are used to deal with, and the way in which they are dealt with which ultimately informs whether or not a program text in that language will exhibit aesthetic features. -Furthermore, idiomatic writing is accompanied by a language-independnent, but group-dependent feature: that of programming style. Fundamentally, the problem of style might be that "\emph{the practical existence of humanity is absorbed in the struggle between individuality and generality}" \citep{simmel_problem_1991}. Simmel's investigation of the topic focuses on the dichotomy between works of fine art and mass-produced works of applied arts, and he draws a distinction between unique objects displaying the subjectivity of its maker, and the industrially produced and replicated, and which are only meant to serve practical ends. +This concept of appropriateness also relates to material honesty. As seen in \ref{subsec:material-knoweldge}, the fact that a programmer tends to identify their practice with craft implies that they work with tools and materials. Programming languages being their tools, and computation the material, one can extend to the concept of material honesty to the source code \citep{sennett_craftsman_2009}. In this case, working with, and in respect of, the material and tools at hand is a display of excellence in the community of practitioners, and results in an artefact which is in harmony and is well-adapted to the technical environment which allowed it to be. Source code written in accordance with the principles and the affordances of its programming language is therefore more prone to receive a positive aesthetic judgment. Furthermore, idiomatic writing is accompanied by a language-independnent, but group-dependent feature: that of programming style. -As these two kinds of work exist at the opposite extremes of a single continuum, we can insert a third approach: that of the crafted object. It exists in-between, as a repeated display of its maker's subjectivity, destined for active use rather than passive contemplation \citep{sennett_craftsman_2009}. So while style can be seen as a general principle which either mixes with, replaces or displaces individuality, style in programming doesn't stand neatly at either extreme. The work of Gilles-Gaston Granger, and his focus on style as a structuring practice can help to better apprehend style as a relationship between individual taste and structural organization \citep{granger_essai_1988}. Granger posits style in scientific endeavours, which is a component of programming practice, as a mode of knowing at the scale of the group. By abiding by a particular style, the writer and reader can implicitly agree on the fundamental values underpinning a given text, and thus facilitate expectations in further readings of a given program text. +Fundamentally, the problem of style might be that "\emph{the practical existence of humanity is absorbed in the struggle between individuality and generality}" \citep{simmel_problem_1991}. Simmel's investigation of the topic originally focuses on the dichotomy between works of fine art and mass-produced works of applied arts. Indeed, Simmel draws a distinction between the former, as indiosyncratic objects displaying the subjectivity of its maker, and the latter, as industrially produced and replicated, in which the copy cannot be told apart from the original. The work of fine art, according to him, is \emph{a world unto itself, is its own end, symbolizing by its very frame that it refuses any participation in the movements of a practical life beyond itself}, while the work of applied arts only exists beyond this individuality, first and foremost as a practical object. -Concretely, programming style exist as dynamic documents,with both social and technical components. On the social side, they are only useful if inconditionally adopted by all members working on a particular code-base, since "\emph{all code in any code-base should look like a single person typed it, no matter how many people contributed.}" \citep{waldron_idiomatic_2020}; in the strict sense, guidelines are therefore reference documents which should provide an answer to the question of what is the preferred way of writing a particular statement (e.g. var vs. let, or camelCase vs. snake\_case). Beyond aesthetic preferences aimed at optimizing the clarity of a given source code, style guides also include a technical component which aims at reducing programming errors by catching erroneous patterns in a given codebase (e.g. variable declaration before intialization, loose reference to the function-calling context). +As these two kinds of work exist at the opposite extremes of a single continuum, we can insert a third approach: that of the crafted object. It exists in-between, as a repeated display of its maker's subjectivity, destined for active use rather than passive contemplation \citep{sennett_craftsman_2009}. So while style can be seen as a general principle which either mixes with, replaces or displaces individuality, style in programming doesn't stand neatly at either extreme. The work of Gilles-Gaston Granger, and his focus on style as a structuring practice can help to better apprehend style as a relationship between individual taste and structural organization \citep{granger_essai_1988}. Granger posits style in scientific endeavours, which is a component of programming practice, as a mode of knowing at the scale of the group. Abiding by a particular style, the writer and reader can implicitly agree on the fundamental values underpinning a given text, and thus facilitate expectations in further readings of a given program text. -Programming style also exhibits the particular property that it is not just enforced by convention, but also by computational procedure: linters and formatters are particular software whose main function is to formally rearrange the appearance of lines of code according to some preset rules. This constitutes an additional socio-technical context which further enmeshes human writing and machine writing \citep{depaz_discursive_2022}. +Concretely, programming style exist as dynamic documents,with both social and technical components. On the social side, they are only useful if inconditionally adopted by all members working on a particular code-base, since "\emph{all code in any code-base should look like a single person typed it, no matter how many people contributed.}" \citep{waldron_idiomatic_2020}; personal style is usually frowned upon by software developers as an indicator of individual preferences over group coordination\footnote{Angus Croll wrote a satirical book, \emph{What if Hemingway Wrote JavaScript}, about personal style in programming, in which he copies the style of fiction authors into different programming languages \citep{croll_if_2014}. This shows that, while personal style and expression is very much possible in programming languages, it is also somewhat ludicrous}. -% language design \url{https://dl.acm.org/doi/10.1145/390016.808420}, \url{http://rigaux.org/language-study/syntax-across-languages/} +In the strict sense, guidelines are therefore reference documents which should provide an answer to the question of what is the preferred way of writing a particular statement (e.g. var vs. let, or camelCase vs. snake\_case). Beyond aesthetic preferences aimed at optimizing the clarity of a given source code, style guides also include a technical component which aims at reducing programming errors by catching erroneous patterns in a given codebase (e.g. variable declaration before intialization, loose reference to the function-calling context). -% \url{https://pointersgonewild.com/2022/05/23/minimalism-in-programming-language-design/} +Programming style also exhibits the particular property that it is not just enforced by convention, but also by computational procedure: linters and formatters are particular software whose main function is to formally rearrange the appearance of lines of code according to some preset rules. This constitutes an additional socio-technical context which further enmeshes human writing and machine writing \citep{depaz_discursive_2022}. Essentially, this means that source code will be judged not just on how it functions technically, but also how it exists stylistically—that is, within a social contract which can be implemented through technical, automated means. \spacer @@ -448,7 +486,7 @@ \subsubsection{Openness and transparency} This abandoning of abstraction, in order to reveal what should be revealed to a reader-as-potential writer, builds on a community ethos of hacking, where the machine's workings are laid bare in order to support unexpected changes by unknown individuals. This textual hint at both multiple realities (i.e. how the playback is actually done, inside the \lstinline{VideoLooper} abstraction) and particular possibilities (i.e. using, or changing it), creates a particularly welcoming space for newcomers. -% state that the code poetry and the hacker code are limited, and stand in contrast to developer code +% state that the code poetry and the hacker code are limited, and stand in contrast to developer code -> argument for this \subsubsection{Emotions and functionality} \label{subsubsec:emotions-functionality} @@ -478,6 +516,8 @@ \subsubsection{Emotions and functionality} The added depth of meaning from this code poem goes beyond the syntactic and semantic interplay immediately visible when reading the source, as the execution provides a result whose meaning depends on the co-existence of both source and output. Beyond keywords, variable names and data structures, it is also the procedure itself which gains expressive power: a poem initially about \emph{you} results in a humanly infinite, but hardware-bounded, series of \emph{me}\footnote{Another productive comparison could be found in Stein's work, \emph{Rose is a rose is a rose...}}. +% "Implied in the design of the Source Code Poetry competition is the idea that the writing of code is an artistic enterprise. Indeed, "real" programmers are the ones whose code itself is poetry in motion. The emphasis on executable code reveals aesthetic possibilities of programming languages that blend form and function. Such poems are fascinating because they are variably accessible and inaccessible to readers, a function of their readers' knowledge of programming languages and facility with poetry. They also provide means of expression in multiple ways: the visual aesthetics of the code on the page, an aural dimension if read aloud, and the output rendered by the code when compiled. Their possibilities for interpretation, then, are fragmentary, requiring negotiation on these many fronts to appreciate and understand." \citep{risam_poetry_2015} + \subsection{Between humans and machines} \label{subsec:humans-machines} @@ -496,6 +536,8 @@ \subsection{Between humans and machines} \section{Contexts of functions} %25k \label{sec:contexts-of-functions} +% add this quote from The Embodied Aesthetics of Code, quoted in [Sy Brand](https://www.youtube.com/watch?v=CkGqINHZit0): "an object is 'functionally beautiful' to the extent that its aesthetic properties contribute to its overall performance---the functional beauty of an object enhances its fulfilling its primary function" and, in this case, the primary function is not to be executed, but to be understood. + % The function of aesthetics is to display truth: From the Embodied Aesthetics of Source code: McAllister further argued that the doctrine, espoused by several prominent scientists such as Chandrasekhar and Heisenberg, that beauty is an attribute of truth implies an agreement between an entity’s perceptual aspects and its utilitarian qualities Despite the diversity of metaphors applied to source code aesthetics, from literature and metaphor, we nonetheless maintains a common ground of functionalism. That is, each program-text that we have examined in this work always implies a necessity of being functioning in order to be properly judged at an aesthetic level. This section argues that function acts both as base for value judgment, but also that it depends on socio-technical contexts in which the program text is being presented. @@ -505,6 +547,8 @@ \section{Contexts of functions} %25k \subsection{Definitions of function} \label{subsec:definitions-function} +% functions need to be made clearer. there are three levels (1) syntactical/formal validity, (2) what the program down (operational semantics) and (3) what the program should do (intentional semantics). THIS SHOULD BE DISCUSSED AT THE BEGINNING RATHER THAN JUST THE LAME INTRO ON LAMBDA CALCULUS. then start from those to discuss the function/meaning of computer programs + From a computer science perspective, a function is a program unit which produces an output for each input. While there might not be some explicit value given as an output, a function in a computer program is nonetheless an action which has the ability to modify some internal state of the machine—that is, they are \emph{effective} \citep{abelson_structure_1979}, and they are commonly expected to provide a tangible change as a result of their execution. Here, function is therefore understood in the lineage of lambda calculus, the immediate mathematical predecessor to the Turing machine. In lambda calculus, such input and output are expected to be a numerical value. For instance, the function square, given the number 2, provides the number 4. In fact, the technical environment in which source code exists—computers—are expressedly designed in order to support such a definition of function. In order to fulfill this definition of function, a given program text must be correctly machine-readable. This means that its syntax must be correct, before its semantics can be executed by the machine. Here, we see a first bifurcation: between what the program text does (represented by its syntax), and what it is about (represented by its semantics). The first can be qualified as an effective function, while the second can be qualified as a useful function. All programs examined in this work have an effective function. @@ -589,7 +633,7 @@ \subsection{Function in aesthetics} A final specificity of program texts is that there are very often collaborative and open-ended, particularly in the open-source movement, which tends to make all program texts writerly texts, in the sense of Barthes \citep{barthes_bruissement_1984}. This double movement of readers being potential writers and writers being potential readers complicates an already complex matter. Since the audience can become the creator of modified functional technical systems, expressive devices also act as communicative devices, and thus take on a relational dimension. In turn, this relational dimension within a technical environment involves notions of skill and knowledge whenever the understanding of a program text is at stake, notions of pre-existing skill and knowledge which, in turn, vary from one individual to the next. The program text therefore acts as a bridge between the intent of the ideal version of the software, and the mental spaces constructed by a reader. -All source code aesthetics relate to a certain conception of function: either technical function, or social function, and more or less essential to aesthetic judgment. While these two understanding of function further impact the manifestations of source code, these manifestations are not just expressive, but also communicative, aiming at the transmission of concepts from one individual through the use of machine syntax. Indeed, code that is neither functioning for the machine, nor meaningul for a human holds the least possible value amongst different combinations presented above.A program is always a program \emph{about something}, and therefore holds some intentionality, an intentionality which can, in turn, be made pleasantly graspable by a writer to a reader. +All source code aesthetics relate to a certain conception of function: either technical function, or social function, and more or less essential to aesthetic judgment. While these two understanding of function further impact the manifestations of source code, these manifestations are not just expressive, but also communicative, aiming at the transmission of concepts from one individual through the use of machine syntax through the dual lens of human-machine semantics. Indeed, code that is neither functioning for the machine, nor meaningul for a human holds the least possible value amongst different combinations presented above. A program is always a program \emph{about something}, and therefore holds some intentionality, an intentionality which can, in turn, be made pleasantly graspable by a writer to a reader. In the overwhelming majority of cases of program texts, the expectation is to understand. The recogntion of the existence of the other as a reader and co-author, both a generalized other in the sense that anyone can theoretically read and modified code, but also as a specificied other, in the sense that the other possesses a particular set of skills, knowledge, habits and practices stemming from the diversity of programming communities. This stance, between general and particular, is one that shows the ethical component of an aesthetic practice: recognizing both the similarity and the difference in the other, and communicating with a peer through specific symbol systems. diff --git a/redaction/thesis.bib b/redaction/thesis.bib index e3701c9..e2f8a05 100644 --- a/redaction/thesis.bib +++ b/redaction/thesis.bib @@ -1027,6 +1027,18 @@ @misc{cheney_practical_2019 file = {/home/pierre/Zotero/storage/4GLB654A/gophercon-singapore-2019.html} } +@misc{chevalier-boisvert_minimalism_2022, + title = {Minimalism in {{Programming Language Design}}}, + author = {{Chevalier-Boisvert}, Maxime}, + year = {2022}, + month = may, + journal = {Pointers Gone Wild}, + urldate = {2023-08-04}, + abstract = {Four years ago, I wrote a blog post titled Minimalism in Programming, in which I tried to formulate an argument as to why it's usually a good idea to try to minimize complexity in your progra\ldots}, + langid = {english}, + file = {/home/pierre/Zotero/storage/UILXRVV2/minimalism-in-programming-language-design.html} +} + @misc{chewxy_gorgonia_2022, title = {Gorgonia/Gorgonia}, author = {{Chewxy}}, @@ -1316,6 +1328,19 @@ @misc{craver_underhanded_2015 file = {/home/pierre/Zotero/storage/6FJ6HR8M/_page_id_2.html} } +@book{croll_if_2014, + title = {If {{Hemingway Wrote JavaScript}}}, + author = {Croll, Angus}, + year = {2014}, + month = mar, + publisher = {{No Starch Press}}, + urldate = {2023-08-04}, + abstract = {If Hemingway Wrote JavaScript playfully imagines JavaScript programs as written by famous authors.}, + isbn = {978-1-59327-585-3}, + langid = {english}, + file = {/home/pierre/Zotero/storage/VP8KEDUM/hemingway.html} +} + @book{crowthers_colossal_1977, title = {Colossal {{Cave Adventure}}}, author = {Crowthers, Will}, @@ -1562,6 +1587,12 @@ @article{dijkstra_design_1963 file = {/home/pierre/Zotero/storage/BX9ELHC6/Dijkstra - 1963 - On the design of machine independent programming l.pdf} } +@misc{dijkstra_how_1975, + title = {How Do We Tell Truths That Might Hurt?}, + author = {Dijkstra, Edsger W.}, + year = {1975} +} + @incollection{dijkstra_humble_2007, title = {The Humble Programmer}, booktitle = {{{ACM Turing}} Award Lectures}, @@ -2177,6 +2208,23 @@ @book{gamma_design_1994 langid = {english} } +@article{gannon_impact_1975, + title = {The Impact of Language Design on the Production of Reliable Software}, + author = {Gannon, J. D. and Horning, J. J.}, + year = {1975}, + month = apr, + journal = {ACM SIGPLAN Notices}, + volume = {10}, + number = {6}, + pages = {10--22}, + issn = {0362-1340}, + doi = {10.1145/390016.808420}, + urldate = {2023-08-04}, + abstract = {The language in which programs are written can have a substantial effect on their reliability. This paper discusses the design of programming languages to enhance reliability. It presents several general design principles, and then applies them to particular language constructs. Since we can not logically prove the validity of such design principles, empirical evidence is needed to support or discredit them. Gannon has performed a major experiment to measure the effect of nine specific language design decisions in one context. Analysis of the frequency and persistence of errors shows that several decisions had a significant impact on reliability.}, + keywords = {Characteristic errors,Experimentation.,Language design,Redundancy,Reliability}, + file = {/home/pierre/Zotero/storage/HJ6N784V/Gannon and Horning - 1975 - The impact of language design on the production of.pdf} +} + @misc{garfinkel_biological_2000, title = {Biological {{Computing}}}, author = {Garfinkel, Simson}, @@ -6087,6 +6135,16 @@ @book{ricoeur_rule_2003 langid = {english} } +@misc{risam_poetry_2015, + title = {The Poetry of Executable Code | {{Jacket2}}}, + author = {Risam, Roopika}, + year = {2015}, + journal = {Jecket2}, + urldate = {2023-08-03}, + howpublished = {https://jacket2.org/commentary/poetry-executable-code}, + file = {/home/pierre/Zotero/storage/222W4F4Y/poetry-executable-code.html} +} + @article{ritchie_unix_1984, title = {The {{UNIX System}}: {{The Evolution}} of the {{UNIX Time-sharing System}}}, shorttitle = {The {{UNIX System}}}, diff --git a/redaction/todo.md b/redaction/todo.md index c43cbae..86c99f5 100644 --- a/redaction/todo.md +++ b/redaction/todo.md @@ -14,6 +14,8 @@ ## chap 4 - programming +- rewrite intro of chap once the whole thing is re-read + - __case studies__ - choose the case-studies in the way that is the most illustrative of my point. doesn't have to be huge. - i should definitely have a more comparative approach: multiple code-bases, with aesthetics which are tied to __LANGUAGE__, __COMMUNITY__ and __PROBLEM__ (question of the idiomatic). this is better than having one case study after another, completely discontinued. @@ -23,47 +25,26 @@ ### programming languages -- in language design section, inckude iverson_notation_as_tool_for_thought, beardsley: cognitive gratification under ideal circumstances - ### styles -- [ ] add marielle macé to a bit of conclusion on the styles of programmers (maybe once the part about simmel is extracted from the craft) -- [ ] add if hemingway wrote javascript as an example of styles +- [ ] language design \url{http://rigaux.org/language-study/syntax-across-languages/} +- [ ] add marielle macé to a bit of conclusion on the styles of programmers -- [ ] include the concept of interface by matthew kirschenbaum -- [ ] proofread subsec between human and machines -- [ ] there is no example of iterating in 5.1.2 -- [ ] all figures (especially fig. 51, 52) need to be very much explicited: "we need to be lead into why that is even an issue" / define reasonable examples for each / show different return types in fig. 51 -- [ ] for the threads, first define what a `process` is so that we can understand what a `thread` is, also show the non-threaded version? explain how the sequential outcome is bouleversé? -- [ ] look into why we can't return `[]int` in c -- [ ] in golang, the `_` identifiers are not about the error, but about ignoring things. show that it differs in JS or Python -- for fig. 62, step through a computer interpretation of the process, before giving a literary interpretation of the process. +- for fig. 62 (self_inspect), step through a computer interpretation of the process, before giving a literary interpretation of the process. - for the code poems, I need to be able to articulate their relevance when looking at different domains. they're not just related to literature, but also architecture (follies, pavillions) or math (pure math), while other source code (linux kernel) might also be a sort of literature (legal code). but also make it explicit that i talk about the ones that can run, not the code poems that are not executable -- [ ] rather than having 5.3 as this total disconnect, maybe start by writing a monolith to avoid the pitfalls of structure. particularly because at this point i need to synthesize. rather work on _connections and disconnections_ between the social and the functional? - in the case of list comprehension in Python, it is both a technical and social environment -#### emotions and functionality - -- [ ] literature: include a section about poetry: : +### functions -> Implied in the design of the Source Code Poetry competition is the idea that the writing of code is an artistic enterprise. Indeed, "real" programmers are the ones whose code itself is poetry in motion. The emphasis on executable code reveals aesthetic possibilities of programming languages that blend form and function. Such poems are fascinating because they are variably accessible and inaccessible to readers, a function of their readers' knowledge of programming languages and facility with poetry. They also provide means of expression in multiple ways: the visual aesthetics of the code on the page, an aural dimension if read aloud, and the output rendered by the code when compiled. Their possibilities for interpretation, then, are fragmentary, requiring negotiation on these many fronts to appreciate and understand. - -in the last section, 5.3 - -- [ ] functions need to be made clearer. there are three levels (1) syntactical/formal validity, (2) what the program down (operational semantics) and (3) what the program should do (intentional semantics). THIS SHOULD BE DISCUSSED AT THE BEGINNING RATHER THAN JUST THE LAME INTRO ON LAMBDA CALCULUS. then start from those to discuss the function/meaning of computer programs -- [ ] i talk about "_syntactic meaning_" but this makes no sense, meaning is only semantic -- [ ] again, shorten the code snippets and __explain them__ +- [ ] rather than having 5.3 as this total disconnect, maybe start by writing a monolith to avoid the pitfalls of structure. particularly because at this point i need to synthesize. rather work on _connections and disconnections_ between the social and the functional? - [ ] re-quote hayles and her regime of computation (surface, depth, etc.) when i also talk about paloque berges et. al. -- [ ] add this quote from The Embodied Aesthetics of Code, quoted in [Sy Brand](https://www.youtube.com/watch?v=CkGqINHZit0): "an object is 'functionally beautiful' to the extent that its aesthetic properties contribute to its overall performance---the functional beauty of an object enhances its fulfilling its primary function" and, in this case, the primary function is not to be executed, but to be understood. -- [ ] from sy brand talk, gabrielle starr, feeling beauty: the neuroaesthetics of the experience: "Aesthetic response enables the comparison and integration of novel kinds of reward in a process that opens possibilities for new knowledge, or new ways of negotiating the world. The perceptions, images, and emotions we find through our experience of poetry, painting and music put ideas and events into relation with one another that would rarely, if ever, be possible outside the arts." ## chap 3 - beauty -overall, I should keep in mind that I do not have a technical audience, and I should rework/remove a lot of the examples, and add extensive discussions and rationale as to why those examples are there - ### aesthetics - mention that knowledge influences how we perceive things (brandy, mathematical beauty) +- from sy brand talk, gabrielle starr, feeling beauty: the neuroaesthetics of the experience: "Aesthetic response enables the comparison and integration of novel kinds of reward in a process that opens possibilities for new knowledge, or new ways of negotiating the world. The perceptions, images, and emotions we find through our experience of poetry, painting and music put ideas and events into relation with one another that would rarely, if ever, be possible outside the arts." ### literature @@ -85,6 +66,7 @@ overall, I should keep in mind that I do not have a technical audience, and I sh - [ ] __levels of software__ ([as we may code](https://nshipster.com/as-we-may-code/)) highlights the need for such a thing (quoting: What if, instead of lowering source code down for the purpose of execution, we raised source code for the purpose of understanding?) - [ ] __tools__ add to means of understanding and IDEs deciding how we write: - [ ] __tools__ including a discussion of how does step in a debugger relate to code as terrain, or surface coverage for tests? e.g. how does build and architecture related to code as structure? +- [ ] include the concept of interface by matthew kirschenbaum - [ ] __programmer metaphors__ my approach to metaphors should be more systematic: that is, I should look into how metaphors can represent a SYSTEM (for instance, `symlink` is a limitation when it comes to the files and folder metaphor) - [ ] __programmer metaphors__ metaphor of the `macro` (implies scale), of `scope`, of `global`, implies scale as well. `libraries` is also a metaphor that is literary. - [ ] __programmer metaphors__ refer to master/slave as a problematic one