Programming/JavaScript

[EloquentJS] Ch12. Project: A Programming Language

dododoo 2020. 4. 21. 17:37

Code

function parseExpression(program) {
    program = skipSpace(program);

    let match, expr;
    if (match = /^"([^"]*)"/.exec(program)) {
        expr = {type: "value", value: match[1]};
    } else if (match = /^\d+\b/.exec(program)) {
        expr = {type: "value", value: Number(match[0])};
    } else if (match = /^[^\s(),#"]+/.exec(program)) {
        expr = {type: "word", name: match[0]};
    } else {
        throw new SyntaxError("Unexpected Syntax: " + program);
    }

    return parseApply(expr, program.slice(match[0].length));
}

function skipSpace(string) {
    let first = string.search(/\S/);
    if (first == -1) return "";
    else return string.slice(first);
}

function parseApply(expr, program) {
    program = skipSpace(program);
    if (program[0] != "(") {
        return {expr, rest: program};
    }

    program = skipSpace(program.slice(1));
    expr = {type: "apply", operator: expr, args: []};
    while (program[0] != ")") {
        let arg = parseExpression(program);
        expr.args.push(arg.expr);
        program = skipSpace(arg.rest);
        if (program[0] == ",") {
            program = skipSpace(program.slice(1));
        } else if (program[0] != ")") {
            throw new SyntaxError("Expected ',' or ')'");
        }
    }
    return parseApply(expr, program.slice(1));
}

function parse(program) {
    let {expr, rest} = parseExpression(program);
    if (skipSpace(rest).length > 0) {
        throw new SyntaxError("Unexpected text after program");
    }
    return expr;
}

let specialForms = Object.create(null);

function evaluate(expr, scope) {
    if (expr.type == "value") {
        return expr.value;
    } else if (expr.type == "word") {
        if (expr.name in scope) {
            return scope[expr.name];
        } else {
            throw new ReferenceError(
                `Undefined Binding: ${expr.name}`);
        }
    } else if (expr.type == "apply") {
        let {operator, args} = expr;
        if (operator.type == "word" &&          // ***
            operator.name in specialForms) {
            return specialForms[operator.name](args, scope);
        } else {
            let op = evaluate(operator, scope); // ***
            if (typeof op == "function") {
                return op(...args.map(arg => evaluate(arg, scope)));
            } else {
                throw new TypeError("Applying a non-function.");
            }
        }
    }
}

specialForms.if = function(args, scope) {
    if (args.length != 3) {
        throw new SyntaxError("Wrong number of args to if");
    } else if (evaluate(args[0], scope) !== false) {
        return evaluate(args[1], scope);
    } else {
        return evaluate(args[2], scope);
    }
};

specialForms.while = (args, scope) => {
    if (args.length != 2) {
        throw new SyntaxError("wrong number of args to while");
    }
    while (evaluate(args[0], scope) !== false) {
        evaluate(args[1], scope);
    }

    // Since undefined does not exist in Egg, we return false,
    // for lack of a meaningful result.
    return false;
};

specialForms.do = (args, scope) => {
    let value = false;  // 왜 초기화?: args.length가 0일 수도 있으므로 
    for (let arg of args) {
        value = evaluate(arg, scope);
    }
    return value;
};

specialForms.define = (args, scope) => {
    if (args.length != 2 || args[0].type != "word") {
        throw new SyntaxError("Incorrect use of define");
    }
    let value = evaluate(args[1], scope);
    scope[args[0].name] = value;
    return value;
};

const topScope = Object.create(null);

topScope.true = true;
topScope.false = false;

/*
let prog = parse(`if(true, false, true)`);
console.log(prog);
console.log(evaluate(prog, topScope));
// → false
*/

for (let op of ["+", "-", "*", "/", "==", "<", ">"]) {
    topScope[op] = Function("a, b", `return a ${op} b;`);
}

topScope.print = value => {
    console.log(value);
    return value;
};

function run(program) {
    return evaluate(parse(program), Object.create(topScope));
}

/*
run(`
do(define(total, 0),
   define(count, 1),
   while(<(count, 11),
         do(define(total, +(total, count)),
            define(count, +(count, 1)))),
   print(total))
`);
// → 55
*/

specialForms.fun = (args, scope) => {
    if (!args.length) {
        throw new SyntaxError("Functions need a body");
    }
    let body = args[args.length - 1];
    let params = args.slice(0, args.length - 1).map(expr => {
        if (expr.type != "word") {
            throw new SyntaxError("Parameter names must be words");   
        }
        return expr.name;
    });

    return function() {
        if (arguments.length != params.length) {
            throw new TypeError("Wrong number of arguments");
        }
        let localScope = Object.create(scope);
        for (let i = 0; i < arguments.length; ++i) {
            localScope[params[i]] = arguments[i];
        }
        return evaluate(body, localScope);
    };
};

/*
run(`
do(define(plusOne, fun(a, +(a, 1))),
   print(plusOne(10)))
`);
// → 11

run(`
do(define(pow, fun(base, exp,
     if(==(exp, 0),
        1,
        *(base, pow(base, -(exp, 1)))))),
   print(pow(2, 10)))
`);
// → 1024
*/

// Exercises
// 1. Array
topScope.array = (...values) => {
    return values;
};

topScope.length = array => {
    return array.length;
};

topScope.element = (array, n) => {
    return array[n];
};

// 3. Comment
function my_skipSpace(string) {
    let first = string.search(/\S/);
    if (first == -1) return "";

    string = string.slice(first);
    let match = /^#.*/.exec(string);
    if (match) return skipSpace(string.slice(match[0].length));
    else return string;
}

function skipSpace(string) {
    let skippable = string.match(/^(\s|#.*)*/);
    return string.slice(skippable[0].length);
}

// 4. Fixing scope
specialForms.my_set = (args, scope) => {
    if (args.length != 2 || args[0].type != "word") {
        throw new SyntaxError("Incorrect use of set");
    }

    let curScope = scope;
    while (curScope != null &&
           !Object.prototype.hasOwnProperty.call(curScope, args[0].name)) {
        curScope = Object.getPrototypeOf(curScope);
    }
    if (!curScope) {
        throw new ReferenceError(
            `Undefined Binding: ${args[0].name}`);
    }

    let value = evaluate(args[1], scope);
    curScope[args[0].name] = value;
    return value;
};

specialForms.set = (args, env) => {
    if (args.length != 2 || args[0].type != "word") {
      throw new SyntaxError("Bad use of set");
    }
    let varName = args[0].name;
    let value = evaluate(args[1], env);

    for (let scope = env; scope; scope = Object.getPrototypeOf(scope)) {
      if (Object.prototype.hasOwnProperty.call(scope, varName)) {
        scope[varName] = value;
        return value;
      }
    }
    throw new ReferenceError(`Setting undefined variable ${varName}`);
};
  • We will build a programming language called Egg. ... It will allow simple abstraction based on functions.

Parsing

  • Everything in Egg is an expression. An expression can be the name of a binding, a number, a string, or an application. Applications are used for function calls but also for constructs such as if or while.

  • The uniformity of the Egg language means that things that are operators in JavaScript (such as >) are normal bindings in this language, applied just like other functions. And since the syntax has no concept of a block, we need a do construct to represent doing multiple things in sequence.

  • Expressions are not separated into lines, and they have a recursive structure. Application expressions contain other expressions. Fortunately, this problem can be solved very well by writing a parser function that is recursive in a way that reflects the recursive nature of the language.

  • function parseExpression(program) {
        program = skipSpace(program);
    
        let match, expr;
        if (match = /^"([^"]*)"/.exec(program)) {
            expr = {type: "value", value: match[1]};
        } else if (match = /^\d+\b/.exec(program)) {
            expr = {type: "value", value: Number(match[0])};
        } else if (match = /^[^\s(),#"]+/.exec(program)) {
            expr = {type: "word", name: match[0]};
        } else {
            throw new SyntaxError("Unexpected Syntax: " + program);
        }
    
        return parseApply(expr, program.slice(match[0].length));
    }
    
    function skipSpace(string) {
        let first = string.search(/\S/);
        if (first == -1) return "";
        else return string.slice(first);
    }
  • parseExpression uses three regular expressions to spot the three atomic elements that Egg supports: strings, numbers, and words.

  • We use SyntaxError instead of Error as the exception constructor, which is another standard error type, because it is a little more specific—it is also the error type thrown when an attempt is made to run an invalid JavaScript program.

  • ... parseApply, which checks whether the expression is an application. If so, it parses a parenthesized list of arguments.

  • function parseApply(expr, program) {
        program = skipSpace(program);
        if (program[0] != "(") {
            return {expr, rest: program};
        }
    
        program = skipSpace(program.slice(1));
        expr = {type: "apply", operator: expr, args: []};
        while (program[0] != ")") {
            let arg = parseExpression(program);
            expr.args.push(arg.expr);
            program = skipSpace(arg.rest);
            if (program[0] == ",") {
                program = skipSpace(program.slice(1));
            } else if (program[0] != ")") {
                throw new SyntaxError("Expected ',' or ')'");
            }
        }
        return parseApply(expr, program.slice(1));
    }
  • Because an application expression can itself be applied (such as in multiplier(2)(1)), parseApply must, after it has parsed an application, call itself again to check whether another pair of parentheses follows.

  • This is all we need to parse Egg. We wrap it in a convenient parse function that verifies that it has reached the end of the input string after parsing the expression (an Egg program is a single expression), and that gives us the program’s data structure.

  • function parse(program) {
        let {expr, rest} = parseExpression(program);
        if (skipSpace(rest).length > 0) {
            throw new SyntaxError("Unexpected text after program");
        }
        return expr;
    }

The evaluator

  • What can we do with the syntax tree for a program? Run it, of course! And that is what the evaluator does. You give it a syntax tree and a scope object that associates names with values, and it will evaluate the expression that the tree represents and return the value that this produces.

  • let specialForms = Object.create(null);
    
    function evaluate(expr, scope) {
        if (expr.type == "value") {
            return expr.value;
        } else if (expr.type == "word") {
            if (expr.name in scope) {
                return scope[expr.name];
            } else {
                throw new ReferenceError(
                    `Undefined Binding: ${expr.name}`);
            }
        } else if (expr.type == "apply") {
            let {operator, args} = expr;
            if (operator.type == "word" &&          // ***
                operator.name in specialForms) {
                return specialForms[operator.name](args, scope);
            } else {
                let op = evaluate(operator, scope); // ***
                if (typeof op == "function") {
                    return op(...args.map(arg => evaluate(arg, scope)));
                } else {
                    throw new TypeError("Applying a non-function.");
                }
            }
        }
    }
  • For a binding, we must check whether it is actually defined in the scope and, if it is, fetch the binding’s value.

  • Applications are more involved. If they are a special form, like if, we do not evaluate anything and pass the argument expressions, along with the scope, to the function that handles this form. If it is a normal call, we evaluate the operator, verify that it is a function, and call it with the evaluated arguments.

  • We use plain JavaScript function values to represent Egg’s function values. We will come back to this later, when the special form called fun is defined.

Special forms

  • The specialForms object is used to define special syntax in Egg. It associates words with functions that evaluate such forms. It is currently empty.

  • specialForms.if = function(args, scope) {
        if (args.length != 3) {
            throw new SyntaxError("Wrong number of args to if");
        } else if (evaluate(args[0], scope) !== false) {
            return evaluate(args[1], scope);
        } else {
            return evaluate(args[2], scope);
        }
    };
  • This if form is more similar to JavaScript’s ternary ?: operator than to JavaScript’s if. It is an expression, not a statement, and it produces a value, namely, the result of the second or third argument.

  • Egg also differs from JavaScript in how it handles the condition value to if. It will not treat things like zero or the empty string as false, only the precise value false.

  • The reason we need to represent if as a special form, rather than a regular function, is that all arguments to functions are evaluated before the function is called, whereas if should evaluate only either its second or its third argument, depending on the value of the first.

  • specialForms.while = (args, scope) => {
        if (args.length != 2) {
            throw new SyntaxError("wrong number of args to while");
        }
        while (evaluate(args[0], scope) !== false) {
            evaluate(args[1], scope);
        }
    
        // Since undefined does not exist in Egg, we return false,
        // for lack of a meaningful result.
        return false;
    };
  • Another basic building block is do, which executes all its arguments from top to bottom. Its value is the value produced by the last argument.

  • specialForms.do = (args, scope) => {
        let value = false;  // 왜 초기화?: args.length가 0일 수도 있으므로 
        for (let arg of args) {
            value = evaluate(arg, scope);
        }
        return value;
    };
  • Since define, like everything, is an expression, it must return a value. We’ll make it return the value that was assigned (just like JavaScript’s = operator).

  • specialForms.define = (args, scope) => {
        if (args.length != 2 || args[0].type != "word") {
            throw new SyntaxError("Incorrect use of define");
        }
        let value = evaluate(args[1], scope);
        scope[args[0].name] = value;
        return value;
    };

The environment

  • The scope accepted by evaluate is an object with properties whose names correspond to binding names and whose values correspond to the values those bindings are bound to.
  • To be able to use the if construct we just defined, we must have access to Boolean values.
  • const topScope = Object.create(null);
    topScope.true = true;
    topScope.false = false;
  • To supply basic arithmetic and comparison operators, we will also add some function values to the scope. In the interest of keeping the code short, we’ll use Function to synthesize a bunch of operator functions in a loop, instead of defining them individually.
  • for (let op of ["+", "-", "*", "/", "==", "<", ">"]) {
        topScope[op] = Function("a, b", `return a ${op} b;`);
    }
  • topScope.print = value => {
        console.log(value);
        return value;
    };
  • The following function provides a convenient way to parse a program and run it in a fresh scope:
  • function run(program) {
        return evaluate(parse(program), Object.create(topScope));
    }
  • We’ll use object prototype chains to represent nested scopes so that the program can add bindings to its local scope without changing the top-level scope.

Functions

  • Fortunately, it isn’t hard to add a fun construct, which treats its last argument as the function’s body and uses all arguments before that as the names of the function’s parameters.

  • specialForms.fun = (args, scope) => {
        if (!args.length) {
            throw new SyntaxError("Functions need a body");
        }
        let body = args[args.length - 1];
        let params = args.slice(0, args.length - 1).map(expr => {
            if (expr.type != "word") {
                throw new SyntaxError("Parameter names must be words");   
            }
            return expr.name;
        });
    
        return function() {
            if (arguments.length != params.length) {
                throw new TypeError("Wrong number of arguments");
            }
            let localScope = Object.create(scope);
            for (let i = 0; i < arguments.length; ++i) {
                localScope[params[i]] = arguments[i];
            }
            return evaluate(body, localScope);
        };
    };
  • Functions in Egg get their own local scope. The function produced by the fun form creates this local scope and adds the argument bindings to it. It then evaluates the function body in this scope and returns the result.

Compilation

  • What we have built is an interpreter. During evaluation, it acts directly on the representation of the program produced by the parser.
  • Compilation is the process of adding another step between the parsing and the running of a program, which transforms the program into something that can be evaluated more efficiently by doing as much work as possible in advance. For example, in well-designed languages it is obvious, for each use of a binding, which binding is being referred to, without actually running the program. This can be used to avoid looking up the binding by name every time it is accessed, instead directly fetching it from some predetermined memory location.
  • ... But any process that converts a program to a different representation can be thought of as compilation.
  • It would be possible to write an alternative evaluation strategy for Egg, one that first converts the program to a JavaScript program, uses Function to invoke the JavaScript compiler on it, and then runs the result. When done right, this would make Egg run very fast while still being quite simple to implement.

Cheating

  • When we defined if and while, you probably noticed that they were more or less trivial wrappers around JavaScript’s own if and while. Similarly, the values in Egg are just regular old JavaScript values.
  • ... Regardless, this example ideally gave you an impression of the way programming languages work.
  • And when it comes to getting something done, cheating is more effective than doing everything yourself. Though the toy language in this chapter doesn’t do anything that couldn’t be done better in JavaScript, there are situations where writing small languages helps get real work done.
  • Such a language does not have to resemble a typical programming language. If JavaScript didn’t come equipped with regular expressions, for example, you could write your own parser and evaluator for regular expressions.
  • Or imagine you are building a giant robotic dinosaur and need to program its behavior. JavaScript might not be the most effective way to do this.
  • ... This is what is usually called a domain-specific language, a language tailored to express a narrow domain of knowledge.

Exercises

Arrays

  • https://developer.mozilla.org/ko/docs/Web/JavaScript/Reference/Functions/rest_parameters

  • https://developer.mozilla.org/ko/docs/Web/JavaScript/Reference/Operators/Spread_syntax

  • topScope.array = (...values) => {
        return values;
    };
    
    topScope.length = array => {
        return array.length;
    };
    
    topScope.element = (array, n) => {
        return array[n];
    };

    Closure

  • Again, we are riding along on a JavaScript mechanism to get the equivalent feature in Egg. Special forms are passed the local scope in which they are evaluated so that they can evaluate their subforms in that scope. The function returned by fun has access to the scope argument given to its enclosing function and uses that to create the function’s local scope when it is called.

  • This means that the prototype of the local scope will be the scope in which the function was created, which makes it possible to access bindings in that scope from the function. This is all there is to implementing closure (though to compile it in a way that is actually efficient, you’d need to do some more work).

Comments

  • function my_skipSpace(string) {
        let first = string.search(/\S/);
        if (first == -1) return "";
    
        string = string.slice(first);
        let match = /^#.*/.exec(string);
        if (match) return skipSpace(string.slice(match[0].length));
        else return string;
    }
    
    function skipSpace(string) {
        let skippable = string.match(/^(\s|#.*)*/);
        return string.slice(skippable[0].length);
    }

Fixing scope

in operator는 프로토타입을 포함해서 확인하고 [] 역시 프로토타입을 포함해서 참조 가능하지만, []을 사용한 대입은 현재 객체에 해당 속성을 추가함

  • specialForms.my_set = (args, scope) => {
        if (args.length != 2 || args[0].type != "word") {
            throw new SyntaxError("Incorrect use of set");
        }
    
        let curScope = scope;
        while (curScope != null &&
            !Object.prototype.hasOwnProperty.call(curScope, args[0].name)) {
            curScope = Object.getPrototypeOf(curScope);
        }
        if (!curScope) {
            throw new ReferenceError(
                `Undefined Binding: ${args[0].name}`);
        }
    
        let value = evaluate(args[1], scope);
        curScope[args[0].name] = value;
        return value;
    };
    
    specialForms.set = (args, env) => {
        if (args.length != 2 || args[0].type != "word") {
            throw new SyntaxError("Bad use of set");
        }
        let varName = args[0].name;
        let value = evaluate(args[1], env);
    
        for (let scope = env; scope; scope = Object.getPrototypeOf(scope)) {
            if (Object.prototype.hasOwnProperty.call(scope, varName)) {
                scope[varName] = value;
                return value;
            }
        }
        throw new ReferenceError(`Setting undefined variable ${varName}`);
    };