diff --git a/spec/assignments.sh b/spec/assignments.sh new file mode 100644 index 00000000..e02e76f0 --- /dev/null +++ b/spec/assignments.sh @@ -0,0 +1,49 @@ + +# Single assignment +var a = ""; +var a = "something"; +var a = "/usr/home/ken"; +var a = "C:\\Users\\Bill"; +var interests = ( + "plan9" + "go" + "c" + "asm" + "scheme" +); + +# MultipleAssign +var a, b = "1", "2"; +var a, b, c, d, e, ff, ggg, hhhh = "1", "2", "3", "4", "5", "6", "7", "8"; +var A, B = (), (); + +var aa, bb = (), ("a" "b"); + +# MultipleAssign2 +var (a="1"); + +var ( + a = () +); + +var ( + this = "", + is = "", + boring = "", +); + +var ( + localHost = "localhost", + targetHost = "victim.tld", +); + +# ExecAssign +var out <= boom; +var _, _ <= nuke deploy --location brasilia; + +# set assignments +a = "1"; +a = (); +a = ("a" "b"); + +out, sts <= boom --again; diff --git a/spec/cmd.sh b/spec/cmd.sh new file mode 100644 index 00000000..d724593c --- /dev/null +++ b/spec/cmd.sh @@ -0,0 +1,8 @@ +echo +echo "hello" +echo "hello" "world" +echo $name +ls $path +ls /etc/passwd +cat /non-existent-path + diff --git a/spec/comments.sh b/spec/comments.sh new file mode 100644 index 00000000..fcd6410d --- /dev/null +++ b/spec/comments.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env nash + +#!C:\Users\i4k\bin\nash.exe + +# + +#@#$%¨&*()_+ +#[[ç[çsçf]~sçaf~]asçf~]lasfasdf~kas~fkasdjfo34utn3y4y34nt78y4283y4t823 +# +#ç~~~~][´```{`´´´;;...maslk dlahs hhkHKHHGJKG GJ JKjgjk gjhkg JGJGkjhg13153//*}] \ No newline at end of file diff --git a/spec/nash.ohm b/spec/nash.ohm new file mode 100644 index 00000000..17e9c6b5 --- /dev/null +++ b/spec/nash.ohm @@ -0,0 +1,182 @@ +// Formal specification of nash language using ohm PEG +// For informations about ohm syntax visit the site below: +// https://github.com/harc/ohm + +// There's an interactive Nash's grammar visualizer at: +// https://ohmlang.github.io/editor/#fca725147857c09bb442be1ce03048e0 + +// Note: Nash has automatic semicolon insertion, but this isn't defined here +// to not complicate the grammar spec. + +// TODO: rfork, cmd redirections, pipes, import, string-concat + +Nash { + Block + = Statement* + + Body + = "{" Block "}" + + comment + = "#" everythingNoNL* + + Comment + = comment + + // Syntatic rules named *Internal are only created to code reuse + // They're not nash syntax per-se. + + VarStmt (a variable declaration) + = "var" AssignInternal stmtEnd + + AssignStmt (an assignment declaration) + = AssignInternal stmtEnd + + MultipleAssignStmt + = MultipleAssignInternal stmtEnd + + MultipleAssign2Stmt + = MultipleAssign2Internal stmtEnd + + AssignInternal + = SingleAssign | MultipleAssignInternal | MultipleAssign2Internal + + SingleAssign + = ident (ValueAssign | ExecAssign) + + ValueAssign + = "=" VRHS + + VRHS (a value right-hand-side) + = Literal | Variable + + ExecAssign + = "<=" ExecExpr + + // a, b, c = ... + // out, err, status <= + MultipleAssignInternal (a multiple variables assignment) + = MLHS (MultipleValueAssignInternal | ExecAssign) + + // a, b, c + MLHS (a multiple left hand side) + = NonemptyListOf + + MultipleAssign2Internal (a list of assignments) + = "(" NonemptyListOf ","? ")" + + MultipleValueAssignInternal + = "=" MVRHS + + // "1", "2", (), ("test") + MVRHS + = NonemptyListOf ","? + + // ls $path; + // ls /etc; + CmdStmt (a command statement) + = CmdExpr stmtEnd + + CmdExpr + = Program ~"(" (Arg|Variable)* + + // An executable expression is a command or function call + // echo "hello" + // deploy() + ExecExpr (an executable expression) + = CmdExpr | FuncallExpr + + FuncallStmt (a function call) + = FuncallExpr stmtEnd + + FuncallExpr + = ident "(" ListOf ")" + + // for {} + // for i in $lst {} + ForStmt (a for statement) + = "for" ForClause? Body + + ForClause + = ident "in" Variable + + IfStmt (an if statement) + = "if" Condition Body ElseStmt? + + ElseStmt + = "else" Body + + Condition + = Expr logicalOp Expr + + SetenvStmt + = "setenv" AssignInternal stmtEnd + + Expr (an expression) + = Variable | Literal | FuncallExpr + + logicalOp (a logical operator) + = "==" | "!=" + + Indexing + = "[" (integer|Variable) "]" + + // $a + // $a[0] + // $a[$b] + Variable + = "$" ident Indexing? + + // A program cannot have a keyword name + Program (a program) + = ~keywords (ident | Arg) + + Arg (an argument) + = string | argLiteral + + // /usr/home/ken + // C:\Users\Bill + argLiteral + = (alnum|pathSeparator|":"|"-")+ + + ident (an identifier) + = ( letter | "_" ) (alnum | "_")* + + pathSeparator (a path separator) + = "/" | "\\" + + lineTerminator (a line terminator) + = "\n" | "\r" | "\u2028" | "\u2029" | "\r\n" + + stmtEnd (end of statement) + = end|";" + + // TODO: This list is not complete + // TODO: How to use the ~ lookhead to solve this? + string + = "\"" (alnum|blank|"\\"|"-"|":"|"/"|".")* "\"" + + integer (a number) + = digit+ + + Literal (a literal) + = string | ListStmt + + ListStmt + = "(" Literal* ")" + + blank + = " " | "\t" | "\n" | "\r" + + // TODO: get rid of this + everythingNoNL + = (alnum|"_"|"!"|"@"|"#"|"$"|"%"|"¨"|"&"|"*"|"("|")"|"_"|"-"|"+"|"="| + "`"|"´"|"["|"{"|"^"|"~"|"]"|"}"|"<"|">"|":"|"?"|"/"|";"|"."|","|"\\"| + "|"|"'"|"\""|" ") + keywords + = "for" | "var" | "in" | "setenv" | "if" | "rfork" | "import" + + Statement = (comment | VarStmt | AssignStmt | CmdStmt | FuncallStmt | ForStmt | + IfStmt | SetenvStmt) + +} \ No newline at end of file diff --git a/spec/spec.js b/spec/spec.js new file mode 100644 index 00000000..36734665 --- /dev/null +++ b/spec/spec.js @@ -0,0 +1,35 @@ +// Validate nash's grammar + +var fs = require('fs'); + +function getExamples(rootDir, cb) { + fs.readdir(rootDir, function(err, files) { + var scripts = []; + for (var i = 0; i < files.length; i++) { + var file = files[i]; + if (file.endsWith('.sh')) { + scripts.push(rootDir + '/' + file); + } + } + cb(scripts); + }); +} + +var ohm = require('ohm-js'); +var contents = fs.readFileSync('nash.ohm'); +var nashGrammar = ohm.grammar(contents); + +// test the grammar of each example +getExamples(".", function(files) { + for (var i = 0; i < files.length; i++) { + var file = files[i]; + var scriptSource = fs.readFileSync(file); + var m = nashGrammar.match(scriptSource); + if (m.succeeded()) { + console.log(file, ": ok"); + } else { + console.error(file, ": fail"); + //console.error(nashGrammar.trace(scriptSource).toString()); + } + } +});