mirror of
https://github.com/HeyPuter/puter.git
synced 2025-02-02 23:28:39 +08:00
feat(phoenix): Implement parsing of sed scripts
Sed is now finally able to actually run scripts, instead of ignoring all input and running a hard-coded test script!
This commit is contained in:
parent
e047b0bf30
commit
0d4f907b66
@ -56,6 +56,10 @@ export class AddressRange {
|
||||
this.leaveRangeNextLine = false;
|
||||
}
|
||||
|
||||
get addressCount() {
|
||||
return (this.start ? 1 : 0) + (this.end ? 1 : 0);
|
||||
}
|
||||
|
||||
updateMatchState(lineNumber, line) {
|
||||
// Only ranges have a state to update
|
||||
if (!(this.start && this.end)) {
|
||||
|
@ -16,47 +16,534 @@
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
import { AddressRange } from './address.js';
|
||||
import * as Commands from './command.js';
|
||||
import { Address, AddressRange } from './address.js';
|
||||
import {
|
||||
AppendTextCommand,
|
||||
BranchCommand,
|
||||
ConditionalBranchCommand,
|
||||
DebugPrintCommand,
|
||||
DeleteCommand,
|
||||
DeleteLineCommand,
|
||||
ExchangeCommand,
|
||||
GetAppendCommand,
|
||||
GetCommand,
|
||||
GroupEndCommand,
|
||||
GroupStartCommand,
|
||||
HoldAppendCommand,
|
||||
HoldCommand,
|
||||
InsertTextCommand,
|
||||
LabelCommand,
|
||||
LineNumberCommand,
|
||||
PrintCommand,
|
||||
PrintLineCommand,
|
||||
QuitCommand,
|
||||
ReplaceCommand,
|
||||
SubstituteCommand,
|
||||
SubstituteFlags,
|
||||
TransliterateCommand,
|
||||
ZapCommand,
|
||||
} from './command.js';
|
||||
import { Script } from './script.js';
|
||||
import { GrammarContext, standard_parsers } from '../../../../packages/parsely/exports.js';
|
||||
import { StringStream } from '../../../../packages/parsely/streams.js';
|
||||
import { INVALID, Parser, UNRECOGNIZED, VALUE } from '../../../../packages/parsely/parser.js';
|
||||
|
||||
export const parseScript = (scriptString) => {
|
||||
const commands = [];
|
||||
/**
|
||||
* A slight hack: Parsely doesn't yet have an equivalent of backreferences.
|
||||
* So, while parsing /foo/bar/, where the `/` can be any character, we set the current_delimiter variable
|
||||
* to that delimiter character temporarily, so we can refer to it in the subsequent delimiters.
|
||||
*/
|
||||
class DelimiterParser extends Parser {
|
||||
static current_delimiter;
|
||||
|
||||
// Generate a hard-coded script for now.
|
||||
// TODO: Actually parse input!
|
||||
_create({ first = false, character = null } = {}) {
|
||||
this.character = character;
|
||||
this.first = first;
|
||||
}
|
||||
|
||||
commands.push(new Commands.SubstituteCommand(new AddressRange(), /Puter/, 'Frogger', new Commands.SubstituteFlags()));
|
||||
commands.push(new Commands.ConditionalBranchCommand(new AddressRange(), 'yay', true));
|
||||
commands.push(new Commands.ConditionalBranchCommand(new AddressRange(), 'nay', false));
|
||||
commands.push(new Commands.AppendTextCommand(new AddressRange(), 'HELLO!'));
|
||||
commands.push(new Commands.LabelCommand('yay'));
|
||||
commands.push(new Commands.PrintCommand(new AddressRange()));
|
||||
commands.push(new Commands.BranchCommand(new AddressRange(), 'end'));
|
||||
commands.push(new Commands.LabelCommand('nay'));
|
||||
commands.push(new Commands.AppendTextCommand(new AddressRange(), 'NADA!'));
|
||||
commands.push(new Commands.LabelCommand('end'));
|
||||
_parse(stream) {
|
||||
const sub_stream = stream.fork();
|
||||
|
||||
// commands.push(new TransliterateCommand(new AddressRange(), 'abcdefABCDEF', 'ABCDEFabcdef'));
|
||||
// commands.push(new ZapCommand(new AddressRange({start: new Address(1), end: new Address(10)})));
|
||||
// commands.push(new HoldAppendCommand(new AddressRange({start: new Address(1), end: new Address(10)})));
|
||||
// commands.push(new GetCommand(new AddressRange({start: new Address(11)})));
|
||||
// commands.push(new DebugPrintCommand(new AddressRange()));
|
||||
let { done, value } = sub_stream.next();
|
||||
if (done) return UNRECOGNIZED;
|
||||
|
||||
// commands.push(new ReplaceCommand(new AddressRange({start: new Address(3), end: new Address(30)}), "LOL"));
|
||||
if (this.first) {
|
||||
if (this.character && this.character !== value)
|
||||
return UNRECOGNIZED;
|
||||
// Backslash and newline are disallowed as delimiters.
|
||||
if (value === '\n' || value === '\\')
|
||||
return UNRECOGNIZED;
|
||||
DelimiterParser.current_delimiter = value;
|
||||
} else if (DelimiterParser.current_delimiter !== value) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
|
||||
// commands.push(new GroupCommand(new AddressRange({ start: new Address(5), end: new Address(10) }), [
|
||||
// // new LineNumberCommand(),
|
||||
// // new TextCommand(new AddressRange({ start: new Address(8) }), "Well hello friends! :^)"),
|
||||
// new QuitCommand(new AddressRange({ start: new Address(8) })),
|
||||
// new NoopCommand(new AddressRange()),
|
||||
// new PrintCommand(new AddressRange({ start: new Address(2), end: new Address(14) })),
|
||||
// ]));
|
||||
|
||||
// commands.push(new LineNumberCommand(new AddressRange({ start: new Address(5), end: new Address(10) })));
|
||||
// commands.push(new PrintCommand());
|
||||
// commands.push(new NoopCommand());
|
||||
// commands.push(new PrintCommand());
|
||||
|
||||
return new Script(commands);
|
||||
stream.join(sub_stream);
|
||||
return { status: VALUE, $: 'delimiter', value };
|
||||
}
|
||||
}
|
||||
|
||||
export const parseScript = (script_string, options) => {
|
||||
|
||||
const grammar_context = new GrammarContext({
|
||||
...standard_parsers(),
|
||||
delimiter: DelimiterParser,
|
||||
});
|
||||
|
||||
let group_start_id = 0;
|
||||
let group_end_id = 0;
|
||||
|
||||
const parser = grammar_context.define_parser({
|
||||
script: a => a.repeat(
|
||||
a.optional(a.symbol('command')),
|
||||
a.firstMatch(
|
||||
a.literal('\n'),
|
||||
a.literal(';'),
|
||||
),
|
||||
),
|
||||
command: a => a.sequence(
|
||||
a.symbol('whitespace'),
|
||||
a.optional(a.symbol('address_range')),
|
||||
a.symbol('whitespace'),
|
||||
a.firstMatch(
|
||||
a.discard(a.symbol('comment')),
|
||||
a.symbol('{'),
|
||||
a.symbol('}'),
|
||||
a.symbol(':'),
|
||||
a.symbol('='),
|
||||
a.symbol('a'),
|
||||
a.symbol('b'),
|
||||
a.symbol('c'),
|
||||
a.symbol('d'),
|
||||
a.symbol('D'),
|
||||
a.symbol('g'),
|
||||
a.symbol('G'),
|
||||
a.symbol('h'),
|
||||
a.symbol('H'),
|
||||
a.symbol('i'),
|
||||
a.symbol('l'),
|
||||
a.symbol('p'),
|
||||
a.symbol('P'),
|
||||
a.symbol('q'),
|
||||
a.symbol('Q'),
|
||||
a.symbol('s'),
|
||||
a.symbol('t'),
|
||||
a.symbol('T'),
|
||||
a.symbol('x'),
|
||||
a.symbol('y'),
|
||||
a.symbol('z'),
|
||||
),
|
||||
),
|
||||
address_range: a => a.sequence(
|
||||
a.optional(
|
||||
a.sequence(
|
||||
a.symbol('address'),
|
||||
a.optional(a.sequence(
|
||||
a.literal(','),
|
||||
a.symbol('address'),
|
||||
)),
|
||||
),
|
||||
),
|
||||
a.optional(
|
||||
a.sequence(
|
||||
a.symbol('whitespace'),
|
||||
a.literal('!'),
|
||||
),
|
||||
),
|
||||
),
|
||||
address: a => a.firstMatch(
|
||||
// TODO: A dollar sign, for "final line"
|
||||
a.symbol('decimal_number'),
|
||||
a.symbol('regex'),
|
||||
),
|
||||
decimal_number: a => a.stringOf(c => /\d/.test(c)),
|
||||
regex: a => a.sequence(
|
||||
a.firstMatch(
|
||||
a.delimiter({ first: true, character: '/' }),
|
||||
a.sequence(
|
||||
a.literal('\\'),
|
||||
a.delimiter({ first: true }),
|
||||
),
|
||||
),
|
||||
a.stringUntil(c => c === DelimiterParser.current_delimiter),
|
||||
a.delimiter(),
|
||||
),
|
||||
whitespace: a => a.discard(
|
||||
a.optional(
|
||||
a.stringOf(c => /[ \t]/.test(c)),
|
||||
),
|
||||
),
|
||||
label: a => a.stringOf(c => {
|
||||
// POSIX defines this as being characters within "the portable filename character set".
|
||||
return /[A-Za-z0-9.\-_]/.test(c);
|
||||
}),
|
||||
filename: a => a.stringOf(c => {
|
||||
return /[A-Za-z0-9.\-_]/.test(c);
|
||||
}),
|
||||
text: a => a.stringUntil('\n'),
|
||||
comment: a => a.sequence(
|
||||
a.literal('#'),
|
||||
a.stringOf(c => c !== '\n'),
|
||||
),
|
||||
'{': a => a.literal('{'),
|
||||
'}': a => a.literal('}'),
|
||||
':': a => a.sequence(
|
||||
a.literal(':'),
|
||||
a.symbol('label'),
|
||||
),
|
||||
'=': a => a.literal('='),
|
||||
a: a => a.sequence(
|
||||
a.literal('a\\\n'),
|
||||
a.symbol('text'),
|
||||
),
|
||||
b: a => a.sequence(
|
||||
a.literal('b'),
|
||||
a.optional(
|
||||
a.sequence(
|
||||
a.symbol('whitespace'),
|
||||
a.symbol('label'),
|
||||
),
|
||||
),
|
||||
),
|
||||
c: a => a.sequence(
|
||||
a.literal('c\\\n'),
|
||||
a.symbol('text'),
|
||||
),
|
||||
d: a => a.literal('d'),
|
||||
D: a => a.literal('D'),
|
||||
g: a => a.literal('g'),
|
||||
G: a => a.literal('G'),
|
||||
h: a => a.literal('h'),
|
||||
H: a => a.literal('H'),
|
||||
i: a => a.sequence(
|
||||
a.literal('i\\\n'),
|
||||
a.symbol('text'),
|
||||
),
|
||||
l: a => a.literal('l'),
|
||||
p: a => a.literal('p'),
|
||||
P: a => a.literal('P'),
|
||||
q: a => a.literal('q'),
|
||||
Q: a => a.literal('Q'),
|
||||
s: a => a.sequence(
|
||||
a.literal('s'),
|
||||
a.delimiter({ first: true }),
|
||||
a.stringUntil(c => c === DelimiterParser.current_delimiter),
|
||||
a.delimiter(),
|
||||
a.stringUntil(c => c === DelimiterParser.current_delimiter),
|
||||
a.delimiter(),
|
||||
a.optional(
|
||||
a.repeat(
|
||||
a.firstMatch(
|
||||
a.literal('g'),
|
||||
a.literal('p'),
|
||||
a.symbol('decimal_number'),
|
||||
a.sequence(
|
||||
a.literal('w'),
|
||||
a.symbol('whitespace'),
|
||||
a.symbol('filename'),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
),
|
||||
t: a => a.sequence(
|
||||
a.literal('t'),
|
||||
a.optional(
|
||||
a.sequence(
|
||||
a.symbol('whitespace'),
|
||||
a.symbol('label'),
|
||||
),
|
||||
),
|
||||
),
|
||||
T: a => a.sequence(
|
||||
a.literal('T'),
|
||||
a.optional(
|
||||
a.sequence(
|
||||
a.symbol('whitespace'),
|
||||
a.symbol('label'),
|
||||
),
|
||||
),
|
||||
),
|
||||
x: a => a.literal('x'),
|
||||
y: a => a.sequence(
|
||||
a.literal('y'),
|
||||
a.delimiter({ first: true }),
|
||||
a.stringUntil(c => c === DelimiterParser.current_delimiter),
|
||||
a.delimiter(),
|
||||
a.stringUntil(c => c === DelimiterParser.current_delimiter),
|
||||
a.delimiter(),
|
||||
),
|
||||
z: a => a.literal('z'),
|
||||
}, {
|
||||
script: script => {
|
||||
const commands = script
|
||||
.filter(it => {
|
||||
return it.$ === 'command' && it.value;
|
||||
}).map(it => {
|
||||
return it.value;
|
||||
});
|
||||
|
||||
// Record all labels that exist in the script, so we can validate branch commands.
|
||||
const labels = new Set();
|
||||
for (const command of commands) {
|
||||
if (command instanceof LabelCommand) {
|
||||
labels.add(command.label);
|
||||
}
|
||||
}
|
||||
|
||||
// Validate commands
|
||||
let group_depth = 0;
|
||||
for (const command of commands) {
|
||||
// Ensure branches all go to labels that exist
|
||||
if (command instanceof BranchCommand || command instanceof ConditionalBranchCommand) {
|
||||
// Note: Branches to the end of the script don't have a label.
|
||||
if (command.label && !labels.has(command.label))
|
||||
throw new Error(`Label "${command.label}" does not exist in the script.`);
|
||||
}
|
||||
|
||||
if (command instanceof GroupStartCommand) {
|
||||
group_depth++;
|
||||
}
|
||||
|
||||
if (command instanceof GroupEndCommand) {
|
||||
if (group_depth < 1)
|
||||
throw new Error('Unexpected "}": no open groups');
|
||||
group_depth--;
|
||||
}
|
||||
}
|
||||
|
||||
if (group_depth !== 0)
|
||||
throw new Error(`${group_depth} groups left open`);
|
||||
|
||||
return new Script(commands);
|
||||
},
|
||||
command: command => {
|
||||
// Comments show up as empty commands. Just skip them.
|
||||
if (command.length === 0)
|
||||
return;
|
||||
|
||||
let addresses_provided = 0;
|
||||
let address_range, func;
|
||||
switch (command.length) {
|
||||
case 1:
|
||||
address_range = new AddressRange();
|
||||
func = command[0];
|
||||
break;
|
||||
default:
|
||||
address_range = command[0].value;
|
||||
func = command[1];
|
||||
addresses_provided = address_range.addressCount;
|
||||
break;
|
||||
}
|
||||
|
||||
const require_max_address_count = (count) => {
|
||||
if (addresses_provided > count)
|
||||
throw new Error(`Too many addresses provided to '${func.$}' command, most is ${count}`);
|
||||
}
|
||||
|
||||
// Decode func into its command type
|
||||
switch (func.$) {
|
||||
case '{': {
|
||||
require_max_address_count(2);
|
||||
return new GroupStartCommand(address_range, ++group_start_id);
|
||||
}
|
||||
case '}': {
|
||||
require_max_address_count(0);
|
||||
return new GroupEndCommand(++group_end_id);
|
||||
}
|
||||
case ':': {
|
||||
require_max_address_count(0);
|
||||
return new LabelCommand(func.value);
|
||||
}
|
||||
case '=': {
|
||||
require_max_address_count(1);
|
||||
return new LineNumberCommand(address_range);
|
||||
}
|
||||
case 'a': {
|
||||
require_max_address_count(1);
|
||||
return new AppendTextCommand(address_range, func.value);
|
||||
}
|
||||
case 'b': {
|
||||
require_max_address_count(2);
|
||||
return new BranchCommand(address_range, func.value);
|
||||
}
|
||||
case 'c': {
|
||||
require_max_address_count(2);
|
||||
return new ReplaceCommand(address_range, func.value);
|
||||
}
|
||||
case 'd': {
|
||||
require_max_address_count(2);
|
||||
return new DeleteCommand(address_range);
|
||||
}
|
||||
case 'D': {
|
||||
require_max_address_count(2);
|
||||
return new DeleteLineCommand(address_range);
|
||||
}
|
||||
case 'g': {
|
||||
require_max_address_count(2);
|
||||
return new GetCommand(address_range);
|
||||
}
|
||||
case 'G': {
|
||||
require_max_address_count(2);
|
||||
return new GetAppendCommand(address_range);
|
||||
}
|
||||
case 'h': {
|
||||
require_max_address_count(2);
|
||||
return new HoldCommand(address_range);
|
||||
}
|
||||
case 'H': {
|
||||
require_max_address_count(2);
|
||||
return new HoldAppendCommand(address_range);
|
||||
}
|
||||
case 'i': {
|
||||
require_max_address_count(1);
|
||||
return new InsertTextCommand(address_range, func.value);
|
||||
}
|
||||
case 'l': {
|
||||
require_max_address_count(2);
|
||||
return new DebugPrintCommand(address_range);
|
||||
}
|
||||
case 'p': {
|
||||
require_max_address_count(2);
|
||||
return new PrintCommand(address_range);
|
||||
}
|
||||
case 'P': {
|
||||
require_max_address_count(2);
|
||||
return new PrintLineCommand(address_range);
|
||||
}
|
||||
case 'q': {
|
||||
require_max_address_count(1);
|
||||
return new QuitCommand(address_range, false);
|
||||
}
|
||||
case 'Q': {
|
||||
require_max_address_count(1);
|
||||
return new QuitCommand(address_range, true);
|
||||
}
|
||||
case 's': {
|
||||
require_max_address_count(2);
|
||||
const { regex, replacement, flags } = func.value;
|
||||
return new SubstituteCommand(address_range, regex, replacement, flags);
|
||||
}
|
||||
case 't':
|
||||
case 'T': {
|
||||
require_max_address_count(2);
|
||||
return new ConditionalBranchCommand(address_range, func.value, func.$ === 't');
|
||||
}
|
||||
case 'x': {
|
||||
require_max_address_count(2);
|
||||
return new ExchangeCommand(address_range);
|
||||
}
|
||||
case 'y': {
|
||||
require_max_address_count(2);
|
||||
const { input, replacement } = func.value;
|
||||
return new TransliterateCommand(address_range, input, replacement);
|
||||
}
|
||||
case 'z': {
|
||||
require_max_address_count(2);
|
||||
return new ZapCommand(address_range);
|
||||
}
|
||||
default:
|
||||
throw new Error(`Unimplemented command '${func.$}'`);
|
||||
}
|
||||
},
|
||||
address_range: address_range => {
|
||||
if (address_range.length === 0)
|
||||
return new AddressRange();
|
||||
|
||||
if (address_range.length === 1) {
|
||||
if (address_range[0].value[0].$ === 'address') {
|
||||
// Either 1 or two addresses
|
||||
const parts = address_range[0].value;
|
||||
const start = parts[0].value;
|
||||
const end = parts[1] ? parts[1].value[1].value : null;
|
||||
return new AddressRange({ start, end });
|
||||
}
|
||||
|
||||
// No addresses, just inverted
|
||||
return new AddressRange({ inverted: true });
|
||||
}
|
||||
|
||||
// Addresses and inverted
|
||||
const parts = address_range[0].value;
|
||||
const start = parts[0].value;
|
||||
const end = parts[1] ? parts[1].value[1].value : null;
|
||||
return new AddressRange({ start, end, inverted: true });
|
||||
},
|
||||
address: address => {
|
||||
if (address instanceof RegExp)
|
||||
return new Address(address);
|
||||
return new Address(Number(address));
|
||||
},
|
||||
regex: regex => new RegExp(regex[1].value),
|
||||
|
||||
// Functions with arguments
|
||||
':': it => it[1].value,
|
||||
a: it => it[1].value,
|
||||
b: it => {
|
||||
if (it.length < 2) return null;
|
||||
return it[1].value[0].value;
|
||||
},
|
||||
c: it => it[1].value,
|
||||
i: it => it[1].value,
|
||||
s: it => {
|
||||
const [ s, _, regex, __, replacement, ___, flag_values ] = it;
|
||||
const flags = {
|
||||
global: false,
|
||||
nthOccurrence: null,
|
||||
print: false,
|
||||
writeToFile: null,
|
||||
};
|
||||
if (flag_values && flag_values.value.length) {
|
||||
for (const flag of flag_values.value) {
|
||||
if (flag.value instanceof Array) {
|
||||
// It's a 'w'
|
||||
if (flags.writeToFile)
|
||||
throw new Error(`Multiple 'w' flags given to s command`);
|
||||
flags.writeToFile = flag.value[1].value;
|
||||
|
||||
} else if (flag.value === 'g') {
|
||||
if (flags.global)
|
||||
throw new Error(`Multiple 'g' flags given to s command`);
|
||||
flags.global = true;
|
||||
|
||||
} else if (flag.value === 'p') {
|
||||
if (flags.print)
|
||||
throw new Error(`Multiple 'p' flags given to s command`);
|
||||
flags.print = true;
|
||||
|
||||
} else {
|
||||
// Should be a number
|
||||
if (flags.nthOccurrence !== null)
|
||||
throw new Error(`Multiple number flags given to s command`);
|
||||
flags.nthOccurrence = Number.parseInt(flag.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
return {
|
||||
regex: new RegExp(regex.value),
|
||||
replacement: replacement.value,
|
||||
flags: new SubstituteFlags(flags),
|
||||
};
|
||||
},
|
||||
t: it => {
|
||||
if (it.length < 2) return null;
|
||||
return it[1].value[0].value;
|
||||
},
|
||||
T: it => {
|
||||
if (it.length < 2) return null;
|
||||
return it[1].value[0].value;
|
||||
},
|
||||
y: it => {
|
||||
const input = it[2].value;
|
||||
const replacement = it[4].value;
|
||||
if (input.length !== replacement.length)
|
||||
throw new Error('Input and replacement parts of y command must have the same length');
|
||||
|
||||
return { input, replacement };
|
||||
}
|
||||
});
|
||||
|
||||
const stream = new StringStream(script_string);
|
||||
const result = parser(stream, 'script', { must_consume_all_input: true });
|
||||
return result.value;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user