(* A simple combinator-style parsing library for F#. Inspired by the Hutton & Meijer paper as well as the FParsec combinator library. Other than being much smaller, this library trades away performance for simplicity. If you need a fast library, look at FParsec. Version: 1.10 (2023-11-01) *) module Combinator open System open System.Text.RegularExpressions ///

/// A 3-tuple representing a "rich string" that the parser needs for normal operation. /// First element: the input string /// Second element: the current position in the parse /// Third element: a boolean which is true if debugging is enabled ///

type Input = string * int * bool ///

/// Use this to prepare a rich string (an Input) for normal (non-debug) /// parsing operation. ///

/// An input string. /// Returns an Input. let prepare(input: string) : Input = input, 0, false ///

/// Use this to prepare a rich string (an Input) for debug-mode /// parsing operation. ///

/// An input string. /// Returns an Input. let debug(input: string) : Input = input, 0, true ///

/// Extracts the string input from an Input tuple. ///

/// An Input. /// The input string. let input i = let (e,_,_) = i e ///

/// Extracts the current position from an Input tuple. ///

/// An Input. /// The position int. let position i = let (_,e,_) = i e ///

/// Returns true if the Input's current position /// is at the end of the input string ("end of file"). ///

/// An Input. /// true iff the position is EOF. let isEOF i = let pos = position i let len = String.length (input i) pos >= len ///

/// Returns true if the Input is running in /// debug mode. ///

/// An Input. /// true iff debug mode enabled. let isDebug i = let (_,_,e) = i e /// Represents the result of running a Parser<'a>. type Outcome<'a> = | Success of result: 'a * remaining: Input | Failure of fail_pos: int * rule: string /// A Parser<'a> is a function from Input to /// Outcome<'a>. type Parser<'a> = Input -> Outcome<'a> ///

/// recparser is used to declare a parser before it is /// defined. The primary use case is when defining recursive /// parsers, e.g., parsers of the form e ::= ... e .... ///

/// A tuple containing a simple parser that calls an /// implementation stored in a mutable reference cell, and a /// mutable reference cell initialized to hold a dummy /// implementation. let recparser() = let dumbparser = fun (input: Input) -> failwith "You forgot to initialize your recursive parser." let r = ref dumbparser (fun (input: Input) -> !r input), r // suggested refactoring in RFC FS-1111 due to ref cell deprecation // https://github.com/fsharp/fslang-design/blob/main/FSharp-6.0/FS-1111-refcell-op-information-messages.md // to be enabled CSCI 334, Spring 2024 // type 'a RefCell = { Value: 'a } // let recparser() = // let dumbparser = fun (input: Input) -> failwith "You forgot to initialize your recursive parser." // let r = { Value = dumbparser } // (fun (input: Input) -> r.Value input), r ///

/// Returns the hexadecimal character code for the given character. ///

/// A char. /// A string representing a char code, in hex. let cToHex(c: char) = "0x" + System.Convert.ToByte(c).ToString("x2");; ///

/// A debug parser. Prints debug information for the given parser /// p as a side effect. ///

let ()(p: Parser<'a>)(label: string)(i: Input) : Outcome<'a> = // if debugging is enabled... if (isDebug i) then let nextText = (input i).Substring(position i) if (input i).Length - (position i) > 0 then eprintfn "[attempting: %s on \"%s\", next char: %s]" label nextText (cToHex (input i).[0]) else eprintfn "[attempting: %s on \"%s\", next char: EOF]" label nextText let o = p i match o with | Success(a, i') -> let i1pos = position i let i2pos = position i' let istr = input i let nconsumed = i2pos - i1pos let iconsumed = istr.Substring(i1pos, i2pos - i1pos) let rem = istr.[i2pos..] if istr.Length - i2pos > 0 then eprintfn "[success: %s, consumed: \"%s\", remaining: \"%s\", next char: %s]" label iconsumed rem (cToHex rem.[0]) else eprintfn "[success: %s, consumed: \"%s\", remaining: \"%s\", next char: EOF]" label iconsumed rem | Failure(pos,rule) -> let rem = (input i).[pos..] if rem.Length > 0 then eprintfn "[failure at pos %d in rule [%s]: %s, remaining input: \"%s\", next char: %s]" pos rule label rem (cToHex rem.[0]) else eprintfn "[failure at pos %d in rule [%s]: %s, remaining input: \"%s\", next char: EOF]" pos rule label rem o // if debugging is disabled else p i ///

/// Returns true if the given regular expression rgx matches s. ///

/// A string. /// A string representing a C# regular expression. /// true iff rgx matches s. let is_regexp(s: string)(rgx: string) = Regex.Match(s, rgx).Success ///

/// Returns true if the given character is whitespace. ///

/// A char. /// true iff c is whitespace. let is_whitespace(c: char) = is_regexp (c.ToString()) @"\s" ///

/// Returns true if the given character is whitespace, /// not including newline characters. ///

/// A char. /// true iff c is whitespace but not newline. let is_whitespace_no_nl(c: char) = is_regexp (c.ToString()) @"\t| " ///

/// Returns true if the given character is uppercase. ///

/// A char. /// true iff c is uppercase. let is_upper(c: char) = is_regexp (c.ToString()) @"[A-Z]" ///

/// Returns true if the given character is lowercase. ///

/// A char. /// true iff c is lowercase. let is_lower(c: char) = is_regexp (c.ToString()) @"[a-z]" ///

/// Returns true if the given character is a letter. ///

/// A char. /// true iff c is a letter. let is_letter(c: char) = is_upper c || is_lower c ///

/// Returns true if the given character is a numeric digit. ///

/// A char. /// true iff c is a numeric digit. let is_digit(c: char) = is_regexp (c.ToString()) @"[0-9]" ///