using System; using System.Collections.Generic; using System.Globalization; using UniLinq; namespace Sprache { /// /// Parsers and combinators. /// public static partial class Parse { /// /// TryParse a single character matching 'predicate' /// /// /// /// public static Parser Char(Predicate predicate, string description) { if (predicate == null) throw new ArgumentNullException(nameof(predicate)); if (description == null) throw new ArgumentNullException(nameof(description)); return i => { if (!i.AtEnd) { if (predicate(i.Current)) return Result.Success(i.Current, i.Advance()); return Result.Failure(i, $"unexpected '{i.Current}'", new[] { description }); } return Result.Failure(i, "Unexpected end of input reached", new[] { description }); }; } /// /// Parse a single character except those matching . /// /// Characters not to match. /// Description of characters that don't match. /// A parser for characters except those matching . public static Parser CharExcept(Predicate predicate, string description) { return Char(c => !predicate(c), "any character except " + description); } /// /// Parse a single character c. /// /// /// public static Parser Char(char c) { return Char(ch => c == ch, char.ToString(c)); } /// /// Parse a single character of any in c /// /// /// public static Parser Chars(params char[] c) { return Char(c.Contains, StringExtensions.Join("|", c)); } /// /// Parse a single character of any in c /// /// /// public static Parser Chars(string c) { return Char(c.ToEnumerable().Contains, StringExtensions.Join("|", c.ToEnumerable())); } /// /// Parse a single character except c. /// /// /// public static Parser CharExcept(char c) { return CharExcept(ch => c == ch, char.ToString(c)); } /// /// Parses a single character except for those in the given parameters /// /// /// public static Parser CharExcept(IEnumerable c) { var chars = c as char[] ?? c.ToArray(); return CharExcept(chars.Contains, StringExtensions.Join("|", chars)); } /// /// Parses a single character except for those in c /// /// /// public static Parser CharExcept(string c) { return CharExcept(c.ToEnumerable().Contains, StringExtensions.Join("|", c.ToEnumerable())); } /// /// Parse a single character in a case-insensitive fashion. /// /// /// public static Parser IgnoreCase(char c) { return Char(ch => char.ToLower(c) == char.ToLower(ch), char.ToString(c)); } /// /// Parse a string in a case-insensitive fashion. /// /// /// public static Parser> IgnoreCase(string s) { if (s == null) throw new ArgumentNullException(nameof(s)); return s .ToEnumerable() .Select(IgnoreCase) .Aggregate(Return(Enumerable.Empty()), (a, p) => a.Concat(p.Once())) .Named(s); } /// /// Parse any character. /// public static readonly Parser AnyChar = Char(c => true, "any character"); /// /// Parse a whitespace. /// public static readonly Parser WhiteSpace = Char(char.IsWhiteSpace, "whitespace"); /// /// Parse a digit. /// public static readonly Parser Digit = Char(char.IsDigit, "digit"); /// /// Parse a letter. /// public static readonly Parser Letter = Char(char.IsLetter, "letter"); /// /// Parse a letter or digit. /// public static readonly Parser LetterOrDigit = Char(char.IsLetterOrDigit, "letter or digit"); /// /// Parse a lowercase letter. /// public static readonly Parser Lower = Char(char.IsLower, "lowercase letter"); /// /// Parse an uppercase letter. /// public static readonly Parser Upper = Char(char.IsUpper, "uppercase letter"); /// /// Parse a numeric character. /// public static readonly Parser Numeric = Char(char.IsNumber, "numeric character"); /// /// Parse a string of characters. /// /// /// public static Parser> String(string s) { if (s == null) throw new ArgumentNullException(nameof(s)); return s .ToEnumerable() .Select(Char) .Aggregate(Return(Enumerable.Empty()), (a, p) => a.Concat(p.Once())) .Named(s); } /// /// Constructs a parser that will fail if the given parser succeeds, /// and will succeed if the given parser fails. In any case, it won't /// consume any input. It's like a negative look-ahead in regex. /// /// The result type of the given parser /// The parser to wrap /// A parser that is the opposite of the given parser. public static Parser Not(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return i => { var result = parser(i); if (result.WasSuccessful) { var msg = $"`{StringExtensions.Join(", ", result.Expectations)}' was not expected"; return Result.Failure(i, msg, new string[0]); } return Result.Success(null, i); }; } /// /// Parse first, and if successful, then parse second. /// /// /// /// /// /// public static Parser Then(this Parser first, Func> second) { if (first == null) throw new ArgumentNullException(nameof(first)); if (second == null) throw new ArgumentNullException(nameof(second)); return i => first(i).IfSuccess(s => second(s.Value)(s.Remainder)); } /// /// Parse a stream of elements. /// /// /// /// /// Implemented imperatively to decrease stack usage. public static Parser> Many(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return i => { var remainder = i; var result = new List(); var r = parser(i); while (r.WasSuccessful) { if (remainder.Equals(r.Remainder)) break; result.Add(r.Value); remainder = r.Remainder; r = parser(remainder); } return Result.Success>(result, remainder); }; } /// /// Parse a stream of elements, failing if any element is only partially parsed. /// /// The type of element to parse. /// A parser that matches a single element. /// A that matches the sequence. /// /// /// Using may be preferable to /// where the first character of each match identified by /// is sufficient to determine whether the entire match should succeed. The X* /// methods typically give more helpful errors and are easier to debug than their /// unqualified counterparts. /// /// /// public static Parser> XMany(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return parser.Many().Then(m => parser.Once().XOr(Return(m))); } /// /// TryParse a stream of elements with at least one item. /// /// /// /// public static Parser> AtLeastOnce(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return parser.Once().Then(t1 => parser.Many().Select(ts => t1.Concat(ts))); } /// /// TryParse a stream of elements with at least one item. Except the first /// item, all other items will be matched with the XMany operator. /// /// /// /// public static Parser> XAtLeastOnce(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return parser.Once().Then(t1 => parser.XMany().Select(ts => t1.Concat(ts))); } /// /// Parse end-of-input. /// /// /// /// public static Parser End(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return i => parser(i).IfSuccess(s => s.Remainder.AtEnd ? s : Result.Failure( s.Remainder, string.Format("unexpected '{0}'", s.Remainder.Current), new[] { "end of input" })); } /// /// Take the result of parsing, and project it onto a different domain. /// /// /// /// /// /// public static Parser Select(this Parser parser, Func convert) { if (parser == null) throw new ArgumentNullException(nameof(parser)); if (convert == null) throw new ArgumentNullException(nameof(convert)); return parser.Then(t => Return(convert(t))); } /// /// Parse the token, embedded in any amount of whitespace characters. /// /// /// /// public static Parser Token(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return from leading in WhiteSpace.Many() from item in parser from trailing in WhiteSpace.Many() select item; } /// /// Refer to another parser indirectly. This allows circular compile-time dependency between parsers. /// /// /// /// public static Parser Ref(Func> reference) { if (reference == null) throw new ArgumentNullException(nameof(reference)); Parser p = null; return i => { if (p == null) p = reference(); if (i.Memos.ContainsKey(p)) { var pResult = i.Memos[p] as IResult; if (pResult.WasSuccessful) return pResult; throw new ParseException(pResult.ToString()); } i.Memos[p] = Result.Failure(i, "Left recursion in the grammar.", new string[0]); var result = p(i); i.Memos[p] = result; return result; }; } /// /// Convert a stream of characters to a string. /// /// /// public static Parser Text(this Parser> characters) { return characters.Select(chs => new string(chs.ToArray())); } /// /// Parse first, if it succeeds, return first, otherwise try second. /// /// /// /// /// public static Parser Or(this Parser first, Parser second) { if (first == null) throw new ArgumentNullException(nameof(first)); if (second == null) throw new ArgumentNullException(nameof(second)); return i => { var fr = first(i); if (!fr.WasSuccessful) { return second(i).IfFailure(sf => DetermineBestError(fr, sf)); } if (fr.Remainder.Equals(i)) return second(i).IfFailure(sf => fr); return fr; }; } /// /// Names part of the grammar for help with error messages. /// /// /// /// /// public static Parser Named(this Parser parser, string name) { if (parser == null) throw new ArgumentNullException(nameof(parser)); if (name == null) throw new ArgumentNullException(nameof(name)); return i => parser(i).IfFailure(f => f.Remainder.Equals(i) ? Result.Failure(f.Remainder, f.Message, new[] { name }) : f); } /// /// Parse first, if it succeeds, return first, otherwise try second. /// Assumes that the first parsed character will determine the parser chosen (see Try). /// /// /// /// /// public static Parser XOr(this Parser first, Parser second) { if (first == null) throw new ArgumentNullException(nameof(first)); if (second == null) throw new ArgumentNullException(nameof(second)); return i => { var fr = first(i); if (!fr.WasSuccessful) { // The 'X' part if (!fr.Remainder.Equals(i)) return fr; return second(i).IfFailure(sf => DetermineBestError(fr, sf)); } // This handles a zero-length successful application of first. if (fr.Remainder.Equals(i)) return second(i).IfFailure(sf => fr); return fr; }; } // Examines two results presumably obtained at an "Or" junction; returns the result with // the most information, or if they apply at the same input position, a union of the results. static IResult DetermineBestError(IResult firstFailure, IResult secondFailure) { if (secondFailure.Remainder.Position > firstFailure.Remainder.Position) return secondFailure; if (secondFailure.Remainder.Position == firstFailure.Remainder.Position) return Result.Failure( firstFailure.Remainder, firstFailure.Message, firstFailure.Expectations.Union(secondFailure.Expectations)); return firstFailure; } /// /// Parse a stream of elements containing only one item. /// /// /// /// public static Parser> Once(this Parser parser) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return parser.Select(r => (IEnumerable)new[] { r }); } /// /// Concatenate two streams of elements. /// /// /// /// /// public static Parser> Concat(this Parser> first, Parser> second) { if (first == null) throw new ArgumentNullException(nameof(first)); if (second == null) throw new ArgumentNullException(nameof(second)); return first.Then(f => second.Select(f.Concat)); } /// /// Succeed immediately and return value. /// /// /// /// public static Parser Return(T value) { return i => Result.Success(value, i); } /// /// Version of Return with simpler inline syntax. /// /// /// /// /// /// public static Parser Return(this Parser parser, U value) { if (parser == null) throw new ArgumentNullException(nameof(parser)); return parser.Select(t => value); } /// /// Attempt parsing only if the parser fails. /// /// /// /// /// /// public static Parser Except(this Parser parser, Parser except) { if (parser == null) throw new ArgumentNullException(nameof(parser)); if (except == null) throw new ArgumentNullException(nameof(except)); // Could be more like: except.Then(s => s.Fail("..")).XOr(parser) return i => { var r = except(i); if (r.WasSuccessful) return Result.Failure(i, "Excepted parser succeeded.", new[] { "other than the excepted input" }); return parser(i); }; } /// /// Parse a sequence of items until a terminator is reached. /// Returns the sequence, discarding the terminator. /// /// /// /// /// /// public static Parser> Until(this Parser parser, Parser until) { return parser.Except(until).Many().Then(r => until.Return(r)); } /// /// Succeed if the parsed value matches predicate. /// /// /// /// /// public static Parser Where(this Parser parser, Func predicate) { if (parser == null) throw new ArgumentNullException(nameof(parser)); if (predicate == null) throw new ArgumentNullException(nameof(predicate)); return i => parser(i).IfSuccess(s => predicate(s.Value) ? s : Result.Failure(i, string.Format("Unexpected {0}.", s.Value), new string[0])); } /// /// Monadic combinator Then, adapted for Linq comprehension syntax. /// /// /// /// /// /// /// /// public static Parser SelectMany( this Parser parser, Func> selector, Func projector) { if (parser == null) throw new ArgumentNullException(nameof(parser)); if (selector == null) throw new ArgumentNullException(nameof(selector)); if (projector == null) throw new ArgumentNullException(nameof(projector)); return parser.Then(t => selector(t).Select(u => projector(t, u))); } /// /// Chain a left-associative operator. /// /// /// /// /// /// /// public static Parser ChainOperator( Parser op, Parser operand, Func apply) { if (op == null) throw new ArgumentNullException(nameof(op)); if (operand == null) throw new ArgumentNullException(nameof(operand)); if (apply == null) throw new ArgumentNullException(nameof(apply)); return operand.Then(first => ChainOperatorRest(first, op, operand, apply, Or)); } /// /// Chain a left-associative operator. /// /// /// /// /// /// /// public static Parser XChainOperator( Parser op, Parser operand, Func apply) { if (op == null) throw new ArgumentNullException(nameof(op)); if (operand == null) throw new ArgumentNullException(nameof(operand)); if (apply == null) throw new ArgumentNullException(nameof(apply)); return operand.Then(first => ChainOperatorRest(first, op, operand, apply, XOr)); } static Parser ChainOperatorRest( T firstOperand, Parser op, Parser operand, Func apply, Func, Parser, Parser> or) { if (op == null) throw new ArgumentNullException(nameof(op)); if (operand == null) throw new ArgumentNullException(nameof(operand)); if (apply == null) throw new ArgumentNullException(nameof(apply)); return or(op.Then(opvalue => operand.Then(operandValue => ChainOperatorRest(apply(opvalue, firstOperand, operandValue), op, operand, apply, or))), Return(firstOperand)); } /// /// Chain a right-associative operator. /// /// /// /// /// /// /// public static Parser ChainRightOperator( Parser op, Parser operand, Func apply) { if (op == null) throw new ArgumentNullException(nameof(op)); if (operand == null) throw new ArgumentNullException(nameof(operand)); if (apply == null) throw new ArgumentNullException(nameof(apply)); return operand.Then(first => ChainRightOperatorRest(first, op, operand, apply, Or)); } /// /// Chain a right-associative operator. /// /// /// /// /// /// /// public static Parser XChainRightOperator( Parser op, Parser operand, Func apply) { if (op == null) throw new ArgumentNullException(nameof(op)); if (operand == null) throw new ArgumentNullException(nameof(operand)); if (apply == null) throw new ArgumentNullException(nameof(apply)); return operand.Then(first => ChainRightOperatorRest(first, op, operand, apply, XOr)); } static Parser ChainRightOperatorRest( T lastOperand, Parser op, Parser operand, Func apply, Func, Parser, Parser> or) { if (op == null) throw new ArgumentNullException(nameof(op)); if (operand == null) throw new ArgumentNullException(nameof(operand)); if (apply == null) throw new ArgumentNullException(nameof(apply)); return or(op.Then(opvalue => operand.Then(operandValue => ChainRightOperatorRest(operandValue, op, operand, apply, or)).Then(r => Return(apply(opvalue, lastOperand, r)))), Return(lastOperand)); } /// /// Parse a number. /// public static readonly Parser Number = Numeric.AtLeastOnce().Text(); static Parser DecimalWithoutLeadingDigits(CultureInfo ci = null) { return from nothing in Return("") // dummy so that CultureInfo.CurrentCulture is evaluated later from dot in String((ci ?? CultureInfo.CurrentCulture).NumberFormat.NumberDecimalSeparator).Text() from fraction in Number select dot + fraction; } static Parser DecimalWithLeadingDigits(CultureInfo ci = null) { return Number.Then(n => DecimalWithoutLeadingDigits(ci).XOr(Return("")).Select(f => n + f)); } /// /// Parse a decimal number using the current culture's separator character. /// public static readonly Parser Decimal = DecimalWithLeadingDigits().XOr(DecimalWithoutLeadingDigits()); /// /// Parse a decimal number with separator '.'. /// public static readonly Parser DecimalInvariant = DecimalWithLeadingDigits(CultureInfo.InvariantCulture) .XOr(DecimalWithoutLeadingDigits(CultureInfo.InvariantCulture)); } }