diff --git a/Cargo.lock b/Cargo.lock index 6ea9b5aa..0508782c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -579,6 +579,12 @@ dependencies = [ "secp256k1-sys", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.188" @@ -655,6 +661,7 @@ dependencies = [ "getrandom", "itertools", "miniscript", + "semver", "serde", "serde_json", "simplicity-lang", diff --git a/Cargo.toml b/Cargo.toml index cff2c274..773958e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ itertools = "0.13.0" arbitrary = { version = "1", optional = true, features = ["derive"] } clap = "4.5.37" chumsky = "0.11.2" +semver = "1.0.27" [target.wasm32-unknown-unknown.dependencies] getrandom = { version = "0.2", features = ["js"] } diff --git a/doc/versioning.md b/doc/versioning.md new file mode 100644 index 00000000..c79e3351 --- /dev/null +++ b/doc/versioning.md @@ -0,0 +1,42 @@ +# Compiler versioning + +A `.simf` file may begin with a compiler version directive: + +```text +simc ">=0.6.0"; +``` + +It is a **fail-fast compatibility check**: if the running compiler does not satisfy the range, compilation stops with a clear message instead of a confusing parser error. The directive is **optional** — a file without one still compiles, and `simc` prints a warning suggesting you add it. When present it must be the first non-comment item, at most once per file. + +A version *range* does not pin the output: several compiler versions can satisfy it and produce different Commitment Merkle Roots (CMRs), hence different addresses. See [Reproducibility](#reproducibility). + +The check runs on the raw text, before lexing, and the directive syntax is **frozen**: every compiler, past or future, can read it. A compiler that cannot even tokenize the rest of a newer file still reports the version mismatch — and reports *only* that, since other diagnostics are noise when the compiler itself is the wrong one. + +## Version ranges + +Standard [SemVer](https://semver.org) operators apply: + +- `^0.6.0` / `0.6.0` — compatible updates (`0.6.1` yes, `0.7.0` no) +- `~0.6` — patch updates only +- `=0.6.0` — that exact version +- `>=0.6.0` — inequalities +- `0.x.x` — wildcards +- `>=0.6.0, <1.0.0` — comma-separated bounds + +A pre-release compiler (e.g. `0.6.0-rc.0`) is treated as its release version (`0.6.0`) unless the range itself names a pre-release. The range grammar is part of the frozen directive syntax: a range a given compiler cannot parse is reported as invalid syntax, not as a mismatch. + +## Multi-file projects + +The entry file and every reachable dependency are checked; if any is incompatible, compilation halts. A stable library cannot be silently built with an incompatible compiler. + +## Reproducibility + +A deployed contract's address is its CMR, so pin an **exact** version (`=x.y.z`) for anything you deploy and verify the CMR that `simc` prints — a range alone is not reproducible. Selecting, pinning, and fetching compilers is the job of higher-level tooling, not `simc`. + +## Tooling + +The declared range is machine-readable without compiling — or even lexing — the program, so it stays readable across language versions. See the rustdoc on `version::SimcDirective::requirement_of`. + +## Flattened output + +A flattened multi-file project carries no `simc` directive; since directives are optional it still recompiles. Threading the merged range through flattening is future work. diff --git a/src/driver/mod.rs b/src/driver/mod.rs index 4273f8d4..1ccede1c 100644 --- a/src/driver/mod.rs +++ b/src/driver/mod.rs @@ -29,6 +29,9 @@ mod linearization; mod resolve_order; +#[cfg(test)] +mod version_tests; + use std::collections::{HashMap, HashSet, VecDeque}; use std::path::PathBuf; use std::sync::Arc; diff --git a/src/driver/version_tests.rs b/src/driver/version_tests.rs new file mode 100644 index 00000000..2de78307 --- /dev/null +++ b/src/driver/version_tests.rs @@ -0,0 +1,256 @@ +//! Multi-file enforcement of `simc "...";` directives: the entry file and every +//! reachable dependency are checked. Semver matching itself is covered by +//! `crate::version`'s unit tests. + +use crate::driver::tests::setup_graph_raw; + +/// Builds the given files (with `{v}` replaced by the current compiler version) +/// through the dependency-graph build, returning success and collected diagnostics. +fn build(files: &[(&str, &str)]) -> (bool, String) { + let v = env!("CARGO_PKG_VERSION"); + let owned: Vec<(&str, String)> = files + .iter() + .map(|(p, c)| (*p, c.replace("{v}", v))) + .collect(); + let refs: Vec<(&str, &str)> = owned.iter().map(|(p, c)| (*p, c.as_str())).collect(); + let (graph_opt, _, _ws, handler) = setup_graph_raw(refs); + let ok = graph_opt.is_some() && !handler.has_errors(); + (ok, handler.to_string()) +} + +fn assert_builds(files: &[(&str, &str)]) { + let (ok, errors) = build(files); + assert!(ok, "build failed unexpectedly. Errors:\n{errors}"); +} + +/// Like [`assert_builds`], but the build must fail with a diagnostic containing +/// `expected_err`. +fn assert_build_fails(expected_err: &str, files: &[(&str, &str)]) { + let (ok, errors) = build(files); + assert!(!ok, "build succeeded when it should have failed"); + assert!( + errors.contains(expected_err), + "Expected error containing '{expected_err}' but got:\n{errors}" + ); +} + +/// A multi-file program whose every file declares a compatible directive compiles: +/// each directive is checked and stripped, and the bodies still parse across `use`. +#[test] +fn mixed_valid_operators() { + assert_builds(&[ + ( + "main.simf", + r#"simc "^{v}"; +use lib::A::foo; +fn main() {}"#, + ), + ( + "libs/lib/A.simf", + r#"simc "={v}"; +use crate::B::foo; +pub fn foo() {}"#, + ), + ( + "libs/lib/B.simf", + r#"simc ">0.1.0"; +use crate::C::foo; +pub fn foo() {}"#, + ), + ( + "libs/lib/C.simf", + r#"simc "*"; +pub fn foo() {}"#, + ), + ]); +} + +/// The entry file's directive is checked. +#[test] +fn main_too_old_fails() { + assert_build_fails( + "Incompatible compiler version", + &[ + ( + "main.simf", + r#"simc ">99.0.0"; +use lib::A::foo; +fn main() {}"#, + ), + ( + "libs/lib/A.simf", + r#"simc "={v}"; +pub fn foo() {}"#, + ), + ], + ); +} + +/// Every reachable dependency's directive is checked, not just the entry file. +#[test] +fn lib_too_old_fails() { + assert_build_fails( + "Incompatible compiler version", + &[ + ( + "main.simf", + r#"simc "={v}"; +use lib::A::foo; +fn main() {}"#, + ), + ( + "libs/lib/A.simf", + r#"simc ">99.0.0"; +pub fn foo() {}"#, + ), + ], + ); +} + +/// A file that is never imported is not checked, even with an incompatible directive. +#[test] +fn unreferenced_file_with_invalid_version_ignored() { + assert_builds(&[ + ( + "main.simf", + r#"simc "={v}"; +use lib::A::foo; +fn main() {}"#, + ), + ( + "libs/lib/A.simf", + r#"simc "={v}"; +pub fn foo() {}"#, + ), + ( + "libs/lib/B.simf", + r#"simc ">99.0.0"; +pub fn unused() {}"#, + ), + ]); +} + +/// An omitted directive is allowed through the driver: only a present directive is +/// enforced, so a directive-less entry file builds successfully. +#[test] +fn directive_omitted() { + assert_builds(&[("main.simf", "fn main() {}")]); +} + +/// The compatibility check runs on the raw text, before lexing: an incompatible +/// compiler is reported even when the file's body cannot be tokenized (here a string +/// literal, which the language does not have), instead of a pile of lex errors. +#[test] +fn incompatible_reported_despite_unlexable_body() { + assert_build_fails( + "Incompatible compiler version", + &[( + "main.simf", + r#"simc ">99.0.0"; +fn main() { let s = "future syntax"; }"#, + )], + ); +} + +/// A stray directive is reported alone: its `"";` remnant and the rest of +/// the parse must not add noise on top of the reserved-keyword error. +#[test] +fn stray_directive_reports_single_error() { + let (ok, errors) = build(&[( + "main.simf", + r#"simc "={v}"; +simc "={v}"; +fn main() {}"#, + )]); + assert!(!ok, "duplicate directive must fail the build"); + assert!( + errors.contains("reserved"), + "expected the reserved-keyword error, got:\n{errors}" + ); + assert!( + !errors.contains("Cannot parse"), + "remnant noise must be suppressed, got:\n{errors}" + ); +} + +/// A file may declare at most one directive: a second one is misplaced and gets a +/// targeted diagnostic. +#[test] +fn multiple_directives_same_file_fails() { + assert_build_fails( + "must be the first item", + &[( + "main.simf", + r#"simc "={v}"; +simc "={v}"; +fn main() {}"#, + )], + ); +} + +/// A directive after another item is misplaced and gets a targeted diagnostic, +/// with no noise from its `"";` remnant. +#[test] +fn directive_after_item_fails() { + let (ok, errors) = build(&[( + "main.simf", + r#"fn main() {} +simc "={v}";"#, + )]); + assert!(!ok, "misplaced directive must fail the build"); + assert!( + errors.contains("must be the first item"), + "expected the reserved-keyword error, got:\n{errors}" + ); + assert!( + !errors.contains("Cannot parse"), + "remnant noise must be suppressed, got:\n{errors}" + ); +} + +/// A malformed version requirement surfaces through the pipeline. +#[test] +fn invalid_syntax_main() { + assert_build_fails( + "Invalid version requirement", + &[ + ( + "main.simf", + r#"simc "foo"; +use lib::A::foo; +fn main() {}"#, + ), + ( + "libs/lib/A.simf", + r#"simc "={v}"; +pub fn foo() {}"#, + ), + ], + ); +} + +/// A directive may follow a leading line comment; a commented-out directive does not +/// count. +#[test] +fn version_in_comment_ignored() { + assert_builds(&[( + "main.simf", + r#"// simc "=99.0.0"; +simc "={v}"; +fn main() {}"#, + )]); +} + +/// A directive may follow a leading block comment; one inside the comment does not +/// count. +#[test] +fn version_in_block_comment_ignored() { + assert_builds(&[( + "main.simf", + r#"/* +simc "=99.0.0"; +*/ +simc "={v}"; +fn main() {}"#, + )]); +} diff --git a/src/error.rs b/src/error.rs index 2bcb11d4..0a27b3f0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -485,6 +485,15 @@ pub enum Error { UnstableFeature { feature: UnstableFeature, }, + InvalidSimcVersionSyntax { + err: String, + }, + SimcVersionMismatch { + required: String, + current: String, + }, + MalformedSimcDirective, + ReservedSimcKeyword, DependencyPathNotFound { path: PathBuf, }, @@ -660,6 +669,21 @@ impl fmt::Display for Error { f, "The '{feature}' feature is not enabled.\nEnable it with: -Z {feature}" ), + Error::InvalidSimcVersionSyntax { err } => { + write!(f, "Invalid version requirement in `simc` directive: {err}") + } + Error::SimcVersionMismatch { required, current } => write!( + f, + "Incompatible compiler version: file requires `{required}`, but the compiler is `{current}`. Update the compiler or the `simc` directive." + ), + Error::MalformedSimcDirective => write!( + f, + "Malformed compiler version directive: expected `simc \"\";`" + ), + Error::ReservedSimcKeyword => write!( + f, + "`simc` is reserved for the compiler version directive, which must be the first item in the file and may appear at most once" + ), Error::DependencyPathNotFound { path } => write!( f, "Path not found: {}", path.display() @@ -1110,6 +1134,71 @@ let x: u32 = Left( assert_eq!(&expected[1..], &error.to_string()); } + #[test] + fn display_compiler_version_invalid_syntax() { + let file = "simc \"abc\";\nfn main() {}"; + let error = Error::InvalidSimcVersionSyntax { + err: "unexpected character 'a'".to_string(), + } + .with_span(Span::new_in_default_file(0..11)) + .with_content(Arc::from(file)); + + let expected = r#" + | +1 | simc "abc"; + | ^^^^^^^^^^^ Invalid version requirement in `simc` directive: unexpected character 'a'"#; + + assert_eq!(&expected[1..], &error.to_string()); + } + + #[test] + fn display_compiler_version_mismatch() { + let file = "simc \">= 0.6.0\";\nfn main() {}"; + let error = Error::SimcVersionMismatch { + required: ">= 0.6.0".to_string(), + current: "0.5.0".to_string(), + } + .with_span(Span::new_in_default_file(0..16)) + .with_content(Arc::from(file)); + + let expected = r#" + | +1 | simc ">= 0.6.0"; + | ^^^^^^^^^^^^^^^^ Incompatible compiler version: file requires `>= 0.6.0`, but the compiler is `0.5.0`. Update the compiler or the `simc` directive."#; + + assert_eq!(&expected[1..], &error.to_string()); + } + + #[test] + fn display_malformed_directive() { + let file = "simc \"1.0\"\nfn main() {}"; + let error = Error::MalformedSimcDirective + .with_span(Span::new_in_default_file(0..10)) + .with_content(Arc::from(file)); + + let expected = r#" + | +1 | simc "1.0" + | ^^^^^^^^^^ Malformed compiler version directive: expected `simc "";`"#; + + assert_eq!(&expected[1..], &error.to_string()); + } + + #[test] + fn display_reserved_simc_keyword() { + let file = "fn main() {}\nsimc \"1.0\";"; + let error = Error::ReservedSimcKeyword + .with_span(Span::new_in_default_file(13..17)) + .with_content(Arc::from(file)); + + let expected = r#" + | +2 | simc "1.0"; + | ^^^^ `simc` is reserved for the compiler version directive, which must be the first item in the file and may appear at most once"#; + + assert_eq!(&expected[1..], &error.to_string()); + } + // --- Tests with filename --- #[test] fn display_single_line_with_file() { diff --git a/src/lexer.rs b/src/lexer.rs index b98ef3ea..a510ddd9 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -6,6 +6,7 @@ use chumsky::{error::Rich, extra, span::SimpleSpan, text, IterParser, Parser}; use crate::driver::CRATE_STR; use crate::error::{Error, RichError, Span}; use crate::str::{Binary, Decimal, Hexadecimal}; +use crate::version::SIMC_STR; pub type Spanned = (T, SimpleSpan); pub type Tokens<'src> = Vec<(Token<'src>, crate::error::Span)>; @@ -23,6 +24,9 @@ pub enum Token<'src> { Const, Match, Crate, + /// Reserved for the compiler version directive, which the preprocessor consumes + /// before lexing; [`lex`] reports any occurrence as an error and drops the token. + Simc, // Control symbols Arrow, @@ -82,6 +86,7 @@ impl<'src> fmt::Display for Token<'src> { Token::Const => write!(f, "const"), Token::Match => write!(f, "match"), Token::Crate => write!(f, "{}", CRATE_STR), + Token::Simc => write!(f, "{}", SIMC_STR), Token::Arrow => write!(f, "->"), Token::DoubleColon => write!(f, "::"), @@ -119,6 +124,45 @@ impl<'src> fmt::Display for Token<'src> { } } +/// Recognizer for a `// ...` line comment. +fn line_comment<'src>( +) -> impl Parser<'src, &'src str, (), extra::Err>> + Clone { + just("//") + .then(any().and_is(just('\n').not()).repeated()) + .ignored() +} + +/// Recognizer for a (possibly nested) `/* ... */` block comment; an unterminated +/// comment is reported and swallows the rest of the input. +fn block_comment<'src>( +) -> impl Parser<'src, &'src str, (), extra::Err>> + Clone { + recursive(|block| { + just("/*") + .map_with(|_, e| e.span()) + .then(choice((block, any().and_is(just("*/").not()).ignored())).repeated()) + .then(just("*/").or_not()) + .validate(|((open_span, ()), close), _span, emit| { + if close.is_none() { + emit.emit(Rich::custom(open_span, "Unclosed block comment")); + } + }) + }) +} + +/// Trivia — whitespace and comments — shared with the version-directive scanner +/// (`version::SimcDirective::scan`) so the lexer and the scanner agree on comment +/// syntax. +pub(crate) fn trivia<'src>( +) -> impl Parser<'src, &'src str, (), extra::Err>> { + choice(( + line_comment(), + block_comment(), + any().filter(|c: &char| c.is_whitespace()).ignored(), + )) + .repeated() + .ignored() +} + pub fn lexer<'src>( ) -> impl Parser<'src, &'src str, Vec>>, extra::Err>> { @@ -158,6 +202,7 @@ pub fn lexer<'src>( "const" => Token::Const, "match" => Token::Match, CRATE_STR => Token::Crate, + SIMC_STR => Token::Simc, "true" => Token::Bool(true), "false" => Token::Bool(false), _ => Token::Ident(s), @@ -194,22 +239,8 @@ pub fn lexer<'src>( just(">").to(Token::RAngle), )); - let comment = just("//") - .ignore_then(any().and_is(just('\n').not()).repeated()) - .to(Token::Comment); - - let block_comment = recursive(|block| { - just("/*") - .map_with(|_, e| e.span()) - .then(choice((block.ignored(), any().and_is(just("*/").not()).ignored())).repeated()) - .then(just("*/").or_not()) - .validate(|((open_span, _content), close), _span, emit| { - if close.is_none() { - emit.emit(Rich::custom(open_span, "Unclosed block comment")); - } - Token::BlockComment - }) - }); + let comment = line_comment().to(Token::Comment); + let block_comment = block_comment().to(Token::BlockComment); let token = choice(( comment, block_comment, @@ -236,33 +267,45 @@ pub fn lexer<'src>( /// /// All comments in the input code are discarded. pub fn lex(file_id: usize, input: &str) -> (Option>, Vec) { - let (tokens, errors) = lexer().parse(input).into_output_errors(); + let (tokens, lex_errors) = lexer().parse(input).into_output_errors(); + // The reserved-keyword errors come first: a stray directive also produces + // follow-up errors for its `"";` remnant, and the sentinel is their cause. + let mut errors: Vec = Vec::new(); let tokens = tokens.map(|vec| { vec.into_iter() - .filter(|(tok, _)| !matches!(tok, Token::Comment | Token::BlockComment)) - .map(|(tok, span)| (tok, Span::from_chumsky(file_id, span))) + .filter_map(|(tok, span)| match tok { + Token::Comment | Token::BlockComment => None, + // The reserved keyword is a sentinel: the preprocessor consumed the one + // legitimate directive before lexing, so any occurrence is misplaced. + Token::Simc => { + errors.push(RichError::new( + Error::ReservedSimcKeyword, + Span::from_chumsky(file_id, span), + )); + None + } + tok => Some((tok, Span::from_chumsky(file_id, span))), + }) .collect() }); - let errors = errors - .into_iter() - .map(|err| { - RichError::new( - Error::CannotParse { - msg: err.reason().to_string(), - }, - Span::from_chumsky(file_id, *err.span()), - ) - }) - .collect(); + errors.extend(lex_errors.into_iter().map(|err| { + RichError::new( + Error::CannotParse { + msg: err.reason().to_string(), + }, + Span::from_chumsky(file_id, *err.span()), + ) + })); (tokens, errors) } /// A list of all reserved keywords. pub const KEYWORDS: &[&str] = &[ - "pub", "use", "as", "fn", "let", "type", "mod", "const", "match", CRATE_STR, "true", "false", + "pub", "use", "as", "fn", "let", "type", "mod", "const", "match", CRATE_STR, SIMC_STR, "true", + "false", ]; /// Checks whether a given string is a keyword. @@ -365,6 +408,31 @@ mod tests { assert_eq!(tokens, Some(vec![Token::BlockComment])); } + #[test] + fn simc_is_reserved() { + // The preprocessor consumes the one legitimate leading directive before + // lexing, so `lex` reports any `simc` it sees and drops the sentinel token. + for src in ["simc", "fn simc() {}", "fn f() {}\nsimc"] { + let (tokens, errors) = super::lex(0, src); + assert!( + errors.iter().any(|e| e.to_string().contains("reserved")), + "expected a reserved-keyword error for {src:?}, got: {errors:?}" + ); + assert!( + tokens + .expect("recovery keeps the stream") + .iter() + .all(|(tok, _)| !matches!(tok, Token::Simc)), + "the sentinel must not reach the token stream for {src:?}" + ); + } + + // Identifiers merely starting with `simc` are ordinary identifiers. + let (tokens, errors) = lex("simcfoo"); + assert!(errors.is_empty(), "unexpected: {errors:?}"); + assert_eq!(tokens, Some(vec![Token::Ident("simcfoo")])); + } + #[test] fn lexer_test() { use chumsky::prelude::*; diff --git a/src/lib.rs b/src/lib.rs index ee036498..7c158482 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,6 +27,7 @@ pub mod test_utils; pub mod tracker; pub mod types; pub mod value; +pub mod version; mod witness; use std::sync::Arc; @@ -1530,6 +1531,35 @@ fn main() { regression_test("transfer_with_timeout"); } } + + // Smoke tests that the version check is wired into `TemplateProgram::new`: one + // compatible directive compiles, one incompatible directive aborts. The semver + // matching and per-kind messages are covered exhaustively in `version`'s unit + // tests, so they are not re-asserted through the pipeline here. + #[test] + fn compatible_directive_compiles() { + let compatible = format!( + "simc \"{}\";\nfn main() {{}}", + crate::version::SimcDirective::current_version() + ); + assert!( + TemplateProgram::new(compatible, Box::new(crate::ast::ElementsJetHinter::new())) + .is_ok() + ); + } + + #[test] + fn incompatible_directive_aborts() { + let too_old = "simc \">= 99.99.99\";\nfn main() {}"; + let err = TemplateProgram::new(too_old, Box::new(crate::ast::ElementsJetHinter::new())) + .unwrap_err() + .to_string(); + assert!( + err.contains("Incompatible compiler version"), + "Expected 'Incompatible compiler version', got: {}", + err + ); + } } #[cfg(test)] diff --git a/src/main.rs b/src/main.rs index 7a870034..a06e7475 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use base64::engine::general_purpose::STANDARD; use clap::{Arg, ArgAction, Command}; use simplicityhl::ast::ElementsJetHinter; +use simplicityhl::version::SimcDirective; use simplicityhl::{ resolution::DependencyMapBuilder, source::CanonPath, source::CanonSourceFile, AbiMeta, CompiledProgram, @@ -112,6 +113,10 @@ fn main() -> Result<(), Box> { let prog_file = matches.get_one::("prog_file").unwrap(); let main_path = CanonPath::canonicalize(Path::new(prog_file))?; let main_text = std::fs::read_to_string(main_path.as_path()).map_err(|e| e.to_string())?; + // Entry file only; deps are still version-checked in the driver, just not warned. + if let Some(warning) = SimcDirective::missing_warning(&main_text) { + eprintln!("Warning: {warning}"); + } let include_debug_symbols = matches.get_flag("debug"); let output_json = matches.get_flag("json"); let abi_param = matches.get_flag("abi"); diff --git a/src/parse.rs b/src/parse.rs index 57fe3a60..4172339f 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -31,6 +31,7 @@ use crate::str::{ }; use crate::types::{AliasedType, BuiltinAlias, TypeConstructible, UIntType}; use crate::unstable::{impl_require_feature, UnstableFeature, UnstableFeatures}; +use crate::version::SimcDirective; /// A program is a sequence of items. #[derive(Clone, Debug)] @@ -1207,7 +1208,10 @@ type ParseError<'src> = extra::Err; /// This implementation only returns first encountered error. impl ParseFromStr for A { fn parse_from_str(s: &str) -> Result { - let (tokens, mut lex_errs) = crate::lexer::lex(MAIN_MODULE, s); + // A leading `simc "...";` directive is validated and blanked before lexing. + let content = SimcDirective::preprocess(s, MAIN_MODULE) + .map_err(|(err, span)| RichError::new(err, span))?; + let (tokens, mut lex_errs) = crate::lexer::lex(MAIN_MODULE, &content); let Some(tokens) = tokens else { return Err(lex_errs.pop().unwrap_or(RichError::parsing_error( @@ -1243,14 +1247,35 @@ impl ParseF handler: &mut ErrorCollector, ) -> Option { let source: SourceFile = source.into(); - let src = source.content().to_string(); + let content = source.content(); + + // Handle the `simc` directive before lexing: an incompatible or malformed + // directive is reported as the only diagnostic (the rest is noise), and a + // valid one is blanked so the lexer and grammar never see it. + let content = match SimcDirective::preprocess(&content, file_id) { + Ok(content) => content, + Err((err, span)) => { + handler.push(RichError::new(err, span).with_source(source)); + return None; + } + }; - let (tokens, lex_errs) = crate::lexer::lex(file_id, &src); + let (tokens, mut lex_errs) = crate::lexer::lex(file_id, &content); + // A stray `simc` makes every other diagnostic noise — its `"";` remnant + // does not lex — so the reserved-keyword errors are reported alone. + if lex_errs + .iter() + .any(|e| matches!(e.error(), Error::ReservedSimcKeyword)) + { + lex_errs.retain(|e| matches!(e.error(), Error::ReservedSimcKeyword)); + handler.extend(source, lex_errs); + return None; + } let lex_ok = lex_errs.is_empty(); handler.extend(source.clone(), lex_errs); let tokens = tokens?; - let eoi = Span::eof(file_id, src.len()); + let eoi = Span::eof(file_id, content.len()); let (ast, parse_errs) = A::parser() .parse(tokens.as_slice().map(eoi, |(t, s)| (t, s))) .into_output_errors(); diff --git a/src/version.rs b/src/version.rs new file mode 100644 index 00000000..a3a195bc --- /dev/null +++ b/src/version.rs @@ -0,0 +1,385 @@ +//! Compiler-version directives: `simc "";`. +//! +//! A `.simf` file may declare, as its first non-comment item, the semver range of +//! compilers it is written for (see `doc/versioning.md`). The check runs once per +//! file, before lexing: `SimcDirective::preprocess` scans the raw text, validates +//! the range against the running compiler, and blanks the directive out of the +//! source. Working on raw text keeps the check alive for files whose bodies this +//! compiler cannot even tokenize; an incompatible compiler is reported as the only +//! diagnostic. +//! +//! The language has no directive token: `simc` is a reserved keyword, so the lexer +//! rejects any `simc` left after preprocessing (a duplicate or misplaced +//! directive). The syntax recognized by `SimcDirective::scan` is frozen — every +//! past and future compiler must be able to read it. +//! +//! External tooling reads a file's declared range without compiling it via +//! [`SimcDirective::requirement_of`]. + +use std::borrow::Cow; +use std::ops::Range; +use std::sync::OnceLock; + +use chumsky::Parser; +use semver::{Version, VersionReq}; + +use crate::error::{Error, Span}; + +/// The directive keyword, reserved by the lexer. +pub(crate) const SIMC_STR: &str = "simc"; + +/// Result of scanning a file's leading bytes for a `simc "";` directive. +#[derive(Clone, Debug, Eq, PartialEq)] +enum DirectiveScan<'a> { + /// A well-formed directive: the range between the quotes and its byte span. + Found { range: &'a str, span: Range }, + /// A directive missing its closing quote or semicolon; `span` covers the broken text. + Malformed { span: Range }, + /// No directive at the top of the file. + Absent, +} + +/// Operations on the `simc "";` compiler-version directive. +pub struct SimcDirective; + +impl SimcDirective { + /// The running compiler's version (`CARGO_PKG_VERSION`), which directives are + /// validated against. + pub fn current_version() -> &'static str { + env!("CARGO_PKG_VERSION") + } + + /// Scan the raw text for a leading directive, validate it against the running + /// compiler, and blank it out with equal-length spaces. + /// + /// Blanking keeps the pipeline directive-free — no token, no grammar special + /// cases, `simc` stays plainly reserved — while preserving every byte offset, + /// so downstream error spans need no translation. On error the caller should + /// not lex: any further diagnostic is noise. + pub(crate) fn preprocess(content: &str, file_id: usize) -> Result, (Error, Span)> { + match Self::scan(content) { + DirectiveScan::Absent => Ok(Cow::Borrowed(content)), + DirectiveScan::Malformed { span } => { + Err((Error::MalformedSimcDirective, Span::new(file_id, span))) + } + DirectiveScan::Found { range, span } => { + Self::validate(range, Span::new(file_id, span.clone()))?; + Ok(Cow::Owned(Self::blank(content, &span))) + } + } + } + + /// Read a file's declared version requirement without compiling it, for external + /// tooling. Returns `Ok(None)` when the file declares no directive, and an error + /// when the directive is malformed or its range is not valid semver. + pub fn requirement_of(content: &str) -> Result, String> { + match Self::scan(content) { + DirectiveScan::Found { range, .. } => VersionRequirement::parse(range.trim()).map(Some), + DirectiveScan::Malformed { .. } => Err(Error::MalformedSimcDirective.to_string()), + DirectiveScan::Absent => Ok(None), + } + } + + /// The CLI advisory for a file with no `simc` directive, or `None` when one is + /// present (a malformed directive counts as present). + pub fn missing_warning(content: &str) -> Option { + matches!(Self::scan(content), DirectiveScan::Absent).then(|| { + format!( + "no compiler version directive at the top of the file; consider adding `{} \"{}\";`", + SIMC_STR, + Self::current_version() + ) + }) + } + + /// Scan the start of `content` for a directive, skipping whitespace and comments. + /// The single (frozen) definition of the directive syntax. + fn scan(content: &str) -> DirectiveScan<'_> { + // `rest` is always a suffix of `content`, so positions fall out of its length. + let offset = |rest: &str| content.len() - rest.len(); + let start = Self::skip_trivia(content); + + let Some(rest) = content[start..].strip_prefix(SIMC_STR) else { + return DirectiveScan::Absent; + }; + // A bare `simc` is not a directive; the lexer rejects it as reserved. + let Some(rest) = rest.trim_start_matches([' ', '\t']).strip_prefix('"') else { + return DirectiveScan::Absent; + }; + // The range runs to the closing quote on the same line. + let Some(quote) = rest + .find(['"', '\n']) + .filter(|&i| rest[i..].starts_with('"')) + else { + let line_end = rest.find('\n').unwrap_or(rest.len()); + return DirectiveScan::Malformed { + span: start..offset(rest) + line_end, + }; + }; + let (range, rest) = (&rest[..quote], &rest[quote + 1..]); + let rest = rest.trim_start_matches([' ', '\t']); + let Some(rest) = rest.strip_prefix(';') else { + return DirectiveScan::Malformed { + span: start..offset(rest), + }; + }; + DirectiveScan::Found { + range, + span: start..offset(rest), + } + } + + /// Validate a directive's version-requirement string against the running compiler. + fn validate(required: &str, span: Span) -> Result<(), (Error, Span)> { + let required = required.trim(); + let req = VersionRequirement::parse(required) + .map_err(|e| (Error::InvalidSimcVersionSyntax { err: e }, span))?; + + if !req.matches(Self::current_semver()) { + let err = Error::SimcVersionMismatch { + required: required.to_string(), + current: Self::current_version().to_string(), + }; + return Err((err, span)); + } + Ok(()) + } + + /// The running compiler's version, parsed once. `CARGO_PKG_VERSION` is a compile-time + /// constant, so a multi-file build reuses this instead of re-parsing it per file. + fn current_semver() -> &'static Version { + static CURRENT: OnceLock = OnceLock::new(); + CURRENT.get_or_init(|| { + Version::parse(Self::current_version()).expect("CARGO_PKG_VERSION is valid semver") + }) + } + + /// Replace `span` with equal-length spaces, preserving all byte offsets. + fn blank(content: &str, span: &Range) -> String { + let mut blanked = content.to_string(); + blanked.replace_range(span.clone(), &" ".repeat(span.len())); + blanked + } + + /// Byte offset of the first content after leading whitespace and comments, using + /// the lexer's shared [`trivia`](crate::lexer::trivia) recognizer so the scanner + /// and the lexer agree on comment syntax. + fn skip_trivia(content: &str) -> usize { + crate::lexer::trivia() + .to_slice() + .lazy() + .parse(content) + .into_output() + .map_or(0, str::len) + } +} + +/// A parsed directive requirement: the semver range written inside the quotes. Wraps +/// [`semver::VersionReq`] so [`Self::matches`] can apply compiler-aware pre-release +/// handling — a plain release range still accepts a pre-release build of that version. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct VersionRequirement { + req: VersionReq, +} + +impl VersionRequirement { + /// Parse a requirement string such as `>=0.6.0` or `=0.6.0`. + pub fn parse(s: &str) -> Result { + VersionReq::parse(s) + .map(|req| VersionRequirement { req }) + .map_err(|e| e.to_string()) + } + + /// The underlying requirement, for external tooling that + /// intersects ranges across a project's files. + pub fn req(&self) -> &VersionReq { + &self.req + } + + #[allow(rustdoc::private_intra_doc_links)] + /// Whether `version` satisfies the requirement, after pre-release + /// normalization (see [`Self::effective_version`]). + pub fn matches(&self, version: &Version) -> bool { + self.req.matches(&self.effective_version(version)) + } + + /// Strip the compiler's pre-release tag (`0.6.0-rc.0` → `0.6.0`) when the + /// requirement names no pre-release, so a release range still accepts a matching + /// pre-release compiler. Without this, semver would reject `0.6.0-rc.0` for a + /// plain `>=0.6.0`. + fn effective_version(&self, version: &Version) -> Version { + let req_allows_pre = self.req.comparators.iter().any(|c| !c.pre.is_empty()); + if req_allows_pre || version.pre.is_empty() { + version.clone() + } else { + Version { + pre: semver::Prerelease::EMPTY, + ..version.clone() + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::driver::MAIN_MODULE; + + /// `scan` distinguishes found, malformed, and absent directives, with exact spans. + #[test] + fn scan_directive_cases() { + let found = |content: &str, range: &str, directive: &str| match SimcDirective::scan(content) + { + DirectiveScan::Found { range: r, span } => { + assert_eq!(r, range, "wrong range in {content:?}"); + assert_eq!(&content[span], directive, "wrong span in {content:?}"); + } + other => panic!("expected Found in {content:?}, got {other:?}"), + }; + let malformed = |content: &str, broken: &str| match SimcDirective::scan(content) { + DirectiveScan::Malformed { span } => { + assert_eq!(&content[span], broken, "wrong span in {content:?}"); + } + other => panic!("expected Malformed in {content:?}, got {other:?}"), + }; + + found( + "simc \">=0.6.0\";\nfn main() {}", + ">=0.6.0", + "simc \">=0.6.0\";", + ); + found("simc \"*\" ;rest", "*", "simc \"*\" ;"); + // Line comments, nested block comments, and blank lines are skipped. + found( + "// note\n/* outer /* inner */ outer */\n\nsimc \"1.0\";", + "1.0", + "simc \"1.0\";", + ); + + // Committed (`simc "`) but missing the closing quote or the semicolon. + malformed("simc \"1.0\"\nfn f() {}", "simc \"1.0\""); + malformed("simc \"1.0\nfn f() {}", "simc \"1.0"); + malformed("simc \"1.0\"", "simc \"1.0\""); + + // Not a directive at all: bare identifier, other code, or a directive that is + // not the first item (the lexer rejects those as reserved `simc`). + assert_eq!(SimcDirective::scan("simc"), DirectiveScan::Absent); + assert_eq!( + SimcDirective::scan("simcfoo \"1.0\";"), + DirectiveScan::Absent + ); + assert_eq!(SimcDirective::scan("fn main() {}"), DirectiveScan::Absent); + assert_eq!( + SimcDirective::scan("fn f() {}\nsimc \"1.0\";"), + DirectiveScan::Absent + ); + assert_eq!( + SimcDirective::scan("/* unterminated\nsimc \"1.0\";"), + DirectiveScan::Absent + ); + } + + /// `preprocess` blanks a valid directive (byte offsets preserved), passes a + /// directive-less source through without a copy, and aborts on a broken one. + #[test] + fn preprocess_blanks_validates_and_passes_through() { + let src = "// c\nsimc \"*\";\nfn main() {}"; + let DirectiveScan::Found { span, .. } = SimcDirective::scan(src) else { + panic!("expected a directive in the test source"); + }; + let out = SimcDirective::preprocess(src, MAIN_MODULE).unwrap(); + assert!(matches!(out, Cow::Owned(_))); + assert_eq!(out.len(), src.len(), "blanking must preserve byte offsets"); + assert!(out[span.clone()].chars().all(|c| c == ' ')); + assert_eq!(out[..span.start], src[..span.start]); + assert_eq!(out[span.end..], src[span.end..]); + + let plain = "fn main() {}"; + let out = SimcDirective::preprocess(plain, MAIN_MODULE).unwrap(); + assert!(matches!(out, Cow::Borrowed(_))); + + assert!(matches!( + SimcDirective::preprocess("simc \"1.0\"\nfn main() {}", MAIN_MODULE) + .unwrap_err() + .0, + Error::MalformedSimcDirective + )); + assert!(matches!( + SimcDirective::preprocess("simc \">99.0.0\";", MAIN_MODULE) + .unwrap_err() + .0, + Error::SimcVersionMismatch { .. } + )); + } + + /// `matches` respects semver operators and the pre-release normalization of + /// `effective_version`; `0.6.0-rc.0` stands in for the compiler. + #[test] + fn matches_respects_operators_and_prerelease() { + let cur = Version::parse("0.6.0-rc.0").unwrap(); + let accepted = [ + "*", + "0.6.0", + "^0.6.0", + "~0.6.0", + ">=0.6.0", + ">0.1.0", + "=0.6.0-rc.0", + "^0.6.0-rc.0", + ]; + let rejected = [ + "=0.5.0", + ">99.0.0", + "<0.0.1", + "<0.6.0", // -rc tag stripped, so 0.6.0 is not < 0.6.0 + ">=0.1.0-alpha.1", // pre-release gating: different base, so no match + ">=0.7.0, =0.6.0", // the `=0.6.0` must not rescue the failing `>=0.7.0` + ]; + for req in accepted { + let req = VersionRequirement::parse(req).unwrap(); + assert!(req.matches(&cur), "`{req:?}` should match {cur}"); + } + for req in rejected { + let parsed = VersionRequirement::parse(req).unwrap(); + assert!(!parsed.matches(&cur), "`{req}` should not match {cur}"); + } + } + + /// `validate` rejects a bad semver range and an incompatible compiler, and + /// accepts a satisfiable one. + #[test] + fn validate_reports_bad_and_incompatible() { + let span = Span::new(MAIN_MODULE, 0..1); + assert!(matches!( + SimcDirective::validate("not-a-version", span) + .unwrap_err() + .0, + Error::InvalidSimcVersionSyntax { .. } + )); + assert!(matches!( + SimcDirective::validate(">=99.0.0", span).unwrap_err().0, + Error::SimcVersionMismatch { .. } + )); + assert!(SimcDirective::validate("*", span).is_ok()); + } + + /// Only a directive-less source triggers the advisory; `requirement_of` + /// distinguishes "no directive" from "broken directive". + #[test] + fn missing_warning_and_requirement_of() { + assert!(SimcDirective::missing_warning("fn main() {}").is_some()); + assert!(SimcDirective::missing_warning("simc \"*\";\nfn main() {}").is_none()); + assert!(SimcDirective::missing_warning("// note\nsimc \"*\";\nfn main() {}").is_none()); + assert!(SimcDirective::missing_warning("simc \"*\"\nfn main() {}").is_none()); + + assert_eq!( + SimcDirective::requirement_of("simc \">=0.1.0\";\nfn main() {}") + .unwrap() + .map(|r| r.req().clone()), + Some(VersionReq::parse(">=0.1.0").unwrap()) + ); + assert_eq!(SimcDirective::requirement_of("fn main() {}"), Ok(None)); + assert!(SimcDirective::requirement_of("simc \"*\"\nfn main() {}").is_err()); + assert!(SimcDirective::requirement_of("simc \"not-a-version\";").is_err()); + } +} diff --git a/tests/cli.rs b/tests/cli.rs index aa76eef3..ea736d9b 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -1,10 +1,34 @@ use std::path::{Path, PathBuf}; -use std::process::Command; +use std::process::{Command, Output}; fn repo_path(path: &str) -> PathBuf { Path::new(env!("CARGO_MANIFEST_DIR")).join(path) } +/// Write `content` to a uniquely named `.simf` file in the test temp dir and run +/// `simc` on it, returning the process output. Used by the version-directive tests +/// to exercise the real binary on standalone files. +fn run_simc_on_source(name: &str, content: &str) -> Output { + let file = Path::new(env!("CARGO_TARGET_TMPDIR")).join(format!("{name}.simf")); + std::fs::write(&file, content).expect("failed to write source file"); + Command::new(env!("CARGO_BIN_EXE_simc")) + .arg(file) + .output() + .expect("failed to run simc") +} + +/// Write each `(relative path, content)` under a unique temp project root (creating +/// parent directories) and return the root. Used to drive multi-file `--dep` builds. +fn setup_project(name: &str, files: &[(&str, &str)]) -> PathBuf { + let root = Path::new(env!("CARGO_TARGET_TMPDIR")).join(name); + for (rel, content) in files { + let path = root.join(rel); + std::fs::create_dir_all(path.parent().unwrap()).expect("failed to create project dirs"); + std::fs::write(&path, content).expect("failed to write project file"); + } + root +} + #[test] fn cli_dependency_can_use_crate_root() { let root = repo_path("functional-tests/valid-test-cases/external-library-uses-crate"); @@ -87,3 +111,154 @@ fn cli_reserved_crate_mapping_fails() { stderr ); } + +/// A compatible version directive compiles from the command line, with no +/// missing-directive warning. `*` matches any compiler, so this stays valid across +/// version bumps and acts as the positive control for the rejection tests below. +#[test] +fn cli_version_compatible_accepted() { + let output = run_simc_on_source("version_ok", "simc \"*\";\nfn main() {}\n"); + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + output.status.success(), + "simc should accept a compatible directive\nstderr:\n{stderr}", + ); + assert!( + !stderr.contains("no compiler version directive"), + "a present directive must not trigger the missing-directive warning, got:\n{stderr}" + ); +} + +/// A directive the running compiler cannot satisfy is rejected. `>99.0.0` is +/// permanently too new, so the build aborts with a non-zero exit and a clear message. +#[test] +fn cli_version_incompatible_rejected() { + let output = run_simc_on_source("version_incompatible", "simc \">99.0.0\";\nfn main() {}\n"); + assert!( + !output.status.success(), + "simc must reject an incompatible directive" + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("Incompatible compiler version"), + "Expected 'Incompatible compiler version', got:\n{stderr}" + ); +} + +/// A directive is optional: a file with none still compiles, but the CLI prints a +/// warning suggesting one be added. +#[test] +fn cli_version_missing_warns_but_compiles() { + let output = run_simc_on_source("version_missing", "fn main() {}\n"); + assert!( + output.status.success(), + "simc must accept a file with no version directive" + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("no compiler version directive"), + "Expected a missing-directive warning, got:\n{stderr}" + ); +} + +/// A directive whose requirement is not valid semver is a syntax error, not a +/// version mismatch. +#[test] +fn cli_version_invalid_syntax_rejected() { + let output = run_simc_on_source( + "version_bad_syntax", + "simc \"not-a-version\";\nfn main() {}\n", + ); + assert!( + !output.status.success(), + "simc must reject a malformed version requirement" + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("Invalid version requirement in `simc` directive"), + "Expected 'Invalid version requirement in `simc` directive', got:\n{stderr}" + ); +} + +/// A directive that is structurally broken (here a missing semicolon) is rejected +/// before the requirement is even parsed — a different path than an invalid semver +/// string above. +#[test] +fn cli_version_malformed_directive_rejected() { + let output = run_simc_on_source("version_malformed", "simc \"1.0\"\nfn main() {}\n"); + assert!( + !output.status.success(), + "simc must reject a directive with a missing semicolon" + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("Malformed compiler version directive"), + "Expected 'Malformed compiler version directive', got:\n{stderr}" + ); +} + +/// The fail-fast promise: an incompatible directive is reported even when the file's +/// body uses syntax this compiler cannot lex (here a string literal), and the version +/// error is the *only* diagnostic — not buried under `Cannot parse` noise. +#[test] +fn cli_version_incompatible_preempts_lex_errors() { + let output = run_simc_on_source( + "version_future_syntax", + "simc \">99.0.0\";\nfn main() { let s = \"future string literal\"; }\n", + ); + assert!( + !output.status.success(), + "simc must reject an incompatible directive even when the body does not lex" + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("Incompatible compiler version"), + "Expected 'Incompatible compiler version', got:\n{stderr}" + ); + assert!( + !stderr.contains("Cannot parse"), + "the version error must preempt lex errors, got:\n{stderr}" + ); +} + +/// An incompatible directive in a *dependency* (reached via `--dep`), not the entry +/// file, also aborts the build and the diagnostic points at the dependency. +#[test] +fn cli_dependency_version_mismatch_rejected() { + let root = setup_project( + "version_dep_mismatch", + &[ + ( + "main.simf", + "simc \"*\";\nuse lib::module::add;\nfn main() {}\n", + ), + ("lib/module.simf", "simc \">99.0.0\";\npub fn add() {}\n"), + ], + ); + let dep_arg = format!("{}:lib={}", root.display(), root.join("lib").display()); + + let output = Command::new(env!("CARGO_BIN_EXE_simc")) + .arg(root.join("main.simf")) + .arg("-Z") + .arg("imports") + .arg("--dep") + .arg(dep_arg) + .output() + .expect("failed to run simc"); + + assert!( + !output.status.success(), + "simc must reject an incompatible directive in a dependency" + ); + + let stderr = String::from_utf8_lossy(&output.stderr); + assert!( + stderr.contains("Incompatible compiler version") && stderr.contains("module.simf"), + "expected an incompatible-version error pointing at the dependency, got:\n{stderr}" + ); +}