diff --git a/Cargo.lock b/Cargo.lock index 00dfbba3bbba77..bbd8fd15cdc2b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -501,19 +501,6 @@ dependencies = [ "itertools 0.10.5", ] -[[package]] -name = "crossbeam" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-epoch", - "crossbeam-queue", - "crossbeam-utils", -] - [[package]] name = "crossbeam-channel" version = "0.5.12" @@ -542,15 +529,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.19" @@ -563,6 +541,16 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "ctrlc" +version = "3.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345" +dependencies = [ + "nix", + "windows-sys 0.52.0", +] + [[package]] name = "darling" version = "0.20.8" @@ -1804,11 +1792,17 @@ name = "red_knot" version = "0.1.0" dependencies = [ "anyhow", + "bitflags 2.5.0", + "crossbeam-channel", + "ctrlc", "dashmap", "hashbrown 0.14.3", "log", + "notify", "parking_lot", + "rayon", "ruff_index", + "ruff_notebook", "ruff_python_ast", "ruff_python_parser", "ruff_python_trivia", @@ -2335,7 +2329,7 @@ name = "ruff_server" version = "0.2.2" dependencies = [ "anyhow", - "crossbeam", + "crossbeam-channel", "insta", "jod-thread", "libc", diff --git a/Cargo.toml b/Cargo.toml index 0b07c2925c9131..b9f5bfeda2298b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ console_error_panic_hook = { version = "0.1.7" } console_log = { version = "1.0.0" } countme = { version = "3.0.1" } criterion = { version = "0.5.1", default-features = false } -crossbeam = { version = "0.8.4" } +crossbeam-channel = { version = "0.5.12" } dashmap = { version = "5.5.3" } dirs = { version = "5.0.0" } drop_bomb = { version = "0.1.5" } diff --git a/crates/red_knot/Cargo.toml b/crates/red_knot/Cargo.toml index d4cd89069439f1..10705f750dc57f 100644 --- a/crates/red_knot/Cargo.toml +++ b/crates/red_knot/Cargo.toml @@ -17,12 +17,18 @@ ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_trivia = { path = "../ruff_python_trivia" } ruff_text_size = { path = "../ruff_text_size" } ruff_index = { path = "../ruff_index" } +ruff_notebook = { path = "../ruff_notebook" } anyhow = { workspace = true } +bitflags = { workspace = true } +ctrlc = "3.4.4" +crossbeam-channel = { workspace = true } dashmap = { workspace = true } hashbrown = { workspace = true } log = { workspace = true } +notify = { workspace = true } parking_lot = { workspace = true } +rayon = { workspace = true } rustc-hash = { workspace = true } smallvec = { workspace = true } smol_str = "0.2.1" diff --git a/crates/red_knot/src/cancellation.rs b/crates/red_knot/src/cancellation.rs new file mode 100644 index 00000000000000..9f5e57cf566a0b --- /dev/null +++ b/crates/red_knot/src/cancellation.rs @@ -0,0 +1,65 @@ +use std::sync::{Arc, Condvar, Mutex}; + +#[derive(Debug, Default)] +pub struct CancellationSource { + signal: Arc<(Mutex, Condvar)>, +} + +impl CancellationSource { + pub fn new() -> Self { + Self { + signal: Arc::new((Mutex::new(false), Condvar::default())), + } + } + + pub fn cancel(&self) { + let (cancelled, condvar) = &*self.signal; + + let mut cancelled = cancelled.lock().unwrap(); + + if *cancelled { + return; + } + + *cancelled = true; + condvar.notify_all(); + } + + pub fn is_cancelled(&self) -> bool { + let (cancelled, _) = &*self.signal; + + *cancelled.lock().unwrap() + } + + pub fn token(&self) -> CancellationToken { + CancellationToken { + signal: self.signal.clone(), + } + } +} + +#[derive(Clone, Debug)] +pub struct CancellationToken { + signal: Arc<(Mutex, Condvar)>, +} + +impl CancellationToken { + /// Returns `true` if cancellation has been requested. + pub fn is_cancelled(&self) -> bool { + let (cancelled, _) = &*self.signal; + + *cancelled.lock().unwrap() + } + + pub fn wait(&self) { + let (bool, condvar) = &*self.signal; + + let lock = condvar + .wait_while(bool.lock().unwrap(), |bool| !*bool) + .unwrap(); + + debug_assert!(*lock); + + drop(lock); + } +} diff --git a/crates/red_knot/src/db.rs b/crates/red_knot/src/db.rs index 3cfbeb8c35be3a..77dc49f30a955c 100644 --- a/crates/red_knot/src/db.rs +++ b/crates/red_knot/src/db.rs @@ -2,6 +2,7 @@ use std::path::Path; use std::sync::Arc; use crate::files::FileId; +use crate::lint::{Diagnostics, LintSyntaxStorage}; use crate::module::{Module, ModuleData, ModuleName, ModuleResolver, ModuleSearchPath}; use crate::parse::{Parsed, ParsedStorage}; use crate::source::{Source, SourceStorage}; @@ -16,6 +17,8 @@ pub trait SourceDb { fn source(&self, file_id: FileId) -> Source; fn parse(&self, file_id: FileId) -> Parsed; + + fn lint_syntax(&self, file_id: FileId) -> Diagnostics; } pub trait SemanticDb: SourceDb { @@ -38,6 +41,7 @@ pub trait Db: SemanticDb {} pub struct SourceJar { pub sources: SourceStorage, pub parsed: ParsedStorage, + pub lint_syntax: LintSyntaxStorage, } #[derive(Debug, Default)] @@ -70,6 +74,7 @@ pub trait HasJar { pub(crate) mod tests { use crate::db::{HasJar, SourceDb, SourceJar}; use crate::files::{FileId, Files}; + use crate::lint::{lint_syntax, Diagnostics}; use crate::module::{ add_module, path_to_module, resolve_module, set_module_search_paths, Module, ModuleData, ModuleName, ModuleSearchPath, @@ -127,6 +132,10 @@ pub(crate) mod tests { fn parse(&self, file_id: FileId) -> Parsed { parse(self, file_id) } + + fn lint_syntax(&self, file_id: FileId) -> Diagnostics { + lint_syntax(self, file_id) + } } impl SemanticDb for TestDb { diff --git a/crates/red_knot/src/lib.rs b/crates/red_knot/src/lib.rs index b9323151a86961..64fe66654e45a4 100644 --- a/crates/red_knot/src/lib.rs +++ b/crates/red_knot/src/lib.rs @@ -9,15 +9,18 @@ use crate::files::FileId; pub mod ast_ids; pub mod cache; +pub mod cancellation; pub mod db; pub mod files; pub mod hir; +pub mod lint; pub mod module; mod parse; pub mod program; pub mod source; mod symbols; mod types; +pub mod watch; pub(crate) type FxDashMap = dashmap::DashMap>; #[allow(unused)] diff --git a/crates/red_knot/src/lint.rs b/crates/red_knot/src/lint.rs new file mode 100644 index 00000000000000..eec1f0caf74039 --- /dev/null +++ b/crates/red_knot/src/lint.rs @@ -0,0 +1,124 @@ +use std::ops::{Deref, DerefMut}; +use std::sync::Arc; + +use ruff_python_ast::visitor::Visitor; +use ruff_python_ast::StringLiteral; + +use crate::cache::KeyValueCache; +use crate::db::{HasJar, SourceDb, SourceJar}; +use crate::files::FileId; + +pub(crate) fn lint_syntax(db: &Db, file_id: FileId) -> Diagnostics +where + Db: SourceDb + HasJar, +{ + let storage = &db.jar().lint_syntax; + + storage.get(&file_id, |file_id| { + let mut diagnostics = Vec::new(); + + let source = db.source(*file_id); + lint_lines(source.text(), &mut diagnostics); + + let parsed = db.parse(*file_id); + + if parsed.errors().is_empty() { + let ast = parsed.ast(); + + let mut visitor = SyntaxLintVisitor { + diagnostics, + source: source.text(), + }; + visitor.visit_body(&ast.body); + diagnostics = visitor.diagnostics + } else { + diagnostics.extend(parsed.errors().iter().map(|err| err.to_string())); + } + + Diagnostics::from(diagnostics) + }) +} + +pub(crate) fn lint_lines(source: &str, diagnostics: &mut Vec) { + for (line_number, line) in source.lines().enumerate() { + if line.len() < 88 { + continue; + } + + let char_count = line.chars().count(); + if char_count > 88 { + diagnostics.push(format!( + "Line {} is too long ({} characters)", + line_number + 1, + char_count + )); + } + } +} + +#[derive(Debug)] +struct SyntaxLintVisitor<'a> { + diagnostics: Vec, + source: &'a str, +} + +impl Visitor<'_> for SyntaxLintVisitor<'_> { + fn visit_string_literal(&mut self, string_literal: &'_ StringLiteral) { + // A very naive implementation of use double quotes + let text = &self.source[string_literal.range]; + + if text.starts_with('\'') { + self.diagnostics + .push("Use double quotes for strings".to_string()); + } + } +} + +#[derive(Debug, Clone)] +pub enum Diagnostics { + Empty, + List(Arc>), +} + +impl Diagnostics { + pub fn as_slice(&self) -> &[String] { + match self { + Diagnostics::Empty => &[], + Diagnostics::List(list) => list.as_slice(), + } + } +} + +impl Deref for Diagnostics { + type Target = [String]; + fn deref(&self) -> &Self::Target { + self.as_slice() + } +} + +impl From> for Diagnostics { + fn from(value: Vec) -> Self { + if value.is_empty() { + Diagnostics::Empty + } else { + Diagnostics::List(Arc::new(value)) + } + } +} + +#[derive(Default, Debug)] +pub struct LintSyntaxStorage(KeyValueCache); + +impl Deref for LintSyntaxStorage { + type Target = KeyValueCache; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for LintSyntaxStorage { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/crates/red_knot/src/main.rs b/crates/red_knot/src/main.rs index 59a599e54d37a3..9bfb27bd0bb68d 100644 --- a/crates/red_knot/src/main.rs +++ b/crates/red_knot/src/main.rs @@ -1,136 +1,259 @@ -use std::path::PathBuf; -use tracing::level_filters::LevelFilter; +use std::collections::hash_map::Entry; +use std::num::NonZeroUsize; +use std::path::Path; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +use rustc_hash::FxHashMap; use tracing::subscriber::Interest; use tracing::{Level, Metadata}; +use tracing_subscriber::filter::LevelFilter; use tracing_subscriber::layer::{Context, Filter, SubscriberExt}; use tracing_subscriber::{Layer, Registry}; use tracing_tree::time::Uptime; -use red_knot::db::{HasJar, SemanticDb, SourceJar}; +use red_knot::cancellation::CancellationSource; +use red_knot::db::{HasJar, SourceDb, SourceJar}; +use red_knot::files::FileId; use red_knot::module::{ModuleSearchPath, ModuleSearchPathKind}; -use red_knot::program::Program; +use red_knot::program::{FileChange, FileChangeKind, Program}; +use red_knot::watch::FileWatcher; use red_knot::{files, Workspace}; -#[allow(clippy::dbg_macro, clippy::print_stdout, clippy::unnecessary_wraps)] +#[allow( + clippy::dbg_macro, + clippy::print_stdout, + clippy::unnecessary_wraps, + clippy::print_stderr +)] fn main() -> anyhow::Result<()> { setup_tracing(); - let files = files::Files::default(); - let mut workspace = Workspace::new(PathBuf::from("/home/micha/astral/test/")); - - let file_id = files.intern(&workspace.root().join("test.py")); - workspace.open_file(file_id); + let arguments: Vec<_> = std::env::args().collect(); - // For now, discover all python files in the root directory and mark them as open. + if arguments.len() < 2 { + eprintln!("Usage: red_knot "); + return Err(anyhow::anyhow!("Invalid arguments")); + } - // for entry in fs::read_dir(workspace.root())? - // .filter_map(|entry| entry.ok()) - // .filter(|entry| entry.path().extension().map_or(false, |ext| ext == "py")) - // { - // let file_id = files.intern(&entry.path()); - // dbg!(file_id, &entry.path()); - // + let entry_point = Path::new(&arguments[1]); - // workspace.open_file(file_id); - // } + if !entry_point.exists() { + eprintln!("The entry point does not exist."); + return Err(anyhow::anyhow!("Invalid arguments")); + } - // TODO: discover all python files and intern the file ids? + if !entry_point.is_file() { + eprintln!("The entry point is not a file."); + return Err(anyhow::anyhow!("Invalid arguments")); + } - tracing::debug!("start analysis for workspace"); + let files = files::Files::default(); + let workspace_folder = entry_point.parent().unwrap(); + let mut workspace = Workspace::new(workspace_folder.to_path_buf()); let workspace_search_path = ModuleSearchPath::new( workspace.root().to_path_buf(), ModuleSearchPathKind::FirstParty, ); - let program = Program::new(vec![workspace_search_path], files); - - let mut queue: Vec<_> = workspace.open_files().collect(); - // let mut queued: FxHashSet<_> = queue.iter().copied().collect(); - // Should we use an accumulator for this? - - // TODO we could now consider spawning the analysis of the dependencies into their own threads. - while let Some(file) = queue.pop() { - // let source = program.source(file); - // let module_path = program.file_path(file_id); - - // // TODO this looks weird: dependencies.files. Let's figure out a better naming and structure. - // let dependencies = dependencies(&db, content); - // - // // We know that we need to analyse all dependencies, but we don't need to check them. - // for dependency in &*dependencies { - // let dependency_path = module_path.join(&dependency.path).canonicalize().unwrap(); - // let dependency_file_id = files.intern(&dependency_path); - // - // if queued.insert(dependency_file_id) { - // queue.push(dependency_file_id); - // } - // } - - let symbols = program.symbol_table(file); - - dbg!(&symbols); - - // If this is an open file - if workspace.is_file_open(file) { - // * run source text, logical line, and path based rules. - // * build the semantic model - // * run the semantic rules - // * run type checking - // Some of the steps could run together - - // TODO check_tokens(&db, parsed.tokens(&db)); - - // I think we can run the syntax checks and the item tree construction in a single traversal? - // Probably not, because we actually want to visit the nodes in a different order (breath first vs depth first, at least for some nodes). - // diagnostics.extend(check_physical_lines(&db, content).diagnostics(&db)); - // diagnostics.extend(check_syntax(&db, parsed).diagnostics(&db)); - } + let entry_id = files.intern(entry_point); - // let ids = ast_ids(&db, content); - - // dbg!(ids.root()); - // - // dbg!(ids.ast_id_for_node_key(ids.root())); - // - // let ast = parsed.ast(&db); - // - // if let Some(function) = ast.body.iter().find_map(|stmt| stmt.as_function_def_stmt()) { - // let id = ids.ast_id(function); - // dbg!(&id); - // - // let key = ids.key(id); - // - // dbg!(key.resolve(ast.into())); - // } - // - // let definitions = definitions(&db, content); - // - // dbg!(&definitions); - - // This is the HIR - // I forgot how rust-analyzer reference from the HIR to the AST. - // let item_tree = build_item_tree(&db, parsed.ast(&db)); // construct the item tree from the AST (the item tree is location agnostic) - // The bindings should only resolve internally. Imports should be resolved to full qualified paths - // but not resolved to bindings to ensure that result can be calculated on a per-file basis. - // let bindings = binder(&db, item_tree); // Run the item tree through the binder - - // let types = type_inference(&db, bindings); // Run the type checker on the bindings - - // We need to build the symbol table here. What rust-analyzer does is it first transforms - // the AST into a HIR that only contains the definitions. Each HIR node gets a unique where - // it first assigns IDs to the top-level elements before their children (to ensure that changes - // in the function body remain local). The idea of the HIR is to make the analysis location independent. - - // Run the syntax only rules for the file and perform some binding? - - // dbg!(parsed.module(&db)); - } + let mut program = Program::new(vec![workspace_search_path], files.clone()); + + workspace.open_file(entry_id); + + let (sender, receiver) = crossbeam_channel::bounded( + std::thread::available_parallelism() + .map(NonZeroUsize::get) + .unwrap_or(50) + .max(4), // TODO: Both these numbers are very arbitrary. Pick sensible defaults. + ); - // TODO let's trigger a re-check down here. Not sure how to do this or how to model it but that's kind of what this - // is all about. + // Listen to Ctrl+C and abort the watch mode. + let abort_sender = Mutex::new(Some(sender.clone())); + ctrlc::set_handler(move || { + let mut lock = abort_sender.lock().unwrap(); - // Oh dear, fitting this all into the fix loop will be fun. + if let Some(sender) = lock.take() { + sender.send(Message::Exit).unwrap(); + } + })?; + + // Watch for file changes and re-trigger the analysis. + let file_changes_sender = sender.clone(); + + let mut file_watcher = FileWatcher::new( + move |changes| { + file_changes_sender + .send(Message::FileChanges(changes)) + .unwrap(); + }, + files.clone(), + )?; + + file_watcher.watch_folder(workspace_folder)?; + + let files_to_check = vec![entry_id]; + + // Main loop that runs until the user exits the program + // Runs the analysis for each changed file. Cancels the analysis if a new change is detected. + loop { + let changes = { + tracing::trace!("Main Loop: Tick"); + + // Token to cancel the analysis if a new change is detected. + let run_cancellation_token_source = CancellationSource::new(); + let run_cancellation_token = run_cancellation_token_source.token(); + + // Tracks the number of pending analysis runs. + let pending_analysis = Arc::new(AtomicUsize::new(0)); + + // Take read-only references that are copy and Send. + let program = &program; + let workspace = &workspace; + + let receiver = receiver.clone(); + let started_analysis = pending_analysis.clone(); + + // Orchestration task. Ideally, we would run this on main but we should start it as soon as possible so that + // we avoid scheduling tasks when we already know that we're about to exit or cancel the analysis because of a file change. + // This uses `std::thread::spawn` because we don't want it to run inside of the thread pool + // or this code deadlocks when using a thread pool of the size 1. + let orchestration_handle = std::thread::spawn(move || { + fn consume_pending_messages( + receiver: &crossbeam_channel::Receiver, + mut aggregated_changes: AggregatedChanges, + ) -> NextTickCommand { + loop { + // Consume possibly incoming file change messages before running a new analysis, but don't wait for more than 100ms. + crossbeam_channel::select! { + recv(receiver) -> message => { + match message { + Ok(Message::Exit) => { + return NextTickCommand::Exit; + } + Ok(Message::FileChanges(file_changes)) => { + aggregated_changes.extend(file_changes); + } + + Ok(Message::AnalysisCancelled | Message::AnalysisCompleted(_)) => { + unreachable!( + "All analysis should have been completed at this time" + ); + }, + + Err(_) => { + // There are no more senders, no point in waiting for more messages + break; + } + } + }, + default(std::time::Duration::from_millis(100)) => { + break; + } + } + } + + NextTickCommand::FileChanges(aggregated_changes) + } + + let mut diagnostics = Vec::new(); + let mut aggregated_changes = AggregatedChanges::default(); + + for message in &receiver { + match message { + Message::AnalysisCompleted(file_diagnostics) => { + diagnostics.extend_from_slice(&file_diagnostics); + + if pending_analysis.fetch_sub(1, Ordering::SeqCst) == 1 { + // Analysis completed, print the diagnostics. + dbg!(&diagnostics); + } + } + + Message::AnalysisCancelled => { + if pending_analysis.fetch_sub(1, Ordering::SeqCst) == 1 { + return consume_pending_messages(&receiver, aggregated_changes); + } + } + + Message::Exit => { + run_cancellation_token_source.cancel(); + + // Don't consume any outstanding messages because we're exiting anyway. + return NextTickCommand::Exit; + } + + Message::FileChanges(changes) => { + // Request cancellation, but wait until all analysis tasks have completed to + // avoid stale messages in the next main loop. + run_cancellation_token_source.cancel(); + + aggregated_changes.extend(changes); + + if pending_analysis.load(Ordering::SeqCst) == 0 { + return consume_pending_messages(&receiver, aggregated_changes); + } + } + } + } + + // This can be reached if there's no Ctrl+C and no file watcher handler. + // In that case, assume that we don't run in watch mode and exit. + NextTickCommand::Exit + }); + + // Star the analysis task on the thread pool and wait until they complete. + rayon::scope(|scope| { + for file in &files_to_check { + let cancellation_token = run_cancellation_token.clone(); + if cancellation_token.is_cancelled() { + break; + } + + let sender = sender.clone(); + + started_analysis.fetch_add(1, Ordering::SeqCst); + + // TODO: How do we allow the host to control the number of threads used? + // Or should we just assume that each host implements its own main loop, + // I don't think that's entirely unreasonable but we should avoid + // having different main loops per host AND command (e.g. format vs check vs lint) + scope.spawn(move |_| { + if cancellation_token.is_cancelled() { + tracing::trace!("Exit analysis because cancellation was requested."); + sender.send(Message::AnalysisCancelled).unwrap(); + return; + } + + // TODO schedule the dependencies. + let mut diagnostics = Vec::new(); + + if workspace.is_file_open(*file) { + diagnostics.extend_from_slice(&program.lint_syntax(*file)); + } + + sender + .send(Message::AnalysisCompleted(diagnostics)) + .unwrap(); + }); + } + }); + + // Wait for the orchestration task to complete. This either returns the file changes + // or instructs the main loop to exit. + match orchestration_handle.join().unwrap() { + NextTickCommand::FileChanges(changes) => changes, + NextTickCommand::Exit => { + break; + } + } + }; + + // We have a mutable reference here and can perform all necessary invalidations. + program.apply_changes(changes.iter()); + } let source_jar: &SourceJar = program.jar(); @@ -140,6 +263,90 @@ fn main() -> anyhow::Result<()> { Ok(()) } +enum Message { + AnalysisCompleted(Vec), + AnalysisCancelled, + Exit, + FileChanges(Vec), +} + +#[derive(Default, Debug)] +struct AggregatedChanges { + changes: FxHashMap, +} + +impl AggregatedChanges { + fn add(&mut self, change: FileChange) { + match self.changes.entry(change.file_id()) { + Entry::Occupied(mut entry) => { + let merged = entry.get_mut(); + + match (merged, change.kind()) { + (FileChangeKind::Created, FileChangeKind::Deleted) => { + // Deletion after creations means that ruff never saw the file. + entry.remove(); + } + (FileChangeKind::Created, FileChangeKind::Modified) => { + // No-op, for ruff, modifying a file that it doesn't yet know that it exists is still considered a creation. + } + + (FileChangeKind::Modified, FileChangeKind::Created) => { + // Uhh, that should probably not happen. Continue considering it a modification. + } + + (FileChangeKind::Modified, FileChangeKind::Deleted) => { + *entry.get_mut() = FileChangeKind::Deleted; + } + + (FileChangeKind::Deleted, FileChangeKind::Created) => { + *entry.get_mut() = FileChangeKind::Modified; + } + + (FileChangeKind::Deleted, FileChangeKind::Modified) => { + // That's weird, but let's consider it a modification. + *entry.get_mut() = FileChangeKind::Modified; + } + + (FileChangeKind::Created, FileChangeKind::Created) + | (FileChangeKind::Modified, FileChangeKind::Modified) + | (FileChangeKind::Deleted, FileChangeKind::Deleted) => { + // No-op transitions. Some of them should be impossible but we handle them anyway. + } + } + } + Entry::Vacant(entry) => { + entry.insert(change.kind()); + } + } + } + + fn extend(&mut self, changes: I) + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + let iter = changes.into_iter(); + self.changes.reserve(iter.len()); + + for change in iter { + self.add(change); + } + } + + fn iter(&self) -> impl Iterator + '_ { + self.changes + .iter() + .map(|(id, kind)| FileChange::new(*id, *kind)) + } +} + +enum NextTickCommand { + /// Exit the main loop in the next tick + Exit, + /// Apply the given changes in the next main loop tick. + FileChanges(AggregatedChanges), +} + fn setup_tracing() { let subscriber = Registry::default().with( tracing_tree::HierarchicalLayer::default() diff --git a/crates/red_knot/src/program/mod.rs b/crates/red_knot/src/program/mod.rs index 9bd473e204f62d..38c94854814f8a 100644 --- a/crates/red_knot/src/program/mod.rs +++ b/crates/red_knot/src/program/mod.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use crate::db::{Db, HasJar, SemanticDb, SemanticJar, SourceDb, SourceJar}; use crate::files::{FileId, Files}; +use crate::lint::{lint_syntax, Diagnostics, LintSyntaxStorage}; use crate::module::{ add_module, path_to_module, resolve_module, set_module_search_paths, Module, ModuleData, ModuleName, ModuleResolver, ModuleSearchPath, @@ -24,6 +25,7 @@ impl Program { source: SourceJar { sources: SourceStorage::default(), parsed: ParsedStorage::default(), + lint_syntax: LintSyntaxStorage::default(), }, semantic: SemanticJar { module_resolver: ModuleResolver::new(module_search_paths), @@ -33,14 +35,19 @@ impl Program { } } - pub fn file_changed(&mut self, path: &Path) { - let Some(file_id) = self.files.try_get(path) else { - return; - }; - - self.semantic.module_resolver.remove_module(path); - self.source.sources.remove(&file_id); - self.source.parsed.remove(&file_id); + pub fn apply_changes(&mut self, changes: I) + where + I: IntoIterator, + { + for change in changes { + self.semantic + .module_resolver + .remove_module(&self.file_path(change.id)); + self.semantic.symbol_tables.remove(&change.id); + self.source.sources.remove(&change.id); + self.source.parsed.remove(&change.id); + self.source.lint_syntax.remove(&change.id); + } } } @@ -60,6 +67,10 @@ impl SourceDb for Program { fn parse(&self, file_id: FileId) -> Parsed { parse(self, file_id) } + + fn lint_syntax(&self, file_id: FileId) -> Diagnostics { + lint_syntax(self, file_id) + } } impl SemanticDb for Program { @@ -106,3 +117,30 @@ impl HasJar for Program { &mut self.semantic } } + +#[derive(Copy, Clone, Debug)] +pub struct FileChange { + id: FileId, + kind: FileChangeKind, +} + +impl FileChange { + pub fn new(file_id: FileId, kind: FileChangeKind) -> Self { + Self { id: file_id, kind } + } + + pub fn file_id(&self) -> FileId { + self.id + } + + pub fn kind(&self) -> FileChangeKind { + self.kind + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub enum FileChangeKind { + Created, + Modified, + Deleted, +} diff --git a/crates/red_knot/src/source.rs b/crates/red_knot/src/source.rs index 57ce7972fbb9ef..7dd6ed92852a72 100644 --- a/crates/red_knot/src/source.rs +++ b/crates/red_knot/src/source.rs @@ -1,5 +1,7 @@ use crate::cache::KeyValueCache; use crate::db::{HasJar, SourceDb, SourceJar}; +use ruff_notebook::Notebook; +use ruff_python_ast::PySourceType; use std::ops::{Deref, DerefMut}; use std::sync::Arc; @@ -13,6 +15,8 @@ where let sources = &db.jar().sources; sources.get(&file_id, |file_id| { + tracing::trace!("Reading source text for file_id={:?}.", file_id); + let path = db.file_path(*file_id); let source_text = std::fs::read_to_string(&path).unwrap_or_else(|err| { @@ -20,23 +24,59 @@ where String::new() }); - Source::new(source_text) + let python_ty = PySourceType::from(&path); + + let kind = match python_ty { + PySourceType::Python => { + SourceKind::Python(Arc::from(source_text)) + } + PySourceType::Stub => SourceKind::Stub(Arc::from(source_text)), + PySourceType::Ipynb => { + let notebook = Notebook::from_source_code(&source_text).unwrap_or_else(|err| { + // TODO should this be changed to never fail? + // or should we instead add a diagnostic somewhere? But what would we return in this case? + tracing::error!( + "Failed to parse notebook '{path:?}: {err}'. Falling back to an empty notebook" + ); + Notebook::from_source_code("").unwrap() + }); + + SourceKind::IpyNotebook(Arc::new(notebook)) + } + }; + + Source { kind } }) } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq)] +pub enum SourceKind { + Python(Arc), + Stub(Arc), + IpyNotebook(Arc), +} + +#[derive(Debug, Clone, PartialEq)] pub struct Source { - text: Arc, + kind: SourceKind, } impl Source { - pub fn new>>(source: T) -> Self { + pub fn python>>(source: T) -> Self { Self { - text: source.into(), + kind: SourceKind::Python(source.into()), } } + pub fn kind(&self) -> &SourceKind { + &self.kind + } + pub fn text(&self) -> &str { - &self.text + match &self.kind { + SourceKind::Python(text) => text, + SourceKind::Stub(text) => text, + SourceKind::IpyNotebook(notebook) => notebook.source_code(), + } } } diff --git a/crates/red_knot/src/watch.rs b/crates/red_knot/src/watch.rs new file mode 100644 index 00000000000000..d9681bad66904b --- /dev/null +++ b/crates/red_knot/src/watch.rs @@ -0,0 +1,78 @@ +use anyhow::Context; +use std::path::Path; + +use crate::files::Files; +use crate::program::{FileChange, FileChangeKind}; +use notify::event::{CreateKind, RemoveKind}; +use notify::{recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher}; + +pub struct FileWatcher { + watcher: RecommendedWatcher, +} + +pub trait EventHandler: Send + 'static { + fn handle(&self, changes: Vec); +} + +impl EventHandler for F +where + F: Fn(Vec) + Send + 'static, +{ + fn handle(&self, changes: Vec) { + let f = self; + f(changes); + } +} + +impl FileWatcher { + pub fn new(handler: E, files: Files) -> anyhow::Result + where + E: EventHandler, + { + Self::from_handler(Box::new(handler), files) + } + + fn from_handler(handler: Box, files: Files) -> anyhow::Result { + let watcher = recommended_watcher(move |changes: notify::Result| { + match changes { + Ok(event) => { + // TODO verify that this handles all events correctly + let change_kind = match event.kind { + EventKind::Create(CreateKind::File) => FileChangeKind::Created, + EventKind::Modify(_) => FileChangeKind::Modified, + EventKind::Remove(RemoveKind::File) => FileChangeKind::Deleted, + _ => { + return; + } + }; + + let mut changes = Vec::new(); + + for path in event.paths { + if path.is_file() { + let id = files.intern(&path); + changes.push(FileChange::new(id, change_kind)); + } + } + + if !changes.is_empty() { + handler.handle(changes); + } + } + // TODO proper error handling + Err(_err) => { + panic!("Error"); + } + } + }) + .context("Failed to create file watcher.")?; + + Ok(Self { watcher }) + } + + pub fn watch_folder(&mut self, path: &Path) -> anyhow::Result<()> { + self.watcher.watch(path, RecursiveMode::Recursive)?; + + Ok(()) + } +} diff --git a/crates/ruff_server/Cargo.toml b/crates/ruff_server/Cargo.toml index a93430d6eb0d31..591a37b3152d5d 100644 --- a/crates/ruff_server/Cargo.toml +++ b/crates/ruff_server/Cargo.toml @@ -26,7 +26,7 @@ ruff_text_size = { path = "../ruff_text_size" } ruff_workspace = { path = "../ruff_workspace" } anyhow = { workspace = true } -crossbeam = { workspace = true } +crossbeam-channel = { workspace = true } jod-thread = { workspace = true } libc = { workspace = true } lsp-server = { workspace = true } diff --git a/crates/ruff_server/src/server/client.rs b/crates/ruff_server/src/server/client.rs index d36c50ef665f8d..dae8ed269a24ce 100644 --- a/crates/ruff_server/src/server/client.rs +++ b/crates/ruff_server/src/server/client.rs @@ -6,7 +6,7 @@ use serde_json::Value; use super::schedule::Task; -pub(crate) type ClientSender = crossbeam::channel::Sender; +pub(crate) type ClientSender = crossbeam_channel::Sender; type ResponseBuilder<'s> = Box Task<'s>>; diff --git a/crates/ruff_server/src/server/schedule.rs b/crates/ruff_server/src/server/schedule.rs index fe8cc5c18c4e0b..4ffd819e868af9 100644 --- a/crates/ruff_server/src/server/schedule.rs +++ b/crates/ruff_server/src/server/schedule.rs @@ -1,6 +1,6 @@ use std::num::NonZeroUsize; -use crossbeam::channel::Sender; +use crossbeam_channel::Sender; use crate::session::Session; diff --git a/crates/ruff_server/src/server/schedule/thread/pool.rs b/crates/ruff_server/src/server/schedule/thread/pool.rs index 7d1f9a418fde4d..ea07db65d4a975 100644 --- a/crates/ruff_server/src/server/schedule/thread/pool.rs +++ b/crates/ruff_server/src/server/schedule/thread/pool.rs @@ -21,7 +21,7 @@ use std::{ }, }; -use crossbeam::channel::{Receiver, Sender}; +use crossbeam_channel::{Receiver, Sender}; use super::{Builder, JoinHandle, ThreadPriority}; @@ -52,7 +52,7 @@ impl Pool { let threads = usize::from(threads); // Channel buffer capacity is between 2 and 4, depending on the pool size. - let (job_sender, job_receiver) = crossbeam::channel::bounded(std::cmp::min(threads * 2, 4)); + let (job_sender, job_receiver) = crossbeam_channel::bounded(std::cmp::min(threads * 2, 4)); let extant_tasks = Arc::new(AtomicUsize::new(0)); let mut handles = Vec::with_capacity(threads);