perf(lsp): Cache semantic tokens for open documents (#23799)

VScode will typically send a `textDocument/semanticTokens/full` request followed by `textDocument/semanticTokens/range`, and occassionally request semantic tokens even when we know nothing has changed. Semantic tokens also get refreshed on each change. Computing semantic tokens is relatively heavy in TSC, so we should avoid it as much as possible. Caches the semantic tokens for open documents, to avoid making TSC do unnecessary work. Results in a noticeable improvement in local benchmarking before: ``` Starting Deno benchmark -> Start benchmarking lsp - Simple Startup/Shutdown (10 runs, mean: 383ms) - Big Document/Several Edits (5 runs, mean: 1079ms) - Find/Replace (10 runs, mean: 59ms) - Code Lens (10 runs, mean: 440ms) - deco-cx/apps Multiple Edits + Navigation (5 runs, mean: 9921ms) <- End benchmarking lsp ``` after: ``` Starting Deno benchmark -> Start benchmarking lsp - Simple Startup/Shutdown (10 runs, mean: 395ms) - Big Document/Several Edits (5 runs, mean: 1024ms) - Find/Replace (10 runs, mean: 56ms) - Code Lens (10 runs, mean: 438ms) - deco-cx/apps Multiple Edits + Navigation (5 runs, mean: 8927ms) <- End benchmarking lsp ```
denoland · May 15, 2024 · 36d877b · 36d877b
1 parent 1a788b5
commit 36d877b
Show file tree

Hide file tree

Showing 5 changed files with 420 additions and 3 deletions.
diff --git a/cli/lsp/documents.rs b/cli/lsp/documents.rs
@@ -143,6 +143,16 @@ impl AssetOrDocument {
     }
   }
 
+  pub fn maybe_semantic_tokens(&self) -> Option<lsp::SemanticTokens> {
+    match self {
+      AssetOrDocument::Asset(_) => None,
+      AssetOrDocument::Document(d) => d
+        .open_data
+        .as_ref()
+        .and_then(|d| d.maybe_semantic_tokens.lock().clone()),
+    }
+  }
+
   pub fn text(&self) -> Arc<str> {
     match self {
       AssetOrDocument::Asset(a) => a.text(),
@@ -249,6 +259,7 @@ fn get_maybe_test_module_fut(
 pub struct DocumentOpenData {
   lsp_version: i32,
   maybe_parsed_source: Option<ParsedSourceResult>,
+  maybe_semantic_tokens: Arc<Mutex<Option<lsp::SemanticTokens>>>,
 }
 
 #[derive(Debug)]
@@ -330,6 +341,7 @@ impl Document {
       open_data: maybe_lsp_version.map(|v| DocumentOpenData {
         lsp_version: v,
         maybe_parsed_source,
+        maybe_semantic_tokens: Default::default(),
       }),
       resolver,
       specifier,
@@ -421,6 +433,8 @@ impl Document {
       open_data: self.open_data.as_ref().map(|d| DocumentOpenData {
         lsp_version: d.lsp_version,
         maybe_parsed_source,
+        // reset semantic tokens
+        maybe_semantic_tokens: Default::default(),
       }),
       resolver,
       specifier: self.specifier.clone(),
@@ -499,6 +513,7 @@ impl Document {
       open_data: self.open_data.is_some().then_some(DocumentOpenData {
         lsp_version: version,
         maybe_parsed_source,
+        maybe_semantic_tokens: Default::default(),
       }),
       resolver: self.resolver.clone(),
     }))
@@ -652,6 +667,15 @@ impl Document {
   ) {
     *self.maybe_navigation_tree.lock() = Some(navigation_tree);
   }
+
+  pub fn cache_semantic_tokens_full(
+    &self,
+    semantic_tokens: lsp::SemanticTokens,
+  ) {
+    if let Some(open_data) = self.open_data.as_ref() {
+      *open_data.maybe_semantic_tokens.lock() = Some(semantic_tokens);
+    }
+  }
 }
 
 fn resolve_media_type(

diff --git a/cli/lsp/language_server.rs b/cli/lsp/language_server.rs
@@ -2529,6 +2529,16 @@ impl Inner {
       .performance
       .mark_with_args("lsp.semantic_tokens_full", &params);
     let asset_or_doc = self.get_asset_or_document(&specifier)?;
+    if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
+      let response = if !tokens.data.is_empty() {
+        Some(SemanticTokensResult::Tokens(tokens.clone()))
+      } else {
+        None
+      };
+      self.performance.measure(mark);
+      return Ok(response);
+    }
+
     let line_index = asset_or_doc.line_index();
 
     let semantic_classification = self
@@ -2542,6 +2552,11 @@ impl Inner {
 
     let semantic_tokens =
       semantic_classification.to_semantic_tokens(line_index)?;
+
+    if let Some(doc) = asset_or_doc.document() {
+      doc.cache_semantic_tokens_full(semantic_tokens.clone());
+    }
+
     let response = if !semantic_tokens.data.is_empty() {
       Some(SemanticTokensResult::Tokens(semantic_tokens))
     } else {
@@ -2566,6 +2581,18 @@ impl Inner {
       .performance
       .mark_with_args("lsp.semantic_tokens_range", &params);
     let asset_or_doc = self.get_asset_or_document(&specifier)?;
+    if let Some(tokens) = asset_or_doc.maybe_semantic_tokens() {
+      let tokens =
+        super::semantic_tokens::tokens_within_range(&tokens, params.range);
+      let response = if !tokens.data.is_empty() {
+        Some(SemanticTokensRangeResult::Tokens(tokens))
+      } else {
+        None
+      };
+      self.performance.measure(mark);
+      return Ok(response);
+    }
+
     let line_index = asset_or_doc.line_index();
 
     let semantic_classification = self

diff --git a/cli/lsp/semantic_tokens.rs b/cli/lsp/semantic_tokens.rs
@@ -7,6 +7,7 @@
 
 use std::ops::Index;
 use std::ops::IndexMut;
+use tower_lsp::lsp_types as lsp;
 use tower_lsp::lsp_types::SemanticToken;
 use tower_lsp::lsp_types::SemanticTokenModifier;
 use tower_lsp::lsp_types::SemanticTokenType;
@@ -247,6 +248,54 @@ impl SemanticTokensBuilder {
   }
 }
 
+pub fn tokens_within_range(
+  tokens: &SemanticTokens,
+  range: lsp::Range,
+) -> SemanticTokens {
+  let mut line = 0;
+  let mut character = 0;
+
+  let mut first_token_line = 0;
+  let mut first_token_char = 0;
+  let mut keep_start_idx = tokens.data.len();
+  let mut keep_end_idx = keep_start_idx;
+  for (i, token) in tokens.data.iter().enumerate() {
+    if token.delta_line != 0 {
+      character = 0;
+    }
+    line += token.delta_line;
+    character += token.delta_start;
+    let token_start = lsp::Position::new(line, character);
+    if i < keep_start_idx && token_start >= range.start {
+      keep_start_idx = i;
+      first_token_line = line;
+      first_token_char = character;
+    }
+    if token_start > range.end {
+      keep_end_idx = i;
+      break;
+    }
+  }
+  if keep_end_idx == keep_start_idx {
+    return SemanticTokens {
+      result_id: None,
+      data: Vec::new(),
+    };
+  }
+
+  let mut data = tokens.data[keep_start_idx..keep_end_idx].to_vec();
+  // we need to adjust the delta_line and delta_start on the first token
+  // as it is relative to 0 now, not the previous token
+  let first_token = &mut data[0];
+  first_token.delta_line = first_token_line;
+  first_token.delta_start = first_token_char;
+
+  SemanticTokens {
+    result_id: None,
+    data,
+  }
+}
+
 #[cfg(test)]
 mod tests {
   use super::*;
@@ -352,4 +401,129 @@ mod tests {
       ]
     );
   }
+
+  #[test]
+  fn test_tokens_within_range() {
+    let mut builder = SemanticTokensBuilder::new();
+    builder.push(1, 0, 5, 0, 0);
+    builder.push(2, 1, 1, 1, 0);
+    builder.push(2, 2, 3, 2, 0);
+    builder.push(2, 5, 5, 3, 0);
+    builder.push(3, 0, 4, 4, 0);
+    builder.push(5, 2, 3, 5, 0);
+    let tokens = builder.build(None);
+    let range = lsp::Range {
+      start: lsp::Position {
+        line: 2,
+        character: 2,
+      },
+      end: lsp::Position {
+        line: 4,
+        character: 0,
+      },
+    };
+
+    let result = tokens_within_range(&tokens, range);
+
+    assert_eq!(
+      result.data,
+      vec![
+        // line 2 char 2
+        SemanticToken {
+          delta_line: 2,
+          delta_start: 2,
+          length: 3,
+          token_type: 2,
+          token_modifiers_bitset: 0
+        },
+        // line 2 char 5
+        SemanticToken {
+          delta_line: 0,
+          delta_start: 3,
+          length: 5,
+          token_type: 3,
+          token_modifiers_bitset: 0
+        },
+        // line 3 char 0
+        SemanticToken {
+          delta_line: 1,
+          delta_start: 0,
+          length: 4,
+          token_type: 4,
+          token_modifiers_bitset: 0
+        }
+      ]
+    );
+  }
+
+  #[test]
+  fn test_tokens_within_range_include_end() {
+    let mut builder = SemanticTokensBuilder::new();
+    builder.push(1, 0, 1, 0, 0);
+    builder.push(2, 1, 2, 1, 0);
+    builder.push(2, 3, 3, 2, 0);
+    builder.push(3, 0, 4, 3, 0);
+    let tokens = builder.build(None);
+    let range = lsp::Range {
+      start: lsp::Position {
+        line: 2,
+        character: 2,
+      },
+      end: lsp::Position {
+        line: 3,
+        character: 4,
+      },
+    };
+    let result = tokens_within_range(&tokens, range);
+
+    assert_eq!(
+      result.data,
+      vec![
+        // line 2 char 3
+        SemanticToken {
+          delta_line: 2,
+          delta_start: 3,
+          length: 3,
+          token_type: 2,
+          token_modifiers_bitset: 0
+        },
+        // line 3 char 0
+        SemanticToken {
+          delta_line: 1,
+          delta_start: 0,
+          length: 4,
+          token_type: 3,
+          token_modifiers_bitset: 0
+        }
+      ]
+    );
+  }
+
+  #[test]
+  fn test_tokens_within_range_empty() {
+    let mut builder = SemanticTokensBuilder::new();
+    builder.push(1, 0, 1, 0, 0);
+    builder.push(2, 1, 2, 1, 0);
+    builder.push(2, 3, 3, 2, 0);
+    builder.push(3, 0, 4, 3, 0);
+    let tokens = builder.build(None);
+    let range = lsp::Range {
+      start: lsp::Position {
+        line: 3,
+        character: 2,
+      },
+      end: lsp::Position {
+        line: 3,
+        character: 4,
+      },
+    };
+    let result = tokens_within_range(&tokens, range);
+
+    assert_eq!(result.data, vec![]);
+
+    assert_eq!(
+      tokens_within_range(&SemanticTokens::default(), range).data,
+      vec![]
+    );
+  }
 }
diff --git a/tests/integration/lsp_tests.rs b/tests/integration/lsp_tests.rs
@@ -12698,3 +12698,87 @@ fn lsp_ts_code_fix_any_param() {
 
   panic!("failed to find 'Infer parameter types from usage' fix in fixes: {fixes:#?}");
 }
+
+#[test]
+fn lsp_semantic_token_caching() {
+  let context = TestContextBuilder::new().use_temp_cwd().build();
+  let temp_dir = context.temp_dir().path();
+
+  let mut client: LspClient = context
+    .new_lsp_command()
+    .collect_perf()
+    .set_root_dir(temp_dir.clone())
+    .build();
+  client.initialize_default();
+
+  let a = source_file(
+    temp_dir.join("a.ts"),
+    r#"
+    export const a = 1;
+    export const b = 2;
+    export const bar = () => "bar";
+    function foo(fun: (number, number, number) => number, c: number) {
+      const double = (x) => x * 2;
+      return fun(double(a), b, c);
+    }"#,
+  );
+
+  client.did_open_file(&a);
+
+  // requesting a range won't cache the tokens, so this will
+  // be computed
+  let res = client.write_request(
+    "textDocument/semanticTokens/range",
+    json!({
+      "textDocument": a.identifier(),
+      "range": {
+        "start": a.range_of("const bar").start,
+        "end": a.range_of("}").end,
+      }
+    }),
+  );
+
+  assert_eq!(
+    client
+      .perf()
+      .measure_count("tsc.request.getEncodedSemanticClassifications"),
+    1,
+  );
+
+  // requesting for the full doc should compute and cache the tokens
+  let _full = client.write_request(
+    "textDocument/semanticTokens/full",
+    json!({
+      "textDocument": a.identifier(),
+    }),
+  );
+
+  assert_eq!(
+    client
+      .perf()
+      .measure_count("tsc.request.getEncodedSemanticClassifications"),
+    2,
+  );
+
+  // use the cached tokens
+  let res_cached = client.write_request(
+    "textDocument/semanticTokens/range",
+    json!({
+      "textDocument": a.identifier(),
+      "range": {
+        "start": a.range_of("const bar").start,
+        "end": a.range_of("}").end,
+      }
+    }),
+  );
+
+  // make sure we actually used the cache
+  assert_eq!(
+    client
+      .perf()
+      .measure_count("tsc.request.getEncodedSemanticClassifications"),
+    2,
+  );
+
+  assert_eq!(res, res_cached);
+}