prisma · key-moon · Apr 6, 2024 · Apr 6, 2024 · Apr 6, 2024 · Apr 8, 2024
@@ -3,6 +3,7 @@ mod multi_schema;
 mod relation_mode;
 mod relations;
 
+use crate::offsets::{offset_to_position, position_after_span, range_to_span};
 use lsp_types::{CodeActionOrCommand, CodeActionParams, Diagnostic, Range, TextEdit, WorkspaceEdit};
 use psl::{
     diagnostics::Span,
@@ -146,7 +147,7 @@ pub(super) fn diagnostics_for_span(
 ) -> Option<Vec<Diagnostic>> {
     let res: Vec<_> = diagnostics
         .iter()
-        .filter(|diag| span.overlaps(crate::range_to_span(diag.range, schema)))
+        .filter(|diag| span.overlaps(range_to_span(diag.range, schema)))
         .cloned()
         .collect();
 
@@ -180,7 +181,7 @@ fn create_missing_attribute<'a>(
         let new_text = format!(" @{attribute_name}");
 
         let field = fields.next().unwrap();
-        let position = crate::position_after_span(field.ast_field().span(), schema);
+        let position = position_after_span(field.ast_field().span(), schema);
 
         let range = Range {
             start: position,
@@ -208,15 +209,15 @@ fn create_missing_attribute<'a>(
 }
 
 fn range_after_span(schema: &str, span: Span) -> Range {
-    let start = crate::offset_to_position(span.end - 1, schema);
-    let end = crate::offset_to_position(span.end, schema);
+    let start = offset_to_position(span.end - 1, schema);
+    let end = offset_to_position(span.end, schema);
 
     Range { start, end }
 }
 
 fn span_to_range(schema: &str, span: Span) -> Range {
-    let start = crate::offset_to_position(span.start, schema);
-    let end = crate::offset_to_position(span.end, schema);
+    let start = offset_to_position(span.start, schema);
+    let end = offset_to_position(span.end, schema);
 
     Range { start, end }
 }

@@ -5,14 +5,14 @@ mod get_dmmf;
 mod lint;
 mod merge_schemas;
 mod native;
+mod offsets;
 mod preview;
 mod schema_file_input;
 mod text_document_completion;
 mod validate;
 
 use log::*;
-use lsp_types::{Position, Range};
-use psl::parser_database::ast;
+pub use offsets::offset_to_position;
 use schema_file_input::SchemaFileInput;
 
 /// The API is modelled on an LSP [completion
@@ -211,96 +211,3 @@ pub fn get_config(get_config_params: String) -> Result<String, String> {
 pub fn get_dmmf(get_dmmf_params: String) -> Result<String, String> {
     get_dmmf::get_dmmf(&get_dmmf_params)
 }
-
-/// The LSP position is expressed as a (line, col) tuple, but our pest-based parser works with byte
-/// offsets. This function converts from an LSP position to a pest byte offset. Returns `None` if
-/// the position has a line past the end of the document, or a character position past the end of
-/// the line.
-pub(crate) fn position_to_offset(position: &Position, document: &str) -> Option<usize> {
-    let mut offset = 0;
-    let mut line_offset = position.line;
-    let mut character_offset = position.character;
-    let mut chars = document.chars();
-
-    while line_offset > 0 {
-        loop {
-            match chars.next() {
-                Some('\n') => {
-                    offset += 1;
-                    break;
-                }
-                Some(_) => {
-                    offset += 1;
-                }
-                None => return Some(offset),
-            }
-        }
-
-        line_offset -= 1;
-    }
-
-    while character_offset > 0 {
-        match chars.next() {
-            Some('\n') | None => return Some(offset),
-            Some(_) => {
-                offset += 1;
-                character_offset -= 1;
-            }
-        }
-    }
-
-    Some(offset)
-}
-
-#[track_caller]
-/// Converts an LSP range to a span.
-pub(crate) fn range_to_span(range: Range, document: &str) -> ast::Span {
-    let start = position_to_offset(&range.start, document).unwrap();
-    let end = position_to_offset(&range.end, document).unwrap();
-
-    ast::Span::new(start, end, psl::parser_database::FileId::ZERO)
-}
-
-/// Gives the LSP position right after the given span.
-pub(crate) fn position_after_span(span: ast::Span, document: &str) -> Position {
-    offset_to_position(span.end - 1, document)
-}
-
-/// Converts a byte offset to an LSP position, if the given offset
-/// does not overflow the document.
-pub fn offset_to_position(offset: usize, document: &str) -> Position {
-    let mut position = Position::default();
-
-    for (i, chr) in document.chars().enumerate() {
-        match chr {
-            _ if i == offset => {
-                return position;
-            }
-            '\n' => {
-                position.character = 0;
-                position.line += 1;
-            }
-            _ => {
-                position.character += 1;
-            }
-        }
-    }
-
-    position
-}
-
-#[cfg(test)]
-mod tests {
-    use lsp_types::Position;
-
-    // On Windows, a newline is actually two characters.
-    #[test]
-    fn position_to_offset_with_crlf() {
-        let schema = "\r\nmodel Test {\r\n    id Int @id\r\n}";
-        // Let's put the cursor on the "i" in "id Int".
-        let expected_offset = schema.chars().position(|c| c == 'i').unwrap();
-        let found_offset = super::position_to_offset(&Position { line: 2, character: 4 }, schema).unwrap();
-
-        assert_eq!(found_offset, expected_offset);
-    }
-}
@@ -1,4 +1,8 @@
-use psl::diagnostics::{DatamodelError, DatamodelWarning};
+use crate::offsets::offset_to_lsp_offset;
+use psl::{
+    diagnostics::{DatamodelError, DatamodelWarning},
+    ValidatedSchema,
+};
 
 use crate::schema_file_input::SchemaFileInput;
 
@@ -11,18 +15,18 @@ pub struct MiniError {
 }
 
 pub(crate) fn run(schema: SchemaFileInput) -> String {
-    let schema = match schema {
+    let validated_schema = match schema {
         SchemaFileInput::Single(file) => psl::validate(file.into()),
         SchemaFileInput::Multiple(files) => psl::validate_multi_file(files),
     };
-    let diagnostics = &schema.diagnostics;
+    let ValidatedSchema { diagnostics, db, .. } = &validated_schema;
 
     let mut mini_errors: Vec<MiniError> = diagnostics
         .errors()
         .iter()
         .map(|err: &DatamodelError| MiniError {
-            start: err.span().start,
-            end: err.span().end,
+            start: offset_to_lsp_offset(err.span().start, db.source(err.span().file_id)),
+            end: offset_to_lsp_offset(err.span().end, db.source(err.span().file_id)),
             text: err.message().to_string(),
             is_warning: false,
         })
@@ -32,8 +36,8 @@ pub(crate) fn run(schema: SchemaFileInput) -> String {
         .warnings()
         .iter()
         .map(|warn: &DatamodelWarning| MiniError {
-            start: warn.span().start,
-            end: warn.span().end,
+            start: offset_to_lsp_offset(warn.span().start, db.source(warn.span().file_id)),
+            end: offset_to_lsp_offset(warn.span().end, db.source(warn.span().file_id)),
             text: warn.message().to_owned(),
             is_warning: true,
         })
@@ -61,6 +65,27 @@ mod tests {
         serde_json::to_string_pretty(&value).unwrap()
     }
 
+    #[test]
+    fn should_return_utf16_offset() {
+        let schema = indoc! {r#"
+            // 🌐 ｍｕｌｔｉｂｙｔｅ
+            😀
+        "#};
+        let datamodel = SchemaFileInput::Single(schema.to_string());
+
+        let expected = expect![[r#"
+            [
+              {
+                "start": 16,
+                "end": 19,
+                "text": "Error validating: This line is invalid. It does not start with any known Prisma schema keyword.",
+                "is_warning": false
+              }
+            ]"#]];
+
+        expected.assert_eq(&lint(datamodel));
+    }
+
     #[test]
     fn single_deprecated_preview_features_should_give_a_warning() {
         let schema = indoc! {r#"

@@ -1,6 +1,5 @@
 mod actions;
 mod format;
-// mod lint;
 mod native;
 mod preview;
 

@@ -0,0 +1,134 @@
+use lsp_types::{Position, Range};
+use psl::parser_database::ast::Span;
+
+/// The LSP position is expressed as a (line, col) tuple, but our pest-based parser works with byte
+/// offsets. This function converts from an LSP position to a pest byte offset. Returns `None` if
+/// the position has a line past the end of the document, or a character position past the end of
+/// the line.
+pub(crate) fn position_to_offset(position: &Position, document: &str) -> Option<usize> {
+    let mut offset = 0;
+    let mut line_offset = position.line;
+    let mut character_offset = position.character as i64;
+    let mut chars = document.chars();
+
+    while line_offset > 0 {
+        loop {
+            match chars.next() {
+                Some('\n') => {
+                    offset += '\n'.len_utf8();
+                    break;
+                }
+                Some(chr) => {
+                    offset += chr.len_utf8();
+                }
+                None => return Some(offset),
+            }
+        }
+
+        line_offset -= 1;
+    }
+
+    while character_offset > 0 {
+        match chars.next() {
+            Some('\n') | None => return Some(offset),
+            Some(chr) => {
+                offset += chr.len_utf8();
+                character_offset -= chr.len_utf16() as i64;
+            }
+        }
+    }
+
+    Some(offset)
+}
+
+#[track_caller]
+/// Converts an LSP range to a span.
+pub(crate) fn range_to_span(range: Range, document: &str) -> Span {
+    let start = position_to_offset(&range.start, document).unwrap();
+    let end = position_to_offset(&range.end, document).unwrap();
+
+    Span::new(start, end, psl::parser_database::FileId::ZERO)
+}
+
+/// Gives the LSP position right after the given span.
+pub(crate) fn position_after_span(span: Span, document: &str) -> Position {
+    offset_to_position(span.end - 1, document)
+}
+
+/// Converts the byte offset to the offset used in the LSP, which is the number of the UTF-16 code unit.
+pub(crate) fn offset_to_lsp_offset(offset: usize, document: &str) -> usize {
+    let mut current_offset = 0;
+    let mut current_lsp_offset = 0;
+
+    for chr in document.chars() {
+        if offset <= current_offset {
+            break;
+        }
+        current_offset += chr.len_utf8();
+        current_lsp_offset += chr.len_utf16();
+    }
+
+    current_lsp_offset
+}
+
+/// Converts a byte offset to an LSP position, if the given offset
+/// does not overflow the document.
+pub fn offset_to_position(offset: usize, document: &str) -> Position {
+    let mut current_offset = 0;
+    let mut position = Position::default();
+
+    for chr in document.chars() {
+        match chr {
+            _ if offset <= current_offset => {
+                return position;
+            }
+            '\n' => {
+                position.character = 0;
+                position.line += 1;
+            }
+            _ => {
+                position.character += chr.len_utf16() as u32;
+            }
+        }
+        current_offset += chr.len_utf8();
+    }
+
+    position
+}
+
+#[cfg(test)]
+mod tests {
+    use lsp_types::Position;
+
+    // On Windows, a newline is actually two characters.
+    #[test]
+    fn position_to_offset_with_crlf() {
+        let schema = "\r\nmodel Test {\r\n    id Int @id\r\n}";
+        // Let's put the cursor on the "i" in "id Int".
+        let expected_offset = schema.bytes().position(|c| c == b'i').unwrap();
+        let found_offset = super::position_to_offset(&Position { line: 2, character: 4 }, schema).unwrap();
+
+        assert_eq!(found_offset, expected_offset);
+    }
+
+    // In the LSP protocol, the number of the UTF-16 code unit should be used as the offset.
+    #[test]
+    fn offset_to_position_with_multibyte() {
+        let schema = "// 🌐 ｍｕｌｔｉｂｙｔｅ\n😀@\n";
+
+        let cursor_offset = schema.bytes().position(|c| c == b'@').unwrap();
+        let expected_position = Position { line: 1, character: 2 };
+        let found_position = super::offset_to_position(cursor_offset, schema);
+
+        assert_eq!(expected_position, found_position);
+    }
+    #[test]
+    fn position_to_offset_with_multibyte() {
+        let schema = "// 🌐 ｍｕｌｔｉｂｙｔｅ\n😀@\n";
+
+        let expected_offset = schema.bytes().position(|c| c == b'@').unwrap();
+        let found_offset = super::position_to_offset(&Position { line: 1, character: 2 }, schema).unwrap();
+
+        assert_eq!(expected_offset, found_offset);
+    }
+}