Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(prisma-fmt): use UTF-16 offset in the response for the schema that contains multi-byte characters #4815

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
13 changes: 7 additions & 6 deletions prisma-fmt/src/code_actions.rs
Expand Up @@ -3,6 +3,7 @@ mod multi_schema;
mod relation_mode;
mod relations;

use crate::offsets::{offset_to_position, position_after_span, range_to_span};
use lsp_types::{CodeActionOrCommand, CodeActionParams, Diagnostic, Range, TextEdit, WorkspaceEdit};
use psl::{
diagnostics::Span,
Expand Down Expand Up @@ -146,7 +147,7 @@ pub(super) fn diagnostics_for_span(
) -> Option<Vec<Diagnostic>> {
let res: Vec<_> = diagnostics
.iter()
.filter(|diag| span.overlaps(crate::range_to_span(diag.range, schema)))
.filter(|diag| span.overlaps(range_to_span(diag.range, schema)))
.cloned()
.collect();

Expand Down Expand Up @@ -180,7 +181,7 @@ fn create_missing_attribute<'a>(
let new_text = format!(" @{attribute_name}");

let field = fields.next().unwrap();
let position = crate::position_after_span(field.ast_field().span(), schema);
let position = position_after_span(field.ast_field().span(), schema);

let range = Range {
start: position,
Expand Down Expand Up @@ -208,15 +209,15 @@ fn create_missing_attribute<'a>(
}

fn range_after_span(schema: &str, span: Span) -> Range {
let start = crate::offset_to_position(span.end - 1, schema);
let end = crate::offset_to_position(span.end, schema);
let start = offset_to_position(span.end - 1, schema);
let end = offset_to_position(span.end, schema);

Range { start, end }
}

fn span_to_range(schema: &str, span: Span) -> Range {
let start = crate::offset_to_position(span.start, schema);
let end = crate::offset_to_position(span.end, schema);
let start = offset_to_position(span.start, schema);
let end = offset_to_position(span.end, schema);

Range { start, end }
}
Expand Down
97 changes: 2 additions & 95 deletions prisma-fmt/src/lib.rs
Expand Up @@ -5,14 +5,14 @@ mod get_dmmf;
mod lint;
mod merge_schemas;
mod native;
mod offsets;
mod preview;
mod schema_file_input;
mod text_document_completion;
mod validate;

use log::*;
use lsp_types::{Position, Range};
use psl::parser_database::ast;
pub use offsets::offset_to_position;
use schema_file_input::SchemaFileInput;

/// The API is modelled on an LSP [completion
Expand Down Expand Up @@ -211,96 +211,3 @@ pub fn get_config(get_config_params: String) -> Result<String, String> {
pub fn get_dmmf(get_dmmf_params: String) -> Result<String, String> {
get_dmmf::get_dmmf(&get_dmmf_params)
}

/// The LSP position is expressed as a (line, col) tuple, but our pest-based parser works with byte
/// offsets. This function converts from an LSP position to a pest byte offset. Returns `None` if
/// the position has a line past the end of the document, or a character position past the end of
/// the line.
pub(crate) fn position_to_offset(position: &Position, document: &str) -> Option<usize> {
let mut offset = 0;
let mut line_offset = position.line;
let mut character_offset = position.character;
let mut chars = document.chars();

while line_offset > 0 {
loop {
match chars.next() {
Some('\n') => {
offset += 1;
break;
}
Some(_) => {
offset += 1;
}
None => return Some(offset),
}
}

line_offset -= 1;
}

while character_offset > 0 {
match chars.next() {
Some('\n') | None => return Some(offset),
Some(_) => {
offset += 1;
character_offset -= 1;
}
}
}

Some(offset)
}

#[track_caller]
/// Converts an LSP range to a span.
pub(crate) fn range_to_span(range: Range, document: &str) -> ast::Span {
let start = position_to_offset(&range.start, document).unwrap();
let end = position_to_offset(&range.end, document).unwrap();

ast::Span::new(start, end, psl::parser_database::FileId::ZERO)
}

/// Gives the LSP position right after the given span.
pub(crate) fn position_after_span(span: ast::Span, document: &str) -> Position {
offset_to_position(span.end - 1, document)
}

/// Converts a byte offset to an LSP position, if the given offset
/// does not overflow the document.
pub fn offset_to_position(offset: usize, document: &str) -> Position {
let mut position = Position::default();

for (i, chr) in document.chars().enumerate() {
match chr {
_ if i == offset => {
return position;
}
'\n' => {
position.character = 0;
position.line += 1;
}
_ => {
position.character += 1;
}
}
}

position
}

#[cfg(test)]
mod tests {
use lsp_types::Position;

// On Windows, a newline is actually two characters.
#[test]
fn position_to_offset_with_crlf() {
let schema = "\r\nmodel Test {\r\n id Int @id\r\n}";
// Let's put the cursor on the "i" in "id Int".
let expected_offset = schema.chars().position(|c| c == 'i').unwrap();
let found_offset = super::position_to_offset(&Position { line: 2, character: 4 }, schema).unwrap();

assert_eq!(found_offset, expected_offset);
}
}
39 changes: 32 additions & 7 deletions prisma-fmt/src/lint.rs
@@ -1,4 +1,8 @@
use psl::diagnostics::{DatamodelError, DatamodelWarning};
use crate::offsets::offset_to_lsp_offset;
use psl::{
diagnostics::{DatamodelError, DatamodelWarning},
ValidatedSchema,
};

use crate::schema_file_input::SchemaFileInput;

Expand All @@ -11,18 +15,18 @@ pub struct MiniError {
}

pub(crate) fn run(schema: SchemaFileInput) -> String {
let schema = match schema {
let validated_schema = match schema {
SchemaFileInput::Single(file) => psl::validate(file.into()),
SchemaFileInput::Multiple(files) => psl::validate_multi_file(files),
};
let diagnostics = &schema.diagnostics;
let ValidatedSchema { diagnostics, db, .. } = &validated_schema;

let mut mini_errors: Vec<MiniError> = diagnostics
.errors()
.iter()
.map(|err: &DatamodelError| MiniError {
start: err.span().start,
end: err.span().end,
start: offset_to_lsp_offset(err.span().start, db.source(err.span().file_id)),
end: offset_to_lsp_offset(err.span().end, db.source(err.span().file_id)),
text: err.message().to_string(),
is_warning: false,
})
Expand All @@ -32,8 +36,8 @@ pub(crate) fn run(schema: SchemaFileInput) -> String {
.warnings()
.iter()
.map(|warn: &DatamodelWarning| MiniError {
start: warn.span().start,
end: warn.span().end,
start: offset_to_lsp_offset(warn.span().start, db.source(warn.span().file_id)),
end: offset_to_lsp_offset(warn.span().end, db.source(warn.span().file_id)),
text: warn.message().to_owned(),
is_warning: true,
})
Expand Down Expand Up @@ -61,6 +65,27 @@ mod tests {
serde_json::to_string_pretty(&value).unwrap()
}

#[test]
fn should_return_utf16_offset() {
let schema = indoc! {r#"
// 🌐 multibyte
😀
"#};
let datamodel = SchemaFileInput::Single(schema.to_string());

let expected = expect![[r#"
[
{
"start": 16,
"end": 19,
"text": "Error validating: This line is invalid. It does not start with any known Prisma schema keyword.",
"is_warning": false
}
]"#]];

expected.assert_eq(&lint(datamodel));
}

#[test]
fn single_deprecated_preview_features_should_give_a_warning() {
let schema = indoc! {r#"
Expand Down
1 change: 0 additions & 1 deletion prisma-fmt/src/main.rs
@@ -1,6 +1,5 @@
mod actions;
mod format;
// mod lint;
mod native;
mod preview;

Expand Down
134 changes: 134 additions & 0 deletions prisma-fmt/src/offsets.rs
@@ -0,0 +1,134 @@
use lsp_types::{Position, Range};
use psl::parser_database::ast::Span;

/// The LSP position is expressed as a (line, col) tuple, but our pest-based parser works with byte
/// offsets. This function converts from an LSP position to a pest byte offset. Returns `None` if
/// the position has a line past the end of the document, or a character position past the end of
/// the line.
pub(crate) fn position_to_offset(position: &Position, document: &str) -> Option<usize> {
let mut offset = 0;
let mut line_offset = position.line;
let mut character_offset = position.character as i64;
let mut chars = document.chars();

while line_offset > 0 {
loop {
match chars.next() {
Some('\n') => {
offset += '\n'.len_utf8();
break;
}
Some(chr) => {
offset += chr.len_utf8();
}
None => return Some(offset),
}
}

line_offset -= 1;
}

while character_offset > 0 {
match chars.next() {
Some('\n') | None => return Some(offset),
Some(chr) => {
offset += chr.len_utf8();
character_offset -= chr.len_utf16() as i64;
}
}
}

Some(offset)
}

#[track_caller]
/// Converts an LSP range to a span.
pub(crate) fn range_to_span(range: Range, document: &str) -> Span {
let start = position_to_offset(&range.start, document).unwrap();
let end = position_to_offset(&range.end, document).unwrap();

Span::new(start, end, psl::parser_database::FileId::ZERO)
}

/// Gives the LSP position right after the given span.
pub(crate) fn position_after_span(span: Span, document: &str) -> Position {
offset_to_position(span.end - 1, document)
}

/// Converts the byte offset to the offset used in the LSP, which is the number of the UTF-16 code unit.
pub(crate) fn offset_to_lsp_offset(offset: usize, document: &str) -> usize {
let mut current_offset = 0;
let mut current_lsp_offset = 0;

for chr in document.chars() {
if offset <= current_offset {
break;
}
current_offset += chr.len_utf8();
current_lsp_offset += chr.len_utf16();
}

current_lsp_offset
}

/// Converts a byte offset to an LSP position, if the given offset
/// does not overflow the document.
pub fn offset_to_position(offset: usize, document: &str) -> Position {
let mut current_offset = 0;
let mut position = Position::default();

for chr in document.chars() {
match chr {
_ if offset <= current_offset => {
return position;
}
'\n' => {
position.character = 0;
position.line += 1;
}
_ => {
position.character += chr.len_utf16() as u32;
}
}
current_offset += chr.len_utf8();
}

position
}

#[cfg(test)]
mod tests {
use lsp_types::Position;

// On Windows, a newline is actually two characters.
#[test]
fn position_to_offset_with_crlf() {
let schema = "\r\nmodel Test {\r\n id Int @id\r\n}";
// Let's put the cursor on the "i" in "id Int".
let expected_offset = schema.bytes().position(|c| c == b'i').unwrap();
let found_offset = super::position_to_offset(&Position { line: 2, character: 4 }, schema).unwrap();

assert_eq!(found_offset, expected_offset);
}

// In the LSP protocol, the number of the UTF-16 code unit should be used as the offset.
#[test]
fn offset_to_position_with_multibyte() {
let schema = "// 🌐 multibyte\n😀@\n";

let cursor_offset = schema.bytes().position(|c| c == b'@').unwrap();
let expected_position = Position { line: 1, character: 2 };
let found_position = super::offset_to_position(cursor_offset, schema);

assert_eq!(expected_position, found_position);
}
#[test]
fn position_to_offset_with_multibyte() {
let schema = "// 🌐 multibyte\n😀@\n";

let expected_offset = schema.bytes().position(|c| c == b'@').unwrap();
let found_offset = super::position_to_offset(&Position { line: 1, character: 2 }, schema).unwrap();

assert_eq!(expected_offset, found_offset);
}
}