Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix BytePos -> CharPos calculations #6574

Merged
merged 4 commits into from
Dec 4, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UACnB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something';\nconsole.error(`❌ ${message}`);\nconst bbb = '';\n//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJ4eHgiLCJjb25zb2xlIiwiZXJyb3IiLCJtZXNzYWdlIiwiYmJiIl0sInNvdXJjZXMiOlsidW5rbm93biJdLCJzb3VyY2VzQ29udGVudCI6WyJjb25zdCB4eHggPSAnLCBzb21ldGhpbmcnXG5jb25zb2xlLmVycm9yKGDinYwgJHttZXNzYWdlfWApO1xuXG5jb25zdCBiYmIgPSAnJ1xuIl0sIm1hcHBpbmdzIjoiQUFBQSxNQUFNQSxHQUFHLEdBQUcsYUFBWjtBQUNBQyxPQUFPLENBQUNDLEtBQVIsQ0FBZSxLQUFJQyxPQUFRLEVBQTNCO0FBRUEsTUFBTUMsR0FBRyxHQUFHLEVBQVoifQ==\n"
],
"version": 3
}
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}
@@ -0,0 +1,4 @@
const xxx = ', something'
console.error(`❌ ${message}`);

const bbb = ''
@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAY,OAARC,UAEnB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something'\nconsole.error(`❌ ${message}`);\n\nconst bbb = ''\n"
],
"version": 3
}
110 changes: 92 additions & 18 deletions crates/swc_common/src/source_map.rs
Expand Up @@ -17,9 +17,7 @@
//! within the SourceMap, which upon request can be converted to line and column
//! information, source code snippets, etc.
use std::{
cmp,
cmp::{max, min},
env, fs,
cmp, env, fs,
hash::Hash,
io,
path::{Path, PathBuf},
Expand Down Expand Up @@ -295,8 +293,7 @@ impl SourceMap {
);

let linechpos = self.bytepos_to_file_charpos_with(&f, linebpos);

let col = max(chpos, linechpos) - min(chpos, linechpos);
let col = chpos - linechpos;

let col_display = {
let start_width_idx = f
Expand Down Expand Up @@ -954,7 +951,7 @@ impl SourceMap {
}

fn bytepos_to_file_charpos_with(&self, map: &SourceFile, bpos: BytePos) -> CharPos {
let total_extra_bytes = self.calc_extra_bytes(map, &mut 0, &mut 0, bpos);
let total_extra_bytes = self.calc_utf16_offset(map, &mut 0, &mut 0, bpos);
assert!(
map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32(),
"map.start_pos = {:?}; total_extra_bytes = {}; bpos = {:?}",
Expand All @@ -966,7 +963,7 @@ impl SourceMap {
}

/// Converts an absolute BytePos to a CharPos relative to the source_file.
fn calc_extra_bytes(
pub fn calc_utf16_offset(
&self,
map: &SourceFile,
prev_total_extra_bytes: &mut u32,
Expand All @@ -975,13 +972,18 @@ impl SourceMap {
) -> u32 {
// The number of extra bytes due to multibyte chars in the SourceFile
let mut total_extra_bytes = *prev_total_extra_bytes;
let mut i = *start;

for (i, &mbc) in map.multibyte_chars[*start..].iter().enumerate() {
for &mbc in map.multibyte_chars[i..].iter() {
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
// every character is at least one byte, so we only
// count the actual extra bytes.
total_extra_bytes += mbc.bytes as u32 - 1;
// 1, 2, and 3 UTF-8 bytes maps to 1 UTF-16 char, but 4 UTF-8
// bytes maps to 2.
total_extra_bytes += if mbc.bytes == 4 {
2
} else {
mbc.bytes as u32 - 1
};
// We should never see a byte position in the middle of a
// character
debug_assert!(
Expand All @@ -991,13 +993,14 @@ impl SourceMap {
mbc.pos,
mbc.bytes
);
i += 1;
} else {
*start += i;
break;
}
}

*prev_total_extra_bytes = total_extra_bytes;
*start = i;

total_extra_bytes
}
Expand Down Expand Up @@ -1197,6 +1200,9 @@ impl SourceMap {
let mut line_ch_start = 0;
let mut inline_sources_content = false;

let mut prev_bpos = BytePos(0);
let mut prev_linebpos = BytePos(0);

for (pos, lc) in mappings.iter() {
let pos = *pos;

Expand Down Expand Up @@ -1235,6 +1241,9 @@ impl SourceMap {
line_prev_extra_bytes = 0;
line_ch_start = 0;

prev_bpos = BytePos(0);
prev_linebpos = BytePos(0);

cur_file = Some(f.clone());
&f
}
Expand All @@ -1253,7 +1262,6 @@ impl SourceMap {
Some(line) => line as u32,
None => continue,
};
let mut name = config.name_for_bytepos(pos);

let linebpos = f.lines[line as usize];
debug_assert!(
Expand All @@ -1263,18 +1271,43 @@ impl SourceMap {
pos,
linebpos,
);
let chpos =
pos.to_u32() - self.calc_extra_bytes(f, &mut prev_extra_bytes, &mut ch_start, pos);
// TODO: mappings really should be ordered, but it's not.
// debug_assert!(line >= prev_line);
if linebpos < prev_linebpos {
line_prev_extra_bytes = 0;
line_ch_start = 0;
}
prev_linebpos = linebpos;

let linechpos = linebpos.to_u32()
- self.calc_extra_bytes(
- self.calc_utf16_offset(
f,
&mut line_prev_extra_bytes,
&mut line_ch_start,
linebpos,
);

let mut col = max(chpos, linechpos) - min(chpos, linechpos);
// TODO: mappings really should be ordered, but it's not.
// debug_assert(pos >= prev_bpos);
if pos < prev_bpos {
prev_extra_bytes = line_prev_extra_bytes;
ch_start = line_ch_start;
}
prev_bpos = pos;

let chpos =
pos.to_u32() - self.calc_utf16_offset(f, &mut prev_extra_bytes, &mut ch_start, pos);

debug_assert!(
chpos >= linechpos,
"{}: chpos = {:?}; linechpos = {:?};",
f.name,
chpos,
linechpos,
);

let mut col = chpos - linechpos;
let mut name = None;
if let Some(orig) = &orig {
if let Some(token) = orig
.lookup_token(line, col)
Expand All @@ -1298,7 +1331,9 @@ impl SourceMap {
}
}

let name_idx = name.map(|name| builder.add_name(name));
let name_idx = name
.or_else(|| config.name_for_bytepos(pos))
.map(|name| builder.add_name(name));

builder.add_raw(lc.line, lc.col, line, col, Some(src_id), name_idx);
prev_dst_line = lc.line;
Expand Down Expand Up @@ -1653,6 +1688,45 @@ mod tests {
assert!(sm.merge_spans(span1, span2).is_none());
}

#[test]
fn calc_utf16_offset() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut prev_extra_bytes = 0_u32;
let mut start = 0;
let mut bpos = file.start_pos;
let mut cpos = CharPos(bpos.to_usize());
for c in input.chars() {
let actual = bpos.to_u32()
- sm.calc_utf16_offset(&file, &mut prev_extra_bytes, &mut start, bpos);

assert_eq!(actual, cpos.to_u32());

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}
}

#[test]
fn bytepos_to_charpos() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut bpos = file.start_pos;
let mut cpos = CharPos(0);
for c in input.chars() {
let actual = sm.bytepos_to_file_charpos_with(&file, bpos);

assert_eq!(actual, cpos);

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}
}

/// Returns the span corresponding to the `n`th occurrence of
/// `substring` in `source_text`.
trait SourceMapExtension {
Expand Down
26 changes: 14 additions & 12 deletions crates/swc_estree_compat/src/babelify/mod.rs
Expand Up @@ -4,6 +4,7 @@ use rayon::prelude::*;
use serde::{de::DeserializeOwned, Serialize};
use swc_common::{
comments::{CommentKind, Comments},
source_map::Pos,
sync::Lrc,
BytePos, SourceFile, SourceMap, Span,
};
Expand Down Expand Up @@ -43,18 +44,19 @@ impl Context {
// We rename this to feel more comfortable while doing math.
let start_offset = self.fm.start_pos;

let mut start = span.lo.0 - start_offset.0;
let mut end = span.hi.0 - start_offset.0;

for mb in self.fm.multibyte_chars.iter() {
if mb.pos < span.lo {
start -= (mb.bytes - 1) as u32;
}

if mb.pos < span.hi {
end -= (mb.bytes - 1) as u32;
}
}
let mut prev_extra_bytes = 0;
let mut ch_start = 0;

let start = span.lo.to_u32()
- start_offset.to_u32()
- self
.cm
.calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.lo);
let end = span.hi.to_u32()
- start_offset.to_u32()
- self
.cm
.calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.hi);

(Some(start), Some(end))
}
Expand Down