Skip to content

Commit

Permalink
Fix UTF-8 -> UTF-16 calculation
Browse files Browse the repository at this point in the history
There were a few issues in the old code:

1. UTF-8 maps 1-3 bytes into 1 UTF-16 char, but 4 bytes into 2 UTF-16 chars
2. The starting offset was not recorded when we end the `multibyte_chars` iteration
3. The `mappings` can be unordered, meaning we need to restart UTF-16 offset calculation
  • Loading branch information
jridgewell committed Dec 3, 2022
1 parent 8bee06f commit a4b8c38
Show file tree
Hide file tree
Showing 10 changed files with 186 additions and 30 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAa,OAARC,UACpB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something';\nconsole.error(`❌ ${message}`);\nconst bbb = '';\n//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJ4eHgiLCJjb25zb2xlIiwiZXJyb3IiLCJtZXNzYWdlIiwiYmJiIl0sInNvdXJjZXMiOlsidW5rbm93biJdLCJzb3VyY2VzQ29udGVudCI6WyJjb25zdCB4eHggPSAnLCBzb21ldGhpbmcnXG5jb25zb2xlLmVycm9yKGDinYwgJHttZXNzYWdlfWApO1xuXG5jb25zdCBiYmIgPSAnJ1xuIl0sIm1hcHBpbmdzIjoiQUFBQSxNQUFNQSxHQUFHLEdBQUcsYUFBWjtBQUNBQyxPQUFPLENBQUNDLEtBQVIsQ0FBZSxLQUFJQyxPQUFRLEVBQTNCO0FBRUEsTUFBTUMsR0FBRyxHQUFHLEVBQVoifQ==\n"
],
"version": 3
}
20 changes: 20 additions & 0 deletions crates/swc/tests/fixture/sourcemap/issue-6552/no-map/input/.swcrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"sourceMaps": true,
"jsc": {
"parser": {
"syntax": "ecmascript",
"jsx": false
},
"target": "es5",
"loose": false,
"minify": {
"compress": false,
"mangle": false
}
},
"module": {
"type": "commonjs"
},
"minify": true,
"isModule": true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
const xxx = ', something'
console.error(`❌ ${message}`);

const bbb = ''
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"use strict";var xxx=", something";console.error("❌ ".concat(message));var bbb="";
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"mappings": "AAAA,aAAA,IAAMA,IAAM,cACZC,QAAQC,KAAK,CAAC,AAAC,KAAa,OAARC,UAEpB,IAAMC,IAAM",
"names": [
"xxx",
"console",
"error",
"message",
"bbb"
],
"sources": [
"../../input/index.js"
],
"sourcesContent": [
"const xxx = ', something'\nconsole.error(`❌ ${message}`);\n\nconst bbb = ''\n"
],
"version": 3
}
106 changes: 88 additions & 18 deletions crates/swc_common/src/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
//! within the SourceMap, which upon request can be converted to line and column
//! information, source code snippets, etc.
use std::{
cmp,
cmp::{max, min},
env, fs,
cmp, env, fs,
hash::Hash,
io,
path::{Path, PathBuf},
Expand Down Expand Up @@ -295,8 +293,7 @@ impl SourceMap {
);

let linechpos = self.bytepos_to_file_charpos_with(&f, linebpos);

let col = max(chpos, linechpos) - min(chpos, linechpos);
let col = chpos - linechpos;

let col_display = {
let start_width_idx = f
Expand Down Expand Up @@ -954,7 +951,7 @@ impl SourceMap {
}

fn bytepos_to_file_charpos_with(&self, map: &SourceFile, bpos: BytePos) -> CharPos {
let total_extra_bytes = self.calc_extra_bytes(map, &mut 0, &mut 0, bpos);
let total_extra_bytes = self.calc_utf16_offset(map, &mut 0, &mut 0, bpos);
assert!(
map.start_pos.to_u32() + total_extra_bytes <= bpos.to_u32(),
"map.start_pos = {:?}; total_extra_bytes = {}; bpos = {:?}",
Expand All @@ -966,7 +963,7 @@ impl SourceMap {
}

/// Converts an absolute BytePos to a CharPos relative to the source_file.
fn calc_extra_bytes(
pub fn calc_utf16_offset(
&self,
map: &SourceFile,
prev_total_extra_bytes: &mut u32,
Expand All @@ -975,13 +972,14 @@ impl SourceMap {
) -> u32 {
// The number of extra bytes due to multibyte chars in the SourceFile
let mut total_extra_bytes = *prev_total_extra_bytes;
let mut i = *start;

for (i, &mbc) in map.multibyte_chars[*start..].iter().enumerate() {
for &mbc in map.multibyte_chars[i..].iter() {
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
// every character is at least one byte, so we only
// count the actual extra bytes.
total_extra_bytes += mbc.bytes as u32 - 1;
// 1, 2, and 3 UTF-8 bytes maps to 1 UTF-16 byte, but 4 UTF-8
// bytes maps to 2.
total_extra_bytes += if mbc.bytes == 4 { 2 } else { 1 };
// We should never see a byte position in the middle of a
// character
debug_assert!(
Expand All @@ -991,13 +989,14 @@ impl SourceMap {
mbc.pos,
mbc.bytes
);
i += 1;
} else {
*start += i;
break;
}
}

*prev_total_extra_bytes = total_extra_bytes;
*start = i;

total_extra_bytes
}
Expand Down Expand Up @@ -1197,6 +1196,9 @@ impl SourceMap {
let mut line_ch_start = 0;
let mut inline_sources_content = false;

let mut prev_bpos = BytePos(0);
let mut prev_linebpos = BytePos(0);

for (pos, lc) in mappings.iter() {
let pos = *pos;

Expand Down Expand Up @@ -1235,6 +1237,9 @@ impl SourceMap {
line_prev_extra_bytes = 0;
line_ch_start = 0;

prev_bpos = BytePos(0);
prev_linebpos = BytePos(0);

cur_file = Some(f.clone());
&f
}
Expand All @@ -1253,7 +1258,6 @@ impl SourceMap {
Some(line) => line as u32,
None => continue,
};
let mut name = config.name_for_bytepos(pos);

let linebpos = f.lines[line as usize];
debug_assert!(
Expand All @@ -1263,18 +1267,43 @@ impl SourceMap {
pos,
linebpos,
);
let chpos =
pos.to_u32() - self.calc_extra_bytes(f, &mut prev_extra_bytes, &mut ch_start, pos);
// TODO: mappings really should be ordered, but it's not.
// debug_assert!(line >= prev_line);
if linebpos < prev_linebpos {
line_prev_extra_bytes = 0;
line_ch_start = 0;
}
prev_linebpos = linebpos;

let linechpos = linebpos.to_u32()
- self.calc_extra_bytes(
- self.calc_utf16_offset(
f,
&mut line_prev_extra_bytes,
&mut line_ch_start,
linebpos,
);

let mut col = max(chpos, linechpos) - min(chpos, linechpos);
// TODO: mappings really should be ordered, but it's not.
// debug_assert(pos >= prev_bpos);
if pos < prev_bpos {
prev_extra_bytes = line_prev_extra_bytes;
ch_start = line_ch_start;
}
prev_bpos = pos;

let chpos =
pos.to_u32() - self.calc_utf16_offset(f, &mut prev_extra_bytes, &mut ch_start, pos);

debug_assert!(
chpos >= linechpos,
"{}: chpos = {:?}; linechpos = {:?};",
f.name,
chpos,
linechpos,
);

let mut col = chpos - linechpos;
let mut name = None;
if let Some(orig) = &orig {
if let Some(token) = orig
.lookup_token(line, col)
Expand All @@ -1298,7 +1327,9 @@ impl SourceMap {
}
}

let name_idx = name.map(|name| builder.add_name(name));
let name_idx = name
.or_else(|| config.name_for_bytepos(pos))
.map(|name| builder.add_name(name));

builder.add_raw(lc.line, lc.col, line, col, Some(src_id), name_idx);
prev_dst_line = lc.line;
Expand Down Expand Up @@ -1653,6 +1684,45 @@ mod tests {
assert!(sm.merge_spans(span1, span2).is_none());
}

#[test]
fn calc_utf16_offset() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut prev_extra_bytes = 0_u32;
let mut start = 0;
let mut bpos = file.start_pos;
let mut cpos = CharPos(bpos.to_usize());
for c in input.chars() {
let actual = bpos.to_u32()
- sm.calc_utf16_offset(&file, &mut prev_extra_bytes, &mut start, bpos);

assert_eq!(actual, cpos.to_u32());

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}
}

#[test]
fn bytepos_to_charpos() {
let input = "t¢e∆s💩t";
let sm = SourceMap::new(FilePathMapping::empty());
let file = sm.new_source_file(PathBuf::from("blork.rs").into(), input.to_string());

let mut bpos = file.start_pos;
let mut cpos = CharPos(0);
for c in input.chars() {
let actual = sm.bytepos_to_file_charpos_with(&file, bpos);

assert_eq!(actual, cpos);

bpos = bpos + BytePos(c.len_utf8() as u32);
cpos = cpos + CharPos(c.len_utf16());
}
}

/// Returns the span corresponding to the `n`th occurrence of
/// `substring` in `source_text`.
trait SourceMapExtension {
Expand Down
26 changes: 14 additions & 12 deletions crates/swc_estree_compat/src/babelify/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use rayon::prelude::*;
use serde::{de::DeserializeOwned, Serialize};
use swc_common::{
comments::{CommentKind, Comments},
source_map::Pos,
sync::Lrc,
BytePos, SourceFile, SourceMap, Span,
};
Expand Down Expand Up @@ -43,18 +44,19 @@ impl Context {
// We rename this to feel more comfortable while doing math.
let start_offset = self.fm.start_pos;

let mut start = span.lo.0 - start_offset.0;
let mut end = span.hi.0 - start_offset.0;

for mb in self.fm.multibyte_chars.iter() {
if mb.pos < span.lo {
start -= (mb.bytes - 1) as u32;
}

if mb.pos < span.hi {
end -= (mb.bytes - 1) as u32;
}
}
let mut prev_extra_bytes = 0;
let mut ch_start = 0;

let start = span.lo.to_u32()
- start_offset.to_u32()
- self
.cm
.calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.lo);
let end = span.hi.to_u32()
- start_offset.to_u32()
- self
.cm
.calc_utf16_offset(&self.fm, &mut prev_extra_bytes, &mut ch_start, span.hi);

(Some(start), Some(end))
}
Expand Down

0 comments on commit a4b8c38

Please sign in to comment.