Skip to content

Commit

Permalink
Add lossy parameter to fromUtf8
Browse files Browse the repository at this point in the history
  • Loading branch information
webmaster128 committed Oct 24, 2022
1 parent bba7780 commit 68f3dee
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 2 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ and this project adheres to

## [Unreleased]

### Added

- @cosmjs/encoding: Add `lossy` parameter to `fromUtf8` allowing the use of a
replacement charater instead of throwing.

## [0.29.2] - 2022-10-13

### Added
Expand Down
12 changes: 12 additions & 0 deletions packages/encoding/src/utf8.spec.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { toAscii } from "./ascii";
import { fromUtf8, toUtf8 } from "./utf8";

describe("utf8", () => {
Expand Down Expand Up @@ -59,4 +60,15 @@ describe("utf8", () => {
// Broken UTF8 example from https://github.com/nodejs/node/issues/16894
expect(() => fromUtf8(new Uint8Array([0xf0, 0x80, 0x80]))).toThrow();
});

describe("fromUtf8", () => {
it("replaces characters in lossy mode", () => {
expect(fromUtf8(new Uint8Array([]), true)).toEqual("");
expect(fromUtf8(new Uint8Array([0x61, 0x62, 0x63]), true)).toEqual("abc");
// Example from https://doc.rust-lang.org/stable/std/string/struct.String.html#method.from_utf8_lossy
expect(
fromUtf8(new Uint8Array([...toAscii("Hello "), 0xf0, 0x90, 0x80, ...toAscii("World")]), true),
).toEqual("Hello �World");
});
});
});
11 changes: 9 additions & 2 deletions packages/encoding/src/utf8.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ export function toUtf8(str: string): Uint8Array {
return new TextEncoder().encode(str);
}

export function fromUtf8(data: Uint8Array): string {
return new TextDecoder("utf-8", { fatal: true }).decode(data);
/**
* Takes UTF-8 data and decodes it to a string.
*
* In lossy mode, the replacement character � is used to substitude invalid
* encodings. By default lossy mode is off and invalid data will lead to exceptions.
*/
export function fromUtf8(data: Uint8Array, lossy = false): string {
const fatal = !lossy;
return new TextDecoder("utf-8", { fatal }).decode(data);
}

0 comments on commit 68f3dee

Please sign in to comment.