Skip to content

Commit

Permalink
Add Tiktoken's missing model names (#7111)
Browse files Browse the repository at this point in the history
  • Loading branch information
tarekgh committed Mar 29, 2024
1 parent 01e6dd2 commit c96aac7
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/Microsoft.ML.Tokenizers/Model/Tiktoken.cs
Original file line number Diff line number Diff line change
Expand Up @@ -676,8 +676,9 @@ internal enum ModelEncoding
private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixToEncoding =
[
// chat
("gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k
("gpt-3.5-turbo-", ModelEncoding.Cl100kBase) // e.g, gpt-3.5-turbo-0301, -0401, etc.
( "gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k
( "gpt-3.5-turbo-", ModelEncoding.Cl100kBase), // e.g, gpt-3.5-turbo-0301, -0401, etc.
( "gpt-35-turbo-", ModelEncoding.Cl100kBase ) // Azure deployment name
];

private static readonly Dictionary<string, ModelEncoding> _modelToEncoding =
Expand All @@ -686,6 +687,9 @@ internal enum ModelEncoding
// chat
{ "gpt-4", ModelEncoding.Cl100kBase },
{ "gpt-3.5-turbo", ModelEncoding.Cl100kBase },
{ "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase },
{ "gpt-35-turbo", ModelEncoding.Cl100kBase }, // Azure deployment name
{ "gpt-35-turbo-16k", ModelEncoding.Cl100kBase }, // Azure deployment name

// text
{ "text-davinci-003", ModelEncoding.P50kBase },
Expand Down
4 changes: 4 additions & 0 deletions test/Microsoft.ML.Tokenizers.Tests/TitokenTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ public void TestEncodeR50kBase()
[InlineData("gpt-4-")]
[InlineData("gpt-3.5-turbo")]
[InlineData("gpt-3.5-turbo-")]
[InlineData("gpt-3.5-turbo-16k")]
[InlineData("gpt-35-turbo")]
[InlineData("gpt-35-turbo-16k")]
[InlineData("gpt-35-turbo-")]
[InlineData("text-davinci-003")]
[InlineData("text-davinci-002")]
[InlineData("text-davinci-001")]
Expand Down

0 comments on commit c96aac7

Please sign in to comment.