Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add extension method for generating Norwegian national identity numbers. #272

Merged
merged 2 commits into from Dec 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Expand Up @@ -483,6 +483,8 @@ public void Using_FakerT_Inheritance()
* **`using Bogus.Extensions.Italy;`**
* `Bogus.Person.CodiceFiscale()` - Codice Fiscale
* `Bogus.DataSets.Finance.CodiceFiscale()` - Codice Fiscale
* **`using Bogus.Extensions.Norway;`**
* `Bogus.Person.Fødselsnummer()` - Norwegian national identity number
* **`using Bogus.Extensions.Portugal;`**
* `Bogus.Person.Nif()` - Número de Identificação Fiscal (NIF)
* `Bogus.DataSets.Company.Nipc()` - Número de Identificação de Pessoa Colectiva (NIPC)
Expand Down
110 changes: 110 additions & 0 deletions Source/Bogus.Tests/ExtensionTests/NorwegianExtensionTest.cs
@@ -0,0 +1,110 @@
using Bogus.DataSets;
using Bogus.Extensions.Norway;
using FluentAssertions;
using Xunit;

namespace Bogus.Tests.ExtensionTests
{
public class NorwegianExtensionTest : SeededTest
{
private void IsLegalIndividualNumber(int readIndNo, int birthYear, Person p)
{
// Check that birth year is in the correct range given individual number.
if (0 <= readIndNo && readIndNo <= 499)
{
birthYear.Should().BeInRange(0, 99);
}
else if (750 <= readIndNo && readIndNo <= 999)
{
birthYear.Should().BeInRange(0, 39);
}
else if (500 <= readIndNo && readIndNo <= 749)
{
if (0 <= birthYear && birthYear <= 39)
{
birthYear.Should().BeInRange(0, 39);
}
else
{
birthYear.Should().BeInRange(54, 99);
}
}

// Check odd/even individual number given gender.
if (p.Gender == Name.Gender.Female)
{
(readIndNo % 2 == 0).Should().BeTrue();
}
else
{
(readIndNo % 2 == 0).Should().BeFalse();
}
}

private void IsLegalChecksum(string readFødselsnummer)
{
string readCs = readFødselsnummer.Substring(9, 2);

int d1 = int.Parse(readFødselsnummer.Substring(0, 1));
int d2 = int.Parse(readFødselsnummer.Substring(1, 1));
int m1 = int.Parse(readFødselsnummer.Substring(2, 1));
int m2 = int.Parse(readFødselsnummer.Substring(3, 1));
int y1 = int.Parse(readFødselsnummer.Substring(4, 1));
int y2 = int.Parse(readFødselsnummer.Substring(5, 1));
int i1 = int.Parse(readFødselsnummer.Substring(6, 1));
int i2 = int.Parse(readFødselsnummer.Substring(7, 1));
int i3 = int.Parse(readFødselsnummer.Substring(8, 1));

int cs1 = 11 - (((3 * d1) + (7 * d2) + (6 * m1) + (1 * m2) + (8 * y1) + (9 * y2) + (4 * i1) + (5 * i2) + (2 * i3)) % 11);
int cs2 = 11 - (((5 * d1) + (4 * d2) + (3 * m1) + (2 * m2) + (7 * y1) + (6 * y2) + (5 * i1) + (4 * i2) + (3 * i3) + (2 * cs1)) % 11);

if (cs1 == 11)
{
cs1 = 0;
}

if (cs2 == 11)
{
cs2 = 0;
}

$"{cs1}{cs2}".Should().Be(readCs);
}

private void IsLegalFødselsnummer(string readFødselsnummer, Person p)
{
readFødselsnummer.Should().HaveLength(11);

int birthYear = int.Parse(readFødselsnummer.Substring(4, 2));
int indNo = int.Parse(readFødselsnummer.Substring(6, 3));

IsLegalIndividualNumber(indNo, birthYear, p);
IsLegalChecksum(readFødselsnummer);
}

[Fact]
public void can_create_norwegian_fødselsnummer()
{
var f = new Faker("nb_NO");
var person = f.Person;

string fødselsnummer = person.Fødselsnummer();

IsLegalFødselsnummer(fødselsnummer, person);
}

[Fact]
public void can_create_correct_checksum_1()
{
// Test fødselsnummer from DSF.
IsLegalChecksum("31080700442");
}

[Fact]
public void can_create_correct_checksum_2()
{
// Test fødselsnummer from DSF.
IsLegalChecksum("10050050489");
}
}
}
125 changes: 125 additions & 0 deletions Source/Bogus/Extensions/Norway/ExtensionsForNorway.cs
@@ -0,0 +1,125 @@
using System;

namespace Bogus.Extensions.Norway
{
/// <summary>
/// API extensions specific for a geographical location.
/// </summary>
public static class ExtensionsForNorway
{
/// <summary>
/// Norwegian national identity number (fødselsnummer)
/// </summary>
public static string Fødselsnummer(this Person p)
{
const string Key = nameof(ExtensionsForNorway) + "Fødselsnummer";
if (p.context.ContainsKey(Key))
{
return p.context[Key] as string;
}

/*
DDMMYYXXXCC
| | | | |--> Checksum
| | | |
| | | |
| | | |-----> Individual number
| | |-------> Year (last two digits)
| |---------> Month
|-----------> Day

The individual number has to be even for women and odd for men.

The checksum is calculated with a modulo checksum algorithm.
If either of the checksum numbers are 10, the fødselsnummer gets
rejected, and a new individual number has to be generated.

https://www.skatteetaten.no/en/person/national-registry/birth-and-name-selection/children-born-in-norway/national-id-number/

https://nn.wikipedia.org/wiki/F%C3%B8dselsnummer

https://github.com/deegane/NINTool/blob/master/backend/src/main/java/com/nin/validation/NorwegianNinValidator.kt

https://github.com/magnuswatn/fodselsnummer/blob/master/fodselsnummer.py
*/

var r = p.Random;
string birthDate = $"{p.DateOfBirth:ddMMyy}";

string individualNumber;
string checksum;
bool isOkChecksum;

do
{
individualNumber = GenerateIndividualNumber(r, p.Gender, p.DateOfBirth.Year);
isOkChecksum = GenerateChecksum(birthDate, individualNumber, out checksum);
} while (!isOkChecksum);
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @mika-s,

First off, great work! It's very well done and looks good!

However, I have a question about this do-while loop:

  • Why do we need to "check if the checksum is okay" and if the checksum is bad, loop again?
  • Is there some reason why we can't calculate the checksum directly from birthDate and individualNumber?

I guess I feel a little uneasy about this do-while loop because generating the check digits should be a deterministic result of the input arguments.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @bchavez,

The reason for the loop is because the checksums can end up getting 10 as calculated value , either cs1, cs2 or both. In that case the number is invalid, and a new one has to be generated. The only thing that can be regenerated is the individual number, because the birthdate is fixed. So in the loop I generate a new individual number and try calculating the checksum again.

I don't think it will break determinism, as long as the global Random object is used. Given a seed the loop will run the same number of iterations every time.

As for the second comment:

I am not sure why the weights are as they are in the Python library, but they should be correct in the PR. The offical spec is here (page 12-13):

In English:

The weights are:
3, 7, 6, 1, 8, 9, 4, 5 and 2
k1 = 11 - ((3D1 + 7D2 + 6M1 + 1M2 + 8Y1 + 9Y2 + 4I1 + 5I2 + 2I3) - 11Q1)

Q1 is the the integer quotient for the product sum divided by 11, meaning the highest number that when multiplied with 11 gives the product sum or less.

[Q1 is just modulo.]

There are no birth numbers assigned that gives k1 = 10.
If k1 = 11, set K1 equal to 0, otherwise K1 is equal k1.

The second control digits, K2, is calculated in the same way of the first 9 digits and the first
control digit.

The weights here are:
5, 4, 3, 2, 7, 6, 5, 4, 3 and 2
k2 = 11 - ((5D1 + 4D2 + 3M1 + 2M2 + 7Y1 + 6Y2 + 5I1 + 4I2 + 3I3 + 2K1) - 11*Q2)

Q2 is the the integer quotient for the product sum divided by 11, meaning the highest number that when multiplied with 11 gives the product sum or less.

There are no birth numbers assigned that gives k2 = 10.
If k2 = 11, set K2 equal to 0, otherwise K2 is equal k2.

I generated a couple of numbers and double checked their validity with a couple of other tools, e.g. this and this (first textbox), and they said the numbers were valid.

These are the numbers I checked (base64 encoded to make sure Google doesn't index numbers that belong to real people):

MjQwMzk3MDQxNDENCjEwMDE4MTI5MDIyDQowNDExOTkwMjQzMQ0KMTgwMTYyMDQxNjgNCjEzMDM4NzM1NzQy

The calculation of the checksum could of course be made in a different way. A functional approach would be to use map-fold. Another imperative approach is to use a loop that iterates over a split birthdate+indnum string and then multiplies each number with the weight and adds it to a total variable (like in the Python code).

As for the do-while loop, another approach there could be to increment or decrement the individual number if the control digits become 10, and then recalculate the control digits with the incremented/decremented values. The control digits should then become something else than 10, but I cannot guarantee it. The incremented/decremented individual number would become the new individual number of course, it's not just done in the checksum calculation.


string final = $"{p.DateOfBirth:ddMMyy}{individualNumber}{checksum}";

p.context[Key] = final;
return final;
}

private static string GenerateIndividualNumber(Randomizer r, DataSets.Name.Gender gender, int year)
{
int from;
int to;

if (1854 <= year && year <= 1899)
{
from = 500;
to = 749;
}
else if (1900 <= year && year <= 1999)
{
from = 0;
to = 499;
}
else if (2000 <= year && year <= 2039)
{
from = 500;
to = 999;
}
else
{
throw new ArgumentOutOfRangeException(nameof(year), $"{nameof(year)} must be between 1854 and 2039.");
}

int individualNumber = gender == DataSets.Name.Gender.Female ? r.Even(from, to) : r.Odd(from, to);

return individualNumber.ToString("D3");
}

private static bool GenerateChecksum(string birthDate, string individualNumber, out string checksum)
{
int d1 = int.Parse(birthDate.Substring(0, 1));
int d2 = int.Parse(birthDate.Substring(1, 1));
int m1 = int.Parse(birthDate.Substring(2, 1));
int m2 = int.Parse(birthDate.Substring(3, 1));
int y1 = int.Parse(birthDate.Substring(4, 1));
int y2 = int.Parse(birthDate.Substring(5, 1));
int i1 = int.Parse(individualNumber.Substring(0, 1));
int i2 = int.Parse(individualNumber.Substring(1, 1));
int i3 = int.Parse(individualNumber.Substring(2, 1));

int cs1 = 11 - (((3 * d1) + (7 * d2) + (6 * m1) + (1 * m2) + (8 * y1) + (9 * y2) + (4 * i1) + (5 * i2) + (2 * i3)) % 11);
Copy link
Owner

@bchavez bchavez Dec 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps related to my previous do-while comment:
I'm trying to figure out why the method needs to return bool for isOkChecksum in the do-while loop.

I found some open-source python code shows the check digit weight calculations as:
3, 7, 6, 1, 8, 9, 4, 5, 2, 1
https://github.com/magnuswatn/fodselsnummer/blob/5452318bf99ffba5efd4b4dbaa4f3ae9bc17e319/fodselsnummer.py#L122-L123

Here, in the PR, the current C# code weights are:
3, 7, 6, 1, 8, 9, 4, 5, 2
Is it possible that the weights are a little off because the last 1 is missing from the calculation here in the PR?

Again, great work @mika-s! I apologize for the delay.

int cs2 = 11 - (((5 * d1) + (4 * d2) + (3 * m1) + (2 * m2) + (7 * y1) + (6 * y2) + (5 * i1) + (4 * i2) + (3 * i3) + (2 * cs1)) % 11);

if (cs1 == 11)
{
cs1 = 0;
}

if (cs2 == 11)
{
cs2 = 0;
}

checksum = $"{cs1}{cs2}";

return cs1 < 10 && cs2 < 10;
}
}
}