Skip to content

Commit

Permalink
* move methods GetNowTimestamp() & UnescapedJsonSerialize() & fie…
Browse files Browse the repository at this point in the history
…ld `JsonSerializerOptions` to `tbm.Shared.BaseHelper` @ Helper.cs

@ crawler

+ static class `BaseHelper`
* now will log at warn level and serialize entities count group by type to json @ `TbmDbContext.LogDbUpdateConcurrencyException()`
@ shared
@ c#
  • Loading branch information
n0099 committed May 11, 2024
1 parent 2201fbc commit a57af67
Show file tree
Hide file tree
Showing 16 changed files with 42 additions and 35 deletions.
2 changes: 1 addition & 1 deletion c#/crawler/src/Db/CrawlerDbContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class CrawlerDbContext(ILogger<CrawlerDbContext> logger, Fid fid = 0)
// https://www.entityframeworktutorial.net/faq/set-created-and-modified-date-in-efcore.aspx
ChangeTracker.Entries<TimestampedEntity>().ForEach(e =>
{
Helper.GetNowTimestamp(out var now);
BaseHelper.GetNowTimestamp(out var now);
var originalEntityState = e.State; // copy e.State since it might change after any prop value updated
var createdAtProp = e.Property(ie => ie.CreatedAt);
var updatedAtProp = e.Property(ie => ie.UpdatedAt);
Expand Down
16 changes: 1 addition & 15 deletions c#/crawler/src/Helper.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
using System.Text.Encodings.Web;
using System.Text.Unicode;

namespace tbm.Crawler;

#pragma warning disable AV1708 // Type name contains term that should be avoided
public abstract partial class Helper
public static class Helper
#pragma warning restore AV1708 // Type name contains term that should be avoided
{
[SuppressMessage("Member Design", "AV1130:Return type in method signature should be an interface to an unchangeable collection")]
Expand All @@ -23,15 +20,4 @@ public abstract partial class Helper

public static PostContentWrapper? WrapPostContent(IEnumerable<Content>? contents) =>
contents == null ? null : new() {Value = {contents}};

public static void GetNowTimestamp(out Time now) => now = GetNowTimestamp();
[SuppressMessage("Maintainability", "AV1551:Method overload should call another overload")]
public static Time GetNowTimestamp() => (Time)DateTimeOffset.Now.ToUnixTimeSeconds();
}
public abstract partial class Helper
{
private static readonly JsonSerializerOptions UnescapedSerializeOptions =
new() {Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)};
public static string UnescapedJsonSerialize<TValue>(TValue value) =>
JsonSerializer.Serialize(value, UnescapedSerializeOptions);
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/CrawlPost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ private Action<Exception> SaveThreadMissingFirstReply
Pid = thread.FirstReplyPid,
Excerpt = Helper.SerializedProtoBufWrapperOrNullIfEmpty(thread.FirstReplyExcerpt,
() => new ThreadAbstractWrapper {Value = {thread.FirstReplyExcerpt}}),
LastSeenAt = Helper.GetNowTimestamp()
LastSeenAt = BaseHelper.GetNowTimestamp()
};
if (newEntity.Pid == null && newEntity.Excerpt == null) return; // skip if all fields are empty
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Crawler/ThreadLateCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public class ThreadLateCrawler(
{
logger.LogWarning("TiebaException: {} {}",
string.Join(' ', e.GetInnerExceptions().Select(ex => ex.Message)),
Helper.UnescapedJsonSerialize(e.Data));
BaseHelper.UnescapedJsonSerialize(e.Data));
}
else
{
Expand Down
6 changes: 3 additions & 3 deletions c#/crawler/src/Tieba/Crawl/CrawlerLocks.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public IReadOnlySet<Page> AcquireRange(LockId lockId, IEnumerable<Page> pages)
var acquiredPages = pages.ToHashSet();
lock (_crawling)
{ // lock the entire ConcurrentDictionary since following bulk insert should be a single atomic operation
Helper.GetNowTimestamp(out var now);
BaseHelper.GetNowTimestamp(out var now);
if (!_crawling.ContainsKey(lockId))
{ // if no one is locking any page in lockId, just insert pages then return it as is
var pageTimeDict = acquiredPages.Select(page => KeyValuePair.Create(page, now));
Expand Down Expand Up @@ -114,9 +114,9 @@ protected override void LogTrace()
logger.LogTrace("Lock: type={} crawlingIdCount={} crawlingPageCount={} crawlingPageCountsKeyById={}"
+ " failedIdCount={} failedPageCount={} failures={}", LockType,
_crawling.Count, _crawling.Values.Sum(d => d.Count),
Helper.UnescapedJsonSerialize(_crawling.ToDictionary(pair => pair.Key.ToString(), pair => pair.Value.Count)),
BaseHelper.UnescapedJsonSerialize(_crawling.ToDictionary(pair => pair.Key.ToString(), pair => pair.Value.Count)),
_failed.Count, _failed.Values.Sum(d => d.Count),
Helper.UnescapedJsonSerialize(_failed.ToDictionary(pair => pair.Key.ToString(), pair => pair.Value)));
BaseHelper.UnescapedJsonSerialize(_failed.ToDictionary(pair => pair.Key.ToString(), pair => pair.Value)));
}
}

Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ private void ValidateThenParse(BaseCrawler<TResponse, TPostProtoBuf>.Response re
.Select(page => (Page)page)
.SequenceEqual(pages)
? $"within the range [{pages[0]}-{pages[^1]}]"
: JsonSerializer.Serialize(pages);
: BaseHelper.UnescapedJsonSerialize(pages);
Logger.LogInformation("Cannot crawl any page within {} for lock type {}, id {} since they've already been locked",
pagesText, locks.LockType, lockId);
}
Expand Down Expand Up @@ -198,7 +198,7 @@ private void ValidateThenParse(BaseCrawler<TResponse, TPostProtoBuf>.Response re
{
if (!te.ShouldSilent) Logger.LogWarning("TiebaException: {} {}",
string.Join(' ', e.GetInnerExceptions().Select(ex => ex.Message)),
Helper.UnescapedJsonSerialize(e.Data));
BaseHelper.UnescapedJsonSerialize(e.Data));
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ protected override void OnBeforeCommitSave(CrawlerDbContext db, UserSaver userSa
.IntersectBy(newlyLockedLatestRepliers, u => u.Uid)
.Select(u =>
{
u.CreatedAt = Helper.GetNowTimestamp();
u.CreatedAt = BaseHelper.GetNowTimestamp();
return u;
});
db.Users.AddRange(newLatestRepliersExceptLocked);
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ protected override ReplyPost Convert(Reply inPost)
{
logger.LogInformation("Detected an image in the content of reply with pid {} references to {}"
+ " instead of common domains of tieba image hosting service, content={}",
o.Pid, c.OriginSrc, Helper.UnescapedJsonSerialize(c));
o.Pid, c.OriginSrc, BaseHelper.UnescapedJsonSerialize(c));
}
}
o.Content = Helper.SerializedProtoBufWrapperOrNullIfEmpty(inPost.Content,
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Parser/UserParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public partial class UserParser(ConcurrentDictionary<Uid, User> users)
}
catch (Exception e)
{
e.Data["raw"] = Helper.UnescapedJsonSerialize(el);
e.Data["raw"] = BaseHelper.UnescapedJsonSerialize(el);
throw new InvalidDataException("User parse error.", e);
}
}).OfType<User>().ForEach(u => users[u.Uid] = u);
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public class AuthorRevisionSaver(PostType triggeredByPostType)
where TPost : BasePost
where TRevision : AuthorRevision
{
Helper.GetNowTimestamp(out var now);
BaseHelper.GetNowTimestamp(out var now);
var existingRevisionOfExistingUsers = dbSet.AsNoTracking()
.Where(e => e.Fid == db.Fid
&& posts.Select(p => p.AuthorUid).Distinct().Contains(e.Uid))
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Saver/BaseSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ bool IsTimestampingFieldName(string name) => name is nameof(BasePost.LastSeenAt)
logger.LogWarning("Updating field {} is not existing in revision table, " +
"newValue={}, oldValue={}, newObject={}, oldObject={}",
pName, ToHexWhenByteArray(p.CurrentValue), ToHexWhenByteArray(p.OriginalValue),
Helper.UnescapedJsonSerialize(newPostOrUser),
Helper.UnescapedJsonSerialize(entry.OriginalValues.ToObject()));
BaseHelper.UnescapedJsonSerialize(newPostOrUser),
BaseHelper.UnescapedJsonSerialize(entry.OriginalValues.ToObject()));
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public partial class ReplySaver

private Action SaveReplySignatures(CrawlerDbContext db, IEnumerable<ReplyPost> replies)
{
Helper.GetNowTimestamp(out var now);
BaseHelper.GetNowTimestamp(out var now);
var signatures = replies
.Where(r => r is {SignatureId: not null, Signature: not null})
.DistinctBy(r => r.SignatureId)
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Worker/ForumModeratorRevisionCrawlWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ where e.IsCrawling
await using var transaction = await db.Database.BeginTransactionAsync
(IsolationLevel.ReadCommitted, stoppingToken);

Helper.GetNowTimestamp(out var now);
BaseHelper.GetNowTimestamp(out var now);
var revisions = moderators
.GroupBy(t => t.Portrait)
.Select(g => new ForumModeratorRevision
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Worker/RetryCrawlWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ protected override async Task DoWork(CancellationToken stoppingToken)
var failureCountsKeyByTid = tidGroupByFid
.Cast<Tid>().ToDictionary(tid => tid, FailureCountSelector);
logger.LogTrace("Retrying previous failed thread late crawl with fid={}, threadsId={}",
fid, Helper.UnescapedJsonSerialize(tidGroupByFid));
fid, BaseHelper.UnescapedJsonSerialize(tidGroupByFid));
await threadLateFacade.Value(fid).CrawlThenSave(failureCountsKeyByTid, stoppingToken);
}
}
Expand Down
20 changes: 20 additions & 0 deletions c#/shared/BaseHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using System.Diagnostics.CodeAnalysis;
using System.Text.Encodings.Web;
using System.Text.Json;
using System.Text.Unicode;

namespace tbm.Shared;

#pragma warning disable AV1708 // Type name contains term that should be avoided
public static class BaseHelper
#pragma warning restore AV1708 // Type name contains term that should be avoided
{
public static void GetNowTimestamp(out UInt32 now) => now = GetNowTimestamp();
[SuppressMessage("Maintainability", "AV1551:Method overload should call another overload")]
public static UInt32 GetNowTimestamp() => (UInt32)DateTimeOffset.Now.ToUnixTimeSeconds();

private static readonly JsonSerializerOptions UnescapedSerializeOptions =

Check failure on line 16 in c#/shared/BaseHelper.cs

View workflow job for this annotation

GitHub Actions / build (crawler)

Check failure on line 16 in c#/shared/BaseHelper.cs

View workflow job for this annotation

GitHub Actions / build (crawler)

Check failure on line 16 in c#/shared/BaseHelper.cs

View workflow job for this annotation

GitHub Actions / build (imagePipeline)

Check failure on line 16 in c#/shared/BaseHelper.cs

View workflow job for this annotation

GitHub Actions / build (imagePipeline)

Check failure on line 16 in c#/shared/BaseHelper.cs

View workflow job for this annotation

GitHub Actions / build (shared)

Check failure on line 16 in c#/shared/BaseHelper.cs

View workflow job for this annotation

GitHub Actions / build (shared)

new() {Encoder = JavaScriptEncoder.Create(UnicodeRanges.All)};
public static string UnescapedJsonSerialize<TValue>(TValue value) =>
JsonSerializer.Serialize(value, UnescapedSerializeOptions);
}
7 changes: 4 additions & 3 deletions c#/shared/src/Db/TbmDbContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ namespace tbm.Shared.Db;
public abstract class TbmDbContext(ILogger<TbmDbContext> logger) : DbContext
{
public void LogDbUpdateConcurrencyException(DbUpdateConcurrencyException e) =>
logger.LogError(e, "DbUpdateConcurrencyException: {}",
e.Entries.GroupBy(ee => ee.Entity.GetType())
.ToDictionary(g => g.Key, g => g.Count()));
logger.LogWarning(e, "DbUpdateConcurrencyException: {}",
BaseHelper.UnescapedJsonSerialize(e.Entries
.GroupBy(ee => ee.Entity.GetType())
.ToDictionary(g => g.Key, g => g.Count())));

public int SaveChangesForUpdate()
{
Expand Down

0 comments on commit a57af67

Please sign in to comment.