Skip to content

Commit a50f9c2

Browse files
committed
More modernizing
1 parent b79ede0 commit a50f9c2

File tree

6 files changed

+354
-375
lines changed

6 files changed

+354
-375
lines changed
Lines changed: 64 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,88 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Net;
1+
using System.Net;
52
using System.ServiceModel.Syndication;
63
using System.Text.RegularExpressions;
74
using System.Xml.Linq;
85

9-
namespace SimpleFeedReader
6+
namespace SimpleFeedReader;
7+
8+
/// <summary>
9+
/// The <see cref="DefaultFeedItemNormalizer"/> normalizes <see cref="FeedItem.Title"/>,
10+
/// <see cref="FeedItem.Content"/> and <see cref="FeedItem.Summary"/> of <see cref="FeedItem"/>s to the point where
11+
/// they no longer contain any HTML, redundant whitespace, un-normalized unicode chars and other control chars like
12+
/// tabs, newlines or backspaces. The <see cref="FeedItem"/>'s <see cref="FeedItem.Date"/> property will contain
13+
/// whichever date is latest; the <see cref="FeedItem.PublishDate"/> or <see cref="FeedItem.LastUpdatedDate"/>.
14+
/// </summary>
15+
/// <remarks>
16+
/// You can implement a normalizer yourself by implementing the <see cref="IFeedItemNormalizer"/> interface.
17+
/// </remarks>
18+
public class DefaultFeedItemNormalizer : IFeedItemNormalizer
1019
{
20+
private static readonly Regex _htmlregex = new(@"<[^>]*>", RegexOptions.Compiled); //@"<(.|\n)*?>"
21+
private static readonly Regex _controlcodesregex = new(@"[\x00-\x1F\x7f]", RegexOptions.Compiled);
22+
private static readonly Regex _whitespaceregex = new(@"\s{2,}", RegexOptions.Compiled);
23+
1124
/// <summary>
12-
/// The <see cref="DefaultFeedItemNormalizer"/> normalizes <see cref="FeedItem.Title"/>,
13-
/// <see cref="FeedItem.Content"/> and <see cref="FeedItem.Summary"/> of <see cref="FeedItem"/>s to the point where
14-
/// they no longer contain any HTML, redundant whitespace, un-normalized unicode chars and other control chars like
15-
/// tabs, newlines or backspaces. The <see cref="FeedItem"/>'s <see cref="FeedItem.Date"/> property will contain
16-
/// whichever date is latest; the <see cref="FeedItem.PublishDate"/> or <see cref="FeedItem.LastUpdatedDate"/>.
25+
/// Normalizes a SyndicationItem into a FeedItem.
1726
/// </summary>
18-
/// <remarks>
19-
/// You can implement a normalizer yourself by implementing the <see cref="IFeedItemNormalizer"/> interface.
20-
/// </remarks>
21-
public class DefaultFeedItemNormalizer : IFeedItemNormalizer
27+
/// <param name="feed">The <see cref="SyndicationFeed"/> on which the item was retrieved.</param>
28+
/// <param name="item">A <see cref="SyndicationItem"/> to normalize into a <see cref="FeedItem"/>.</param>
29+
/// <returns>Returns a normalized <see cref="FeedItem"/>.</returns>
30+
public virtual FeedItem Normalize(SyndicationFeed feed, SyndicationItem item)
2231
{
23-
private static readonly Regex _htmlregex = new Regex(@"<[^>]*>", RegexOptions.Compiled); //@"<(.|\n)*?>"
24-
private static readonly Regex _controlcodesregex = new Regex(@"[\x00-\x1F\x7f]", RegexOptions.Compiled);
25-
private static readonly Regex _whitespaceregex = new Regex(@"\s{2,}", RegexOptions.Compiled);
32+
var alternatelink = item.Links.FirstOrDefault(l => l.RelationshipType == null || l.RelationshipType.Equals("alternate", StringComparison.OrdinalIgnoreCase));
2633

27-
/// <summary>
28-
/// Normalizes a SyndicationItem into a FeedItem.
29-
/// </summary>
30-
/// <param name="feed">The <see cref="SyndicationFeed"/> on which the item was retrieved.</param>
31-
/// <param name="item">A <see cref="SyndicationItem"/> to normalize into a <see cref="FeedItem"/>.</param>
32-
/// <returns>Returns a normalized <see cref="FeedItem"/>.</returns>
33-
public virtual FeedItem Normalize(SyndicationFeed feed, SyndicationItem item)
34+
var itemuri = alternatelink == null && !Uri.TryCreate(item.Id, UriKind.Absolute, out var parsed) ? parsed : alternatelink.GetAbsoluteUri();
35+
return new FeedItem
3436
{
35-
var alternatelink = item.Links.FirstOrDefault(l => l.RelationshipType == null || l.RelationshipType.Equals("alternate", StringComparison.OrdinalIgnoreCase));
36-
37-
var itemuri = alternatelink == null && !Uri.TryCreate(item.Id, UriKind.Absolute, out var parsed) ? parsed : alternatelink.GetAbsoluteUri();
38-
return new FeedItem
39-
{
40-
Id = string.IsNullOrEmpty(item.Id) ? null : item.Id.Trim(),
41-
Title = item.Title == null ? null : Normalize(item.Title.Text),
42-
Content = item.Content == null ? null : Normalize(((TextSyndicationContent)item.Content).Text),
43-
Summary = item.Summary == null ? null : Normalize(item.Summary.Text),
44-
PublishDate = item.PublishDate,
45-
LastUpdatedDate = item.LastUpdatedTime == DateTimeOffset.MinValue ? item.PublishDate : item.LastUpdatedTime,
46-
Uri = itemuri,
47-
Images = GetFeedItemImages(item),
48-
Categories = item.Categories.Select(c => c.Name)
49-
};
50-
}
37+
Id = string.IsNullOrEmpty(item.Id) ? null : item.Id.Trim(),
38+
Title = item.Title == null ? null : Normalize(item.Title.Text),
39+
Content = item.Content == null ? null : Normalize(((TextSyndicationContent)item.Content).Text),
40+
Summary = item.Summary == null ? null : Normalize(item.Summary.Text),
41+
PublishDate = item.PublishDate,
42+
LastUpdatedDate = item.LastUpdatedTime == DateTimeOffset.MinValue ? item.PublishDate : item.LastUpdatedTime,
43+
Uri = itemuri,
44+
Images = GetFeedItemImages(item),
45+
Categories = item.Categories.Select(c => c.Name)
46+
};
47+
}
5148

52-
private static IEnumerable<Uri> GetFeedItemImages(SyndicationItem item) => item.ElementExtensions
53-
.Where(p => p.OuterName.Equals("image"))
54-
.Select(p => new Uri(p.GetObject<XElement>().Value));
49+
private static IEnumerable<Uri> GetFeedItemImages(SyndicationItem item) => item.ElementExtensions
50+
.Where(p => p.OuterName.Equals("image"))
51+
.Select(p => new Uri(p.GetObject<XElement>().Value));
5552

56-
private static string Normalize(string value)
53+
private static string Normalize(string value)
54+
{
55+
if (!string.IsNullOrEmpty(value))
5756
{
58-
if (!string.IsNullOrEmpty(value))
57+
value = HtmlDecode(value);
58+
if (string.IsNullOrEmpty(value))
5959
{
60-
value = HtmlDecode(value);
61-
if (string.IsNullOrEmpty(value))
62-
{
63-
return value;
64-
}
65-
66-
value = StripHTML(value);
67-
value = StripDoubleOrMoreWhiteSpace(RemoveControlChars(value));
68-
value = value.Normalize().Trim();
60+
return value;
6961
}
70-
return value;
62+
63+
value = StripHTML(value);
64+
value = StripDoubleOrMoreWhiteSpace(RemoveControlChars(value));
65+
value = value.Normalize().Trim();
7166
}
67+
return value;
68+
}
7269

73-
private static string RemoveControlChars(string value) => _controlcodesregex.Replace(value, " ");
70+
private static string RemoveControlChars(string value) => _controlcodesregex.Replace(value, " ");
7471

75-
private static string StripDoubleOrMoreWhiteSpace(string value) => _whitespaceregex.Replace(value, " ");
72+
private static string StripDoubleOrMoreWhiteSpace(string value) => _whitespaceregex.Replace(value, " ");
7673

77-
private static string StripHTML(string value) => _htmlregex.Replace(value, " ");
74+
private static string StripHTML(string value) => _htmlregex.Replace(value, " ");
7875

79-
private static string HtmlDecode(string value, int threshold = 5)
76+
private static string HtmlDecode(string value, int threshold = 5)
77+
{
78+
var c = 0;
79+
var newvalue = WebUtility.HtmlDecode(value);
80+
while (!newvalue.Equals(value) && c < threshold) //Keep decoding (if a string is double/triple/... encoded; we want the original)
8081
{
81-
var c = 0;
82-
var newvalue = WebUtility.HtmlDecode(value);
83-
while (!newvalue.Equals(value) && c < threshold) //Keep decoding (if a string is double/triple/... encoded; we want the original)
84-
{
85-
c++;
86-
value = newvalue;
87-
newvalue = WebUtility.HtmlDecode(value);
88-
}
89-
return c >= threshold ? null : newvalue;
82+
c++;
83+
value = newvalue;
84+
newvalue = WebUtility.HtmlDecode(value);
9085
}
86+
return c >= threshold ? null : newvalue;
9187
}
9288
}

SimpleFeedReader/FeedItem.cs

Lines changed: 83 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -1,105 +1,101 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.ServiceModel.Syndication;
1+
using System.ServiceModel.Syndication;
52

6-
namespace SimpleFeedReader
3+
namespace SimpleFeedReader;
4+
5+
/// <summary>
6+
/// Represents an item from a <see cref="SyndicationFeed"/>.
7+
/// </summary>
8+
public class FeedItem
79
{
810
/// <summary>
9-
/// Represents an item from a <see cref="SyndicationFeed"/>.
11+
/// The Id of the <see cref="FeedItem"/>.
1012
/// </summary>
11-
public class FeedItem
12-
{
13-
/// <summary>
14-
/// The Id of the <see cref="FeedItem"/>.
15-
/// </summary>
16-
public string Id { get; set; }
13+
public string? Id { get; set; }
1714

18-
/// <summary>
19-
/// The Title of the <see cref="FeedItem"/>.
20-
/// </summary>
21-
public string Title { get; set; }
15+
/// <summary>
16+
/// The Title of the <see cref="FeedItem"/>.
17+
/// </summary>
18+
public string? Title { get; set; }
2219

23-
/// <summary>
24-
/// The Content of the <see cref="FeedItem"/>.
25-
/// </summary>
26-
public string Content { get; set; }
20+
/// <summary>
21+
/// The Content of the <see cref="FeedItem"/>.
22+
/// </summary>
23+
public string? Content { get; set; }
2724

28-
/// <summary>
29-
/// The Summary of the <see cref="FeedItem"/>.
30-
/// </summary>
31-
public string Summary { get; set; }
25+
/// <summary>
26+
/// The Summary of the <see cref="FeedItem"/>.
27+
/// </summary>
28+
public string? Summary { get; set; }
3229

33-
/// <summary>
34-
/// The Uri of the <see cref="FeedItem"/>.
35-
/// </summary>
36-
public Uri Uri { get; set; }
30+
/// <summary>
31+
/// The Uri of the <see cref="FeedItem"/>.
32+
/// </summary>
33+
public Uri? Uri { get; set; }
3734

38-
/// <summary>
39-
/// The images of the <see cref="FeedItem"/>.
40-
/// </summary>
41-
public IEnumerable<Uri> Images { get; set; }
35+
/// <summary>
36+
/// The images of the <see cref="FeedItem"/>.
37+
/// </summary>
38+
public IEnumerable<Uri>? Images { get; set; }
4239

43-
/// <summary>
44-
/// The vategories of the <see cref="FeedItem"/>.
45-
/// </summary>
46-
public IEnumerable<string> Categories { get; set; }
40+
/// <summary>
41+
/// The vategories of the <see cref="FeedItem"/>.
42+
/// </summary>
43+
public IEnumerable<string>? Categories { get; set; }
4744

48-
/// <summary>
49-
/// The Date of the <see cref="FeedItem"/>.
50-
/// </summary>
51-
[Obsolete("Split into PublishDate and LastUpdatedDate")]
52-
public DateTimeOffset Date => new[] { PublishDate, LastUpdatedDate }.Max();
45+
/// <summary>
46+
/// The Date of the <see cref="FeedItem"/>.
47+
/// </summary>
48+
[Obsolete("Split into PublishDate and LastUpdatedDate")]
49+
public DateTimeOffset? Date => new[] { PublishDate, LastUpdatedDate }.Max();
5350

54-
/// <summary>
55-
/// The publication date of the <see cref="FeedItem"/>.
56-
/// </summary>
57-
public DateTimeOffset PublishDate { get; set; }
51+
/// <summary>
52+
/// The publication date of the <see cref="FeedItem"/>.
53+
/// </summary>
54+
public DateTimeOffset? PublishDate { get; set; }
5855

59-
/// <summary>
60-
/// The date when the feeditem was last updated <see cref="FeedItem"/>.
61-
/// </summary>
62-
public DateTimeOffset LastUpdatedDate { get; set; }
56+
/// <summary>
57+
/// The date when the feeditem was last updated <see cref="FeedItem"/>.
58+
/// </summary>
59+
public DateTimeOffset? LastUpdatedDate { get; set; }
6360

64-
/// <summary>
65-
/// Initializes a new <see cref="FeedItem"/>.
66-
/// </summary>
67-
public FeedItem()
68-
{
69-
Images = new List<Uri>();
70-
Categories = new List<string>();
71-
}
61+
/// <summary>
62+
/// Initializes a new <see cref="FeedItem"/>.
63+
/// </summary>
64+
public FeedItem()
65+
{
66+
Images = [];
67+
Categories = [];
68+
}
7269

73-
/// <summary>
74-
/// Initializes a new <see cref="FeedItem"/> by copying the passed item's properties into the new instance.
75-
/// </summary>
76-
/// <param name="item">The <see cref="FeedItem"/> to copy.</param>
77-
/// <remarks>This is a copy-constructor.</remarks>
78-
public FeedItem(FeedItem item)
79-
: this()
80-
{
81-
Title = item.Title;
82-
Content = item.Content;
83-
Summary = item.Summary;
84-
Uri = item.Uri;
85-
PublishDate = item.PublishDate;
86-
LastUpdatedDate = item.LastUpdatedDate;
87-
Images = item.Images;
88-
Categories = item.Categories;
89-
}
70+
/// <summary>
71+
/// Initializes a new <see cref="FeedItem"/> by copying the passed item's properties into the new instance.
72+
/// </summary>
73+
/// <param name="item">The <see cref="FeedItem"/> to copy.</param>
74+
/// <remarks>This is a copy-constructor.</remarks>
75+
public FeedItem(FeedItem item)
76+
: this()
77+
{
78+
Title = item.Title;
79+
Content = item.Content;
80+
Summary = item.Summary;
81+
Uri = item.Uri;
82+
PublishDate = item.PublishDate;
83+
LastUpdatedDate = item.LastUpdatedDate;
84+
Images = item.Images;
85+
Categories = item.Categories;
86+
}
9087

91-
/// <summary>
92-
/// Returns content, if any, otherwise returns the summary as content.
93-
/// </summary>
94-
/// <returns>Returns content, if any, otherwise returns the summary as content.</returns>
95-
/// <remarks>This method is intended as conveinience-method.</remarks>
96-
public string GetContent() => !string.IsNullOrEmpty(Content) ? Content : Summary;
88+
/// <summary>
89+
/// Returns content, if any, otherwise returns the summary as content.
90+
/// </summary>
91+
/// <returns>Returns content, if any, otherwise returns the summary as content.</returns>
92+
/// <remarks>This method is intended as conveinience-method.</remarks>
93+
public string? GetContent() => !string.IsNullOrEmpty(Content) ? Content : Summary;
9794

98-
/// <summary>
99-
/// Returns the summary, if any, otherwise returns the content as the summary.
100-
/// </summary>
101-
/// <returns>Returns the summary, if any, otherwise returns the content as the summary.</returns>
102-
/// <remarks>This method is intended as conveinience-method.</remarks>
103-
public string GetSummary() => !string.IsNullOrEmpty(Summary) ? Summary : Content;
104-
}
95+
/// <summary>
96+
/// Returns the summary, if any, otherwise returns the content as the summary.
97+
/// </summary>
98+
/// <returns>Returns the summary, if any, otherwise returns the content as the summary.</returns>
99+
/// <remarks>This method is intended as conveinience-method.</remarks>
100+
public string? GetSummary() => !string.IsNullOrEmpty(Summary) ? Summary : Content;
105101
}

0 commit comments

Comments
 (0)