@@ -10,6 +10,7 @@ import (
1010 "io"
1111 "log"
1212 "os"
13+ "regexp"
1314 str "strings"
1415 "time"
1516
@@ -439,16 +440,21 @@ const (
439440// Code -----------------------------------------------------------------------
440441
441442type TripleAggregateToWikiPageConverter struct {
442- InAggregate chan * TripleAggregate
443- InIndex chan * map [string ]* TripleAggregate
444- OutPage chan * WikiPage
443+ InAggregate chan * TripleAggregate
444+ InIndex chan * map [string ]* TripleAggregate
445+ OutPage chan * WikiPage
446+ cleanUpRegexes []* regexp.Regexp
445447}
446448
447449func NewTripleAggregateToWikiPageConverter () * TripleAggregateToWikiPageConverter {
448450 return & TripleAggregateToWikiPageConverter {
449451 InAggregate : make (chan * TripleAggregate , BUFSIZE ),
450452 InIndex : make (chan * map [string ]* TripleAggregate , BUFSIZE ),
451453 OutPage : make (chan * WikiPage , BUFSIZE ),
454+ cleanUpRegexes : []* regexp.Regexp {
455+ regexp .MustCompile (" [(][^)]*:[^)]*[)]" ),
456+ regexp .MustCompile (" [[][^]]*:[^]]*[]]" ),
457+ },
452458 }
453459}
454460
@@ -488,6 +494,11 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
488494 } else if tr .Obj .Type () == rdf .TermLiteral {
489495
490496 valueStr = tr .Obj .String ()
497+
498+ for _ , r := range p .cleanUpRegexes {
499+ valueStr = r .ReplaceAllString (valueStr , "" )
500+ }
501+
491502 dataTypeStr := tr .Obj .(rdf.Literal ).DataType .String ()
492503
493504 // Add type info on the current property's page
@@ -590,6 +601,11 @@ func (p *TripleAggregateToWikiPageConverter) convertUriToWikiTitle(uri string, u
590601 factTitle = str .Replace (factTitle , "]" , ")" , - 1 )
591602 factTitle = html .EscapeString (factTitle )
592603
604+ // Clean up according to regexes
605+ for _ , r := range p .cleanUpRegexes {
606+ factTitle = r .ReplaceAllString (factTitle , "" )
607+ }
608+
593609 // Limit to max 255 chars (due to MediaWiki limitaiton)
594610 titleIsShortened := false
595611 for len (factTitle ) >= 250 {
0 commit comments