@@ -22,7 +22,7 @@ const (
2222)
2323
2424func main () {
25- flowbase .InitLogInfo ()
25+ // flowbase.InitLogDebug ()
2626
2727 inFileName := flag .String ("in" , "" , "The input file name" )
2828 outFileName := flag .String ("out" , "" , "The output file name" )
@@ -46,47 +46,56 @@ func main() {
4646 // ------------------------------------------
4747
4848 // Create a pipeline runner
49- pipeRunner := flowbase .NewPipelineRunner ()
49+ net := flowbase .NewPipelineRunner ()
5050
5151 // Read in-file
5252 ttlFileRead := NewTurtleFileReader ()
53- pipeRunner .AddProcess (ttlFileRead )
53+ net .AddProcess (ttlFileRead )
5454
5555 // Aggregate per subject
5656 aggregator := NewAggregateTriplesPerSubject ()
57- pipeRunner .AddProcess (aggregator )
57+ net .AddProcess (aggregator )
5858
5959 // Create an subject-indexed "index" of all triples
6060 indexCreator := NewCreateResourceIndex ()
61- pipeRunner .AddProcess (indexCreator )
61+ net .AddProcess (indexCreator )
6262
6363 // Fan-out the triple index to the converter and serializer
6464 indexFanOut := NewResourceIndexFanOut ()
65- pipeRunner .AddProcess (indexFanOut )
65+ net .AddProcess (indexFanOut )
6666
6767 // Serialize the index back to individual subject-tripleaggregates
6868 indexToAggr := NewResourceIndexToTripleAggregates ()
69- pipeRunner .AddProcess (indexToAggr )
69+ net .AddProcess (indexToAggr )
7070
7171 // Convert TripleAggregate to WikiPage
7272 triplesToWikiConverter := NewTripleAggregateToWikiPageConverter ()
73- pipeRunner .AddProcess (triplesToWikiConverter )
73+ net .AddProcess (triplesToWikiConverter )
7474
7575 //categoryFilterer := NewCategoryFilterer([]string{"DataEntry"})
76- //pipeRunner .AddProcess(categoryFilterer)
76+ //net .AddProcess(categoryFilterer)
7777
7878 // Pretty-print wiki page data
7979 //wikiPagePrinter := NewWikiPagePrinter()
80- //pipeRunner .AddProcess(wikiPagePrinter)
80+ //net .AddProcess(wikiPagePrinter)
8181
8282 useTemplates := true
8383 xmlCreator := NewMWXMLCreator (useTemplates )
84- pipeRunner .AddProcess (xmlCreator )
84+ net .AddProcess (xmlCreator )
8585
8686 //printer := NewStringPrinter()
87- //pipeRunner.AddProcess(printer)
88- strFileWriter := NewStringFileWriter (* outFileName )
89- pipeRunner .AddProcess (strFileWriter )
87+ //net.AddProcess(printer)
88+ templateWriter := NewStringFileWriter (str .Replace (* outFileName , ".xml" , "_templates.xml" , 1 ))
89+ net .AddProcess (templateWriter )
90+
91+ propertyWriter := NewStringFileWriter (str .Replace (* outFileName , ".xml" , "_properties.xml" , 1 ))
92+ net .AddProcess (propertyWriter )
93+
94+ pageWriter := NewStringFileWriter (* outFileName )
95+ net .AddProcess (pageWriter )
96+
97+ snk := flowbase .NewSink ()
98+ net .AddProcess (snk )
9099
91100 // ------------------------------------------
92101 // Connect network
@@ -107,7 +116,13 @@ func main() {
107116
108117 triplesToWikiConverter .OutPage = xmlCreator .InWikiPage
109118
110- xmlCreator .Out = strFileWriter .In
119+ xmlCreator .OutTemplates = templateWriter .In
120+ xmlCreator .OutProperties = propertyWriter .In
121+ xmlCreator .OutPages = pageWriter .In
122+
123+ snk .Connect (templateWriter .OutDone )
124+ snk .Connect (propertyWriter .OutDone )
125+ snk .Connect (pageWriter .OutDone )
111126
112127 // ------------------------------------------
113128 // Send in-data and run
@@ -118,8 +133,7 @@ func main() {
118133 ttlFileRead .InFileName <- * inFileName
119134 }()
120135
121- pipeRunner .Run ()
122-
136+ net .Run ()
123137}
124138
125139// ================================================================================
@@ -419,6 +433,7 @@ const (
419433 URITypeUndefined
420434 URITypePredicate
421435 URITypeClass
436+ URITypeTemplate
422437)
423438
424439// Code -----------------------------------------------------------------------
@@ -489,21 +504,8 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
489504 }
490505
491506 if tr .Pred .String () == typePropertyURI || tr .Pred .String () == subClassPropertyURI {
492-
493- catExists := false
494- for _ , existingCat := range page .Categories {
495- if valueStr == existingCat {
496- catExists = true
497- break
498- }
499- }
500-
501- if ! catExists {
502- page .AddCategory (valueStr )
503- }
504-
507+ page .AddCategoryUnique (valueStr )
505508 } else {
506-
507509 page .AddFactUnique (NewFact (propertyStr , valueStr ))
508510 }
509511 }
@@ -521,7 +523,7 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
521523 predPageIndex [page .Title ].AddFactUnique (fact )
522524 }
523525 for _ , cat := range page .Categories {
524- predPageIndex [page .Title ].AddCategory (cat )
526+ predPageIndex [page .Title ].AddCategoryUnique (cat )
525527 }
526528 } else {
527529 // If page does not exist, use the newly created one
@@ -599,6 +601,8 @@ func (p *TripleAggregateToWikiPageConverter) convertUriToWikiTitle(uri string, u
599601 factTitle += " ..."
600602 }
601603
604+ factTitle = upperCaseFirst (factTitle )
605+
602606 if uriType == URITypePredicate {
603607 pageTitle = "Property:" + factTitle
604608 } else if uriType == URITypeClass {
@@ -656,16 +660,20 @@ func (p *CategoryFilterer) Run() {
656660// --------------------------------------------------------------------------------
657661
658662type MWXMLCreator struct {
659- InWikiPage chan * WikiPage
660- Out chan string
661- UseTemplates bool
663+ InWikiPage chan * WikiPage
664+ OutTemplates chan string
665+ OutProperties chan string
666+ OutPages chan string
667+ UseTemplates bool
662668}
663669
664670func NewMWXMLCreator (useTemplates bool ) * MWXMLCreator {
665671 return & MWXMLCreator {
666- InWikiPage : make (chan * WikiPage , BUFSIZE ),
667- Out : make (chan string , BUFSIZE ),
668- UseTemplates : useTemplates ,
672+ InWikiPage : make (chan * WikiPage , BUFSIZE ),
673+ OutTemplates : make (chan string , BUFSIZE ),
674+ OutProperties : make (chan string , BUFSIZE ),
675+ OutPages : make (chan string , BUFSIZE ),
676+ UseTemplates : useTemplates ,
669677 }
670678}
671679
@@ -689,36 +697,57 @@ const wikiXmlTpl = `
689697
690698var pageTypeToMWNamespace = map [int ]int {
691699 URITypeClass : 14 ,
700+ URITypeTemplate : 10 ,
692701 URITypePredicate : 102 ,
693702 URITypeUndefined : 0 ,
694703}
695704
696705func (p * MWXMLCreator ) Run () {
697- defer close ( p . Out )
706+ tplPropertyIdx := make ( map [ string ] map [ string ] int )
698707
699- p .Out <- "<mediawiki>\n "
708+ defer close (p .OutTemplates )
709+ defer close (p .OutProperties )
710+ defer close (p .OutPages )
711+
712+ p .OutPages <- "<mediawiki>\n "
713+ p .OutProperties <- "<mediawiki>\n "
700714
701715 for page := range p .InWikiPage {
702716
703717 wikiText := ""
704718
705719 if p .UseTemplates && len (page .Categories ) > 0 { // We need at least one category, as to name the (to-be) template
706720
707- wikiText += "{{" + page .Categories [0 ] + "\n " // TODO: What to do when we have multipel categories?
721+ templateName := page .Categories [0 ]
722+ templateTitle := "Template:" + templateName
723+
724+ // Make sure template page exists
725+ if tplPropertyIdx [templateTitle ] == nil {
726+ tplPropertyIdx [templateTitle ] = make (map [string ]int )
727+ }
708728
709- // Add facts as parameters to the template
729+ wikiText += "{{" + templateName + "\n " // TODO: What to do when we have multipel categories?
730+
731+ // Add facts as parameters to the template call
710732 var lastProperty string
711733 for _ , fact := range page .Facts {
734+ // Write facts to template call on current page
735+
736+ val := escapeWikiChars (fact .Value )
712737 if fact .Property == lastProperty {
713- wikiText += "," + fact . Value + "\n "
738+ wikiText += "," + val + "\n "
714739 } else {
715- wikiText += "|" + str . Replace (fact .Property , " " , "_" , - 1 ) + "=" + fact . Value + "\n "
740+ wikiText += "|" + spacesToUnderscores (fact .Property ) + "=" + val + "\n "
716741 }
742+
717743 lastProperty = fact .Property
744+
745+ // Add fact to the relevant template page
746+ tplPropertyIdx [templateTitle ][fact.Property ] = 1
718747 }
719748
720749 // Add categories as multi-valued call to the "categories" value of the template
721- wikiText += "|categories ="
750+ wikiText += "|Categories ="
722751 for i , cat := range page .Categories {
723752 if i == 0 {
724753 wikiText += cat
@@ -732,7 +761,7 @@ func (p *MWXMLCreator) Run() {
732761
733762 // Add fact statements
734763 for _ , fact := range page .Facts {
735- wikiText += fmtFact (fact .Property , fact .Value )
764+ wikiText += fmtFact (fact .Property , escapeWikiChars ( fact .Value ) )
736765 }
737766
738767 // Add category statements
@@ -745,10 +774,33 @@ func (p *MWXMLCreator) Run() {
745774 xmlData := fmt .Sprintf (wikiXmlTpl , page .Title , pageTypeToMWNamespace [page .Type ], time .Now ().Format ("2006-01-02T15:04:05Z" ), wikiText )
746775
747776 // Print out the generated XML one line at a time
748- p .Out <- xmlData
777+ if page .Type == URITypePredicate {
778+ p .OutProperties <- xmlData
779+ } else {
780+ p .OutPages <- xmlData
781+ }
749782 }
783+ p .OutPages <- "</mediawiki>\n "
784+ p .OutProperties <- "</mediawiki>\n "
785+
786+ p .OutTemplates <- "<mediawiki>\n "
787+ // Create template pages
788+ for tplName , tplProperties := range tplPropertyIdx {
789+ tplText := `{|class="wikitable smwtable"
790+ !colspan="2"|{{PAGENAMEE}}
791+ `
792+ for property , _ := range tplProperties {
793+ argName := spacesToUnderscores (property )
794+ tplText += fmt .Sprintf ("|-\n !%s\n |{{#arraymap:{{{%s|}}}|,|x|[[%s::x]]|,}}\n " , property , argName , property )
795+ }
796+ tplText += "|}\n \n "
797+ // Add categories
798+ tplText += "{{#arraymap:{{{Categories}}}|,|x|[[Category:x]]|}}\n "
750799
751- p .Out <- "</mediawiki>\n "
800+ xmlData := fmt .Sprintf (wikiXmlTpl , tplName , pageTypeToMWNamespace [URITypeTemplate ], time .Now ().Format ("2006-01-02T15:04:05Z" ), tplText )
801+ p .OutTemplates <- xmlData
802+ }
803+ p .OutTemplates <- "</mediawiki>\n "
752804}
753805
754806// --------------------------------------------------------------------------------
@@ -868,17 +920,21 @@ func (p *StringPrinter) Run() {
868920
869921type StringFileWriter struct {
870922 In chan string
923+ OutDone chan interface {}
871924 fileName string
872925}
873926
874927func NewStringFileWriter (fileName string ) * StringFileWriter {
875928 return & StringFileWriter {
876929 In : make (chan string , BUFSIZE ),
930+ OutDone : make (chan interface {}, BUFSIZE ),
877931 fileName : fileName ,
878932 }
879933}
880934
881935func (p * StringFileWriter ) Run () {
936+ defer close (p .OutDone )
937+
882938 fh , err := os .Create (p .fileName )
883939 if err != nil {
884940 panic ("Could not create output file: " + err .Error ())
@@ -887,8 +943,13 @@ func (p *StringFileWriter) Run() {
887943 for s := range p .In {
888944 fh .WriteString (s )
889945 }
946+
947+ flowbase .Debug .Printf ("Sending done signal on chan %v now in StringFileWriter ...\n " , p .OutDone )
948+ p .OutDone <- & DoneSignal {}
890949}
891950
951+ type DoneSignal struct {}
952+
892953// --------------------------------------------------------------------------------
893954// IP: RDFTriple
894955// --------------------------------------------------------------------------------
@@ -962,6 +1023,19 @@ func (p *WikiPage) AddCategory(category string) {
9621023 p .Categories = append (p .Categories , category )
9631024}
9641025
1026+ func (p * WikiPage ) AddCategoryUnique (category string ) {
1027+ catExists := false
1028+ for _ , existingCat := range p .Categories {
1029+ if category == existingCat {
1030+ catExists = true
1031+ break
1032+ }
1033+ }
1034+ if ! catExists {
1035+ p .AddCategory (category )
1036+ }
1037+ }
1038+
9651039// Helper type: Fact
9661040
9671041type Fact struct {
@@ -1000,3 +1074,23 @@ func removeLastWord(inStr string) string {
10001074 outStr := str .Join (append (bits [:len (bits )- 1 ]), " " )
10011075 return outStr
10021076}
1077+
1078+ func spacesToUnderscores (inStr string ) string {
1079+ return str .Replace (inStr , " " , "_" , - 1 )
1080+ }
1081+
1082+ func upperCaseFirst (inStr string ) string {
1083+ var outStr string
1084+ if inStr != "" {
1085+ outStr = str .ToUpper (inStr [0 :1 ]) + inStr [1 :]
1086+ }
1087+ return outStr
1088+ }
1089+
1090+ func escapeWikiChars (inStr string ) string {
1091+ outStr := str .Replace (inStr , "[" , "(" , - 1 )
1092+ outStr = str .Replace (outStr , "]" , ")" , - 1 )
1093+ outStr = str .Replace (outStr , "|" , "," , - 1 )
1094+ outStr = str .Replace (outStr , "=" , "-" , - 1 )
1095+ return outStr
1096+ }
0 commit comments