Skip to content

Commit 32b7873

Browse files
committed
Collect and write template pages. Write templates and properties to separate files
1 parent e4d84ac commit 32b7873

File tree

1 file changed

+142
-48
lines changed

1 file changed

+142
-48
lines changed

main.go

Lines changed: 142 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const (
2222
)
2323

2424
func main() {
25-
flowbase.InitLogInfo()
25+
//flowbase.InitLogDebug()
2626

2727
inFileName := flag.String("in", "", "The input file name")
2828
outFileName := flag.String("out", "", "The output file name")
@@ -46,47 +46,56 @@ func main() {
4646
// ------------------------------------------
4747

4848
// Create a pipeline runner
49-
pipeRunner := flowbase.NewPipelineRunner()
49+
net := flowbase.NewPipelineRunner()
5050

5151
// Read in-file
5252
ttlFileRead := NewTurtleFileReader()
53-
pipeRunner.AddProcess(ttlFileRead)
53+
net.AddProcess(ttlFileRead)
5454

5555
// Aggregate per subject
5656
aggregator := NewAggregateTriplesPerSubject()
57-
pipeRunner.AddProcess(aggregator)
57+
net.AddProcess(aggregator)
5858

5959
// Create an subject-indexed "index" of all triples
6060
indexCreator := NewCreateResourceIndex()
61-
pipeRunner.AddProcess(indexCreator)
61+
net.AddProcess(indexCreator)
6262

6363
// Fan-out the triple index to the converter and serializer
6464
indexFanOut := NewResourceIndexFanOut()
65-
pipeRunner.AddProcess(indexFanOut)
65+
net.AddProcess(indexFanOut)
6666

6767
// Serialize the index back to individual subject-tripleaggregates
6868
indexToAggr := NewResourceIndexToTripleAggregates()
69-
pipeRunner.AddProcess(indexToAggr)
69+
net.AddProcess(indexToAggr)
7070

7171
// Convert TripleAggregate to WikiPage
7272
triplesToWikiConverter := NewTripleAggregateToWikiPageConverter()
73-
pipeRunner.AddProcess(triplesToWikiConverter)
73+
net.AddProcess(triplesToWikiConverter)
7474

7575
//categoryFilterer := NewCategoryFilterer([]string{"DataEntry"})
76-
//pipeRunner.AddProcess(categoryFilterer)
76+
//net.AddProcess(categoryFilterer)
7777

7878
// Pretty-print wiki page data
7979
//wikiPagePrinter := NewWikiPagePrinter()
80-
//pipeRunner.AddProcess(wikiPagePrinter)
80+
//net.AddProcess(wikiPagePrinter)
8181

8282
useTemplates := true
8383
xmlCreator := NewMWXMLCreator(useTemplates)
84-
pipeRunner.AddProcess(xmlCreator)
84+
net.AddProcess(xmlCreator)
8585

8686
//printer := NewStringPrinter()
87-
//pipeRunner.AddProcess(printer)
88-
strFileWriter := NewStringFileWriter(*outFileName)
89-
pipeRunner.AddProcess(strFileWriter)
87+
//net.AddProcess(printer)
88+
templateWriter := NewStringFileWriter(str.Replace(*outFileName, ".xml", "_templates.xml", 1))
89+
net.AddProcess(templateWriter)
90+
91+
propertyWriter := NewStringFileWriter(str.Replace(*outFileName, ".xml", "_properties.xml", 1))
92+
net.AddProcess(propertyWriter)
93+
94+
pageWriter := NewStringFileWriter(*outFileName)
95+
net.AddProcess(pageWriter)
96+
97+
snk := flowbase.NewSink()
98+
net.AddProcess(snk)
9099

91100
// ------------------------------------------
92101
// Connect network
@@ -107,7 +116,13 @@ func main() {
107116

108117
triplesToWikiConverter.OutPage = xmlCreator.InWikiPage
109118

110-
xmlCreator.Out = strFileWriter.In
119+
xmlCreator.OutTemplates = templateWriter.In
120+
xmlCreator.OutProperties = propertyWriter.In
121+
xmlCreator.OutPages = pageWriter.In
122+
123+
snk.Connect(templateWriter.OutDone)
124+
snk.Connect(propertyWriter.OutDone)
125+
snk.Connect(pageWriter.OutDone)
111126

112127
// ------------------------------------------
113128
// Send in-data and run
@@ -118,8 +133,7 @@ func main() {
118133
ttlFileRead.InFileName <- *inFileName
119134
}()
120135

121-
pipeRunner.Run()
122-
136+
net.Run()
123137
}
124138

125139
// ================================================================================
@@ -419,6 +433,7 @@ const (
419433
URITypeUndefined
420434
URITypePredicate
421435
URITypeClass
436+
URITypeTemplate
422437
)
423438

424439
// Code -----------------------------------------------------------------------
@@ -489,21 +504,8 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
489504
}
490505

491506
if tr.Pred.String() == typePropertyURI || tr.Pred.String() == subClassPropertyURI {
492-
493-
catExists := false
494-
for _, existingCat := range page.Categories {
495-
if valueStr == existingCat {
496-
catExists = true
497-
break
498-
}
499-
}
500-
501-
if !catExists {
502-
page.AddCategory(valueStr)
503-
}
504-
507+
page.AddCategoryUnique(valueStr)
505508
} else {
506-
507509
page.AddFactUnique(NewFact(propertyStr, valueStr))
508510
}
509511
}
@@ -521,7 +523,7 @@ func (p *TripleAggregateToWikiPageConverter) Run() {
521523
predPageIndex[page.Title].AddFactUnique(fact)
522524
}
523525
for _, cat := range page.Categories {
524-
predPageIndex[page.Title].AddCategory(cat)
526+
predPageIndex[page.Title].AddCategoryUnique(cat)
525527
}
526528
} else {
527529
// If page does not exist, use the newly created one
@@ -599,6 +601,8 @@ func (p *TripleAggregateToWikiPageConverter) convertUriToWikiTitle(uri string, u
599601
factTitle += " ..."
600602
}
601603

604+
factTitle = upperCaseFirst(factTitle)
605+
602606
if uriType == URITypePredicate {
603607
pageTitle = "Property:" + factTitle
604608
} else if uriType == URITypeClass {
@@ -656,16 +660,20 @@ func (p *CategoryFilterer) Run() {
656660
// --------------------------------------------------------------------------------
657661

658662
type MWXMLCreator struct {
659-
InWikiPage chan *WikiPage
660-
Out chan string
661-
UseTemplates bool
663+
InWikiPage chan *WikiPage
664+
OutTemplates chan string
665+
OutProperties chan string
666+
OutPages chan string
667+
UseTemplates bool
662668
}
663669

664670
func NewMWXMLCreator(useTemplates bool) *MWXMLCreator {
665671
return &MWXMLCreator{
666-
InWikiPage: make(chan *WikiPage, BUFSIZE),
667-
Out: make(chan string, BUFSIZE),
668-
UseTemplates: useTemplates,
672+
InWikiPage: make(chan *WikiPage, BUFSIZE),
673+
OutTemplates: make(chan string, BUFSIZE),
674+
OutProperties: make(chan string, BUFSIZE),
675+
OutPages: make(chan string, BUFSIZE),
676+
UseTemplates: useTemplates,
669677
}
670678
}
671679

@@ -689,36 +697,57 @@ const wikiXmlTpl = `
689697

690698
var pageTypeToMWNamespace = map[int]int{
691699
URITypeClass: 14,
700+
URITypeTemplate: 10,
692701
URITypePredicate: 102,
693702
URITypeUndefined: 0,
694703
}
695704

696705
func (p *MWXMLCreator) Run() {
697-
defer close(p.Out)
706+
tplPropertyIdx := make(map[string]map[string]int)
698707

699-
p.Out <- "<mediawiki>\n"
708+
defer close(p.OutTemplates)
709+
defer close(p.OutProperties)
710+
defer close(p.OutPages)
711+
712+
p.OutPages <- "<mediawiki>\n"
713+
p.OutProperties <- "<mediawiki>\n"
700714

701715
for page := range p.InWikiPage {
702716

703717
wikiText := ""
704718

705719
if p.UseTemplates && len(page.Categories) > 0 { // We need at least one category, as to name the (to-be) template
706720

707-
wikiText += "{{" + page.Categories[0] + "\n" // TODO: What to do when we have multipel categories?
721+
templateName := page.Categories[0]
722+
templateTitle := "Template:" + templateName
723+
724+
// Make sure template page exists
725+
if tplPropertyIdx[templateTitle] == nil {
726+
tplPropertyIdx[templateTitle] = make(map[string]int)
727+
}
708728

709-
// Add facts as parameters to the template
729+
wikiText += "{{" + templateName + "\n" // TODO: What to do when we have multipel categories?
730+
731+
// Add facts as parameters to the template call
710732
var lastProperty string
711733
for _, fact := range page.Facts {
734+
// Write facts to template call on current page
735+
736+
val := escapeWikiChars(fact.Value)
712737
if fact.Property == lastProperty {
713-
wikiText += "," + fact.Value + "\n"
738+
wikiText += "," + val + "\n"
714739
} else {
715-
wikiText += "|" + str.Replace(fact.Property, " ", "_", -1) + "=" + fact.Value + "\n"
740+
wikiText += "|" + spacesToUnderscores(fact.Property) + "=" + val + "\n"
716741
}
742+
717743
lastProperty = fact.Property
744+
745+
// Add fact to the relevant template page
746+
tplPropertyIdx[templateTitle][fact.Property] = 1
718747
}
719748

720749
// Add categories as multi-valued call to the "categories" value of the template
721-
wikiText += "|categories="
750+
wikiText += "|Categories="
722751
for i, cat := range page.Categories {
723752
if i == 0 {
724753
wikiText += cat
@@ -732,7 +761,7 @@ func (p *MWXMLCreator) Run() {
732761

733762
// Add fact statements
734763
for _, fact := range page.Facts {
735-
wikiText += fmtFact(fact.Property, fact.Value)
764+
wikiText += fmtFact(fact.Property, escapeWikiChars(fact.Value))
736765
}
737766

738767
// Add category statements
@@ -745,10 +774,33 @@ func (p *MWXMLCreator) Run() {
745774
xmlData := fmt.Sprintf(wikiXmlTpl, page.Title, pageTypeToMWNamespace[page.Type], time.Now().Format("2006-01-02T15:04:05Z"), wikiText)
746775

747776
// Print out the generated XML one line at a time
748-
p.Out <- xmlData
777+
if page.Type == URITypePredicate {
778+
p.OutProperties <- xmlData
779+
} else {
780+
p.OutPages <- xmlData
781+
}
749782
}
783+
p.OutPages <- "</mediawiki>\n"
784+
p.OutProperties <- "</mediawiki>\n"
785+
786+
p.OutTemplates <- "<mediawiki>\n"
787+
// Create template pages
788+
for tplName, tplProperties := range tplPropertyIdx {
789+
tplText := `{|class="wikitable smwtable"
790+
!colspan="2"|{{PAGENAMEE}}
791+
`
792+
for property, _ := range tplProperties {
793+
argName := spacesToUnderscores(property)
794+
tplText += fmt.Sprintf("|-\n!%s\n|{{#arraymap:{{{%s|}}}|,|x|[[%s::x]]|,}}\n", property, argName, property)
795+
}
796+
tplText += "|}\n\n"
797+
// Add categories
798+
tplText += "{{#arraymap:{{{Categories}}}|,|x|[[Category:x]]|}}\n"
750799

751-
p.Out <- "</mediawiki>\n"
800+
xmlData := fmt.Sprintf(wikiXmlTpl, tplName, pageTypeToMWNamespace[URITypeTemplate], time.Now().Format("2006-01-02T15:04:05Z"), tplText)
801+
p.OutTemplates <- xmlData
802+
}
803+
p.OutTemplates <- "</mediawiki>\n"
752804
}
753805

754806
// --------------------------------------------------------------------------------
@@ -868,17 +920,21 @@ func (p *StringPrinter) Run() {
868920

869921
type StringFileWriter struct {
870922
In chan string
923+
OutDone chan interface{}
871924
fileName string
872925
}
873926

874927
func NewStringFileWriter(fileName string) *StringFileWriter {
875928
return &StringFileWriter{
876929
In: make(chan string, BUFSIZE),
930+
OutDone: make(chan interface{}, BUFSIZE),
877931
fileName: fileName,
878932
}
879933
}
880934

881935
func (p *StringFileWriter) Run() {
936+
defer close(p.OutDone)
937+
882938
fh, err := os.Create(p.fileName)
883939
if err != nil {
884940
panic("Could not create output file: " + err.Error())
@@ -887,8 +943,13 @@ func (p *StringFileWriter) Run() {
887943
for s := range p.In {
888944
fh.WriteString(s)
889945
}
946+
947+
flowbase.Debug.Printf("Sending done signal on chan %v now in StringFileWriter ...\n", p.OutDone)
948+
p.OutDone <- &DoneSignal{}
890949
}
891950

951+
type DoneSignal struct{}
952+
892953
// --------------------------------------------------------------------------------
893954
// IP: RDFTriple
894955
// --------------------------------------------------------------------------------
@@ -962,6 +1023,19 @@ func (p *WikiPage) AddCategory(category string) {
9621023
p.Categories = append(p.Categories, category)
9631024
}
9641025

1026+
func (p *WikiPage) AddCategoryUnique(category string) {
1027+
catExists := false
1028+
for _, existingCat := range p.Categories {
1029+
if category == existingCat {
1030+
catExists = true
1031+
break
1032+
}
1033+
}
1034+
if !catExists {
1035+
p.AddCategory(category)
1036+
}
1037+
}
1038+
9651039
// Helper type: Fact
9661040

9671041
type Fact struct {
@@ -1000,3 +1074,23 @@ func removeLastWord(inStr string) string {
10001074
outStr := str.Join(append(bits[:len(bits)-1]), " ")
10011075
return outStr
10021076
}
1077+
1078+
func spacesToUnderscores(inStr string) string {
1079+
return str.Replace(inStr, " ", "_", -1)
1080+
}
1081+
1082+
func upperCaseFirst(inStr string) string {
1083+
var outStr string
1084+
if inStr != "" {
1085+
outStr = str.ToUpper(inStr[0:1]) + inStr[1:]
1086+
}
1087+
return outStr
1088+
}
1089+
1090+
func escapeWikiChars(inStr string) string {
1091+
outStr := str.Replace(inStr, "[", "(", -1)
1092+
outStr = str.Replace(outStr, "]", ")", -1)
1093+
outStr = str.Replace(outStr, "|", ",", -1)
1094+
outStr = str.Replace(outStr, "=", "-", -1)
1095+
return outStr
1096+
}

0 commit comments

Comments
 (0)