Skip to content

Commit aae369b

Browse files
committed
Merged from master version 2.1.1
1 parent 1c25273 commit aae369b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+47027
-43671
lines changed

.gitignore

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ profile_wosserver/
2020
metaknowledgeDocs.md
2121
j9Raws/
2222
/manualj9Abbreviations*
23-
metaknowledge/WOS/journalAbbreviations/j9Abbreviations.bak
24-
metaknowledge/WOS/journalAbbreviations/j9Abbreviations.dir
25-
metaknowledge/WOS/journalAbbreviations/j9Abbreviations.dat
23+
metaknowledge/journalAbbreviations/j9Abbreviations.bak
24+
metaknowledge/journalAbbreviations/j9Abbreviations.dir
25+
metaknowledge/journalAbbreviations/j9Abbreviations.dat
2626
!savedrecs.txt
2727
*.bib
2828

metaknowledge/WOS/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,6 @@
8585

8686
from .tagProcessing.tagFunctions import *
8787
from .tagProcessing.funcDicts import tagToFullDict, fullToTagDict, tagNameConverterDict, tagsAndNameSet, knownTagsList
88-
from .journalAbbreviations.backend import updatej9DB, getj9dict, abrevDBname, excludeFromDB, addToDB, manaulDBname
88+
89+
from .recordWOS import WOSRecord, recordParser
90+
from .wosHandlers import isWOSFile, wosParser

metaknowledge/WOS/tagProcessing/tagFunctions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
22
from .helpFuncs import getMonth
3-
from ..journalAbbreviations.wosCitations import WOSCitation
3+
from ...citation import Citation
44

55
import collections
66

@@ -523,7 +523,7 @@ def citations(val):
523523
"""
524524
retCites = []
525525
for c in val:
526-
retCites.append(WOSCitation(c))
526+
retCites.append(Citation(c))
527527
return retCites
528528

529529
def publisherCity(val):

metaknowledge/WOS/wosHandlers.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,23 @@
44
from ..mkExceptions import cacheError, BadWOSFile, BadWOSRecord
55

66
def isWOSFile(infile, checkedLines = 3):
7-
"""Checks if _infile_ has the right header in the first _checkedLines_ lines
7+
"""Determines if _infile_ is the path to a WOS file. A file is considerd to be a WOS file if it has the correct encoding (`utf-8` with a BOM) and within the first _checkedLines_ a line starts with `"VR 1.0"`.
8+
9+
# Parameters
10+
11+
_infile_ : `str`
12+
13+
> The path to the targets file
14+
15+
_checkedLines_ : `optional [int]`
16+
17+
> default 2, the number of lines to check for the header
18+
19+
# Returns
20+
21+
`bool`
22+
23+
> `True` if the file is a WOS file
824
"""
925
try:
1026
with open(infile, 'r', encoding='utf-8-sig') as openfile:

metaknowledge/__init__.py

Lines changed: 17 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
#Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
2-
"""metaknowledge is a Python3 package that simplifies bibliometric and computational analysis of Web of Science data.
1+
#Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2016
2+
"""_metaknowledge_ is a Python3 package that simplifies bibliometric and computational analysis of Web of Science data.
33
44
# Example
55
@@ -19,43 +19,33 @@
1919
2020
# Overview
2121
22-
This package can read the files downloaded from the [Thomson Reuters Web of Science](https://webofknowledge.com) (WOS) as plain text. These files contain metadata about scientific records, such as the authors, title, and citations. The records are exported in groups of up-to 500 individual records to a file.
22+
This package can read the files downloaded from the Thomson Reuters' [Web of Science](https://webofknowledge.com) (_WOS_), Elsevier's [Scopus](https://www.scopus.com/), [ProQuest](www.proquest.com/) and Medline files from [PubMed](www.ncbi.nlm.nih.gov/pubmed). These files contain entries on the metadata of scientific records, such as authors, title, and citations. _metaknowledge_ can also read grants from various organizations including _NSF_ and _NSERC_ which are handled similarly to records.
2323
24-
The [metaknowledge.RecordCollection](#RecordCollection.RecordCollection) class can take a path to one or more of these files load and parse them. The object is the main way for work to be done on multiple records. For each individual record it creates an instance of the [metaknowledge.Record](#Record.Record) class that contains the results of the parsing of the record.
24+
The [metaknowledge.RecordCollection](#RecordCollection.RecordCollection) class can take a path to one or more of these files load and parse them. The object is the main way for work to be done on multiple records. For each individual record it creates an instance of the [metaknowledge.Record](#metaknowledge.Record) class that contains the results of the parsing of the record.
2525
26-
The files given by WOS are a flat database containing a series of 2 character tags, e.g. 'TI' is the title. Each WOS tag has one or more values and metaknowledge can read them to extract useful information. The approximate meanings of the tags are listed in the [tagProcessing](#tagProcessing.tagProcessing) package, along with the parsing functions for each tag. If you simply want the mapping [`tagToFull()`](#metaknowledge.tagToFull) is a function that maps tags to their full names it, as well as a few other similar functions are provided by the base metaknowledge import. Note, the long names can be used in place of the short 2 character codes within metaknowledge. There are no full official public listings of tag the meanings available. metaknowledge is not attempting to provide the definitive or authoritative meanings.
26+
The files read by _metaknowledge_ are a databases containing a series of tags (implicitly or explicitly), e.g. `'TI'` is the title for WOS. Each tag has one or more values and metaknowledge can read them and extract useful information. As the tags differ between providers a small set of values can be accessed by special tags, the tags are listed in `specialRecordFields`. These special tags can act on the whole `Record` and as such may contain information provided by any number of other tags.
2727
28-
Citations are handled by a special [Citation](#Citation.Citation) class. This class can parse the citations given by WOS as well as extra details about the full name of their journal and allow simple comparisons.
28+
Citations are handled by a special [Citation](#Citation.Citation) class. This class can parse the citations given by _WOS_ and journals cited by _Scopus_ and allows for better comparisons when they are used in graphs.
2929
3030
Note for those reading the docstrings metaknowledge's docs are written in markdown and are processed to produce the documentation found at [networkslab.org/metaknowledge/documentation]({{ site.baseurl }}/documentation/), but you should have no problem reading them from the help function.
3131
"""
3232

33-
from .mkRecord import Record, ExtendedRecord
34-
from .citation import Citation, filterNonJournals
35-
from .grants.baseGrant import Grant, DefaultGrant
36-
from .grants.medlineGrant import MedlineGrant
37-
from .grants.cihrGrant import CIHRGrant
38-
from .grants.nsercGrant import NSERCGrant
39-
33+
from .constants import VERBOSE_MODE, __version__, specialRecordFields, FAST_CITES
34+
from .mkExceptions import BadCitation, BadGrant, BadInputFile, BadProQuestFile, BadProQuestRecord, BadPubmedFile, BadPubmedRecord, BadRecord, BadWOSFile, BadWOSRecord, CollectionTypeError, GrantCollectionException, RCTypeError, RCValueError, RecordsNotCompatible, UnknownFile, cacheError, mkException, TagError, BadScopusRecord
4035

36+
from .graphHelpers import writeEdgeList, writeNodeAttributeFile, writeGraph, readGraph, dropEdges, dropNodesByDegree, dropNodesByCount, mergeGraphs, graphStats, writeTnetFile
37+
from .diffusion import diffusionGraph, diffusionCount, diffusionAddCountsFromSource
4138

42-
from .recordCollection import RecordCollection
43-
from .mkExceptions import BadCitation, BadGrant, BadInputFile, BadProQuestFile, BadProQuestRecord, BadPubmedFile, BadPubmedRecord, BadRecord, BadWOSFile, BadWOSRecord, CollectionTypeError, GrantCollectionException, RCTypeError, RCValueError, RecordsNotCompatible, UnknownFile, cacheError, mkException
44-
#from .progressBar import _ProgressBar
45-
from .grantCollection import GrantCollection
39+
from .citation import Citation, filterNonJournals
4640
from .mkCollection import Collection, CollectionWithIDs
41+
from .mkRecord import Record, ExtendedRecord
4742

48-
from .graphHelpers import writeEdgeList, writeNodeAttributeFile, writeGraph, readGraph, dropEdges, dropNodesByDegree, dropNodesByCount, mergeGraphs, graphStats
49-
from .constants import VERBOSE_MODE, __version__, specialRecordFields, FAST_CITES
50-
from .diffusion import diffusionGraph, diffusionCount, diffusionAddCountsFromSource
5143

52-
#from .WOS.tagProcessing.funcDicts import tagToFull, isTagOrName, normalizeToTag, normalizeToName
53-
#from .WOS.wosHandlers import wosParser
54-
from .WOS.recordWOS import WOSRecord#, recordParser
55-
from .WOS.journalAbbreviations.wosCitations import WOSCitation
44+
from .grantCollection import GrantCollection
45+
from .grants import NSERCGrant, CIHRGrant, MedlineGrant, NSFGrant, Grant, DefaultGrant
5646

47+
from .recordCollection import RecordCollection
48+
from .WOS import WOSRecord
5749
from .medline import MedlineRecord
58-
5950
from .proquest import ProQuestRecord
60-
61-
#from .blondel import blondel, modularity #Better implementations can be found on Pypi so this has been discontinued
51+
from .scopus import ScopusRecord

metaknowledge/bin/metaknowledgeCLI.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015
22
import metaknowledge
3-
import metaknowledge.WOS
3+
import metaknowledge.journalAbbreviations
44
import networkx as nx
55
import argparse
66
import os
@@ -180,10 +180,10 @@ def getWhatToDo(clargs, inRC):
180180
f.writelines(cites)
181181
return False
182182
else:
183-
dbName = input("The default manual databse file is called {}, press Enter to use it or type the name of the database you wish to use:\n".format(metaknowledge.WOS.manaulDBname))
183+
dbName = input("The default manual databse file is called {}, press Enter to use it or type the name of the database you wish to use:\n".format(metaknowledge.journalAbbreviations.manaulDBname))
184184
print("Starting to go over citations, to exit press ctr-C.")
185185
if dbName == '':
186-
dbName = metaknowledge.WOS.manaulDBname
186+
dbName = metaknowledge.journalAbbreviations.manaulDBname
187187
try:
188188
for R in inRC:
189189
for c in R.get('citations', []):

metaknowledge/bin/metaknowledgeDocsGen.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,20 @@
1010
import importlib
1111
import re
1212

13-
documentedModules = ['contour', 'WOS', 'medline', 'proquest']#, 'journalAbbreviations', 'tagProcessing']
13+
documentedModules = ['contour', 'WOS', 'medline', 'proquest', 'scopus', 'journalAbbreviations']
1414

1515
docsPrefix = time.strftime("%Y-%m-%d-")
1616

1717
funcCounter = 0
18+
undocumented = 0
1819

1920
blurbDict = {
2021
#modules
2122
'contour' : "A nicer matplotlib graph visualizer and contour plot",
2223
'WOS' : "The functions and classes associated with the Web of Science",
24+
'journalAbbreviations' : "Handles the abbreviated journal names used by WOS",
2325
'medline' : "The functions and classes associated with Medline, the format used by Pubmed",
26+
'scopus' : "The functions and classes associated with records from scopus",
2427
'proquest' : "The functions and classes associated with ProQuest",
2528

2629
#Classes
@@ -33,12 +36,14 @@
3336
'WOSRecord' : "The object for containing and processing WOS entries",
3437
'ProQuestRecord' : "The object for containing and processing ProQuest entries",
3538
'MedlineRecord' : "The object for containing and processing Medline entries",
39+
'ScopusRecord' : "The object for containing and processing Scopus entries",
3640

3741
'Grant' : "The base for all the other Grants",
3842
'DefaultGrant' : "The Grant used if a file was not identifiable",
3943
'CIHRGrant' : "The container for CIHR grant entries",
4044
'NSERCGrant' : "The container for NSERC grant entries",
4145
'MedlineGrant' : "The container for grants derived from Medline Records entries",
46+
'NSFGrant' : "The container for NSF grant entries",
4247

4348
'Collection' : "The base of all other Collections, basically a set",
4449
'CollectionWithIDs' : "A Collection that only holds <i>metaknowledge</i> objects",
@@ -47,12 +52,11 @@
4752

4853
#Deprecated
4954
'tagProcessing' : "All the tags and how they are handled",
50-
'journalAbbreviations' : "Look here to get your J9 database",
5155
}
5256

5357
singleFileYAML = """---
5458
layout: page
55-
title: Full Documentation
59+
title: Full Documentation {}
5660
author:
5761
- name: Reid McIlroy-Young
5862
department:
@@ -64,14 +68,14 @@
6468
shorttitle: metaknowledge
6569
search_omit: true
6670
---
67-
"""
71+
""".format(metaknowledge.__version__)
6872

6973
def makeBlurb(name):
7074
if name in blurbDict:
7175
return blurbDict[name]
7276
else:
7377
print("\033[94m{} had no blurb\033[0m".format(name))
74-
return 'BLURB NEEDED FOR: {}'.format(name)
78+
return 'BLURB NEEDED FOR {}'.format(name)
7579
#raise RuntimeError("{} needs a blurb".format(name))
7680

7781
def makeHeader(title, excerpt, tags = (), weight = 10, layout = "doc", singleFile = False):
@@ -209,6 +213,8 @@ def writeFunc(fn, f, prefix = '', level = 5, singleFile = False):
209213
f.write(cleanedDoc(fn[1], lvl = level, singleFile = singleFile))
210214
except AttributeError:
211215
f.write("# Needs to be written\n\n")
216+
global undocumented
217+
undocumented += 1
212218
print("\033[93m{0}{1} had no docs\033[0m".format(prefix, fn[0]))
213219

214220
def writeClass(cl, f, prefix = '', level = 4, singleFile = False, exceptMode = False):
@@ -219,6 +225,8 @@ def writeClass(cl, f, prefix = '', level = 4, singleFile = False, exceptMode = F
219225
f.write(cleanedDoc(cl[1], lvl = level, singleFile = singleFile))
220226
except AttributeError:
221227
f.write("# Needs to be written\n\n")
228+
global undocumented
229+
undocumented += 1
222230
print("\033[93m{0}{1} had no docs\033[0m".format(prefix, cl[0]))
223231

224232
def proccessClass(cls, f, singleFile = False, exceptMode = False):
@@ -335,7 +343,7 @@ def main(args):
335343

336344
if args.single:
337345
single = True
338-
f = open("metaknowledge2Draft.md",'w')
346+
f = open("metaknowledgeFull.md",'w')
339347
f.write(singleFileYAML)
340348

341349
f.write(makeTable(documentedModules, header = '<a name="objlist"></a>The modules of <i>metaknowledge</i> are:', withBlurbs = True))
@@ -383,7 +391,8 @@ def main(args):
383391
def mkDocs():
384392
args = argumentParser()
385393
main(args)
386-
print(funcCounter)
394+
print("{} total functions".format(funcCounter))
395+
print("{} undocumented".format(undocumented))
387396

388397
if __name__ == '__main__':
389398
mkDocs()

0 commit comments

Comments
 (0)