What does the code mean?

Pyzotero

# First step: to fetch the information contained in our bibliography with pyzotero (a wrapper of the Zotero API (v3))
from pyzotero import zotero 

# I write informations of the collection I want to fetch (library_id, library_type, api_key)
bibliography = zotero.Zotero(4592469, 'group', 'fCmiRRXSChwNGKoiw8lYlTKe')

# In this bibliography, I assign the variable *items* and give the parameter "limit=None" to display all the references 
items = bibliography.top(limit=None)

Data Structure

# Second step: to structure data
# I define my Reference class (capitalized because it's an object)
class Reference:
    # I define the signature of my object (the parameters needed to instantiate it, here the entry variable)
    def __init__(self, entry):
        # I define an attribute (entry which will take the value of the variable entry)
        self.entry = entry 
        # I define each attribute by retrieving the value of their corresponding keys
        # I assign them default values and modify the value if necessary
        # for the title value, I retrieve the value of the "title" key from "data"
        # then I replace the ":" in titles with "-" because the ":" make noise
        self.title = entry.get("data", {}).get("title", "No title").replace(":", "-")
        # for the author value, I get the value of the keys "firstName" and "lastName" from "data"
        # to reconstitute the names of the authors in a list
        self.author = [f"{t.get('firstName', 'No firstName')} {t.get('lastName', 'No lastName')}" for t in entry.get("data", {}).get("creators", [])]
        # for the date value, I get the value of the "date" key from "data"
        self.date = entry.get("data", {}).get("date", "No date") 
        # for the type value, I get the value of the "itemType" key from "data"
        self.type = entry.get("data", {}).get("itemType", "No type")
        # for the tag value, I get the value of the "tag" key of "tags" of "data"
        self.tags = [t.get("tag") for t in entry.get("data", {}).get("tags", [])]

# I define my Tag class the same way
class Tag:
    def __init__(self, entry):
        self.entry = entry 
        self.title = entry.get("data", {}).get("title", "No title").replace(":", "-")
        self.author = [f"{t.get('firstName', 'No firstName')} {t.get('lastName', 'No lastName')}" for t in entry.get("data", {}).get("creators", [])]
        self.date = entry.get("data", {}).get("date", "No date") 
        self.type = entry.get("data", {}).get("itemType", "No type")
        self.tags = [t.get("tag") for t in entry.get("data", {}).get("tags", [])]
        
# In my bibliography, for each entry:
for entry in bibliography.top(limit=None):
    # I instantiate a Reference to the variable reference
    reference = Reference(entry)

# I instantiate an empty list
list_reference = []

# In my bibliography, for each entry : 
for entry in bibliography.top(limit=None):
    # I instantiate a Reference that I add to my list of references
    list_reference.append(Reference(entry))

Graphviz

# Third step: to build the graph   
# I import the library for graphs
import graphviz 

# I define the visual charter of my graph
Margot Style Antoine Style
network = graphviz.Digraph(filename="MyTitle",
				            node_attr={'color': 'lightsalmon', 
					                    'style': 'filled', 
					                    'shape':'doublecircle',
					                    'fontname':'Arial', 
					                    'fontsize':'13.0', 
					                    'margin':'0.05', 
					                    'fixedsize':'margin'},
				            edge_attr={'arrowhead':'none', 
				                        'style': 'filled', 
				                        'color':'deeppink', 
				                        'fontname':'Arial', 
				                        'fontsize':'10.0'})
network = graphviz.Digraph(filename="MyTitle", 
							node_attr={'color': 'mediumturquoise',
                                        'width':'2.5',
                                        'style': 'filled',
                                        'shape':'signature',
                                        'margin':'0.05',
                                        'fixedsize':'margin',
                                        'fontname':'Courier New',
                                        'fontsize':'12.0'},   
							edge_attr={'arrowhead':'none',
                                        'style': 'filled',
                                        'color':'tomato',
                                        'fontname':'Courier New',
                                        'fontsize':'10.0'},)
                            

Graph Architectures

# Finally : the graph architecture with our data
					# To avoid creating the same link several times
# I instantiate an empty set()
already_existing_links = set()
Tag by Title Title by Tag Tag by Author Author by Tag Author by Title Title by Author
# I go through my reference list once with the reference1 loop variable
            for reference1 in list_reference:
                # I run my reference list the first time with the loop variable reference2
                for reference2 in list_reference:
                    # I run the list of tags in reference1 with the loop variable tags
                    for tag1 in reference1.tags:
                        for tag2 in reference2.tags:
                            title1 = reference1.title
                            # if title1 is the same as reference2 title and tag1 is different from tag2
                            if title1 == reference2.title and tag1 != tag2:
                                # then I instantiate edge which is a tuple 
                                #(immutable iterator i.e once created, it cannot be changed) of the following elements ordered :
                                # tag1, tag2, title1
                                edge = tuple(sorted((tag1, tag2, title1)))
                                # if edge is not a link already in my already_existing_links set
                                if edge not in already_existing_links:
                                    # I create the link in my tags_by_title graph with
                                    # tag1 as module1 or node1
                                    # tag2 as module2 or node2
                                    # title1 as the label of the link
                                    tags_by_title.edge(tag1, tag2, label = title1)
                                    # I add the link(edge) to my set
                                    already_existing_links.add(edge)
                                    # as a precaution, I display links that make up my graph
            
# I run my reference list once with the reference1 loop variable
            for reference1 in list_reference:
                # I run my reference list the first time with the loop variable reference2
                for reference2 in list_reference:
                    # I run the list of tags for reference1 with the tags loop variable
                    for tag1 in reference1.tags:
                        # if the title of reference1 is different from the title of reference2
                        # and the tag of reference1 is present in the list of tags of reference2
                        if reference1.title != reference2.title and tag1 in reference2.tags:
                            # then I instantiate edge which is a tuple 
                            #(immutable iterable i.e. once created, it cannot be changed) of the following elements ordered :
                            # title of reference1, title of reference2, tag of reference1
                            edge = tuple(sorted((reference1.title, reference2.title, tag1)))
                            # if edge is not a link already in my already_existing_links set
                            if edge not in already_existing_links:
                                # I create the link in my title_by_tag graph with
                                # the title of reference1 as module1 or node1
                                # the title of reference2 as module2 or node2
                                # the tag of reference1 as the link label
                                title_by_tag.edge(reference1.title, reference2.title, label = tag1)
                                # I add the link(edge) to my set
                                already_existing_links.add(edge)
								# as a precaution, I can display the relationships that make up my graph
							
# I run my reference list once with the reference1 loop variable
            for reference1 in list_reference:
                # I run my reference list the first time with the loop variable reference2
                for reference2 in list_reference:
                    # I run the list of tags for reference1 with the tags loop variable
                    for tag1 in reference1.tags:
                        for tag2 in reference2.tags:
                            for author1 in reference1.author:
                                # if the author of reference 1 is in reference 2 and tag1 is different from tag2
                                if author1 in reference2.author and tag1 != tag2:
                                    # then I instantiate edge which is a tuple 
                                    #(immutable iterator i.e. once created, it cannot be changed) of the following elements ordered : 
                                    # tag1, tag2, author1
                                    edge = tuple(sorted((tag1, tag2, author1)))
                                    # if edge is not a link already in my already_existing_links set
                                    if edge not in already_existing_links:
                                        # I create the link in my tag_by_author graph with 
                                        # the tag1 as module1 or node1
                                        # the tag2 as module2 or node2
                                        # the author1 as the label of the link
                                        tag_by_author.edge(tag1, tag2, label = author1)
                                        # I add the link(edge) to my set
                                        already_existing_links.add(edge)
                                        # as a precaution, I can display the relationships that make up my graph
            # to visualize
            #tag_by_author
            # to download
            tag_by_author
# I run my reference list once with the reference1 loop variable
            for reference1 in list_reference: 
                # I run my reference list the first time with the loop variable reference2
                for reference2 in list_reference: 
                    # I run the list of authors for reference1 with the authors loop variable
                    for author1 in reference1.author:
                        for author2 in reference2.author:
                            for tag1 in reference1.tags:
                                # if tag1 is in tags of reference2 and author1 is different from author2
                                if tag1 in reference2.tags and author1 != author2:
                                    # then I instantiate edge which is a tuple 
                                    #(immutable iterable i.e. once created, it cannot be changed) of the following elements ordered : 
                                    # author1, author2, tag1
                                    edge = tuple(sorted((author1, author2, tag1))) 
                                    # if edge is not a link already in my already_existing_links set
                                    if edge not in already_existing_links:
                                        # I create the link in my author_by_title graph with 
                                        # the author1 as module1 or node1
                                        # the author2 as module2 or node2
                                        # the tag1 as the link label
                                        author_by_tag.edge(author1, author2, label = tag1)
                                        # I add the link(edge) to my set
                                        already_existing_links.add(edge)
                                        # as a precaution, I can display the relationships that make up my graph
            
# I run my reference list once with the reference1 loop variable
            for reference1 in list_reference: 
                # I run my reference list the first time with the loop variable reference2
                for reference2 in list_reference: 
                    # I run the list of authors for reference1 with the authors loop variable
                    for author1 in reference1.author:
                        for author2 in reference2.author:
                            title1 = reference1.title
                            # if the title1 is the same as the title of reference2 and author1 is different from author2
                            if title1 == reference2.title and author1 != author2:
                                # then I instantiate edge which is a tuple 
                                #(immutable iterable i.e. once created, it cannot be changed) of the following elements ordered : 
                                # author1, author2, title of reference1
                                edge = tuple(sorted((author1, author2, title1))) 
                                # if edge is not a link already in my already_existing_links set
                                if edge not in already_existing_links:
                                    # I create the link in my author_by_title graph with 
                                    # the author1 as module1 or node1
                                    # the author2 as module2 or node2
                                    # the title of reference1 as the link label
                                    author_by_title.edge(author1, author2, label = title1)
                                    # I add the link(edge) to my set
                                    already_existing_links.add(edge)
                                    # as a precaution, I can display the relationships that make up my graph
            
# I run my reference list once with the reference1 loop variable
            for reference1 in list_reference: 
                # I run my reference list the first time with the loop variable reference2
                for reference2 in list_reference: 
                    # I run the list of tags for reference1 with the tags loop variable
                    for author1 in reference1.author:
                        # if the title of reference1 is different from the title of reference2
                        # and the author of reference1 is present in the list of authors of reference2
                        if reference1.title != reference2.title and author1 in reference2.author:
                            # then I instantiate edge which is a tuple 
                            #(immutable iterable i.e once created, it cannot be changed) of the following elements ordered : 
                            # title of reference1, title of reference2, author of reference1
                            edge = tuple(sorted((reference1.title, reference2.title, author1))) 
                            # if edge is not a link already in my already_existing_links set
                            if edge not in already_existing_links:
                                # I create the link in my title_by_author graph with 
                                # the title of reference1 as module1 or node1
                                # the title of reference2 as module2 or node2
                                # the author of reference1 as the link label
                                title_by_author.edge(reference1.title, reference2.title, label = author1)
                                # I add the link(edge) to my set
                                already_existing_links.add(edge)
                                # as a precaution, I can display the relationships that make up my graph 
# to visualize
MyTitle
# to download
MyTitle.view()


You will find the complete python here (a Jupyter Notebook).

CC BY-SA

CRCEN & CCLA

Repository GitLab