# First step: to fetch the information contained in our bibliography with pyzotero (a wrapper of the Zotero API (v3))
from pyzotero import zotero
# I write informations of the collection I want to fetch (library_id, library_type, api_key)
bibliography = zotero.Zotero(4592469, 'group', 'fCmiRRXSChwNGKoiw8lYlTKe')
# In this bibliography, I assign the variable *items* and give the parameter "limit=None" to display all the references
items = bibliography.top(limit=None)
# Second step: to structure data
# I define my Reference class (capitalized because it's an object)
class Reference:
# I define the signature of my object (the parameters needed to instantiate it, here the entry variable)
def __init__(self, entry):
# I define an attribute (entry which will take the value of the variable entry)
self.entry = entry
# I define each attribute by retrieving the value of their corresponding keys
# I assign them default values and modify the value if necessary
# for the title value, I retrieve the value of the "title" key from "data"
# then I replace the ":" in titles with "-" because the ":" make noise
self.title = entry.get("data", {}).get("title", "No title").replace(":", "-")
# for the author value, I get the value of the keys "firstName" and "lastName" from "data"
# to reconstitute the names of the authors in a list
self.author = [f"{t.get('firstName', 'No firstName')} {t.get('lastName', 'No lastName')}" for t in entry.get("data", {}).get("creators", [])]
# for the date value, I get the value of the "date" key from "data"
self.date = entry.get("data", {}).get("date", "No date")
# for the type value, I get the value of the "itemType" key from "data"
self.type = entry.get("data", {}).get("itemType", "No type")
# for the tag value, I get the value of the "tag" key of "tags" of "data"
self.tags = [t.get("tag") for t in entry.get("data", {}).get("tags", [])]
# I define my Tag class the same way
class Tag:
def __init__(self, entry):
self.entry = entry
self.title = entry.get("data", {}).get("title", "No title").replace(":", "-")
self.author = [f"{t.get('firstName', 'No firstName')} {t.get('lastName', 'No lastName')}" for t in entry.get("data", {}).get("creators", [])]
self.date = entry.get("data", {}).get("date", "No date")
self.type = entry.get("data", {}).get("itemType", "No type")
self.tags = [t.get("tag") for t in entry.get("data", {}).get("tags", [])]
# In my bibliography, for each entry:
for entry in bibliography.top(limit=None):
# I instantiate a Reference to the variable reference
reference = Reference(entry)
# I instantiate an empty list
list_reference = []
# In my bibliography, for each entry :
for entry in bibliography.top(limit=None):
# I instantiate a Reference that I add to my list of references
list_reference.append(Reference(entry))
# Third step: to build the graph
# I import the library for graphs
import graphviz
# I define the visual charter of my graph
network = graphviz.Digraph(filename="MyTitle",
node_attr={'color': 'lightsalmon',
'style': 'filled',
'shape':'doublecircle',
'fontname':'Arial',
'fontsize':'13.0',
'margin':'0.05',
'fixedsize':'margin'},
edge_attr={'arrowhead':'none',
'style': 'filled',
'color':'deeppink',
'fontname':'Arial',
'fontsize':'10.0'})
network = graphviz.Digraph(filename="MyTitle",
node_attr={'color': 'mediumturquoise',
'width':'2.5',
'style': 'filled',
'shape':'signature',
'margin':'0.05',
'fixedsize':'margin',
'fontname':'Courier New',
'fontsize':'12.0'},
edge_attr={'arrowhead':'none',
'style': 'filled',
'color':'tomato',
'fontname':'Courier New',
'fontsize':'10.0'},)
# Finally : the graph architecture with our data
# To avoid creating the same link several times
# I instantiate an empty set()
already_existing_links = set()
# I go through my reference list once with the reference1 loop variable
for reference1 in list_reference:
# I run my reference list the first time with the loop variable reference2
for reference2 in list_reference:
# I run the list of tags in reference1 with the loop variable tags
for tag1 in reference1.tags:
for tag2 in reference2.tags:
title1 = reference1.title
# if title1 is the same as reference2 title and tag1 is different from tag2
if title1 == reference2.title and tag1 != tag2:
# then I instantiate edge which is a tuple
#(immutable iterator i.e once created, it cannot be changed) of the following elements ordered :
# tag1, tag2, title1
edge = tuple(sorted((tag1, tag2, title1)))
# if edge is not a link already in my already_existing_links set
if edge not in already_existing_links:
# I create the link in my tags_by_title graph with
# tag1 as module1 or node1
# tag2 as module2 or node2
# title1 as the label of the link
tags_by_title.edge(tag1, tag2, label = title1)
# I add the link(edge) to my set
already_existing_links.add(edge)
# as a precaution, I display links that make up my graph
# I run my reference list once with the reference1 loop variable
for reference1 in list_reference:
# I run my reference list the first time with the loop variable reference2
for reference2 in list_reference:
# I run the list of tags for reference1 with the tags loop variable
for tag1 in reference1.tags:
# if the title of reference1 is different from the title of reference2
# and the tag of reference1 is present in the list of tags of reference2
if reference1.title != reference2.title and tag1 in reference2.tags:
# then I instantiate edge which is a tuple
#(immutable iterable i.e. once created, it cannot be changed) of the following elements ordered :
# title of reference1, title of reference2, tag of reference1
edge = tuple(sorted((reference1.title, reference2.title, tag1)))
# if edge is not a link already in my already_existing_links set
if edge not in already_existing_links:
# I create the link in my title_by_tag graph with
# the title of reference1 as module1 or node1
# the title of reference2 as module2 or node2
# the tag of reference1 as the link label
title_by_tag.edge(reference1.title, reference2.title, label = tag1)
# I add the link(edge) to my set
already_existing_links.add(edge)
# as a precaution, I can display the relationships that make up my graph
# I run my reference list once with the reference1 loop variable
for reference1 in list_reference:
# I run my reference list the first time with the loop variable reference2
for reference2 in list_reference:
# I run the list of tags for reference1 with the tags loop variable
for tag1 in reference1.tags:
for tag2 in reference2.tags:
for author1 in reference1.author:
# if the author of reference 1 is in reference 2 and tag1 is different from tag2
if author1 in reference2.author and tag1 != tag2:
# then I instantiate edge which is a tuple
#(immutable iterator i.e. once created, it cannot be changed) of the following elements ordered :
# tag1, tag2, author1
edge = tuple(sorted((tag1, tag2, author1)))
# if edge is not a link already in my already_existing_links set
if edge not in already_existing_links:
# I create the link in my tag_by_author graph with
# the tag1 as module1 or node1
# the tag2 as module2 or node2
# the author1 as the label of the link
tag_by_author.edge(tag1, tag2, label = author1)
# I add the link(edge) to my set
already_existing_links.add(edge)
# as a precaution, I can display the relationships that make up my graph
# to visualize
#tag_by_author
# to download
tag_by_author
# I run my reference list once with the reference1 loop variable
for reference1 in list_reference:
# I run my reference list the first time with the loop variable reference2
for reference2 in list_reference:
# I run the list of authors for reference1 with the authors loop variable
for author1 in reference1.author:
for author2 in reference2.author:
for tag1 in reference1.tags:
# if tag1 is in tags of reference2 and author1 is different from author2
if tag1 in reference2.tags and author1 != author2:
# then I instantiate edge which is a tuple
#(immutable iterable i.e. once created, it cannot be changed) of the following elements ordered :
# author1, author2, tag1
edge = tuple(sorted((author1, author2, tag1)))
# if edge is not a link already in my already_existing_links set
if edge not in already_existing_links:
# I create the link in my author_by_title graph with
# the author1 as module1 or node1
# the author2 as module2 or node2
# the tag1 as the link label
author_by_tag.edge(author1, author2, label = tag1)
# I add the link(edge) to my set
already_existing_links.add(edge)
# as a precaution, I can display the relationships that make up my graph
# I run my reference list once with the reference1 loop variable
for reference1 in list_reference:
# I run my reference list the first time with the loop variable reference2
for reference2 in list_reference:
# I run the list of authors for reference1 with the authors loop variable
for author1 in reference1.author:
for author2 in reference2.author:
title1 = reference1.title
# if the title1 is the same as the title of reference2 and author1 is different from author2
if title1 == reference2.title and author1 != author2:
# then I instantiate edge which is a tuple
#(immutable iterable i.e. once created, it cannot be changed) of the following elements ordered :
# author1, author2, title of reference1
edge = tuple(sorted((author1, author2, title1)))
# if edge is not a link already in my already_existing_links set
if edge not in already_existing_links:
# I create the link in my author_by_title graph with
# the author1 as module1 or node1
# the author2 as module2 or node2
# the title of reference1 as the link label
author_by_title.edge(author1, author2, label = title1)
# I add the link(edge) to my set
already_existing_links.add(edge)
# as a precaution, I can display the relationships that make up my graph
# I run my reference list once with the reference1 loop variable
for reference1 in list_reference:
# I run my reference list the first time with the loop variable reference2
for reference2 in list_reference:
# I run the list of tags for reference1 with the tags loop variable
for author1 in reference1.author:
# if the title of reference1 is different from the title of reference2
# and the author of reference1 is present in the list of authors of reference2
if reference1.title != reference2.title and author1 in reference2.author:
# then I instantiate edge which is a tuple
#(immutable iterable i.e once created, it cannot be changed) of the following elements ordered :
# title of reference1, title of reference2, author of reference1
edge = tuple(sorted((reference1.title, reference2.title, author1)))
# if edge is not a link already in my already_existing_links set
if edge not in already_existing_links:
# I create the link in my title_by_author graph with
# the title of reference1 as module1 or node1
# the title of reference2 as module2 or node2
# the author of reference1 as the link label
title_by_author.edge(reference1.title, reference2.title, label = author1)
# I add the link(edge) to my set
already_existing_links.add(edge)
# as a precaution, I can display the relationships that make up my graph
# to visualize
MyTitle
# to download
MyTitle.view()