!pip install pyjanitor pyvis --quiet


import pandas as pd
import janitor
import datetime

from IPython.core.display import display, HTML
from pyvis import network as net
import networkx as nx


df_connection = pd.read_csv("Connections.csv", skiprows=2)


df_connection.info()


df = (
    df_connection
    .clean_names() # remove spacing and capitalization
    .drop(columns=['first_name', 'last_name', 'email_address']) # drop for privacy
    .dropna(subset=['company', 'position']) # drop missing values in company and position
    .to_datetime('connected_on', format='%d %b %Y')
  )
df.head()


pattern = "retired|self-employed"
df = df[~df['company'].str.contains(pattern, case=False)]


df_company = df['company'].value_counts().reset_index()
df_company.columns = ['company', 'count']
df_company = df_company.sort_values(by="count", ascending=False)
df_company.head(10)


df_position = df['position'].value_counts().reset_index()
df_position.columns = ['position', 'count']
df_position = df_position.sort_values(by="count", ascending=False)
df_position.head(10)


import pandas as pd

df_contacts = pd.read_csv('Contacts.csv')
print(df_contacts.head())


df_contacts = df_contacts.fillna('')


import plotly.express as px

fig = px.scatter(df_contacts, x="Title", y="CreatedAt")
fig.show()


!pip install --upgrade plotly


import numpy as np


df_contacts['Title'].replace('', np.nan, inplace=True)


df_contacts.dropna(subset=['Title'], inplace=True)


fig = px.treemap(df_contacts, path=['Title', 'Companies'], width=800, height=800)

fig.show()


print(f"number of nodes: {g.number_of_nodes()}")
print(f"number of edges: {g.number_of_edges()}")


for _, row in df_company.head(5).iterrows():
  print(row['company'] + "-" + str(row['count']))


print(df_company.shape)
df_company_reduced = df_company.loc[df_company['count']>=5]
print(df_company_reduced.shape)


print(df_position.shape)
df_position_reduced = df_position.loc[df_position['count']>=5]
print(df_position_reduced.shape)


# initialize graph
g = nx.Graph()
g.add_node('root') # intialize yourself as central

# use iterrows tp iterate through the data frame
for _, row in df_company_reduced.iterrows():

  # store company name and count
  company = row['company']
  count = row['count']

  title = f"<b>{company}</b> – {count}"
  positions = set([x for x in df[company == df['company']]['position']])
  positions = ''.join('<li>{}</li>'.format(x) for x in positions)

  position_list = f"<ul>{positions}</ul>"
  hover_info = title + position_list

  g.add_node(company, size=count*2, title=hover_info, color='#3449eb')
  g.add_edge('root', company, color='grey')

# generate the graph
nt = net.Network(height='700px', width='700px', bgcolor="black", font_color='white')
nt.from_nx(g)
nt.hrepulsion()
# more customization https://tinyurl.com/yf5lvvdm
nt.show('company_graph.html')
display(HTML('company_graph.html'))


# initialize graph
g = nx.Graph()
g.add_node('root') # intialize yourself as central

# use iterrows tp iterate through the data frame
for _, row in df_position_reduced.iterrows():

  count = f"{row['count']}"
  position= row['position']
  
  g.add_node(position, size=count, color='#3449eb', title=count)
  g.add_edge('root', position, color='grey')

# generate the graph
nt = net.Network(height='700px', width='700px', bgcolor="black", font_color='white')
nt.from_nx(g)
nt.hrepulsion()
# more customization https://tinyurl.com/yf5lvvdm
nt.show('position_graph.html')
display(HTML('position_graph.html'))

Loading data¶

Data Cleaning¶

Aggregate sum of connections for companies¶

Aggregate sum of connections for positions¶

When Connected¶

Treemap of title in a company¶

Creating the network¶

Creating a network diagram for companies¶

Creating a network based on position in a company¶