Enrich Hubspot Contacts with Company Information from Mattermark

Introduction

Mattermark is a powerful tool for finding details about companies you’re interested in. In this guide, we will use the Company Details endpoint and Python to enrich your new Hubspot contacts with company information from Mattermark (address, annual revenue, industry, etc). This can be run every 15 minutes as a cronjob to constantly enrich new contacts.

Setup

Before you get started you’ll need a Mattermark API Key, Hubspot API Key, and Python 2.

# config
MATTERMARK_API_KEY = "YOUR_MATTERMARK_API_KEY"
HUBSPOT_API_KEY = "YOUR_HUBSPOT_API_KEY"

Overview

  • Pull recently created contacts from Hubspot
  • Extrapolate company name from contacts email addresses
  • Get company data from Mattermark
  • Find or create the company in your Hubspot account
  • Enrich the companies in Hubspot with Mattermark data
# get recently created contacts from hubspot
recent_contact_results = hubspot_get_recent_contacts()

# define a cutoff time for how recently a contact needs to be created
# in this scenario we'd be running a cronjob every 15 minutes
cutoff = datetime.datetime.now()-datetime.timedelta(minutes=15)

for recent_contact in recent_contact_results['contacts']:
  # the list we'll collect all of this contacts email addresses in
  emails = []

  # get all emails associated with this contact in hubspot
  for identity_profile in recent_contact['identity-profiles']:
    emails.extend([identity['value'] for identity in identity_profile['identities'] if identity['type'] == 'EMAIL'])

  # strip the emails into domains
  domains = [email.split("@")[1] for email in emails]

  # enrich each domain associated with this user
  for domain in domains:
    # get this company from mattermark
    mattermark_company = mattermark_get_company_by_domain(domain)

    # add company to hubspot
    hubspot_company = hubspot_add_company(mattermark_company)

    # link the contact with the company in hubspot
    hubspot_add_contact_to_company(recent_contact, hubspot_company)

Full Source

This includes the above code as well as additional functionsa and error handling.

import requests
import json
import csv
import datetime
from dateutil.parser import parse

# config
MATTERMARK_API_KEY = "YOUR_MATTERMARK_API_KEY"
HUBSPOT_API_KEY = "YOUR_HUBSPOT_API_KEY"

## Mattermark API

# get company details from mattermark
def mattermark_get_company_by_domain(domain):
  company_details_url = "https://api.mattermark.com/domains/%s/companies" % domain

  response = requests.get(company_details_url, params={"key": MATTERMARK_API_KEY})
  response.raise_for_status()

  return response.json()

## Hubspot API

# get company details from hubspot
def hubspot_get_company(domain):
  hubspot_company_url = "https://api.hubapi.com/companies/v2/companies/domain/%s?hapikey=%s" % (domain, HUBSPOT_API_KEY)

  response = requests.get(hubspot_company_url)
  response.raise_for_status()

  return response.json()


# add a new company on hubspot
def hubspot_add_company(company_data):
    company_properties = translate_mattermark_company_to_hubspot_properties(company_data)

    url = "https://api.hubapi.com/companies/v2/companies?hapikey=%s" % HUBSPOT_API_KEY

    headers = {
      'Content-Type': 'application/json',
      'Accept': 'application/json'
    }

    print json.dumps(company_properties)

    response = requests.post(url, data=json.dumps(company_properties), headers=headers)
    response.raise_for_status()

    # returns hubspot company dict
    return response.json()


# update company properties in hubspot
def hubspot_update_company(hubspot_company, company_data):
  company_properties = translate_mattermark_company_to_hubspot_properties(company_data, existing_props=hubspot_company['properties'].keys())

  url = "https://api.hubapi.com/companies/v2/companies/%s?hapikey=%s" % (hubspot_company['companyId'], HUBSPOT_API_KEY)

  headers = {
    'Content-Type': 'application/json',
    'Accept': 'application/json'
  }

  response = requests.put(url, data=json.dumps(company_properties), headers=headers)
  response.raise_for_status()

  # returns hubspot company dict
  return response.json()

# get recently created contacts in hubspot
def hubspot_get_recent_contacts():
  url = "https://api.hubapi.com/contacts/v1/lists/all/contacts/recent?hapikey=%s&count=50" % HUBSPOT_API_KEY

  response = requests.get(url)
  response.raise_for_status()

  return response.json()


## Data Transforms

# convert a mattermark company dict to a hubspot properties dict
# mattermark data won't overwrite most props if set already in hubspot
def translate_mattermark_company_to_hubspot_properties(company, existing_props=[]):
  hubspot_props = {}

  # props that are the same between mattermark and hubspot
  shared_props = ['city', 'country', 'description', 'state']

  # if the mattermark company dict has these props transfer to the hubspot dict
  for prop in shared_props:
    if prop in company and not prop in existing_props:
      hubspot_props[prop] = company[prop]

  # calculated props and props with different keys

  if not 'domain' in existing_props:
    hubspot_props['domain'] = company['website']

  # true if mattermark has an ipos on record
  hubspot_props['is_public'] = 1 if ('ipos' in company and len(company['ipos']) > 0) else 0

  if 'facebook_handle' in company and not 'facebook_company_page' in existing_props:
    hubspot_props['facebook_company_page'] = "https://www.facebook.com/%s" % company['facebook_handle']

  if 'twitter_handle' in company:
    hubspot_props['twitterhandle'] = company['twitter_handle']

  if 'est_founding_date' in company and not 'founded_year' in existing_props:
    hubspot_props['founded_year'] = parse(company['est_founding_date']).year

  if 'linkedin_id' in company and not 'linkedin_company_page' in existing_props:
    hubspot_props['linkedin_company_page'] = "https://www.linkedin.com/company-beta/%s/" % company['linkedin_id']

  # overwrite these props that mattermark will have updated data for

  if 'total_funding' in company:
    hubspot_props['total_money_raised'] = company['total_funding']

  if 'twitter_follower_count' in company and len(company['twitter_follower_count']) > 0: 
    hubspot_props['twitterfollowers'] = company['twitter_follower_count'][0]['score']

  if 'facebook_like_count' in company and len(company['facebook_like_count']) > 0: 
    hubspot_props['facebookfans'] = company['facebook_like_count'][0]['score']

  # format as hubspot api expects
  properties = []
  for hubspot_prop in hubspot_props:
    properties.append({
      "name":   hubspot_prop,
      "value":  hubspot_props[hubspot_prop]
    })

  return { "properties": properties }


## List of domains that provide public email addresses

public_domains = [
  # Default domains included
  "aol.com", "att.net", "comcast.net", "facebook.com", "gmail.com", "gmx.com", "googlemail.com",
  "google.com", "hotmail.com", "hotmail.co.uk", "mac.com", "me.com", "mail.com", "msn.com",
  "live.com", "sbcglobal.net", "verizon.net", "yahoo.com", "yahoo.co.uk",
  # Other global domains
  "email.com", "games.com", "gmx.net", "hush.com", "hushmail.com", "icloud.com", "inbox.com",
  "lavabit.com", "love.com", "outlook.com", "pobox.com", "rocketmail.com",
  "safe-mail.net", "wow.com", "ygm.com", "ymail.com", "zoho.com", "fastmail.fm",
  "yandex.com","iname.com",
  # United States ISP domains
  "bellsouth.net", "charter.net", "cox.net", "earthlink.net", "juno.com",
  # British ISP domains
  "btinternet.com", "virginmedia.com", "blueyonder.co.uk", "freeserve.co.uk", "live.co.uk",
  "ntlworld.com", "o2.co.uk", "orange.net", "sky.com", "talktalk.co.uk", "tiscali.co.uk",
  "virgin.net", "wanadoo.co.uk", "bt.com",
  # Domains used in Asia
  "sina.com", "qq.com", "naver.com", "hanmail.net", "daum.net", "nate.com", "yahoo.co.jp", "yahoo.co.kr", "yahoo.co.id", "yahoo.co.in", "yahoo.com.sg", "yahoo.com.ph",
  # French ISP domains
  "hotmail.fr", "live.fr", "laposte.net", "yahoo.fr", "wanadoo.fr", "orange.fr", "gmx.fr", "sfr.fr", "neuf.fr", "free.fr",
  # German ISP domains
  "gmx.de", "hotmail.de", "live.de", "online.de", "t-online.de", "web.de", "yahoo.de",
  # Russian ISP domains
  "mail.ru", "rambler.ru", "yandex.ru", "ya.ru", "list.ru",
  # Belgian ISP domains
  "hotmail.be", "live.be", "skynet.be", "voo.be", "tvcablenet.be", "telenet.be",
  # Argentinian ISP domains
  "hotmail.com.ar", "live.com.ar", "yahoo.com.ar", "fibertel.com.ar", "speedy.com.ar", "arnet.com.ar",
  # Domains used in Mexico
  "yahoo.com.mx", "live.com.mx", "hotmail.es", "hotmail.com.mx", "prodigy.net.mx",
  # Domains used in Brazil
  "yahoo.com.br", "hotmail.com.br", "outlook.com.br", "uol.com.br", "bol.com.br", "terra.com.br", "ig.com.br", "itelefonica.com.br", "r7.com", "zipmail.com.br", "globo.com", "globomail.com", "oi.com.br"
  # Anonymous email domains
  "0815.ru","0wnd.net","0wnd.org","10minutemail.co.za","10minutemail.com","123-m.com","1fsdfdsfsdf.tk","1pad.de","20minutemail.com","21cn.com","2fdgdfgdfgdf.tk","2prong.com","30minutemail.com","33mail.com","3trtretgfrfe.tk","4gfdsgfdgfd.tk","4warding.com","5ghgfhfghfgh.tk","6hjgjhgkilkj.tk","6paq.com","7tags.com","9ox.net","a-bc.net","agedmail.com","ama-trade.de","amilegit.com","amiri.net","amiriindustries.com","anonmails.de","anonymbox.com","antichef.com","antichef.net","antireg.ru","antispam.de","antispammail.de","armyspy.com","artman-conception.com","azmeil.tk","baxomale.ht.cx","beefmilk.com","bigstring.com","binkmail.com","bio-muesli.net","bobmail.info","bodhi.lawlita.com","bofthew.com","bootybay.de","boun.cr","bouncr.com","breakthru.com","brefmail.com","bsnow.net","bspamfree.org","bugmenot.com","bund.us","burstmail.info","buymoreplays.com","byom.de","c2.hu","card.zp.ua","casualdx.com","cek.pm","centermail.com","centermail.net","chammy.info","childsavetrust.org","chogmail.com","choicemail1.com","clixser.com","cmail.net","cmail.org","coldemail.info","cool.fr.nf","courriel.fr.nf","courrieltemporaire.com","crapmail.org","cust.in","cuvox.de","d3p.dk","dacoolest.com","dandikmail.com","dayrep.com","dcemail.com","deadaddress.com","deadspam.com","delikkt.de","despam.it","despammed.com","devnullmail.com","dfgh.net","digitalsanctuary.com","dingbone.com","disposableaddress.com","disposableemailaddresses.com","disposableinbox.com","dispose.it","dispostable.com","dodgeit.com","dodgit.com","donemail.ru","dontreg.com","dontsendmespam.de","drdrb.net","dump-email.info","dumpandjunk.com","dumpyemail.com","e-mail.com","e-mail.org","e4ward.com","easytrashmail.com","einmalmail.de","einrot.com","eintagsmail.de","emailgo.de","emailias.com","emaillime.com","emailsensei.com","emailtemporanea.com","emailtemporanea.net","emailtemporar.ro","emailtemporario.com.br","emailthe.net","emailtmp.com","emailwarden.com","emailx.at.hm","emailxfer.com","emeil.in","emeil.ir","emz.net","ero-tube.org","evopo.com","explodemail.com","express.net.ua","eyepaste.com","fakeinbox.com","fakeinformation.com","fansworldwide.de","fantasymail.de","fightallspam.com","filzmail.com","fivemail.de","fleckens.hu","frapmail.com","friendlymail.co.uk","fuckingduh.com","fudgerub.com","fyii.de","garliclife.com","gehensiemirnichtaufdensack.de","get2mail.fr","getairmail.com","getmails.eu","getonemail.com","giantmail.de","girlsundertheinfluence.com","gishpuppy.com","gmial.com","goemailgo.com","gotmail.net","gotmail.org","gotti.otherinbox.com","great-host.in","greensloth.com","grr.la","gsrv.co.uk","guerillamail.biz","guerillamail.com","guerrillamail.biz","guerrillamail.com","guerrillamail.de","guerrillamail.info","guerrillamail.net","guerrillamail.org","guerrillamailblock.com","gustr.com","harakirimail.com","hat-geld.de","hatespam.org","herp.in","hidemail.de","hidzz.com","hmamail.com","hopemail.biz","ieh-mail.de","ikbenspamvrij.nl","imails.info","inbax.tk","inbox.si","inboxalias.com","inboxclean.com","inboxclean.org","infocom.zp.ua","instant-mail.de","ip6.li","irish2me.com","iwi.net","jetable.com","jetable.fr.nf","jetable.net","jetable.org","jnxjn.com","jourrapide.com","jsrsolutions.com","kasmail.com","kaspop.com","killmail.com","killmail.net","klassmaster.com","klzlk.com","koszmail.pl","kurzepost.de","lawlita.com","letthemeatspam.com","lhsdv.com","lifebyfood.com","link2mail.net","litedrop.com","lol.ovpn.to","lolfreak.net","lookugly.com","lortemail.dk","lr78.com","lroid.com","lukop.dk","m21.cc","mail-filter.com","mail-temporaire.fr","mail.by","mail.mezimages.net","mail.zp.ua","mail1a.de","mail21.cc","mail2rss.org","mail333.com","mailbidon.com","mailbiz.biz","mailblocks.com","mailbucket.org","mailcat.biz","mailcatch.com","mailde.de","mailde.info","maildrop.cc","maileimer.de","mailexpire.com","mailfa.tk","mailforspam.com","mailfreeonline.com","mailguard.me","mailin8r.com","mailinater.com","mailinator.com","mailinator.net","mailinator.org","mailinator2.com","mailincubator.com","mailismagic.com","mailme.lv","mailme24.com","mailmetrash.com","mailmoat.com","mailms.com","mailnesia.com","mailnull.com","mailorg.org","mailpick.biz","mailrock.biz","mailscrap.com","mailshell.com","mailsiphon.com","mailtemp.info","mailtome.de","mailtothis.com","mailtrash.net","mailtv.net","mailtv.tv","mailzilla.com","makemetheking.com","manybrain.com","mbx.cc","mega.zik.dj","meinspamschutz.de","meltmail.com","messagebeamer.de","mezimages.net","ministry-of-silly-walks.de","mintemail.com","misterpinball.de","moncourrier.fr.nf","monemail.fr.nf","monmail.fr.nf","monumentmail.com","mt2009.com","mt2014.com","mycard.net.ua","mycleaninbox.net","mymail-in.net","mypacks.net","mypartyclip.de","myphantomemail.com","mysamp.de","mytempemail.com","mytempmail.com","mytrashmail.com","nabuma.com","neomailbox.com","nepwk.com","nervmich.net","nervtmich.net","netmails.com","netmails.net","neverbox.com","nice-4u.com","nincsmail.hu","nnh.com","no-spam.ws","noblepioneer.com","nomail.pw","nomail.xl.cx","nomail2me.com","nomorespamemails.com","nospam.ze.tc","nospam4.us","nospamfor.us","nospammail.net","notmailinator.com","nowhere.org","nowmymail.com","nurfuerspam.de","nus.edu.sg","objectmail.com","obobbo.com","odnorazovoe.ru","oneoffemail.com","onewaymail.com","onlatedotcom.info","online.ms","opayq.com","ordinaryamerican.net","otherinbox.com","ovpn.to","owlpic.com","pancakemail.com","pcusers.otherinbox.com","pjjkp.com","plexolan.de","poczta.onet.pl","politikerclub.de","poofy.org","pookmail.com","privacy.net","privatdemail.net","proxymail.eu","prtnx.com","putthisinyourspamdatabase.com","putthisinyourspamdatabase.com","qq.com","quickinbox.com","rcpt.at","reallymymail.com","realtyalerts.ca","recode.me","recursor.net","reliable-mail.com","rhyta.com","rmqkr.net","royal.net","rtrtr.com","s0ny.net","safe-mail.net","safersignup.de","safetymail.info","safetypost.de","saynotospams.com","schafmail.de","schrott-email.de","secretemail.de","secure-mail.biz","senseless-entertainment.com","services391.com","sharklasers.com","shieldemail.com","shiftmail.com","shitmail.me","shitware.nl","shmeriously.com","shortmail.net","sibmail.com","sinnlos-mail.de","slapsfromlastnight.com","slaskpost.se","smashmail.de","smellfear.com","snakemail.com","sneakemail.com","sneakmail.de","snkmail.com","sofimail.com","solvemail.info","sogetthis.com","soodonims.com","spam4.me","spamail.de","spamarrest.com","spambob.net","spambog.ru","spambox.us","spamcannon.com","spamcannon.net","spamcon.org","spamcorptastic.com","spamcowboy.com","spamcowboy.net","spamcowboy.org","spamday.com","spamex.com","spamfree.eu","spamfree24.com","spamfree24.de","spamfree24.org","spamgoes.in","spamgourmet.com","spamgourmet.net","spamgourmet.org","spamherelots.com","spamherelots.com","spamhereplease.com","spamhereplease.com","spamhole.com","spamify.com","spaml.de","spammotel.com","spamobox.com","spamslicer.com","spamspot.com","spamthis.co.uk","spamtroll.net","speed.1s.fr","spoofmail.de","stuffmail.de","super-auswahl.de","supergreatmail.com","supermailer.jp","superrito.com","superstachel.de","suremail.info","talkinator.com","teewars.org","teleworm.com","teleworm.us","temp-mail.org","temp-mail.ru","tempe-mail.com","tempemail.co.za","tempemail.com","tempemail.net","tempemail.net","tempinbox.co.uk","tempinbox.com","tempmail.eu","tempmaildemo.com","tempmailer.com","tempmailer.de","tempomail.fr","temporaryemail.net","temporaryforwarding.com","temporaryinbox.com","temporarymailaddress.com","tempthe.net","thankyou2010.com","thc.st","thelimestones.com","thisisnotmyrealemail.com","thismail.net","throwawayemailaddress.com","tilien.com","tittbit.in","tizi.com","tmailinator.com","toomail.biz","topranklist.de","tradermail.info","trash-mail.at","trash-mail.com","trash-mail.de","trash2009.com","trashdevil.com","trashemail.de","trashmail.at","trashmail.com","trashmail.de","trashmail.me","trashmail.net","trashmail.org","trashymail.com","trialmail.de","trillianpro.com","twinmail.de","tyldd.com","uggsrock.com","umail.net","uroid.com","us.af","venompen.com","veryrealemail.com","viditag.com","viralplays.com","vpn.st","vsimcard.com","vubby.com","wasteland.rfc822.org","webemail.me","weg-werf-email.de","wegwerf-emails.de","wegwerfadresse.de","wegwerfemail.com","wegwerfemail.de","wegwerfmail.de","wegwerfmail.info","wegwerfmail.net","wegwerfmail.org","wh4f.org","whyspam.me","willhackforfood.biz","willselfdestruct.com","winemaven.info","wronghead.com","www.e4ward.com","www.mailinator.com","wwwnew.eu","x.ip6.li","xagloo.com","xemaps.com","xents.com","xmaily.com","xoxy.net","yep.it","yogamaven.com","yopmail.com","yopmail.fr","yopmail.net","yourdomain.com","yuurok.com","z1p.biz","za.com","zehnminuten.de","zehnminutenmail.de","zippymail.info","zoemail.net","zomg.info"
]


## Run - use the functions and data defined above to enrich your hubspot contacts

# get recently created contacts from hubspot
recent_contact_results = hubspot_get_recent_contacts()

# define a cutoff time for how recently a contact needs to be created
# in this scenario we'd be running a cronjob every 15 minutes
cutoff = datetime.datetime.now()-datetime.timedelta(minutes=15)

for recent_contact in recent_contact_results['contacts']:
  # get the time this contact was created
  added_at = datetime.datetime.fromtimestamp(recent_contact['addedAt'] / 1000)

  # skip this contact if it wasn't added recently enough
  if added_at < cutoff:
    continue

  # the list we'll collect all of this contacts email addresses in
  emails = []

  # check each identity profile for emails
  for identity_profile in recent_contact['identity-profiles']:
    # look for the email identity dict
    email_results = [identity['value'] for identity in identity_profile['identities'] if identity['type'] == 'EMAIL']

    # if we found any emails add them to our emails list
    if email_results:
      emails.extend(email_results)

  # skip if there's no email address for this contact
  if not emails:
    continue

  # strip the emails into domains
  domains = [email.split("@")[1] for email in emails]

  # remove public domains (gmail, mailinator, etc)
  domains = [domain for domain in domains if not domain in public_domains]

  # skip if there's no email with a private domain
  if not domains:
    continue

  # enrich each domain associated with this user
  for domain in domains:
    # get this company from mattermark
    try:
      mattermark_company = mattermark_get_company_by_domain(domain)
    except requests.exceptions.HTTPError as err:
      if err.response.status_code == 429:
        # Quota and Rate Limiting details available at
        # https://docs.mattermark.com/rest_api/getting_started/index.html#quota-usage
        print "429: Too many requests. Try again later."
      raise

    # check if company exists on hubspot
    try:
      hubspot_company_results = hubspot_get_company(mattermark_company['website'])
      hubspot_company = hubspot_company_results[0]

      # company exists, fill in properties
      hubspot_company = hubspot_update_company(hubspot_company, mattermark_company)
    except requests.exceptions.HTTPError as err:
      if err.response.status_code == 404:
        # 404 is the expected error if a company doesn't exist
        # add it to hubspot instead
        hubspot_company = hubspot_add_company(mattermark_company)
      else:
        # if it's not a 404 don't handle here
        raise

    # link the contact with the company in hubspot
    hubspot_add_contact_to_company(recent_contact, hubspot_company)