Presidential Proclamations¶
Background¶
This notebook demonstrates how to scrape presidential proclamations from the Federal Register, and perform an exploratory data analysis on the data.
A proclamation is offical declaration issued by the president of the United States used to announce a new policy. They often address matters of public policy or recognition of events. However, proclamations do not carry the same weight of laws unless authorized by congress.
Scrape Data¶
Import libraries
import requests # to make requests
import pandas as pd # interact with dataframes
import os # make folder to save files to
Define urls
urls = { # dictionary to store each presidents url along with their name
"Donald J. Trump_2": "https://www.federalregister.gov/api/v1/documents.json?conditions%5Bcorrection%5D=0&conditions%5Bpresident%5D=donald-trump&conditions%5Bpresidential_document_type%5D=proclamation&conditions%5Bsigning_date%5D%5Bgte%5D=01%2F20%2F2025&conditions%5Bsigning_date%5D%5Blte%5D=05%2F03%2F2025&conditions%5Btype%5D%5B%5D=PRESDOCU&fields%5B%5D=citation&fields%5B%5D=document_number&fields%5B%5D=end_page&fields%5B%5D=html_url&fields%5B%5D=pdf_url&fields%5B%5D=type&fields%5B%5D=subtype&fields%5B%5D=publication_date&fields%5B%5D=signing_date&fields%5B%5D=start_page&fields%5B%5D=title&fields%5B%5D=disposition_notes&fields%5B%5D=proclamation_number&fields%5B%5D=full_text_xml_url&fields%5B%5D=body_html_url&fields%5B%5D=json_url&include_pre_1994_docs=true&maximum_per_page=1000&order=proclamation_number&per_page=1000",
"Joseph R. Biden, Jr.": "https://www.federalregister.gov/api/v1/documents.json?conditions%5Bcorrection%5D=0&conditions%5Bpresident%5D=joe-biden&conditions%5Bpresidential_document_type%5D=proclamation&conditions%5Bsigning_date%5D%5Bgte%5D=01%2F20%2F2021&conditions%5Bsigning_date%5D%5Blte%5D=01%2F20%2F2025&conditions%5Btype%5D%5B%5D=PRESDOCU&fields%5B%5D=citation&fields%5B%5D=document_number&fields%5B%5D=end_page&fields%5B%5D=html_url&fields%5B%5D=pdf_url&fields%5B%5D=type&fields%5B%5D=subtype&fields%5B%5D=publication_date&fields%5B%5D=signing_date&fields%5B%5D=start_page&fields%5B%5D=title&fields%5B%5D=disposition_notes&fields%5B%5D=proclamation_number&fields%5B%5D=full_text_xml_url&fields%5B%5D=body_html_url&fields%5B%5D=json_url&include_pre_1994_docs=true&maximum_per_page=1000&order=proclamation_number&per_page=1000",
"Donald J. Trump_1": "https://www.federalregister.gov/api/v1/documents.json?conditions%5Bcorrection%5D=0&conditions%5Bpresident%5D=donald-trump&conditions%5Bpresidential_document_type%5D=proclamation&conditions%5Bsigning_date%5D%5Bgte%5D=01%2F20%2F2017&conditions%5Bsigning_date%5D%5Blte%5D=01%2F20%2F2021&conditions%5Btype%5D%5B%5D=PRESDOCU&fields%5B%5D=citation&fields%5B%5D=document_number&fields%5B%5D=end_page&fields%5B%5D=html_url&fields%5B%5D=pdf_url&fields%5B%5D=type&fields%5B%5D=subtype&fields%5B%5D=publication_date&fields%5B%5D=signing_date&fields%5B%5D=start_page&fields%5B%5D=title&fields%5B%5D=disposition_notes&fields%5B%5D=proclamation_number&fields%5B%5D=full_text_xml_url&fields%5B%5D=body_html_url&fields%5B%5D=json_url&include_pre_1994_docs=true&maximum_per_page=1000&order=proclamation_number&per_page=1000",
"Barack Obama": "https://www.federalregister.gov/api/v1/documents.json?conditions%5Bcorrection%5D=0&conditions%5Bpresident%5D=barack-obama&conditions%5Bpresidential_document_type%5D=proclamation&conditions%5Bsigning_date%5D%5Bgte%5D=01%2F20%2F2009&conditions%5Bsigning_date%5D%5Blte%5D=01%2F20%2F2017&conditions%5Btype%5D%5B%5D=PRESDOCU&fields%5B%5D=citation&fields%5B%5D=document_number&fields%5B%5D=end_page&fields%5B%5D=html_url&fields%5B%5D=pdf_url&fields%5B%5D=type&fields%5B%5D=subtype&fields%5B%5D=publication_date&fields%5B%5D=signing_date&fields%5B%5D=start_page&fields%5B%5D=title&fields%5B%5D=disposition_notes&fields%5B%5D=proclamation_number&fields%5B%5D=full_text_xml_url&fields%5B%5D=body_html_url&fields%5B%5D=json_url&include_pre_1994_docs=true&maximum_per_page=1300&order=proclamation_number&per_page=1300",
"George W. Bush": "https://www.federalregister.gov/api/v1/documents.json?conditions%5Bcorrection%5D=0&conditions%5Bpresident%5D=george-w-bush&conditions%5Bpresidential_document_type%5D=proclamation&conditions%5Bsigning_date%5D%5Bgte%5D=01%2F20%2F2001&conditions%5Bsigning_date%5D%5Blte%5D=01%2F20%2F2009&conditions%5Btype%5D%5B%5D=PRESDOCU&fields%5B%5D=citation&fields%5B%5D=document_number&fields%5B%5D=end_page&fields%5B%5D=html_url&fields%5B%5D=pdf_url&fields%5B%5D=type&fields%5B%5D=subtype&fields%5B%5D=publication_date&fields%5B%5D=signing_date&fields%5B%5D=start_page&fields%5B%5D=title&fields%5B%5D=disposition_notes&fields%5B%5D=proclamation_number&fields%5B%5D=full_text_xml_url&fields%5B%5D=body_html_url&fields%5B%5D=json_url&include_pre_1994_docs=true&maximum_per_page=1000&order=proclamation_number&per_page=1000",
"William J. Clinton": "https://www.federalregister.gov/api/v1/documents.json?conditions%5Bcorrection%5D=0&conditions%5Bpresident%5D=william-j-clinton&conditions%5Bpresidential_document_type%5D=proclamation&conditions%5Bsigning_date%5D%5Bgte%5D=01%2F20%2F1993&conditions%5Bsigning_date%5D%5Blte%5D=01%2F20%2F2001&conditions%5Btype%5D%5B%5D=PRESDOCU&fields%5B%5D=citation&fields%5B%5D=document_number&fields%5B%5D=end_page&fields%5B%5D=html_url&fields%5B%5D=pdf_url&fields%5B%5D=type&fields%5B%5D=subtype&fields%5B%5D=publication_date&fields%5B%5D=signing_date&fields%5B%5D=start_page&fields%5B%5D=title&fields%5B%5D=disposition_notes&fields%5B%5D=proclamation_number&fields%5B%5D=full_text_xml_url&fields%5B%5D=body_html_url&fields%5B%5D=json_url&include_pre_1994_docs=true&maximum_per_page=1000&order=proclamation_number&per_page=1000"
}
Define scrape function
def scrape(urls=urls, drops=["disposition_notes", "full_text_xml_url", "body_html_url", "json_url"]): # define function with input and defualts
os.makedirs("data", exist_ok=True) # create "data" folder if it doesnt exist
combined = pd.DataFrame() # intialize dataframe to store all data
for name, url in urls.items(): # iterate through presidents and their url
data = pd.DataFrame(requests.get(url).json()["results"]) # make a request to the url, convert to json, filter results, and store to dataframe
data.drop(columns=drops, inplace=True) # drop unwanted columns
data["President"] = name # add column for presidents name
data.to_csv(f"data/{name}.csv", index=False) # save single president to csv
combined = pd.concat([combined, data], ignore_index=True) # add president to combined data
combined.to_csv("data/proclamations.csv", index=False) # save combined data to csv
return combined # return combined data
Run function
df = scrape(urls) # scrape and store into df
Explore Data¶
Import libraries
import pandas as pd # interact with dataframes
# graphing libraries
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer # count words
Load data
df = scrape(urls) # scrape and store into df
Data cleaning
# convert dates to datetime
df['publication_date'] = pd.to_datetime(df['publication_date'])
df['signing_date'] = pd.to_datetime(df['signing_date'])
Feature engineering
df['days_to_publish'] = (df['publication_date'] - df['signing_date']).dt.days # days between signing and publication
df['year'] = df['publication_date'].dt.year # add column for publication year
df['month'] = df['publication_date'].dt.month # add column for publication month
df['weekday'] = df['publication_date'].dt.day_name() # add column for weekdat
df['signing_year'] = df['signing_date'].dt.year # add columnf for signing year
df['signing_month'] = df['signing_date'].dt.month # add column for signing month
yearly_counts = df.groupby('year').size() # count proclamations by year
v = CountVectorizer(stop_words='english') # create an english vectorizer
X = v.fit_transform(df['title']) # fit vectorizer based on titles
word_counts = X.sum(axis=0).A1 # sum all words in titles
words = v.get_feature_names_out() # get word list from vectorizer
word_freq = pd.Series(word_counts, index=words).sort_values(ascending=False) # store word frequency
Graphs
Proclamations by President
plt.figure(figsize=(10, 5)) # set figure size
sns.countplot(x='President', data=df[::-1]) # count proclamations
plt.title("Number of Proclamations by President") # set title
plt.xticks(rotation=45) # configure rotation of labels
plt.show() # print chart
Distribution of Days between signing and Publication
plt.figure(figsize=(10, 5)) # set figure size
sns.histplot(df['days_to_publish'], bins=5, kde=True) # set plot type and configure
plt.title("Distribution of Days Between Signing and Publication") # set title
plt.xlabel("Days to Publish") # set X label
plt.show() # print
Proclamations by Year
plt.figure(figsize=(12, 6)) # set plot size
yearly_counts.plot(kind='bar') # configure plot
plt.title("Proclamations by Year") # title
plt.xlabel("Year") # x label
plt.ylabel("Count") # y label
plt.show() # print plot
Proclamations by Weekday
plt.figure(figsize=(14, 6)) # set plot size
sns.countplot(x='weekday', data=df, order=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']) # chart setup
plt.title("Proclamations by Weekday") # set title
plt.ylabel("Count") # set label
plt.xticks(rotation=45) # rotate ticks
plt.tight_layout() # set tight layout
plt.show() # display
15 Most Common Words in Proclamation Titles
plt.figure(figsize=(10, 6)) # set figure size
word_freq.head(15).plot(kind='bar') # setup plot
plt.title("15 Most Common Words in Proclamation Titles") # set title
plt.ylabel("Count") # label y axis
plt.show() # display
Time Delay of Signing and Publication Over TIme
plt.figure(figsize=(12, 6)) # set dimesions
sns.lineplot(x='publication_date', y='days_to_publish', data=df) # plot data
plt.title("Time Delay Between Signing and Publication Over Time") # set title
plt.xlabel("Publication Date") # set label for x
plt.ylabel("Days to Publish") # set label for y
plt.tight_layout() # tighten layout
plt.show() # display