Connect WRDS in R

Connect from R to Wharton Research Data Services

# to set up connection from R to WRDS (https://wrds-www.wharton.upenn.edu/pages/support/programming-wrds/programming-r/r-from-your-computer/)
library(RPostgres)
library(dplyr)

# I've set up wrds connection before hand. Please use your username and password here.

# wrds <- dbConnect(Postgres(),
#                   host='wrds-pgdata.wharton.upenn.edu',
#                   port=9737,
#                   dbname='wrds',
#                   sslmode='require',
#                   user='')

# Check variables (column headers) in COMP ANNUAL FUNDAMENTAL
#uses the already-established wrds connection to prepare the SQL query string and save the query as the result res.
# check avaiable databases: https://wrds-web.wharton.upenn.edu/wrds/tools/variable.cfm?vendor_id=7
res <- dbSendQuery(wrds, "select column_name
                   from information_schema.columns
                   where table_schema='compa'
                   and table_name='funda'
                   order by column_name")
data <- dbFetch(res, n=-1) # fetches the data that results from running the query res against wrds and stores it as data
dbClearResult(res) # closes the connection
head(data)
##   column_name
## 1       acchg
## 2        acco
## 3       accrt
## 4     acctchg
## 5     acctstd
## 6        acdo
# select everything
res <- dbSendQuery(wrds, "select * from compa.funda")

# from compa.funda

# only select the following variables
res <- dbSendQuery(wrds, "select gvkey, datadate, fyear, indfmt, consol, popsrc, datafmt, tic, cusip, conm, curcd, fyr, act, at, bkvlps, ceq, ch, che, dltt, dlc, emp, np, exchg, cik, costat, naicsh,mkvalt from compa.funda") #check variables from (https://wrds-web.wharton.upenn.edu/wrds/ds/comp/funda/index.cfm?navId=80)
## Warning in result_create(conn@ptr, statement): Closing open result set,
## cancelling previous query
data <- dbFetch(res, n=-1)
dbClearResult(res)

head(data)
##    gvkey   datadate fyear indfmt consol popsrc datafmt  tic     cusip
## 1 001000 1961-12-31  1961   INDL      C      D     STD AE.2 000032102
## 2 001000 1962-12-31  1962   INDL      C      D     STD AE.2 000032102
## 3 001000 1963-12-31  1963   INDL      C      D     STD AE.2 000032102
## 4 001000 1964-12-31  1964   INDL      C      D     STD AE.2 000032102
## 5 001000 1965-12-31  1965   INDL      C      D     STD AE.2 000032102
## 6 001000 1966-12-31  1966   INDL      C      D     STD AE.2 000032102
##                    conm curcd fyr   act    at bkvlps   ceq ch   che  dltt   dlc
## 1 A & E PLASTIK PAK INC   USD  12    NA    NA 2.4342    NA NA    NA 0.100    NA
## 2 A & E PLASTIK PAK INC   USD  12    NA    NA 3.0497 0.552 NA    NA 0.000    NA
## 3 A & E PLASTIK PAK INC   USD  12 0.408    NA 2.9731 0.553 NA    NA 0.015    NA
## 4 A & E PLASTIK PAK INC   USD  12 0.718 1.416 3.0969 0.607 NA 0.269 0.522 0.088
## 5 A & E PLASTIK PAK INC   USD  12 0.725 2.310 2.3835 0.491 NA 0.031 1.154 0.300
## 6 A & E PLASTIK PAK INC   USD  12 1.015 2.430 3.8082 0.834 NA 0.063 1.109 0.124
##   emp    np exchg  cik costat naicsh mkvalt
## 1  NA    NA    12 <NA>      I     NA     NA
## 2  NA    NA    12 <NA>      I     NA     NA
## 3  NA    NA    12 <NA>      I     NA     NA
## 4  NA 0.000    12 <NA>      I     NA     NA
## 5  NA 0.225    12 <NA>      I     NA     NA
## 6  NA 0.050    12 <NA>      I     NA     NA
Mike Nguyen
Mike Nguyen
PhD

My research interests include marketing, and social science.

Next
Previous

Related