Reproduction: Cincera, M. (1997). Patents, R&D, and technological spillovers at the firm level.

Franz Mohr, Created: May 8, 2018, Last update: May 8, 2018

Get the data

The data set can be downloaded from the Journal of Applied Econometrics Data Archive.

download.file("http://qed.econ.queensu.ca/jae/1997-v12.3/cincera/mc-data.zip", destfile = "1997_cincera_data.zip")
unzip("1997_cincera_data.zip")

data <- read.delim("data.mc", header = FALSE)

names(data) <- c("fi", "s", "g",
                 "p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91",
                 "lr83", "lr84", "lr85", "lr86", "lr87", "lr88", "lr89", "lr90", "lr91",
                 "ls83", "ls84", "ls85", "ls86", "ls87", "ls88", "ls89", "ls90", "ls91")

p <- matrix(as.matrix(data[, c("p83", "p84", "p85", "p86", "p87", "p88", "p89", "p90", "p91")]))
k <- matrix(as.matrix(data[, c("lr83", "lr84", "lr85", "lr86", "lr87", "lr88", "lr89", "lr90", "lr91")]))
spill <- matrix(as.matrix(data[, c("ls83", "ls84", "ls85", "ls86", "ls87", "ls88", "ls89", "ls90", "ls91")]))

# Firm IDs
fi <- rep(1:181, 9)

# Years
year <- c()
for (i in 1983:1991){
  year <- append(year, rep(i, 181))
}
 
# Geographic dummies
g <- rep(data$g, 9)

g.1 <- as.numeric(g == 1)
g.2 <- as.numeric(g == 2)
g.3 <- as.numeric(g == 3)
g.4 <- as.numeric(g == 4)
 
# Technological dummies
s <- rep(data$s, 9)
 
s.1 <- as.numeric(s == 1)
s.2 <- as.numeric(s == 2)
s.3 <- as.numeric(s == 3)
s.4 <- as.numeric(s == 4)
s.5 <- as.numeric(s == 5)
s.6 <- as.numeric(s == 6)
s.7 <- as.numeric(s == 7)
s.8 <- as.numeric(s == 8)
s.9 <- as.numeric(s == 9)
s.10 <- as.numeric(s == 10)
s.11 <- as.numeric(s == 11)
s.12 <- as.numeric(s == 12)
s.13 <- as.numeric(s == 13)
s.14 <- as.numeric(s == 14)
s.15 <- as.numeric(s == 15)
 
# Lags of R&D Spending
k.1 <- as.vector(c(rep(NA, 181), k[1:(length(k) - 181)]))
k.2 <- as.vector(c(rep(NA, 2 * 181), k[1:(length(k) - 2 * 181)]))
k.3 <- as.vector(c(rep(NA, 3 * 181), k[1:(length(k) - 3 * 181)]))
k.4 <- as.vector(c(rep(NA, 4 * 181), k[1:(length(k) - 4 * 181)]))
 
# Lags of spillovers
spill.1 <- as.vector(c(rep(NA, 181), spill[1:(length(spill) - 181)]))
spill.2 <- as.vector(c(rep(NA, 2 * 181), spill[1:(length(spill) - 2 * 181)]))
spill.3 <- as.vector(c(rep(NA, 3 * 181), spill[1:(length(spill) - 3 * 181)]))
spill.4 <- as.vector(c(rep(NA, 4 * 181), spill[1:(length(spill) - 4 * 181)]))
 
# Generate the finale data frame
data <- data.frame(year, fi, p, k, k.1, k.2, k.3, k.4, spill, spill.1, spill.2, spill.3, spill.4,
                   g.1, g.2, g.3, g.4, s.1, s.2, s.3, s.4, s.5, s.6, s.7, s.8, s.9, s.10,
                   s.11, s.12, s.13, s.14, s.15)
 
# Give labels to the variables
attributes(data)$var.labels <-c('Year','FirmID','# of patents','R&D spending','Lag R&D 1',
                               'Lag R&D 2','Lag R&D 3','Lag R&D 4','Spillover','Lag spillover 1',
                               'Lag spillover 2','Lag spillover 3','Lag spillover 4',
                               'Geographic dummy 1','Geographic dummy 2','Geographic dummy 3',
                               'Geographic dummy 4','Sector dummy 1','Sector dummy 2',
                               'Sector dummy 3','Sector dummy 4','Sector dummy 5',
                               'Sector dummy 6','Sector dummy 7','Sector dummy 8',
                               'Sector dummy 9','Sector dummy 10','Sector dummy 11',
                               'Sector dummy 12','Sector dummy 13','Sector dummy 14',
                               'Sector dummy 15')

If you want, you can save the modified data on your disk:

save(data, file = "1997_cincera_patents.rda")
write.csv(data, file = "1997_cincera_patents.csv", row.names = FALSE)

Tables

Summary statistics

table.1a <- data.frame(Mean = NA, Standard.error = NA, Minimum.value = NA, Maximum.value = NA)
 
table.1a[1, 1]<-round(mean(data$p), 2)
table.1a[1, 2]<-round(sqrt(var(data$p)), 2)
table.1a[1, 3]<-round(min(data$p), 2)
table.1a[1, 4]<-round(max(data$p), 2)
table.1a[2, 1]<-round(mean(data$k), 2)
table.1a[2, 2]<-round(sqrt(var(data$k)), 2)
table.1a[2, 3]<-round(min(data$k), 2)
table.1a[2, 4]<-round(max(data$k), 2)
table.1a[3, 1]<-round(mean(data$spill), 2)
table.1a[3, 2]<-round(sqrt(var(data$spill)), 2)
table.1a[3, 3]<-round(min(data$spill), 2)
table.1a[3, 4]<-round(max(data$spill), 2)

table.1a

##    Mean Standard.error Minimum.value Maximum.value
## 1 60.79         121.56          0.00        925.00
## 2  5.20           1.26          0.87          8.70
## 3  9.40           0.93          6.82         10.76

Correlations

table.1b <- data.frame(P = NA, k = NA, k.1 = NA, k.2 = NA, k.3 = NA)
 
table.1b[1,1] <- round(cor(data$k, data$p), 2)
table.1b[2,1] <- round(cor(data$k.1, data$p, "complete.obs"), 2)
table.1b[3,1] <- round(cor(data$k.2, data$p, "complete.obs"), 2)
table.1b[4,1] <- round(cor(data$k.3, data$p, "complete.obs"), 2)
table.1b[5,1] <- round(cor(data$k.4, data$p, "complete.obs"), 2)
 
# The "complete.obs" option tells R that despite there are missing values it should procide
# with the evaluation. Ultimately, it will only use observations that are complete, i.e.
# where both columns of an observation contain values.
 
table.1b[2,2] <- round(cor(data$k.1, data$k, "complete.obs"), 2)
table.1b[3,2] <- round(cor(data$k.2, data$k, "complete.obs"), 2)
table.1b[4,2] <- round(cor(data$k.3, data$k, "complete.obs"), 2)
table.1b[5,2] <- round(cor(data$k.4, data$k, "complete.obs"), 2)
 
table.1b[3,3] <- round(cor(data$k.2, data$k.1, "complete.obs"), 2)
table.1b[4,3] <- round(cor(data$k.3, data$k.1, "complete.obs"), 2)
table.1b[5,3] <- round(cor(data$k.4, data$k.1, "complete.obs"), 2)
 
table.1b[4,4] <- round(cor(data$k.3, data$k.2, "complete.obs"), 2)
table.1b[5,4] <- round(cor(data$k.4, data$k.2, "complete.obs"), 2)
 
table.1b[5,5] <- round(cor(data$k.4, data$k.3, "complete.obs"), 2)

table.1b

##      P    k  k.1  k.2  k.3
## 1 0.55   NA   NA   NA   NA
## 2 0.55 0.99   NA   NA   NA
## 3 0.55 0.98 0.99   NA   NA
## 4 0.55 0.97 0.98 0.99   NA
## 5 0.55 0.95 0.96 0.97 0.99

Panel estimates

Poisson

Cincera (1997) estimates a simple Poisson model for panel data as a benchmark. In R this can be done with the pglm package for general linear models for panel data.

library(pglm)

# Table 2
# (3) Conditional Poisson
poisson <- pglm(p ~ k + k.1 + k.2 + k.3 + k.4 +
                  spill + spill.1 + spill.2 + spill.3 + spill.4,
                data = data,
                model = "within",
                effect = "individual",
                family = poisson())

summary(poisson)

## --------------------------------------------
## Maximum Likelihood estimation
## Newton-Raphson maximisation, 2 iterations
## Return code 2: successive function values within tolerance limit
## Log-Likelihood: -34220.47 
## 10  free parameters
## Estimates:
##         Estimate Std. error t value  Pr(> t)    
## k        0.50275    0.02480  20.275  < 2e-16 ***
## k.1     -0.12928    0.02966  -4.359 1.31e-05 ***
## k.2      0.10297    0.02880   3.576 0.000349 ***
## k.3     -0.11690    0.03115  -3.753 0.000175 ***
## k.4      0.46476    0.02206  21.071  < 2e-16 ***
## spill    0.05886    0.09513   0.619 0.536118    
## spill.1 -1.48667    0.12580 -11.818  < 2e-16 ***
## spill.2  1.21871    0.11839  10.294  < 2e-16 ***
## spill.3 -1.11823    0.11748  -9.519  < 2e-16 ***
## spill.4  1.24173    0.07499  16.559  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## --------------------------------------------

Annex

A2

colSums(data[data$year == 1983, c("s.1", "s.2", "s.3", "s.4", "s.5", "s.6", "s.7", "s.8", "s.9", "s.10", "s.11", "s.12", "s.13", "s.14", "s.15")])

##  s.1  s.2  s.3  s.4  s.5  s.6  s.7  s.8  s.9 s.10 s.11 s.12 s.13 s.14 s.15 
##   12   28   20   13   29    9   11    3   13   10    3    8    5    2   15