Quadratic Regression

Plotting the relationship between video length and funding outcome:

mydata <- read.csv("https://ximarketing.github.io/class/Kickstarter-Project.csv", fileEncoding = "UTF-8-BOM")
subdata = subset(mydata, IsVideoAvailable == 1) 
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.1
ggplot(subdata,  mapping = aes(VideoLength, Outcome)) + 
stat_summary_bin(fun.y="mean", geom="bar", bins=60)+xlim(0, 400)
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Removed 162 rows containing non-finite values (stat_summary_bin).
## Warning: Removed 2 rows containing missing values (geom_bar).

Including quadratic terms in your regression:

logit <- glm(Outcome ~ VideoLength, data = subdata, family = "binomial")
summary(logit)
## 
## Call:
## glm(formula = Outcome ~ VideoLength, family = "binomial", data = subdata)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5934  -0.9742  -0.9593   1.3909   1.4258  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.5689252  0.0481630 -11.812   <2e-16 ***
## VideoLength  0.0004219  0.0002285   1.846   0.0649 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7017.7  on 5290  degrees of freedom
## Residual deviance: 7014.2  on 5289  degrees of freedom
## AIC: 7018.2
## 
## Number of Fisher Scoring iterations: 4
logit <- glm(Outcome ~ VideoLength + I(VideoLength^2), data = subdata, family = "binomial")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(logit)
## 
## Call:
## glm(formula = Outcome ~ VideoLength + I(VideoLength^2), family = "binomial", 
##     data = subdata)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1024  -1.0060  -0.8702   1.3246   5.0005  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.192e+00  8.958e-02 -13.307  < 2e-16 ***
## VideoLength       6.541e-03  8.102e-04   8.074 6.81e-16 ***
## I(VideoLength^2) -1.056e-05  1.584e-06  -6.666 2.63e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7017.7  on 5290  degrees of freedom
## Residual deviance: 6929.1  on 5288  degrees of freedom
## AIC: 6935.1
## 
## Number of Fisher Scoring iterations: 7

Interactions

mydata$LogFunding = log(mydata$FundingRaised + 1)
result = lm(LogFunding ~ Created * NumberOfProducts, data = mydata)
summary(result)
## 
## Call:
## lm(formula = LogFunding ~ Created * NumberOfProducts, data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15.8916  -2.3080   0.3523   2.2554   8.6714 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               3.039097   0.071653  42.414  < 2e-16 ***
## Created                   0.240761   0.042393   5.679 1.41e-08 ***
## NumberOfProducts          0.443064   0.008182  54.148  < 2e-16 ***
## Created:NumberOfProducts -0.012090   0.005019  -2.409    0.016 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.126 on 6954 degrees of freedom
## Multiple R-squared:  0.3358, Adjusted R-squared:  0.3355 
## F-statistic:  1172 on 3 and 6954 DF,  p-value: < 2.2e-16
subdata = subset(mydata, IsVideoAvailable == 1)
result = lm(LogFunding ~ factor(Gender) * Human, data = subdata)
summary(result)
## 
## Call:
## lm(formula = LogFunding ~ factor(Gender) * Human, data = subdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.8754 -1.8513  0.5323  2.3194  7.8001 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             5.1768     0.3778  13.704  < 2e-16 ***
## factor(Gender)M         0.4710     0.4003   1.177  0.23939    
## factor(Gender)U         1.9207     0.4057   4.734 2.26e-06 ***
## Human                   2.3467     0.4137   5.672 1.48e-08 ***
## factor(Gender)M:Human  -1.1873     0.4411  -2.692  0.00713 ** 
## factor(Gender)U:Human  -0.5688     0.4453  -1.277  0.20153    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.228 on 5285 degrees of freedom
## Multiple R-squared:  0.115,  Adjusted R-squared:  0.1142 
## F-statistic: 137.4 on 5 and 5285 DF,  p-value: < 2.2e-16

Fixed Effects

mydata$LogTarget = log(mydata$Target + 1)
result = lm(LogFunding ~ LogTarget + factor(Subtype) + factor(Location), data = mydata)
summary(result)
## 
## Call:
## lm(formula = LogFunding ~ LogTarget + factor(Subtype) + factor(Location), 
##     data = mydata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0535  -2.3941   0.4245   2.4424  10.4402 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      6.66186    0.40466  16.463  < 2e-16 ***
## LogTarget                        0.20560    0.02403   8.556  < 2e-16 ***
## factor(Subtype)Apps             -4.19174    0.33504 -12.511  < 2e-16 ***
## factor(Subtype)CameraEquipment   1.30956    0.45434   2.882 0.003960 ** 
## factor(Subtype)DIYElectronics   -0.20024    0.40840  -0.490 0.623940    
## factor(Subtype)FabricationTools -2.53178    0.56363  -4.492 7.17e-06 ***
## factor(Subtype)Flight           -1.50850    0.47699  -3.163 0.001571 ** 
## factor(Subtype)Gadgets          -0.33102    0.34636  -0.956 0.339246    
## factor(Subtype)Hardware          0.18633    0.33815   0.551 0.581627    
## factor(Subtype)Makerspaces      -1.07444    0.56385  -1.906 0.056752 .  
## factor(Subtype)Robots           -0.10786    0.43235  -0.249 0.803008    
## factor(Subtype)Software         -3.07865    0.34560  -8.908  < 2e-16 ***
## factor(Subtype)Sound             0.05697    0.42930   0.133 0.894436    
## factor(Subtype)SpaceExploration -0.88965    0.50991  -1.745 0.081076 .  
## factor(Subtype)Technology       -1.70381    0.33201  -5.132 2.95e-07 ***
## factor(Subtype)Wearables         0.19453    0.38017   0.512 0.608882    
## factor(Subtype)Web              -4.43896    0.34505 -12.865  < 2e-16 ***
## factor(Location)IL              -0.96675    0.16151  -5.986 2.26e-09 ***
## factor(Location)MA               0.07324    0.15776   0.464 0.642465    
## factor(Location)NY              -0.36623    0.11099  -3.300 0.000973 ***
## factor(Location)TX              -1.31772    0.11932 -11.043  < 2e-16 ***
## factor(Location)WA              -0.63908    0.16752  -3.815 0.000137 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.306 on 6936 degrees of freedom
## Multiple R-squared:  0.2588, Adjusted R-squared:  0.2566 
## F-statistic: 115.3 on 21 and 6936 DF,  p-value: < 2.2e-16