## ----preliminaries, echo=FALSE, message=FALSE-----------------------------------------
library(knitr)
# library(magrittr)
library(xtable)
opts_chunk$set(echo = TRUE)
options(width = 88, show.signif.stars = FALSE, continue = " ") # , lattice.theme = function() canonical.theme("pdf", color = FALSE)
if (any(search() == "package:NlsyLinks")) detach("package:NlsyLinks")

## -------------------------------------------------------------------------------------
any(.packages(all.available = TRUE) == "NlsyLinks") # Should evaluate to TRUE.
library(NlsyLinks) # Load the package into the current session.

## -------------------------------------------------------------------------------------
### R Code for Example DF analysis with a simple outcome and Gen2 subjects
# Step 2: Load the package containing the linking routines.
library(NlsyLinks)

# Step 3: Load the LINKING dataset and filter for the Gen2 subjects
dsLinking <- subset(Links79Pair, RelationshipPath == "Gen2Siblings")
summary(dsLinking) # Notice there are 11,088 records (one for each unique pair).

# Step 4: Load the OUTCOMES dataset, and then examine the summary.
dsOutcomes <- ExtraOutcomes79 #' ds' stands for 'Data Set'
summary(dsOutcomes)

# Step 5: This step isn't necessary for this example, because Kelly Meredith already
#   groomed the values.  If the negative values (which represent NLSY missing or
#   skip patterns) still exist, then:
dsOutcomes$MathStandardized[dsOutcomes$MathStandardized < 0] <- NA

# Step 6: Create the double entered dataset.
dsDouble <- CreatePairLinksDoubleEntered(
  outcomeDataset   = dsOutcomes,
  linksPairDataset = dsLinking,
  outcomeNames     = c("MathStandardized")
)
summary(dsDouble) # Notice there are 22176=(2*11088) records now (two for each unique pair).

# Step 7: Estimate the ACE components with a DF Analysis
ace <- DeFriesFulkerMethod3(
  dataSet  = dsDouble,
  oName_S1 = "MathStandardized_S1",
  oName_S2 = "MathStandardized_S2"
)
ace

## -------------------------------------------------------------------------------------
### R Code for Example of a DF analysis with a simple outcome and Gen2 subjects
# Step 2: Load the package containing the linking routines.
library(NlsyLinks)

# Step 3: Load the linking dataset and filter for the Gen2 subjects
dsLinking <- subset(Links79Pair, RelationshipPath == "Gen2Siblings")

# Step 4: Load the outcomes dataset from the hard drive and then examine the summary.
#   Your path might be: filePathOutcomes <- 'C:/BGResearch/NlsExtracts/gen2-birth.csv'
# filePathOutcomes <- file.path(path.package("NlsyLinks"), "extdata", "gen2-birth.csv")
filePathOutcomes <- "../inst/extdata/gen2-birth.csv"
dsOutcomes <- ReadCsvNlsy79Gen2(filePathOutcomes)
summary(dsOutcomes)

# Step 5: Verify and rename an existing column.
VerifyColumnExists(dsOutcomes, "C0328600") # Should return '10' in this example.
dsOutcomes <- RenameNlsyColumn(dsOutcomes, "C0328600", "BirthWeightInOunces")

# Step 6: For this item, a negative value indicates the parent refused, didn't know,
#   invalidly skipped, or was missing for some other reason.
#   For our present purposes, we'll treat these responses equivalently.
#   Then clip/Winsorized/truncate the weight to something reasonable.
dsOutcomes$BirthWeightInOunces[dsOutcomes$BirthWeightInOunces < 0] <- NA
dsOutcomes$BirthWeightInOunces <- pmin(dsOutcomes$BirthWeightInOunces, 200)

# Step 7: Create the double entered dataset.
dsDouble <- CreatePairLinksDoubleEntered(
  outcomeDataset   = dsOutcomes,
  linksPairDataset = dsLinking,
  outcomeNames     = c("BirthWeightInOunces")
)

# Step 8: Estimate the ACE components with a DF Analysis
ace <- AceUnivariate(
  method   = "DeFriesFulkerMethod3",
  dataSet  = dsDouble,
  oName_S1 = "BirthWeightInOunces_S1",
  oName_S2 = "BirthWeightInOunces_S2"
)
ace

## -------------------------------------------------------------------------------------
### R Code for Example lavaan estimation analysis with a simple outcome and Gen2 subjects
# Steps 1-5 are explained in the vignette's first example:
library(NlsyLinks)
dsLinking <- subset(Links79Pair, RelationshipPath == "Gen2Siblings")
dsOutcomes <- ExtraOutcomes79
dsOutcomes$MathStandardized[dsOutcomes$MathStandardized < 0] <- NA

# Step 6: Create the single entered dataset.
dsSingle <- CreatePairLinksSingleEntered(
  outcomeDataset   = dsOutcomes,
  linksPairDataset = dsLinking,
  outcomeNames     = c("MathStandardized")
)

# Step 7: Declare the names for the two outcome variables.
oName_S1 <- "MathStandardized_S1" # Stands for Outcome1
oName_S2 <- "MathStandardized_S2" # Stands for Outcome2

# Step 8: Summarize the R groups and determine which groups can be estimated.
dsGroupSummary <- RGroupSummary(dsSingle, oName_S1, oName_S2)
dsGroupSummary

# Step 9: Create a cleaned dataset
dsClean <- CleanSemAceDataset(dsDirty = dsSingle, dsGroupSummary, oName_S1, oName_S2)

# Step 10: Run the model
ace <- AceLavaanGroup(dsClean)
ace
# Notice the 'CaseCount' is 8,390 instead of 17,440.
#  This is because (a) one pair with R=.75 was excluded, and
#  (b) the SEM uses a single-entered dataset instead of double-entered.
#
# Step 11: Inspect the output further
library(lavaan) # Load the package to access methods of the lavaan class.
GetDetails(ace)
# Examine fit stats like Chi-Squared, RMSEA, CFI, etc.
fitMeasures(GetDetails(ace)) #' fitMeasures' is defined in the lavaan package.

# Examine low-level details like each group's individual parameter estimates and standard
#  errors.  Uncomment the next line to view the entire output (which is roughly 4 pages).
# summary(GetDetails(ace))

## -------------------------------------------------------------------------------------
### R Code for Example lavaan estimation analysis with a simple outcome and Gen1 subjects
# Steps 1-5 are explained in the vignette's first example:
library(NlsyLinks)
dsLinking <- subset(Links79Pair, RelationshipPath == "Gen1Housemates")
dsOutcomes <- ExtraOutcomes79
# The HeightZGenderAge variable is already groomed

# Step 6: Create the single entered dataset.
dsSingle <- CreatePairLinksSingleEntered(
  outcomeDataset   = dsOutcomes,
  linksPairDataset = dsLinking,
  outcomeNames     = c("HeightZGenderAge")
)

# Step 7: Declare the names for the two outcome variables.
oName_S1 <- "HeightZGenderAge_S1"
oName_S2 <- "HeightZGenderAge_S2"

# Step 8: Summarize the R groups and determine which groups can be estimated.
dsGroupSummary <- RGroupSummary(dsSingle, oName_S1, oName_S2)
dsGroupSummary

# Step 9: Create a cleaned dataset
dsClean <- CleanSemAceDataset(dsDirty = dsSingle, dsGroupSummary, oName_S1, oName_S2)

# Step 10: Run the model
ace <- AceLavaanGroup(dsClean)
ace

# Step 11: Inspect the output further (see the final step in the previous example).

## -------------------------------------------------------------------------------------
# Step 8: Summarize the R groups and determine which groups can be estimated.
dsGroupSummary <- RGroupSummary(dsSingle, oName_S1, oName_S2)
rGroupsToDrop <- c(1)
dsGroupSummary[dsGroupSummary$R %in% rGroupsToDrop, "Included"] <- FALSE
dsGroupSummary

## ----results='asis'-------------------------------------------------------------------
xt <- xtable(table(Links79Pair$RelationshipPath, dnn = c("Relationship Frequency")),
  caption = "Number of NLSY79 relationship, by `RelationshipPath`.(Recall that 'AuntNiece' also contains uncles and nephews.)"
)
print.xtable(xt, format.args = list(big.mark = ","), type = "html")

## -------------------------------------------------------------------------------------
### R Code for Example lavaan estimation analysis with a simple outcome and Gen1 subjects
# Steps 1-5 are explained in the vignette's first example:
library(NlsyLinks)
dsLinking <- subset(Links79Pair, RelationshipPath %in%
  c(
    "Gen1Housemates", "Gen2Siblings", "Gen2Cousins",
    "ParentChild", "AuntNiece"
  ))
# Because all five paths are specified, the line above is equivalent to:
# dsLinking <- Links79Pair

dsOutcomes <- ExtraOutcomes79
# The HeightZGenderAge variable is already groomed

# Step 6: Create the single entered dataset.
dsSingle <- CreatePairLinksSingleEntered(
  outcomeDataset   = dsOutcomes,
  linksPairDataset = dsLinking,
  outcomeNames     = c("HeightZGenderAge")
)

# Step 7: Declare the names for the two outcome variables.
oName_S1 <- "HeightZGenderAge_S1"
oName_S2 <- "HeightZGenderAge_S2"

# Step 8: Summarize the R groups and determine which groups can be estimated.
dsGroupSummary <- RGroupSummary(dsSingle, oName_S1, oName_S2)
dsGroupSummary

# Step 9: Create a cleaned dataset
dsClean <- CleanSemAceDataset(dsDirty = dsSingle, dsGroupSummary, oName_S1, oName_S2)

# Step 10: Run the model
ace <- AceLavaanGroup(dsClean)
ace

# Step 11: Inspect the output further (see the final step two examples above).

## ----session-info-2, echo=FALSE-------------------------------------------------------
devtools::session_info()$platform
knitr::kable(devtools::session_info()$packages[, c("loadedversion", "date")], format = "html")

