Objective

In this vignette we will see how to modify your outputs. Basically you can:

  • Mix qualitative and quantitative descriptive statistics: regroup() function
  • Choose the quantitative statistics you want to display: report.quanti() function
  • Report row percentages instead of column percentages: report.quali() function
  • Add a specific statistics to an existing table: add.stat() function
  • Split an existing table in 2 parts: split() function
  • Drop one or several levels of a variable in a table
  • Transpose the statistics in columns

Load the usual R packages

Start by loading all usual libraries.

library(ClinReport)
library(officer)
library(flextable)
library(emmeans)

Load your data.

# We will use fake data
data(datafake)
print(head(datafake))
#>    y_numeric y_logistic y_poisson   baseline   VAR GROUP TIMEPOINT SUBJID
#> 1 -0.4203490          1         5 -0.4203490 Cat 1     A        D0 Subj 1
#> 2 -0.1570941          1         5 -0.1570941 Cat 2     A        D0 Subj 1
#> 3         NA          0         3 -2.0853720 Cat 2     A        D0 Subj 1
#> 4 -0.4728527          0         5 -0.4728527 Cat 1     A        D0 Subj 1
#> 5 -0.8651713          1         4 -0.8651713 Cat 1     A        D0 Subj 1
#> 6 -1.5476907          1         3 -1.5476907 Cat 1     A        D0 Subj 1

Mixed Quantitative and Qualitative statistics

You can mix qualitative and quantitative outputs. It’s possible only if there is only 1 explicative variable, and it should be the same variable for both response:

tab1=report.quanti(data=datafake,y="y_numeric",
        x1="GROUP",subjid="SUBJID",y.label="Y numeric")

tab2=report.quali(data=datafake,y="y_logistic",
        x1="GROUP",subjid="SUBJID",y.label="Y logistic")

tab3=regroup(tab1,tab2,rbind.label="The label of your choice")


report.doc(tab3,title="Mixed Qualitative and Quantitative output",
colspan.value="Treatment group")

Table 1: Mixed Qualitative and Quantitative output

Treatment group

The label of your choice

Levels

Statistics

A (N=30)

B (N=21)

C (N=17)

Y numeric

N

180

120

96

Mean (SD)

1.46(1.50)

3.15(2.00)

3.87(2.52)

Median

1.59

3.75

4.73

[Q1;Q3]

[0.45;2.50]

[2.46;4.44]

[3.44;5.30]

[Min;Max]

[-2.34;4.36]

[-2.44;6.19]

[-2.99;7.96]

Missing

4

4

2

Y logistic

0

n (column %)

79(43.89%)

60(50.00%)

47(48.96%)

1

n (column %)

97(53.89%)

59(49.17%)

44(45.83%)

Missing n(%)

4(2.22%)

1(0.83%)

5(5.21%)

Report a specific quantitative statistic

You will have to specify the function corresponding to the statistics and use the func.stat argument

es=function(x) mean(x,na.rm=T)/sd(x,na.rm=T) 

tab=report.quanti(data=datafake,y="y_numeric",x1="GROUP",
total=TRUE,subjid="SUBJID",
func.stat=es,
func.stat.name="Effect size")

report.doc(tab,title="Example of a specific statistic reporting",
colspan.value="Treatment group")

Table 2: Example of a specific statistic reporting

Treatment group

Statistics

A (N=30)

B (N=21)

C (N=17)

Total (N=68)

Effect size

0.98

1.57

1.53

1.16

Add a specific statistics to an existing table

You can also add a specific (or several) statistics to an existing table.

Let’s say you want the default statistics (mean, median, sd etc..) plus some specific statistics like the mode or the coefficient of variation.

In this case, you can use the add.stat() function with the pos argument to choose where you want to add this statistic:

# The default statistics are given here:
 
tab1=report.quanti(data=datafake,y="y_numeric",x1="GROUP",total=TRUE,subjid="SUBJID")

 # Define the function corresponding to the coefficient of variation for example
 
cv=function(y) sd(y,na.rm=TRUE)/mean(y,na.rm=TRUE)
 
 # We use the add.stat function to add CV at the second row:
 
tab1.cv=add.stat(tab1,datafake,func.stat=cv,func.stat.name="Coef. Var",
 pos=2)

report.doc(tab1.cv,title="Example of adding a coefficient of variation")

Table 3: Example of adding a coefficient of variation

Statistics

A (N=30)

B (N=21)

C (N=17)

Total (N=68)

N

180

120

96

396

Coef. Var

1.03

0.64

0.65

0.86

Mean (SD)

1.46(1.50)

3.15(2.00)

3.87(2.52)

2.56(2.20)

Median

1.59

3.75

4.73

2.71

[Q1;Q3]

[0.45;2.50]

[2.46;4.44]

[3.44;5.30]

[1.04;4.33]

[Min;Max]

[-2.34;4.36]

[-2.44;6.19]

[-2.99;7.96]

[-2.99;7.96]

Missing

4

4

2

10


 # Same with 2 explicative variables
 
tab=report.quanti(data=datafake,y="y_numeric",x1="GROUP",
 x2="TIMEPOINT",total=TRUE,subjid="SUBJID",
        at.row="TIMEPOINT")
 
 tab=add.stat(tab,datafake,func.stat=cv,func.stat.name="Coef. Var",
 pos=2)

 
 # And on position 5, we can add for example the mode
 
 mode=function(x)
 {
   x=na.omit(x)
   ux <- unique(x)
   ux[which.max(tabulate(match(x, ux)))]
 }
 
 
 tab=add.stat(tab,datafake,func.stat=mode,func.stat.name="Mode",pos=5)
 
 report.doc(tab,title="Example of adding 2 more statistics in an existing table",
 colspan.value="Treatment Group")

Table 4: Example of adding 2 more statistics in an existing table

Treatment Group

TIMEPOINT

Statistics

A (N=30)

B (N=21)

C (N=17)

Total (N=68)

D0

N

30

20

16

66

Coef. Var

-0.92

-1.62

-0.78

-1.04

Mean (SD)

-0.93(0.86)

-0.67(1.09)

-1.19(0.92)

-0.92(0.95)

Median

-0.82

-0.69

-1.26

-0.86

Mode

-0.42

-0.36

-0.75

-0.42

[Q1;Q3]

[-1.59;-0.16]

[-1.39;-0.06]

[-1.62;-0.83]

[-1.55;-0.16]

[Min;Max]

[-2.34;0.36]

[-2.44;2.10]

[-2.99;0.66]

[-2.99;2.10]

Missing

1

1

0

2

D1

N

30

20

16

66

Coef. Var

0.57

0.31

0.14

0.52

Mean (SD)

1.83(1.04)

4.17(1.28)

4.98(0.69)

3.33(1.73)

Median

1.78

4.19

5.08

3.57

Mode

3.88

5.71

5.02

3.88

[Q1;Q3]

[ 0.94; 2.54]

[ 3.23; 4.92]

[ 4.58; 5.46]

[ 1.78; 4.91]

[Min;Max]

[ 0.11;3.88]

[ 1.48;6.19]

[ 3.80;6.23]

[ 0.11;6.23]

Missing

1

0

0

1

D2

N

30

20

16

66

Coef. Var

0.60

0.22

0.28

0.51

Mean (SD)

1.97(1.17)

4.04(0.89)

4.90(1.36)

3.32(1.70)

Median

1.66

4.19

5.06

3.57

Mode

1.35

4.35

6.58

1.35

[Q1;Q3]

[ 1.23; 2.86]

[ 3.62; 4.36]

[ 4.34; 5.20]

[ 1.89; 4.44]

[Min;Max]

[-0.18;4.36]

[ 2.03;5.63]

[ 2.39;7.96]

[-0.18;7.96]

Missing

1

1

0

2

D3

N

30

20

16

66

Coef. Var

0.66

0.25

0.22

0.56

Mean (SD)

1.78(1.17)

3.81(0.94)

5.07(1.12)

3.15(1.75)

Median

1.78

3.63

5.22

3.15

Mode

2.54

4.44

5.66

2.54

[Q1;Q3]

[ 0.93; 2.42]

[ 3.13; 4.44]

[ 4.11; 5.66]

[ 1.80; 4.39]

[Min;Max]

[-0.16;3.90]

[ 2.46;6.01]

[ 3.16;7.37]

[-0.16;7.37]

Missing

0

1

1

2

D4

N

30

20

16

66

Coef. Var

0.46

0.25

0.20

0.52

Mean (SD)

1.83(0.85)

3.80(0.95)

5.17(1.03)

3.22(1.66)

Median

1.67

3.83

4.88

3.16

Mode

2.32

3.50

4.88

2.32

[Q1;Q3]

[ 1.26; 2.32]

[ 3.12; 4.42]

[ 4.69; 5.50]

[ 1.69; 4.48]

[Min;Max]

[ 0.38;3.97]

[ 2.31;5.41]

[ 3.24;6.96]

[ 0.38;6.96]

Missing

1

1

1

3

D5

N

30

20

16

66

Coef. Var

0.53

0.33

0.22

0.45

Mean (SD)

2.27(1.20)

3.64(1.19)

4.43(0.98)

3.21(1.45)

Median

2.50

3.86

4.57

3.28

Mode

1.87

3.89

2.95

1.87

[Q1;Q3]

[ 1.77; 3.21]

[ 2.59; 4.60]

[ 3.44; 4.97]

[ 2.42; 4.44]

[Min;Max]

[-1.19;4.31]

[ 0.91;5.12]

[ 2.95;6.54]

[-1.19;6.54]

Missing

0

0

0

0

Row or column percentages

If you want to display the row percentages instead of column percentages in report.quali() function, you just have to set the argument percent.col to FALSE:

tab=report.quali(data=datafake,y="y_logistic",x1="GROUP",
total=TRUE,subjid="SUBJID",percent.col=FALSE)

report.doc(tab,title="Example of row percentage reporting",
colspan.value="Treatment group")

Table 5: Example of row percentage reporting

Treatment group

Levels

Statistics

A (N=30)

B (N=21)

C (N=17)

Total (N=68)

0

n (row %)

79(42.47%)

60(32.26%)

47(25.27%)

186(46.97%)

1

n (row %)

97(48.50%)

59(29.50%)

44(22.00%)

200(50.51%)

Missing n(%)

4(40.00%)

1(10.00%)

5(50.00%)

10(2.53%)

Drop levels

If you want to drop some levels in the table, you can use drop.x1 drop.x2 or drop.y if ‘y’ is a qualitative variable.

For example. If we just want the statistics of group A:

tab=report.quali(data=datafake,y="y_logistic",x1="GROUP",
subjid="SUBJID",drop.x1=c("B","C"))

report.doc(tab,title="Example of row percentage reporting",
colspan.value="Treatment group")

Table 6: Example of row percentage reporting

Treatment group

Levels

Statistics

A (N=30)

0

n (column %)

79(43.89%)

1

n (column %)

97(53.89%)

Missing n(%)

4(2.22%)

Drop missing values

Sometimes it can be useful to remove the missing values so that percentages are computed only on non missing observations. For that, just use the remove.missing argument in report.quali:

tab=report.quali(data=datafake,y="y_logistic",x1="GROUP",
remove.missing=TRUE)

report.doc(tab,title="Example of dropping missing values",
colspan.value="Treatment group")

Table 7: Example of dropping missing values

Treatment group

Levels

Statistics

A

B

C

0

n (column %)

79(44.89%)

60(50.42%)

47(51.65%)

1

n (column %)

97(55.11%)

59(49.58%)

44(48.35%)

Transpose Descriptive statistics (experimental)


tab=report.quali(data=datafake,
y="y_logistic",x1="GROUP",
subjid="SUBJID",remove.missing=T)

# The default output
report.doc(tab)

Table 8: Qualitative descriptive statistics of : y_logistic

Levels

Statistics

A (N=30)

B (N=21)

C (N=17)

0

n (column %)

79(44.89%)

60(50.42%)

47(51.65%)

1

n (column %)

97(55.11%)

59(49.58%)

44(48.35%)


# The transposed output
report.doc(transpose(tab))

Table 9: Qualitative descriptive statistics of : y_logistic

GROUP

Levels

n (GROUP%)

A (N=30)

0

79(44.89%)

1

97(55.11%)

B (N=21)

0

60(50.42%)

1

59(49.58%)

C (N=17)

0

47(51.65%)

1

44(48.35%)

Transpose LS-Means

mod=lm(y_numeric~GROUP,data=datafake)
pairs=pairs(emmeans(mod,~GROUP))

tab=report.lsmeans(pairs,transpose=TRUE)

report.doc(tab,title="Example of transposing LS-Means in column",
colspan.value="Treatment group")

Table 10: Example of transposing LS-Means in column

Treatment group

contrast

Estimate (SE)

95% CI

P-value

A - B

-1.68(0.23)

[-2.23;-1.13]

<0.001

A - C

-2.41(0.25)

[-2.99;-1.82]

<0.001

B - C

-0.72(0.27)

[-1.36;-0.09]

0.020