ClinReport Vignette 3: Modify the tables

Objective

In this vignette we will see how to modify your outputs. Basically you can:

Mix qualitative and quantitative descriptive statistics: regroup() function
Choose the quantitative statistics you want to display: report.quanti() function
Report row percentages instead of column percentages: report.quali() function
Add a specific statistics to an existing table: add.stat() function
Split an existing table in 2 parts: split() function
Drop one or several levels of a variable in a table
Transpose the statistics in columns

Load the usual R packages

Start by loading all usual libraries.

library(ClinReport)
library(officer)
library(flextable)
library(emmeans)

Load your data.

# We will use fake data
data(datafake)
print(head(datafake))
#>    y_numeric y_logistic y_poisson   baseline   VAR GROUP TIMEPOINT SUBJID
#> 1 -0.4203490          1         5 -0.4203490 Cat 1     A        D0 Subj 1
#> 2 -0.1570941          1         5 -0.1570941 Cat 2     A        D0 Subj 1
#> 3         NA          0         3 -2.0853720 Cat 2     A        D0 Subj 1
#> 4 -0.4728527          0         5 -0.4728527 Cat 1     A        D0 Subj 1
#> 5 -0.8651713          1         4 -0.8651713 Cat 1     A        D0 Subj 1
#> 6 -1.5476907          1         3 -1.5476907 Cat 1     A        D0 Subj 1

Mixed Quantitative and Qualitative statistics

You can mix qualitative and quantitative outputs. It’s possible only if there is only 1 explicative variable, and it should be the same variable for both response:

tab1=report.quanti(data=datafake,y="y_numeric",
        x1="GROUP",subjid="SUBJID",y.label="Y numeric")

tab2=report.quali(data=datafake,y="y_logistic",
        x1="GROUP",subjid="SUBJID",y.label="Y logistic")

tab3=regroup(tab1,tab2,rbind.label="The label of your choice")


report.doc(tab3,title="Mixed Qualitative and Quantitative output",
colspan.value="Treatment group")

Table 1: Mixed Qualitative and Quantitative output
			Treatment group
The label of your choice	Levels	Statistics	A (N=30)	B (N=21)	C (N=17)
Y numeric		N	180	120	96
		Mean (SD)	1.46(1.50)	3.15(2.00)	3.87(2.52)
		Median	1.59	3.75	4.73
		[Q1;Q3]	[0.45;2.50]	[2.46;4.44]	[3.44;5.30]
		[Min;Max]	[-2.34;4.36]	[-2.44;6.19]	[-2.99;7.96]
		Missing	4	4	2

Y logistic	0	n (column %)	79(43.89%)	60(50.00%)	47(48.96%)
	1	n (column %)	97(53.89%)	59(49.17%)	44(45.83%)
		Missing n(%)	4(2.22%)	1(0.83%)	5(5.21%)

Report a specific quantitative statistic

You will have to specify the function corresponding to the statistics and use the func.stat argument

es=function(x) mean(x,na.rm=T)/sd(x,na.rm=T) 

tab=report.quanti(data=datafake,y="y_numeric",x1="GROUP",
total=TRUE,subjid="SUBJID",
func.stat=es,
func.stat.name="Effect size")

report.doc(tab,title="Example of a specific statistic reporting",
colspan.value="Treatment group")

Table 2: Example of a specific statistic reporting
	Treatment group
Statistics	A (N=30)	B (N=21)	C (N=17)	Total (N=68)
Effect size	0.98	1.57	1.53	1.16

Add a specific statistics to an existing table

You can also add a specific (or several) statistics to an existing table.

Let’s say you want the default statistics (mean, median, sd etc..) plus some specific statistics like the mode or the coefficient of variation.

In this case, you can use the add.stat() function with the pos argument to choose where you want to add this statistic:

# The default statistics are given here:
 
tab1=report.quanti(data=datafake,y="y_numeric",x1="GROUP",total=TRUE,subjid="SUBJID")

 # Define the function corresponding to the coefficient of variation for example
 
cv=function(y) sd(y,na.rm=TRUE)/mean(y,na.rm=TRUE)
 
 # We use the add.stat function to add CV at the second row:
 
tab1.cv=add.stat(tab1,datafake,func.stat=cv,func.stat.name="Coef. Var",
 pos=2)

report.doc(tab1.cv,title="Example of adding a coefficient of variation")

Table 3: Example of adding a coefficient of variation
Statistics	A (N=30)	B (N=21)	C (N=17)	Total (N=68)
N	180	120	96	396
Coef. Var	1.03	0.64	0.65	0.86
Mean (SD)	1.46(1.50)	3.15(2.00)	3.87(2.52)	2.56(2.20)
Median	1.59	3.75	4.73	2.71
[Q1;Q3]	[0.45;2.50]	[2.46;4.44]	[3.44;5.30]	[1.04;4.33]
[Min;Max]	[-2.34;4.36]	[-2.44;6.19]	[-2.99;7.96]	[-2.99;7.96]
Missing	4	4	2	10


 # Same with 2 explicative variables
 
tab=report.quanti(data=datafake,y="y_numeric",x1="GROUP",
 x2="TIMEPOINT",total=TRUE,subjid="SUBJID",
        at.row="TIMEPOINT")
 
 tab=add.stat(tab,datafake,func.stat=cv,func.stat.name="Coef. Var",
 pos=2)

 
 # And on position 5, we can add for example the mode
 
 mode=function(x)
 {
   x=na.omit(x)
   ux <- unique(x)
   ux[which.max(tabulate(match(x, ux)))]
 }
 
 
 tab=add.stat(tab,datafake,func.stat=mode,func.stat.name="Mode",pos=5)
 
 report.doc(tab,title="Example of adding 2 more statistics in an existing table",
 colspan.value="Treatment Group")

Table 4: Example of adding 2 more statistics in an existing table
		Treatment Group
TIMEPOINT	Statistics	A (N=30)	B (N=21)	C (N=17)	Total (N=68)
D0	N	30	20	16	66
	Coef. Var	-0.92	-1.62	-0.78	-1.04
	Mean (SD)	-0.93(0.86)	-0.67(1.09)	-1.19(0.92)	-0.92(0.95)
	Median	-0.82	-0.69	-1.26	-0.86
	Mode	-0.42	-0.36	-0.75	-0.42
	[Q1;Q3]	[-1.59;-0.16]	[-1.39;-0.06]	[-1.62;-0.83]	[-1.55;-0.16]
	[Min;Max]	[-2.34;0.36]	[-2.44;2.10]	[-2.99;0.66]	[-2.99;2.10]
	Missing	1	1	0	2

D1	N	30	20	16	66
	Coef. Var	0.57	0.31	0.14	0.52
	Mean (SD)	1.83(1.04)	4.17(1.28)	4.98(0.69)	3.33(1.73)
	Median	1.78	4.19	5.08	3.57
	Mode	3.88	5.71	5.02	3.88
	[Q1;Q3]	[ 0.94; 2.54]	[ 3.23; 4.92]	[ 4.58; 5.46]	[ 1.78; 4.91]
	[Min;Max]	[ 0.11;3.88]	[ 1.48;6.19]	[ 3.80;6.23]	[ 0.11;6.23]
	Missing	1	0	0	1

D2	N	30	20	16	66
	Coef. Var	0.60	0.22	0.28	0.51
	Mean (SD)	1.97(1.17)	4.04(0.89)	4.90(1.36)	3.32(1.70)
	Median	1.66	4.19	5.06	3.57
	Mode	1.35	4.35	6.58	1.35
	[Q1;Q3]	[ 1.23; 2.86]	[ 3.62; 4.36]	[ 4.34; 5.20]	[ 1.89; 4.44]
	[Min;Max]	[-0.18;4.36]	[ 2.03;5.63]	[ 2.39;7.96]	[-0.18;7.96]
	Missing	1	1	0	2

D3	N	30	20	16	66
	Coef. Var	0.66	0.25	0.22	0.56
	Mean (SD)	1.78(1.17)	3.81(0.94)	5.07(1.12)	3.15(1.75)
	Median	1.78	3.63	5.22	3.15
	Mode	2.54	4.44	5.66	2.54
	[Q1;Q3]	[ 0.93; 2.42]	[ 3.13; 4.44]	[ 4.11; 5.66]	[ 1.80; 4.39]
	[Min;Max]	[-0.16;3.90]	[ 2.46;6.01]	[ 3.16;7.37]	[-0.16;7.37]
	Missing	0	1	1	2

D4	N	30	20	16	66
	Coef. Var	0.46	0.25	0.20	0.52
	Mean (SD)	1.83(0.85)	3.80(0.95)	5.17(1.03)	3.22(1.66)
	Median	1.67	3.83	4.88	3.16
	Mode	2.32	3.50	4.88	2.32
	[Q1;Q3]	[ 1.26; 2.32]	[ 3.12; 4.42]	[ 4.69; 5.50]	[ 1.69; 4.48]
	[Min;Max]	[ 0.38;3.97]	[ 2.31;5.41]	[ 3.24;6.96]	[ 0.38;6.96]
	Missing	1	1	1	3

D5	N	30	20	16	66
	Coef. Var	0.53	0.33	0.22	0.45
	Mean (SD)	2.27(1.20)	3.64(1.19)	4.43(0.98)	3.21(1.45)
	Median	2.50	3.86	4.57	3.28
	Mode	1.87	3.89	2.95	1.87
	[Q1;Q3]	[ 1.77; 3.21]	[ 2.59; 4.60]	[ 3.44; 4.97]	[ 2.42; 4.44]
	[Min;Max]	[-1.19;4.31]	[ 0.91;5.12]	[ 2.95;6.54]	[-1.19;6.54]
	Missing	0	0	0	0

Row or column percentages

If you want to display the row percentages instead of column percentages in report.quali() function, you just have to set the argument percent.col to FALSE:

tab=report.quali(data=datafake,y="y_logistic",x1="GROUP",
total=TRUE,subjid="SUBJID",percent.col=FALSE)

report.doc(tab,title="Example of row percentage reporting",
colspan.value="Treatment group")

Table 5: Example of row percentage reporting
		Treatment group
Levels	Statistics	A (N=30)	B (N=21)	C (N=17)	Total (N=68)
0	n (row %)	79(42.47%)	60(32.26%)	47(25.27%)	186(46.97%)
1	n (row %)	97(48.50%)	59(29.50%)	44(22.00%)	200(50.51%)
	Missing n(%)	4(40.00%)	1(10.00%)	5(50.00%)	10(2.53%)

Drop levels

If you want to drop some levels in the table, you can use drop.x1 drop.x2 or drop.y if ‘y’ is a qualitative variable.

For example. If we just want the statistics of group A:

tab=report.quali(data=datafake,y="y_logistic",x1="GROUP",
subjid="SUBJID",drop.x1=c("B","C"))

report.doc(tab,title="Example of row percentage reporting",
colspan.value="Treatment group")

Table 6: Example of row percentage reporting
		Treatment group
Levels	Statistics	A (N=30)
0	n (column %)	79(43.89%)
1	n (column %)	97(53.89%)
	Missing n(%)	4(2.22%)

Drop missing values

Sometimes it can be useful to remove the missing values so that percentages are computed only on non missing observations. For that, just use the remove.missing argument in report.quali:

tab=report.quali(data=datafake,y="y_logistic",x1="GROUP",
remove.missing=TRUE)

report.doc(tab,title="Example of dropping missing values",
colspan.value="Treatment group")

Table 7: Example of dropping missing values
		Treatment group
Levels	Statistics	A	B	C
0	n (column %)	79(44.89%)	60(50.42%)	47(51.65%)
1	n (column %)	97(55.11%)	59(49.58%)	44(48.35%)

Transpose Descriptive statistics (experimental)


tab=report.quali(data=datafake,
y="y_logistic",x1="GROUP",
subjid="SUBJID",remove.missing=T)

# The default output
report.doc(tab)

Table 8: Qualitative descriptive statistics of : y_logistic
Levels	Statistics	A (N=30)	B (N=21)	C (N=17)
0	n (column %)	79(44.89%)	60(50.42%)	47(51.65%)
1	n (column %)	97(55.11%)	59(49.58%)	44(48.35%)


# The transposed output
report.doc(transpose(tab))

Table 9: Qualitative descriptive statistics of : y_logistic
GROUP	Levels	n (GROUP%)
A (N=30)	0	79(44.89%)
A (N=30)	1	97(55.11%)
B (N=21)	0	60(50.42%)
B (N=21)	1	59(49.58%)
C (N=17)	0	47(51.65%)
C (N=17)	1	44(48.35%)

Transpose LS-Means

mod=lm(y_numeric~GROUP,data=datafake)
pairs=pairs(emmeans(mod,~GROUP))

tab=report.lsmeans(pairs,transpose=TRUE)

report.doc(tab,title="Example of transposing LS-Means in column",
colspan.value="Treatment group")

Table 10: Example of transposing LS-Means in column
	Treatment group
contrast	Estimate (SE)	95% CI	P-value
A - B	-1.68(0.23)	[-2.23;-1.13]	<0.001
A - C	-2.41(0.25)	[-2.99;-1.82]	<0.001
B - C	-0.72(0.27)	[-1.36;-0.09]	0.020

Jean-Francois COLLIN

2019-08-14