*************************************************************;
* Survival times for two groups of chronic hepatitis patients,
*   each with 22 individuals.
* Lifetimes in months
* Censoring (status):  observed death=0  censored=1
*
* In the SAS `data step' below:
*  `input zz$ status @@' tells SAS to read two words at a time from
*   the datalines block without discarding the rest of the line.
*   Without the `trailing @@', SAS would read the first two words
*   from each line in the `datalines' block and ignore the rest of
*   the line. This would result in reading a total of 6 records
*   instead of 44. (The $ means that zz is a text variable instead
*   of a numerical variable.)
*
* The command `informat zz $10.' warns SAS that zz$ might be as long
*   as 10 characters. Unless warned, SAS truncates all text variables
*   at 8 characters. If this command were left out, all instances of
*   `Prednisone' in the output would be truncated to `Predniso'.
*
* In the datalines block below, if zz is a group heading (indicated
*   by status=-1), then SAS sets group=zz. The command `retain group'
*   tells SAS to remember the value of `group' from record to record.
*   Otherwise, SAS resets all variables to `unknown' after writing
*   each record.
*
* If `zz' and `status' are actual data values (indicated by status=0
*   for an observed death or status=1 for censored), then 
*   `months=input(zz,12.0)' converts the text string zz$ to a
*   number and stores it in `months'.
*
* The resulting dataset has 46 records with four variables:
*   group, zz (a scratch variable), status, and months. Two of the
*   46 records were written from group headings. These records have
*   months=`unknown' and will be ignored by SAS, leaving the 44
*   records that we want.
*
* See `ltcmf.sas' for more details about the SAS program.
*
* Data from E.T.Lee, ``Statistical methods for survival data
*   analysis'', 2nd edition.
*************************************************************;

title 'Survival times in months for two groups - YOUR NAME';
title2 'Two groups of chronic hepatitis patients';
options ls=75 ps=60 pageno=1 nocenter;

* See below for an even more compact way to enter Survival Data ;
*  (and create the same SAS dataset)  ;

data hepat;
  retain group;
  informat zz $10.;    * Allow zz to be up to 10 characters;
  input zz$ status @@;
  if status<0 then group=zz;
  else  months=input(zz,12.0);
datalines;
  Prednisone -1
   2 0     6 0    12 0    54 0    56 1    68 0    89 0    96 0
  96 0   125 1   128 1   131 1   140 1   141 1   143 0   145 1
 146 0   148 1   162 1   168 0   173 1   181 1
  Control -1
   2 0     3 0     4 0     7 0    10 0    22 0    28 0    29 0 
  32 0    37 0    40 0    41 0    54 0    61 0    63 0    71 0 
 127 1   140 1   146 1   158 1   167 1   182 1
   ;

proc print;
     title3 'The data as SAS sees it:';
     run;

*************************************************************;
* Use SAS's `proc lifetest' to plot S(t) for the two groups and
*   also compare the two groups using three different
*   two-sample tests.
* The option `notable' means DON'T display detailed tables for
*   Kaplan-Meier estimates. (Default is to include these tables.)
*************************************************************;

proc lifetest plots=(s) notable lineprinter;
     title3 'Proc lifetest plots and tests';
     strata group;
     time months*status(1);
run;


*************************************************************;
* A common convention in Survival Analysis is to use the fact that
*   survival times must be positive: If a survival time is entered
*   with a minus sign, this cannot be literally true, and one can
*   follow a convention that this mean a censored time whose true
*   value is the absolute value.
*
* In the data step, a STATEMENT within parenthesis (like (xx<0))
*   evaluates as 1 if the statement is true and as 0 if the
*   statement is false. Thus  status=(xx<0)  evaluates as
*   as status=1 if xx<0 (if the value is censored) and status=0
*   for an observed death, which is what we want in this case.
*
* The `output' command tells SAS to write a record to the dataset
*   using the current value of variables. If a data step has no
*   `output' commands, then SAS, in effect, supplies one at the
*   end of the data step. If `output' occurs anywhere within a
*   data step, then SAS creates a data set record ONLY when you
*   say `output'. The result below is that the SAS data set only
*   has 44 records for the actual data values. The two scratch
*   records in `data hepat' caused by changing the group name
*   (these had missing `month' values and so were ignored anyway)
*   no longer appear. See `ltangina.sas' for another example of
*   the `output' command.
*************************************************************;

data hepat2;
  retain group;
  informat zz $10.;    * Allow zz to be up to 10 characters;
  input zz$ @@;
  if zz='Prednisone' or zz='Control' then group=zz;
  else do; xx=input(zz,12.0);
           months=abs(xx);  status=(xx<0); * =1 if xx<0, =0 if xx>0;
	   output;  end;
datalines;
  Prednisone
    2      6     12     54    -56     68     89     96
   96   -125   -128   -131   -140   -141    143   -145
  146   -148   -162    168   -173   -181
  Control
    2      3      4      7     10     22     28     29 
   32     37     40     41     54     61     63     71 
 -127   -140   -146   -158   -167   -182
   ;


proc print;
     title3 'Reading the data in an alternative way.';
     title4 'The data as SAS sees it:';
     run;


proc lifetest notable;
  strata group;
  time months*status(1);
run;

