MergeDatasets

Merges datasets holding overlapping cases but different variables. The merge may be controlled by keys or grouping variables.

Properties

Name	Type		Description
MergeFiles	MergeFileDescription	2..n	Description of files to be merged.
MergeByVariables	VariableReferenceBase	0..1	A variable or list of variables that acts as the unique case identifier across datasets. If MergeByVariables is absent, MergeType must be “sequential” on all files.
FirstVariable	string	0..1	The name of a variable set to 1 for the first row of each group of cases with the same value for the MergeByVariables variables and set to 0 for all other rows.
LastVariable	string	0..1	The name of a variable set to 1 for the last row of each group of cases with the same value for the MergeByVariables variables and set to 0 for all other rows.

Properties Inherited from TransformBase

Name	Type		Description
ProducesDataframe	DataframeDescription	0..n	Signify the dataframe which this transform produces.
ConsumesDataframe	DataframeDescription	0..n	Signify the dataframe which this transform acts upon.

Properties Inherited from CommandBase

Name	Type		Description
Command	string	1..1	The type of command
SourceInformation	SourceInformation	0..n	Information about the source of the command.
MessageText	string	0..n	Adds a message that can be displayed with the command.

Item Type Hierarchy

CommandBase
- TransformBase
  
  MergeDatasets

Relationships

The following identified item types reference this type.

stub

Merge_options

Properties and Options of MergeFileDescription

SPSS_merge_examples

==================== EXAMPLE 1 ====================================

MATCH FILES
    /FILE='merge_1.sav'
   /file='merge_2.sav'
    .


{"command": "MergeDatasets",
     "$type": "MergeDatasets",
    "MergeFiles": [
    "mergeFileDescription":
        {"fileName": "merge_1.sav",
        "mergeType": "Sequential",
             "newRow": TRUE
        },
    "MergeFileDescription":
        {"fileName": "merge_2.sav",
        "mergeType": "Sequential"
             "newRow": TRUE
        }
        ]
    }

==================== EXAMPLE 2 ====================================

MATCH FILES
    /FILE='merge_1.sav'
   /in=from_f1
   /file='merge_3.sav'
   /in=from_f3
   /RENAME= (VAR3=VARx)
   /KEEP= id VAR2 VARx
   /by id
   /first=firstvar
   /last=lastvar
  .


{"command": "MergeDatasets",
     "$type": "MergeDatasets",
    "mergeByVariables": [ {"$type": "VariableSymbolExpression",
                            "VariableName":"id"}      ],
    "firstVariable": "firstvar",
    "lastVariable": "lastvar",
     "mergeFiles": [
        "mergeFileDescription":
                     {"fileName": "merge_1.sav",
                     "mergeType": "OneToOne",
                     "mergeFlagVariable":"from_f1",
                     "renameVariable":["RenamePair":
                             {"OldVariable":"VAR3","NewVariable":"VARx"}  ],
                     "newRow": TRUE
                     },
             "mergeFileDescription":
                     {"fileName": "merge_3.sav",
                     "mergeType": "OneToOne",
                     "mergeFlagVariable":"from_f3",
                     "newRow": FALSE
                     }
        },
{"command": "KeepVariables",
             "$type": "KeepVariables",
             "variables": {"$type": "VariableListExpression",
                             "variables":
                                     [ {"$type": "VariableSymbolExpression",
                                             "VariableName":"id"},
                                             {"$type": "VariableSymbolExpression",
                                             "VariableName":"VAR2"},
                                             {"$type": "VariableSymbolExpression",
                                             "VariableName":"VARx"}
                                             ]
                                     },
             "messageText": "NOTE: This KeepVariables command is after the MergeDatasets command, because it applies to the output dataframe."
             }

Stata_merge_examples

NOTE:  These Stata Merge options are not represented in SDTL:
     noreport
     nolabel
     nonotes
     sorted

==================== EXAMPLE 1 ====================================

use "mergedat1.dta", clear
merge 1:1 _n using "mergedat4.dta"
list _all

{"command": "MergeDatasets",
    "mergeFiles": [
    "mergeFileDescription":
        {"fileName": "Active file",
        "mergeType": "Sequential",
        "newRow": TRUE,
        "mergeFlagVariable":"_merge"},
    "mergeFileDescription":
        {"fileName": "mergedat4.dta",
        "mergeType": "Sequential",
        "newRow": TRUE}
        ]
    },
{"$type": "SetValueLabels",
     "command": "SetValueLabels",
     "variables": [
                     {"$type": "VariableSymbolExpression",
                             "variableName", "_merge"}
                     ],
     "labels": [
                     {"value": 1,  "label": "master"}
                     {"value": 2,  "label": "using"}
                     {"value": 3,  "label": "match"}
                     {"value": 4,  "label": "match_update"}
                     {"value": 5,  "label": "match_conflict"}
                     ]
     }

==================== EXAMPLE 2 ====================================

use "mergedat1.dta", clear
merge 1:1 id using "mergedat3b.dta" ,  update  gener(matchVar)
list _all

{"command": "MergeDatasets",
    "MergeFiles": [
    "MergeFileDescription":
        {"fileName": "Active Dataframe",
        "mergeType": "1:1",
        "update": "Master",
        "mergeFlagVariable":"matchVar",
        "newRow": TRUE},
    "MergeFileDescription":
        {"fileName": "mergedat3c.dta",
        "mergeType": "1:1",
        "update": "UpdateMissing",
        "newRow":TRUE}
        ],
    "MergeByVariables": {"$type": "VariableSymbolExpression",
                            "VariableName":"id"}
    },
{"$type": "SetValueLabels",
     "command": "SetValueLabels",
     "variables": [
                     {"$type": "VariableSymbolExpression",
                             "variableName", "matchVar"}
                     ],
     "labels": [
                     {"value": 1,  "label": "master"}
                     {"value": 2,  "label": "using"}
                     {"value": 3,  "label": "match"}
                     {"value": 4,  "label": "match_update"}
                     {"value": 5,  "label": "match_conflict"}
                     ]
     }

==================== EXAMPLE 3 ====================================

use "mergedat1.dta", clear
merge 1:1 id using "mergedat3b.dta" , update replace  keepusing(lastname)


{"command": "MergeDatasets",
     "$type": "MergeDatasets",
    "mergeByVariables":{"$type": "VariableSymbolExpression",
                            "variableName":"id"},
    "mergeFiles": [
    "mergeFileDescription":
        {"mileName": "Active file",
        "mergeType": "1:1",
        "update": "UpdateMissing",
        "mergeFlagVariable":"matchVar",
        "newRow":"False"},
    "mergeFileDescription":
        {"fileName": "mergedat3b.dta",
        "mergeType": "1:1",
        "update": "Master",
        "newRow":"True",
        "keepVariables":{"$type": "VariableSymbolExpression",
                            "variableName":"lastname"}
        }
        ]
    },
{"$type": "SetValueLabels",
     "command": "SetValueLabels",
     "variables": [
                     {"$type": "VariableSymbolExpression",
                             "variableName", "_merge"}
                     ],
     "labels": [
                     {"value": 1,  "label": "master"}
                     {"value": 2,  "label": "using"}
                     {"value": 3,  "label": "match"}
                     {"value": 4,  "label": "match_update"}
                     {"value": 5,  "label": "match_conflict"}
                     ]
     }