Loading data from AWS external stage S3 only particular file (Stage contains multiple files) to Snowflake table using bulk data loading method.
Step1: Create empty table in Snowflake using below DDL command
create TABLE TRIPS_1
(TRIPDURATION NUMBER(38,0),
STARTTIME TIMESTAMP_NTZ(9),
STOPTIME TIMESTAMP_NTZ(9),
START_STATION_ID NUMBER(38,0),
START_STATION_NAME VARCHAR(16777216),
START_STATION_LATITUDE FLOAT,
START_STATION_LONGITUDE FLOAT,
END_STATION_ID NUMBER(38,0),
END_STATION_NAME VARCHAR(16777216),
END_STATION_LATITUDE FLOAT,
END_STATION_LONGITUDE FLOAT,
BIKEID NUMBER(38,0),
MEMBERSHIP_TYPE VARCHAR(16777216),
USERTYPE VARCHAR(16777216),
BIRTH_YEAR NUMBER(38,0),
GENDER NUMBER(38,0));
Step2: Creating AWS External stage in Snowflake Environment
CREATE STAGE TRIPS_CSV_SINGLEFILE
URL = 's3://snowflake-workshop-lab/citibike-trips-csv'
COMMENT = 'EXTERNAL STAGE FOR CITIBIKE LOADING PROCESS FOR SINGLE FILE';
Step3: Creating FILEFORMAT for loading complete data without missing any single record
CREATE FILE FORMAT CSV_FORMAT
TYPE = 'CSV' COMPRESSION = 'AUTO'
FIELD_DELIMITER = ','
RECORD_DELIMITER = '\n'
SKIP_HEADER = 0
FIELD_OPTIONALLY_ENCLOSED_BY = '\042'
TRIM_SPACE = FALSE
ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE
ESCAPE = 'NONE'
ESCAPE_UNENCLOSED_FIELD = '\134'
DATE_FORMAT = 'AUTO'
TIMESTAMP_FORMAT = 'AUTO' NULL_IF = ('')
COMMENT = 'FILE FORMAT TO LOAD CSV - CITIBIKE DATA WITHOUT ANY ERROR RECORDS';
Step4: Loading Stage Data into Snowflake table using Bulk Data loading Method
copy into TRIPS_1
FROM @CITIBIKE.PUBLIC.CITIBIKE_CSV/trips_2018_7_3_0.csv.gz
file_format = csv_format;
Loading data from AWS external stage S3 from a particular file and only getting particular columns to Snowflake table using bulk data loading method.
Step1: Create empty table in Snowflake using below DDL command
create TABLE TRIPS_2
(TRIPDURATION NUMBER(38,0),
STARTTIME TIMESTAMP_NTZ(9),
STOPTIME TIMESTAMP_NTZ(9),
START_STATION_ID NUMBER(38,0),
START_STATION_NAME VARCHAR(16777216),
START_STATION_LATITUDE FLOAT,
START_STATION_LONGITUDE FLOAT,
END_STATION_ID NUMBER(38,0),
END_STATION_NAME VARCHAR(16777216),
END_STATION_LATITUDE FLOAT,
END_STATION_LONGITUDE FLOAT,
BIKEID NUMBER(38,0),
MEMBERSHIP_TYPE VARCHAR(16777216),
USERTYPE VARCHAR(16777216),
BIRTH_YEAR NUMBER(38,0),
GENDER NUMBER(38,0));
Step2: Creating AWS External stage in Snowflake Environment
CREATE STAGE TRIPS_CSV_SINGLEFILE
URL = 's3://snowflake-workshop-lab/citibike-trips-csv'
COMMENT = 'EXTERNAL STAGE FOR CITIBIKE LOADING PROCESS FOR SINGLE FILE';
Step3: Creating FILEFORMAT for loading data
CREATE FILE FORMAT CSV_FORMAT
TYPE = 'CSV' COMPRESSION = 'AUTO'
FIELD_DELIMITER = ','
RECORD_DELIMITER = '\n'
SKIP_HEADER = 0
FIELD_OPTIONALLY_ENCLOSED_BY = '\042'
TRIM_SPACE = FALSE
ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE
ESCAPE = 'NONE'
ESCAPE_UNENCLOSED_FIELD = '\134'
DATE_FORMAT = 'AUTO'
TIMESTAMP_FORMAT = 'AUTO' NULL_IF = ('')
COMMENT = 'FILE FORMAT TO LOAD CSV - CITIBIKE DATA WITHOUT ANY ERROR RECORDS';
Step4: Loading Stage Data into Snowflake table using Bulk Data loading Method
copy into TRIPS_2 ( $1,$2,$3,$4,$5)
FROM @CITIBIKE.PUBLIC.TRIPS_CSV_SINGLEFILE /trips_2018_7_3_0.csv.gz
file_format = csv_format;
Loading data from AWS external stage S3 from a particular file contains 16 columns data but Snowflake table contains only first 5 columns using bulk data loading method.
Step1: Create empty table in Snowflake using below DDL command
create TABLE TRIPS_3 (
TRIPDURATION NUMBER(38,0),
STARTTIME TIMESTAMP_NTZ(9),
STOPTIME TIMESTAMP_NTZ(9),
START_STATION_ID NUMBER(38,0),
START_STATION_NAME VARCHAR(16777216));
Step2: Creating AWS External stage in Snowflake Environment
CREATE STAGE TRIPS_CSV_SINGLEFILE
URL = 's3://snowflake-workshop-lab/citibike-trips-csv'
COMMENT = 'EXTERNAL STAGE FOR CITIBIKE LOADING PROCESS FOR SINGLE FILE';
Step3: Creating FILEFORMAT for loading data
CREATE FILE FORMAT CSV_FORMAT
TYPE = 'CSV' COMPRESSION = 'AUTO'
FIELD_DELIMITER = ','
RECORD_DELIMITER = '\n'
SKIP_HEADER = 0
FIELD_OPTIONALLY_ENCLOSED_BY = '\042'
TRIM_SPACE = FALSE
ERROR_ON_COLUMN_COUNT_MISMATCH = FALSE
ESCAPE = 'NONE'
ESCAPE_UNENCLOSED_FIELD = '\134'
DATE_FORMAT = 'AUTO'
TIMESTAMP_FORMAT = 'AUTO' NULL_IF = ('')
COMMENT = 'FILE FORMAT TO LOAD CSV - CITIBIKE DATA WITHOUT ANY ERROR RECORDS';
Step4: Loading Stage Data into Snowflake table using Bulk Data loading Method
copy into TRIPS_3
select $1,$2,$3,$4,$5 FROM @CITIBIKE.PUBLIC.TRIPS_CSV_SINGLEFILE /trips_2018_7_3_0.csv.gz
file_format = csv_format;