44 use scale_prc, only: &
45 prc_domain_nlim, &
46 prc_comm_null, &
47 prc_abort, &
48 prc_mpistart, &
49 prc_mpifinish, &
50 prc_mpisplit_bulk, &
51 prc_mpisplit_nest, &
52 prc_universal_setup, &
53 prc_global_setup, &
54 prc_global_root, &
55 prc_errhandler_setup
56 use scale_fpm, only: &
57 fpm_init
62 implicit none
63
64 logical, intent(in) :: EXECUTE_PREPROCESS
65 logical, intent(in) :: EXECUTE_MODEL
66
67 integer :: NUM_BULKJOB = 1
68 integer :: NUM_BULKJOB_ONCE = 1
69 integer :: NUM_ITERATION_BULK = 1
70 integer :: BULKJOB_START_DIRNUM = 0
71 logical :: ADD_BULKJOB_PATH = .false.
72 integer :: NUM_DOMAIN = 1
73 integer :: NUM_FAIL_TOLERANCE = 1
74 integer :: FREQ_FAIL_CHECK = 5
75 integer :: PRC_DOMAINS(PRC_DOMAIN_nlim) = 0
76 character(len=H_LONG) :: CONF_FILES (PRC_DOMAIN_nlim) = ""
77 logical :: ABORT_ALL_JOBS = .false.
78 logical :: LOG_SPLIT = .false.
79 logical :: COLOR_REORDER = .true.
80 logical :: FAILURE_PRC_MANAGE = .false.
81
82 namelist / param_launcher / &
83
84
85 num_bulkjob, &
86 num_iteration_bulk, &
87 bulkjob_start_dirnum, &
88 add_bulkjob_path, &
89 num_domain, &
90 num_fail_tolerance, &
91 freq_fail_check, &
92 prc_domains, &
93 conf_files, &
94 abort_all_jobs, &
95 log_split, &
96 color_reorder, &
97 failure_prc_manage
98
99 integer :: universal_comm
100 integer :: universal_nprocs
101 integer :: universal_myrank
102 logical :: universal_master
103 character(len=H_LONG) :: universal_cnf_fname
104
105 integer :: global_comm
106 integer :: global_nprocs
107 integer :: PRC_BULKJOB(PRC_DOMAIN_nlim) = 0
108 integer :: ID_BULKJOB
109
110 logical :: use_fpm = .false.
111
112 integer :: local_comm
113 integer :: ID_DOMAIN
114 integer :: intercomm_parent
115 integer :: intercomm_child
116 character(len=5) :: path
117
118 integer :: itr
119 integer :: fid, ierr
120
121
122
123 call prc_mpistart( universal_comm )
124
125 call prc_universal_setup( universal_comm, &
126 universal_nprocs, &
127 universal_myrank, &
128 universal_master )
129
130 if( universal_master ) write(*,*) '*** Start Launch System for SCALE-DG'
131
132
133
134 universal_cnf_fname = io_arg_getfname( universal_master )
135
136 fid = io_cnf_open( universal_cnf_fname, &
137 universal_master )
138
139
140 conf_files(1) = universal_cnf_fname
141
142
143 rewind(fid)
144 read(fid,nml=param_launcher,iostat=ierr)
145 if ( ierr < 0 ) then
146
147 elseif( ierr > 0 ) then
148 if( universal_master ) write(*,*) 'xxx Not appropriate names in namelist PARAM_LAUNCHER. Check!'
149 call prc_abort
150 endif
151
152 close(fid)
153
154 if ( execute_preprocess &
155 .OR. execute_model ) then
156 if( universal_master ) write(*,*) "*** Execute preprocess? : ", execute_preprocess
157 if( universal_master ) write(*,*) "*** Execute model? : ", execute_model
158 else
159 if( universal_master ) write(*,*) 'xxx No execution. please check PARAM_LAUNCHER. STOP'
160 call prc_abort
161 endif
162
163
164
165 global_nprocs = universal_nprocs
166 if ( num_bulkjob > 1 ) then
167
168 if ( num_bulkjob == 1 ) num_iteration_bulk = 1
169 num_bulkjob_once = ceiling( real(num_bulkjob) / num_iteration_bulk )
170 if ( mod(universal_nprocs,num_bulkjob_once) /= 0 ) then
171 if( universal_master ) write(*,*) 'xxx Total Num of Processes must be divisible by NUM_BULKJOB/NUM_ITERATION_BULK. Check!'
172 if( universal_master ) write(*,*) 'xxx Total Num of Processes = ', universal_nprocs
173 if( universal_master ) write(*,*) 'xxx NUM_BULKJOB = ', num_bulkjob
174 if( universal_master ) write(*,*) 'xxx NUM_ITERATION_BULK = ', num_iteration_bulk
175 if( universal_master ) write(*,*) 'xxx NUM_BULKJOB / NUM_ITERATION_BULK = ', num_bulkjob_once
176 call prc_abort
177 endif
178
179 global_nprocs = universal_nprocs / num_bulkjob_once
180
181 if( universal_master ) write(*,'(1x,A,I5)') "*** TOTAL # of BULK JOBS = ", num_bulkjob
182 if( universal_master ) write(*,'(1x,A,I5)') "*** # of BULK JOB for each iteration = ", num_bulkjob_once
183 if( universal_master ) write(*,'(1x,A,I5)') "*** Total # of PROCESS = ", universal_nprocs
184 if( universal_master ) write(*,'(1x,A,I5)') "*** # of PROCESS of each JOB = ", global_nprocs
185
186 if ( bulkjob_start_dirnum < 0 ) then
187 if( universal_master ) write(*,*) 'xxx BULKJOB_START_DIRNUM must >=0'
188 call prc_abort
189 end if
190 if ( bulkjob_start_dirnum + num_bulkjob -1 > 9999 ) then
191 if( universal_master ) write(*,*) 'xxx BULKJOB_START_DIRNUM + NUM_BULKJOB must <= 9999'
192 call prc_abort
193 end if
194
195 if ( failure_prc_manage ) then
196 if( universal_master ) write(*,'(1x,A)') "*** Available: Failure Process Management"
197 use_fpm = .true.
198 if ( num_fail_tolerance <= 0 ) then
199 if( universal_master ) write(*,*) 'xxx Num of Failure Processes must be positive number. Check!'
200 if( universal_master ) write(*,*) 'xxx NUM_FAIL_TOLERANCE = ', num_fail_tolerance
201 call prc_abort
202 endif
203
204 if ( num_fail_tolerance > num_bulkjob ) then
205 write(*,*) 'xxx NUM_FAIL_TOLERANCE is bigger than # of NUM_BLUKJOB'
206 write(*,*) ' set to be: NUM_FAIL_TOLERANCE <= NUM_BLUKJOB'
207 call prc_abort
208 endif
209
210 if ( num_domain > 1 ) then
211 if ( freq_fail_check >= 1 .or. num_fail_tolerance /= num_bulkjob ) then
212 write(*,*) 'xxx Full function of FPM is not available with online nesting.'
213 write(*,*) ' You can use this only to avoid job stop until all members finish.'
214 write(*,*) ' for this purpose, set: FREQ_FAIL_CHECK = 0'
215 write(*,*) ' NUM_FAIL_TOLERANCE == NUM_BULKJOB'
216 call prc_abort
217 endif
218 endif
219 endif
220 endif
221 prc_bulkjob(1:num_bulkjob) = global_nprocs
222
223
224 call prc_mpisplit_bulk( universal_comm, &
225 num_bulkjob_once, &
226 prc_bulkjob(:), &
227 log_split, &
228 global_comm, &
229 id_bulkjob )
230
231 call prc_global_setup( abort_all_jobs, &
232 global_comm )
233
234
235
236 if ( num_domain > 1 ) then
237 if( universal_master ) write(*,'(1x,A,I5)') "*** TOTAL DOMAIN NUMBER = ", num_domain
238 if( universal_master ) write(*,'(1x,A,L5)') "*** Flag of ABORT ALL JOBS = ", abort_all_jobs
239 endif
240
241
242 call prc_mpisplit_nest( global_comm, &
243 num_domain, &
244 prc_domains(:), &
245 log_split, &
246 color_reorder, &
247 local_comm, &
248 id_domain )
249
250
251 call fpm_init( num_fail_tolerance, &
252 freq_fail_check, &
253 universal_comm, &
254 global_comm, &
255 local_comm, &
256 num_bulkjob, &
257 prc_global_root, &
258 use_fpm )
259
260 call prc_errhandler_setup( use_fpm, universal_master )
261
262
263
264 do itr = 1, num_iteration_bulk
265
266 if ( id_bulkjob > num_bulkjob ) exit
267
268 if( universal_master .and. num_iteration_bulk > 1 ) then
269 write(*,*)
270 write(*,*) '*** BULK ITERATION COUNT : ', itr, '/', num_iteration_bulk
271 end if
272
273
274 call io_set_universalrank( universal_myrank, &
275 id_bulkjob, &
276 id_domain )
277
278 if ( num_bulkjob > 1 ) then
279 write(path,'(I4.4,A)') id_bulkjob + bulkjob_start_dirnum, "/"
280 else
281 path = ""
282 endif
283
284 if ( execute_preprocess ) then
286 conf_files(id_domain), &
287 path, &
288 add_bulkjob_path )
289 endif
290
291 if ( execute_model ) then
293 conf_files(id_domain), &
294 path, &
295 add_bulkjob_path )
296 endif
297
298 id_bulkjob = id_bulkjob + num_bulkjob_once
299
300 end do
301
302
303 call prc_mpifinish
304
305 if( universal_master ) write(*,*) '*** End Launch System for SCALE-DG'
306
module SCALE-DG (a main routine of regional/global model)
subroutine, public dg_driver(comm_world, cnf_fname, path, add_path)
subroutine, public dg_prep(comm_world, cnf_fname, path, add_path)