DSDP
sdpnfac.c
1#include "numchol.h"
2
3static void UpdSnode(int m,
4 int n,
5 int s,
6 double diaga[],
7 double *a,
8 int fira[],
9 double diagb[],
10 double *b,
11 int firb[])
12{
13 int i,k,t,sze;
14 double rtemp1, rtemp2, rtemp3, rtemp4,
15 rtemp5, rtemp6, rtemp7, rtemp8,
16 rtemp9, rtemp10,rtemp11,rtemp12,
17 rtemp13,rtemp14,rtemp15,rtemp16,
18 *a1,*a2, *a3, *a4, *a5, *a6, *a7, *a8,
19 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
20 *b00,*b1,*b0;
21
22 for(i=0; i+1<s; i+=2) {
23 b00 = b+firb[i];
24 b0 = b00+1;
25 b1 = b+firb[i+1];
26 sze = m-i-2;
27 k = 0;
28
29 for(; k+3<n; k+=4) {
30 a1 = a+(fira[k+0 ]+i);
31 a2 = a+(fira[k+1 ]+i);
32 a3 = a+(fira[k+2 ]+i);
33 a4 = a+(fira[k+3 ]+i);
34
35 rtemp1 = a1[0]/diaga[k+0];
36 rtemp2 = a2[0]/diaga[k+1];
37 rtemp3 = a3[0]/diaga[k+2];
38 rtemp4 = a4[0]/diaga[k+3];
39
40 diagb[i] -= rtemp1 * a1[0]
41 + rtemp2 * a2[0]
42 + rtemp3 * a3[0]
43 + rtemp4 * a4[0];
44
45 ++ a1;
46 ++ a2;
47 ++ a3;
48 ++ a4;
49
50 rtemp5 = a1[0]/diaga[k+0];
51 rtemp6 = a2[0]/diaga[k+1];
52 rtemp7 = a3[0]/diaga[k+2];
53 rtemp8 = a4[0]/diaga[k+3];
54
55 b00[0] -= rtemp1 * a1[0]
56 + rtemp2 * a2[0]
57 + rtemp3 * a3[0]
58 + rtemp4 * a4[0];
59
60 diagb[i+1] -= rtemp5 * a1[0]
61 + rtemp6 * a2[0]
62 + rtemp7 * a3[0]
63 + rtemp8 * a4[0];
64
65 ++ a1;
66 ++ a2;
67 ++ a3;
68 ++ a4;
69
70 for(t=0; t<sze; ++t) {
71 b0[t] -= rtemp1 * a1[t]
72 + rtemp2 * a2[t]
73 + rtemp3 * a3[t]
74 + rtemp4 * a4[t];
75
76 b1[t] -= rtemp5 * a1[t]
77 + rtemp6 * a2[t]
78 + rtemp7 * a3[t]
79 + rtemp8 * a4[t];
80 }
81 }
82
83 for(; k+1<n; k+=2) {
84 a1 = a+(fira[k+0 ]+i);
85 a2 = a+(fira[k+1 ]+i);
86
87 rtemp1 = a1[0] / diaga[k+0];
88 rtemp2 = a2[0] / diaga[k+1];
89
90 diagb[i] -= a1[0] * rtemp1
91 + a2[0] * rtemp2;
92
93 ++ a1;
94 ++ a2;
95
96 rtemp5 = a1[0] / diaga[k+0];
97 rtemp6 = a2[0] / diaga[k+1];
98
99 b00[0] -= a1[0] * rtemp1
100 + a2[0] * rtemp2;
101
102 diagb[i+1] -= a1[0] * rtemp5
103 + a2[0] * rtemp6;
104
105
106 ++ a1;
107 ++ a2;
108
109 for(t=0; t<sze; ++t) {
110 b0[t] -= a1[t] * rtemp1
111 + a2[t] * rtemp2;
112
113 b1[t] -= a1[t] * rtemp5
114 + a2[t] * rtemp6;
115 }
116 }
117
118 for(; k<n; ++k) {
119 a1 = a+(fira[k+0 ]+i);
120
121 rtemp1 = a1[0] / diaga[k+0];
122
123 diagb[i] -= a1[0] * rtemp1;
124
125 ++ a1;
126
127 rtemp5 = a1[0] / diaga[k+0];
128
129 diagb[i+1] -= a1[0] * rtemp5;
130
131 b00[0] -= a1[0] * rtemp1;
132
133 ++ a1;
134
135 for(t=0; t<sze; ++t)
136 {
137 b0[t] -= rtemp1 * a1[t];
138 b1[t] -= rtemp5 * a1[t];
139 }
140 }
141 }
142
143 for(; i<s; ++i) {
144
145 b0 = b+firb[i];
146 sze = m-i-1;
147 k = 0;
148
149 for(; k+15<n; k+=16) {
150 a1 = a+(fira[k+0 ]+i);
151 a2 = a+(fira[k+1 ]+i);
152 a3 = a+(fira[k+2 ]+i);
153 a4 = a+(fira[k+3 ]+i);
154 a5 = a+(fira[k+4 ]+i);
155 a6 = a+(fira[k+5 ]+i);
156 a7 = a+(fira[k+6 ]+i);
157 a8 = a+(fira[k+7 ]+i);
158 a9 = a+(fira[k+8 ]+i);
159 a10 = a+(fira[k+9 ]+i);
160 a11 = a+(fira[k+10]+i);
161 a12 = a+(fira[k+11]+i);
162 a13 = a+(fira[k+12]+i);
163 a14 = a+(fira[k+13]+i);
164 a15 = a+(fira[k+14]+i);
165 a16 = a+(fira[k+15]+i);
166
167 rtemp1 = *a1/diaga[k+0];
168 rtemp2 = *a2/diaga[k+1];
169 rtemp3 = *a3/diaga[k+2];
170 rtemp4 = *a4/diaga[k+3];
171 rtemp5 = *a5/diaga[k+4];
172 rtemp6 = *a6/diaga[k+5];
173 rtemp7 = *a7/diaga[k+6];
174 rtemp8 = *a8/diaga[k+7];
175 rtemp9 = *a9/diaga[k+8];
176 rtemp10 = *a10/diaga[k+9];
177 rtemp11 = *a11/diaga[k+10];
178 rtemp12 = *a12/diaga[k+11];
179 rtemp13 = *a13/diaga[k+12];
180 rtemp14 = *a14/diaga[k+13];
181 rtemp15 = *a15/diaga[k+14];
182 rtemp16 = *a16/diaga[k+15];
183
184 diagb[i] -= rtemp1 * (*a1)
185 + rtemp2 * (*a2)
186 + rtemp3 * (*a3)
187 + rtemp4 * (*a4)
188 + rtemp5 * (*a5)
189 + rtemp6 * (*a6)
190 + rtemp7 * (*a7)
191 + rtemp8 * (*a8)
192 + rtemp9 * (*a9)
193 + rtemp10 * (*a10)
194 + rtemp11 * (*a11)
195 + rtemp12 * (*a12)
196 + rtemp13 * (*a13)
197 + rtemp14 * (*a14)
198 + rtemp15 * (*a15)
199 + rtemp16 * (*a16);
200
201
202 ++ a1;
203 ++ a2;
204 ++ a3;
205 ++ a4;
206 ++ a5;
207 ++ a6;
208 ++ a7;
209 ++ a8;
210 ++ a9;
211 ++ a10;
212 ++ a11;
213 ++ a12;
214 ++ a13;
215 ++ a14;
216 ++ a15;
217 ++ a16;
218
219 for(t=0; t<sze; ++t)
220 b0[t] -= rtemp1 * a1[t]
221 + rtemp2 * a2[t]
222 + rtemp3 * a3[t]
223 + rtemp4 * a4[t]
224 + rtemp5 * a5[t]
225 + rtemp6 * a6[t]
226 + rtemp7 * a7[t]
227 + rtemp8 * a8[t]
228 + rtemp9 * a9[t]
229 + rtemp10 * a10[t]
230 + rtemp11 * a11[t]
231 + rtemp12 * a12[t]
232 + rtemp13 * a13[t]
233 + rtemp14 * a14[t]
234 + rtemp15 * a15[t]
235 + rtemp16 * a16[t];
236
237 }
238
239 for(; k+11<n; k+=12) {
240 a1 = a+(fira[k+0 ]+i);
241 a2 = a+(fira[k+1 ]+i);
242 a3 = a+(fira[k+2 ]+i);
243 a4 = a+(fira[k+3 ]+i);
244 a5 = a+(fira[k+4 ]+i);
245 a6 = a+(fira[k+5 ]+i);
246 a7 = a+(fira[k+6 ]+i);
247 a8 = a+(fira[k+7 ]+i);
248 a9 = a+(fira[k+8 ]+i);
249 a10 = a+(fira[k+9 ]+i);
250 a11 = a+(fira[k+10]+i);
251 a12 = a+(fira[k+11]+i);
252
253 rtemp1 = *a1/diaga[k+0];
254 rtemp2 = *a2/diaga[k+1];
255 rtemp3 = *a3/diaga[k+2];
256 rtemp4 = *a4/diaga[k+3];
257 rtemp5 = *a5/diaga[k+4];
258 rtemp6 = *a6/diaga[k+5];
259 rtemp7 = *a7/diaga[k+6];
260 rtemp8 = *a8/diaga[k+7];
261 rtemp9 = *a9/diaga[k+8];
262 rtemp10 = *a10/diaga[k+9];
263 rtemp11 = *a11/diaga[k+10];
264 rtemp12 = *a12/diaga[k+11];
265
266 diagb[i] -= rtemp1 * (*a1)
267 + rtemp2 * (*a2)
268 + rtemp3 * (*a3)
269 + rtemp4 * (*a4)
270 + rtemp5 * (*a5)
271 + rtemp6 * (*a6)
272 + rtemp7 * (*a7)
273 + rtemp8 * (*a8)
274 + rtemp9 * (*a9)
275 + rtemp10 * (*a10)
276 + rtemp11 * (*a11)
277 + rtemp12 * (*a12);
278
279 ++ a1;
280 ++ a2;
281 ++ a3;
282 ++ a4;
283 ++ a5;
284 ++ a6;
285 ++ a7;
286 ++ a8;
287 ++ a9;
288 ++ a10;
289 ++ a11;
290 ++ a12;
291
292 for(t=0; t<sze; ++t)
293 b0[t] -= rtemp1 * a1[t]
294 + rtemp2 * a2[t]
295 + rtemp3 * a3[t]
296 + rtemp4 * a4[t]
297 + rtemp5 * a5[t]
298 + rtemp6 * a6[t]
299 + rtemp7 * a7[t]
300 + rtemp8 * a8[t]
301 + rtemp9 * a9[t]
302 + rtemp10 * a10[t]
303 + rtemp11 * a11[t]
304 + rtemp12 * a12[t];
305
306 }
307
308 for(; k+7<n; k+=8) {
309 a1 = a+(fira[k+0 ]+i);
310 a2 = a+(fira[k+1 ]+i);
311 a3 = a+(fira[k+2 ]+i);
312 a4 = a+(fira[k+3 ]+i);
313 a5 = a+(fira[k+4 ]+i);
314 a6 = a+(fira[k+5 ]+i);
315 a7 = a+(fira[k+6 ]+i);
316 a8 = a+(fira[k+7 ]+i);
317
318 rtemp1 = *a1/diaga[k+0];
319 rtemp2 = *a2/diaga[k+1];
320 rtemp3 = *a3/diaga[k+2];
321 rtemp4 = *a4/diaga[k+3];
322 rtemp5 = *a5/diaga[k+4];
323 rtemp6 = *a6/diaga[k+5];
324 rtemp7 = *a7/diaga[k+6];
325 rtemp8 = *a8/diaga[k+7];
326
327 diagb[i] -= rtemp1 * (*a1)
328 + rtemp2 * (*a2)
329 + rtemp3 * (*a3)
330 + rtemp4 * (*a4)
331 + rtemp5 * (*a5)
332 + rtemp6 * (*a6)
333 + rtemp7 * (*a7)
334 + rtemp8 * (*a8);
335
336 ++ a1;
337 ++ a2;
338 ++ a3;
339 ++ a4;
340 ++ a5;
341 ++ a6;
342 ++ a7;
343 ++ a8;
344
345 for(t=0; t<sze; ++t)
346 b0[t] -= rtemp1 * a1[t]
347 + rtemp2 * a2[t]
348 + rtemp3 * a3[t]
349 + rtemp4 * a4[t]
350 + rtemp5 * a5[t]
351 + rtemp6 * a6[t]
352 + rtemp7 * a7[t]
353 + rtemp8 * a8[t];
354
355 }
356
357 for(; k+3<n; k+=4) {
358 a1 = a+(fira[k+0 ]+i);
359 a2 = a+(fira[k+1 ]+i);
360 a3 = a+(fira[k+2 ]+i);
361 a4 = a+(fira[k+3 ]+i);
362
363 rtemp1 = *a1/diaga[k+0];
364 rtemp2 = *a2/diaga[k+1];
365 rtemp3 = *a3/diaga[k+2];
366 rtemp4 = *a4/diaga[k+3];
367
368 diagb[i] -= rtemp1 * (*a1)
369 + rtemp2 * (*a2)
370 + rtemp3 * (*a3)
371 + rtemp4 * (*a4);
372
373 ++ a1;
374 ++ a2;
375 ++ a3;
376 ++ a4;
377
378 for(t=0; t<sze; ++t)
379 b0[t] -= rtemp1 * a1[t]
380 + rtemp2 * a2[t]
381 + rtemp3 * a3[t]
382 + rtemp4 * a4[t];
383
384 }
385
386 for(; k+1<n; k+=2) {
387 a1 = a+(fira[k+0 ]+i);
388 a2 = a+(fira[k+1 ]+i);
389
390 rtemp1 = *a1/diaga[k+0];
391 rtemp2 = *a2/diaga[k+1];
392
393 diagb[i] -= rtemp1 * (*a1)
394 + rtemp2 * (*a2);
395
396 ++ a1;
397 ++ a2;
398
399 for(t=0; t<sze; ++t)
400 b0[t] -= rtemp1 * a1[t]
401 + rtemp2 * a2[t];
402 }
403
404 for(; k<n; ++k) {
405 a1 = a+(fira[k+0 ]+i);
406
407 rtemp1 = *a1/diaga[k+0];
408
409 diagb[i] -= rtemp1 * (*a1);
410
411 ++ a1;
412
413 for(t=0; t<sze; ++t)
414 b0[t] -= rtemp1 * a1[t];
415 }
416 }
417} /* UpdSnode */
418
419static void iUpdSnode(chfac *cf,
420 int snde,
421 int f,
422 int l,
423 int uf,
424 int ul,
425 int iw[])
426{
427 int k,
428 *ujsze=cf->ujsze,*uhead=cf->uhead,*subg=cf->subg;
429 double *diag=cf->diag,*uval=cf->uval;
430
431 if (f==l || uf==ul)
432 return;
433
434 f += subg[snde];
435 l += subg[snde];
436 uf += subg[snde];
437 ul += subg[snde];
438
439 for(k=f; k<l; ++k)
440 iw[k-f] = uhead[k]+uf-k-1;
441
442 UpdSnode(1+ujsze[uf],l-f,ul-uf,diag+f,uval,iw,diag+uf,uval,uhead+uf);
443} /* iUpdSnode */
444
445static int DiagUpdate(double *dii,
446 int mode)
447{
448 int id=TRUE;
449
450 if (mode) {
451 if (*dii<1.0e-13)
452 return FALSE;
453 }
454 else {
455 if (fabs(*dii)<1.0e-35) {
456 printf(" diagonal nearly zero: %5.1e.\n",(*dii));
457 return FALSE;
458 }
459 }
460 return id;
461} /* DiagUpdate */
462
463static int FacSnode(chfac *cf,
464 int snde,
465 int f,
466 int l,
467 int iw[],
468 int mode)
469{
470 int itemp,k;
471
472 if (f==l)
473 return (CfcOk);
474
475 itemp = cf->subg[snde]+f;
476
477 if (!DiagUpdate(cf->diag+itemp,
478 mode))
479 return (CfcIndef);
480
481 if (fabs(cf->diag[itemp])<=cf->tolpiv) {
482 printf("Singular d[%d]=%e\n",
483 cf->subg[snde]+f,cf->diag[cf->subg[snde]+f]);
484 return (CfcIndef);
485 }
486
487 for(k=f+1; k<l; ++k) {
488 iUpdSnode(cf,snde,f,k,k,k+1,iw);
489
490 itemp = cf->subg[snde]+k;
491
492 if (!DiagUpdate(&cf->diag[itemp],
493 mode))
494 return (CfcIndef);
495
496 if (fabs(cf->diag[itemp])<=cf->tolpiv) {
497 printf(" singular d[%d]=%e\n",
498 cf->subg[snde]+k,cf->diag[cf->subg[snde]+k]);
499
500 return (CfcIndef);
501 }
502 }
503
504 return CfcOk;
505} /* FacSnode */
506
507static void UpdSnodes(int m,
508 int n,
509 int s,
510 double diaga[],
511 double *a,
512 int fira[],
513 double diagb[],
514 double *b,
515 int firb[],
516 int subb[])
517{
518 int i,j,k,t,u,sze,delay,
519 *ls;
520 double rtemp1,rtemp2,rtemp3,rtemp4,
521 rtemp5,rtemp6,rtemp7,rtemp8,
522 rtemp9,rtemp10,rtemp11,rtemp12,
523 rtemp13,rtemp14,rtemp15,rtemp16,
524 *a1,*a2,*a3,*a4,*a5,*a6,*a7,*a8,
525 *a9,*a10,*a11,*a12,*a13,*a14,*a15,*a16,
526 *b0;
527
528 if (m<s)
529 exit(0);
530
531 if (m==0 || n==0)
532 return;
533
534 for(i=0; i<s; ++i) {
535 j = subb[i];
536 u = j-subb[0];
537
538 b0 = b+firb[u];
539
540 delay = j+1;
541 sze = m-i-1;
542 ls = subb+i+1;
543
544 k = 0;
545
546 for(; k+15<n; k+=16) {
547 a1 = a+(fira[k+0 ]+i);
548 a2 = a+(fira[k+1 ]+i);
549 a3 = a+(fira[k+2 ]+i);
550 a4 = a+(fira[k+3 ]+i);
551 a5 = a+(fira[k+4 ]+i);
552 a6 = a+(fira[k+5 ]+i);
553 a7 = a+(fira[k+6 ]+i);
554 a8 = a+(fira[k+7 ]+i);
555 a9 = a+(fira[k+8 ]+i);
556 a10 = a+(fira[k+9 ]+i);
557 a11 = a+(fira[k+10]+i);
558 a12 = a+(fira[k+11]+i);
559 a13 = a+(fira[k+12]+i);
560 a14 = a+(fira[k+13]+i);
561 a15 = a+(fira[k+14]+i);
562 a16 = a+(fira[k+15]+i);
563
564 rtemp1 = *a1/diaga[k+0];
565 rtemp2 = *a2/diaga[k+1];
566 rtemp3 = *a3/diaga[k+2];
567 rtemp4 = *a4/diaga[k+3];
568 rtemp5 = *a5/diaga[k+4];
569 rtemp6 = *a6/diaga[k+5];
570 rtemp7 = *a7/diaga[k+6];
571 rtemp8 = *a8/diaga[k+7];
572 rtemp9 = *a9/diaga[k+8];
573 rtemp10 = *a10/diaga[k+9];
574 rtemp11 = *a11/diaga[k+10];
575 rtemp12 = *a12/diaga[k+11];
576 rtemp13 = *a13/diaga[k+12];
577 rtemp14 = *a14/diaga[k+13];
578 rtemp15 = *a15/diaga[k+14];
579 rtemp16 = *a16/diaga[k+15];
580
581 diagb[u] -= rtemp1 * (*a1)
582 + rtemp2 * (*a2)
583 + rtemp3 * (*a3)
584 + rtemp4 * (*a4)
585 + rtemp5 * (*a5)
586 + rtemp6 * (*a6)
587 + rtemp7 * (*a7)
588 + rtemp8 * (*a8)
589 + rtemp9 * (*a9)
590 + rtemp10 * (*a10)
591 + rtemp11 * (*a11)
592 + rtemp12 * (*a12)
593 + rtemp13 * (*a13)
594 + rtemp14 * (*a14)
595 + rtemp15 * (*a15)
596 + rtemp16 * (*a16);
597
598
599 ++ a1;
600 ++ a2;
601 ++ a3;
602 ++ a4;
603 ++ a5;
604 ++ a6;
605 ++ a7;
606 ++ a8;
607 ++ a9;
608 ++ a10;
609 ++ a11;
610 ++ a12;
611 ++ a13;
612 ++ a14;
613 ++ a15;
614 ++ a16;
615
616 for(t=0; t<sze; ++t)
617 b0[ls[t]-delay] -= rtemp1 * a1[t]
618 + rtemp2 * a2[t]
619 + rtemp3 * a3[t]
620 + rtemp4 * a4[t]
621 + rtemp5 * a5[t]
622 + rtemp6 * a6[t]
623 + rtemp7 * a7[t]
624 + rtemp8 * a8[t]
625 + rtemp9 * a9[t]
626 + rtemp10 * a10[t]
627 + rtemp11 * a11[t]
628 + rtemp12 * a12[t]
629 + rtemp13 * a13[t]
630 + rtemp14 * a14[t]
631 + rtemp15 * a15[t]
632 + rtemp16 * a16[t];
633 }
634
635 for(; k+11<n; k+=12) {
636 a1 = a+(fira[k+0 ]+i);
637 a2 = a+(fira[k+1 ]+i);
638 a3 = a+(fira[k+2 ]+i);
639 a4 = a+(fira[k+3 ]+i);
640 a5 = a+(fira[k+4 ]+i);
641 a6 = a+(fira[k+5 ]+i);
642 a7 = a+(fira[k+6 ]+i);
643 a8 = a+(fira[k+7 ]+i);
644 a9 = a+(fira[k+8 ]+i);
645 a10 = a+(fira[k+9 ]+i);
646 a11 = a+(fira[k+10]+i);
647 a12 = a+(fira[k+11]+i);
648
649 rtemp1 = *a1/diaga[k+0];
650 rtemp2 = *a2/diaga[k+1];
651 rtemp3 = *a3/diaga[k+2];
652 rtemp4 = *a4/diaga[k+3];
653 rtemp5 = *a5/diaga[k+4];
654 rtemp6 = *a6/diaga[k+5];
655 rtemp7 = *a7/diaga[k+6];
656 rtemp8 = *a8/diaga[k+7];
657 rtemp9 = *a9/diaga[k+8];
658 rtemp10 = *a10/diaga[k+9];
659 rtemp11 = *a11/diaga[k+10];
660 rtemp12 = *a12/diaga[k+11];
661
662 diagb[u] -= rtemp1 * (*a1)
663 + rtemp2 * (*a2)
664 + rtemp3 * (*a3)
665 + rtemp4 * (*a4)
666 + rtemp5 * (*a5)
667 + rtemp6 * (*a6)
668 + rtemp7 * (*a7)
669 + rtemp8 * (*a8)
670 + rtemp9 * (*a9)
671 + rtemp10 * (*a10)
672 + rtemp11 * (*a11)
673 + rtemp12 * (*a12);
674
675 ++ a1;
676 ++ a2;
677 ++ a3;
678 ++ a4;
679 ++ a5;
680 ++ a6;
681 ++ a7;
682 ++ a8;
683 ++ a9;
684 ++ a10;
685 ++ a11;
686 ++ a12;
687
688 for(t=0; t<sze; ++t)
689 b0[ls[t]-delay] -= rtemp1 * a1[t]
690 + rtemp2 * a2[t]
691 + rtemp3 * a3[t]
692 + rtemp4 * a4[t]
693 + rtemp5 * a5[t]
694 + rtemp6 * a6[t]
695 + rtemp7 * a7[t]
696 + rtemp8 * a8[t]
697 + rtemp9 * a9[t]
698 + rtemp10 * a10[t]
699 + rtemp11 * a11[t]
700 + rtemp12 * a12[t];
701 }
702
703 for(; k+7<n; k+=8) {
704 a1 = a+(fira[k+0 ]+i);
705 a2 = a+(fira[k+1 ]+i);
706 a3 = a+(fira[k+2 ]+i);
707 a4 = a+(fira[k+3 ]+i);
708 a5 = a+(fira[k+4 ]+i);
709 a6 = a+(fira[k+5 ]+i);
710 a7 = a+(fira[k+6 ]+i);
711 a8 = a+(fira[k+7 ]+i);
712
713 rtemp1 = *a1/diaga[k+0];
714 rtemp2 = *a2/diaga[k+1];
715 rtemp3 = *a3/diaga[k+2];
716 rtemp4 = *a4/diaga[k+3];
717 rtemp5 = *a5/diaga[k+4];
718 rtemp6 = *a6/diaga[k+5];
719 rtemp7 = *a7/diaga[k+6];
720 rtemp8 = *a8/diaga[k+7];
721
722 diagb[u] -= rtemp1 * (*a1)
723 + rtemp2 * (*a2)
724 + rtemp3 * (*a3)
725 + rtemp4 * (*a4)
726 + rtemp5 * (*a5)
727 + rtemp6 * (*a6)
728 + rtemp7 * (*a7)
729 + rtemp8 * (*a8);
730
731 ++ a1;
732 ++ a2;
733 ++ a3;
734 ++ a4;
735 ++ a5;
736 ++ a6;
737 ++ a7;
738 ++ a8;
739
740 for(t=0; t<sze; ++t)
741 b0[ls[t]-delay] -= rtemp1 * a1[t]
742 + rtemp2 * a2[t]
743 + rtemp3 * a3[t]
744 + rtemp4 * a4[t]
745 + rtemp5 * a5[t]
746 + rtemp6 * a6[t]
747 + rtemp7 * a7[t]
748 + rtemp8 * a8[t];
749
750 }
751
752 for(; k+3<n; k+=4) {
753 a1 = a+(fira[k+0 ]+i);
754 a2 = a+(fira[k+1 ]+i);
755 a3 = a+(fira[k+2 ]+i);
756 a4 = a+(fira[k+3 ]+i);
757
758 rtemp1 = *a1/diaga[k+0];
759 rtemp2 = *a2/diaga[k+1];
760 rtemp3 = *a3/diaga[k+2];
761 rtemp4 = *a4/diaga[k+3];
762
763 diagb[u]-= rtemp1 * (*a1)
764 +rtemp2 * (*a2)
765 +rtemp3 * (*a3)
766 +rtemp4 * (*a4);
767
768 ++ a1;
769 ++ a2;
770 ++ a3;
771 ++ a4;
772
773 for(t=0; t<sze; ++t)
774 b0[ls[t]-delay] -= rtemp1 * a1[t]
775 + rtemp2 * a2[t]
776 + rtemp3 * a3[t]
777 + rtemp4 * a4[t];
778
779 }
780
781 for(; k+1<n; k+=2) {
782 a1 = a+(fira[k+0 ]+i);
783 a2 = a+(fira[k+1 ]+i);
784
785 rtemp1 = *a1/diaga[k+0];
786 rtemp2 = *a2/diaga[k+1];
787
788 diagb[u] -= rtemp1 * (*a1)
789 + rtemp2 * (*a2);
790
791 ++ a1;
792 ++ a2;
793
794 for(t=0; t<sze; ++t)
795 b0[ls[t]-delay] -= rtemp1 * a1[t]
796 + rtemp2 * a2[t];
797 }
798
799 for(; k<n; ++k) {
800 a1 = a+(fira[k+0 ]+i);
801
802 rtemp1 = *a1/diaga[k+0];
803
804 diagb[u] -= rtemp1 * (*a1);
805
806 ++ a1;
807
808 for(t=0; t<sze; ++t)
809 b0[ls[t]-delay] -= rtemp1 * a1[t];
810 }
811 }
812} /* UpdSnodes */
813
814static void ExtUpdSnode(chfac *cf,
815 int snde,
816 int usnde,
817 int f,
818 int l,
819 int start,
820 int iw[])
821{
822 int k,sze,
823 *ls,
824 *subg=cf->subg,
825 *ujsze=cf->ujsze,*usub=cf->usub,*ujbeg=cf->ujbeg,*uhead=cf->uhead;
826 double *diag=cf->diag,*uval=cf->uval;
827
828 f += subg[snde];
829 l += subg[snde];
830
831 if (usnde==cf->nsnds-1) {
832 if (usub[ujbeg[f]+start]<subg[usnde]) {
833 printf("\n Index error");
834 exit(0);
835 }
836
837 if (cf->sdens)
838 exit(0);
839
840 ls = usub+ujbeg[f]+start;
841 sze = ujsze[f]-start;
842
843 for(k=f; k<l; ++k)
844 iw[k-f] =uhead[k]+start-(k-f);
845
846 UpdSnodes(sze,l-f,sze,diag+f,uval,iw,diag+ls[0],uval,uhead+ls[0],ls);
847 }
848 else
849 exit(0);
850} /* ExtUpdSnode */
851
852static void PushFward(chfac *cf,
853 int snde,
854 int f,
855 int l,
856 int iw[])
857{
858 int j,s,t,u,k,stops,offset,sze,itemp,
859 *ls0,*ls1,
860 *map=iw,*subg=cf->subg,
861 *ujsze=cf->ujsze,*uhead=cf->uhead,*ujbeg=cf->ujbeg,*usub=cf->usub;
862 double rtemp1,*l0,*l1,
863 *diag=cf->diag,*uval=cf->uval;
864
865 if (f>subg[snde+1]-subg[snde]) {
866 printf("\n PushFward");
867 exit(0);
868 }
869
870 if (f==l)
871 return;
872
873 f += subg[snde];
874 l += subg[snde];
875
876 offset = subg[snde+1]-f-1;
877 sze = ujsze[f] - offset;
878 ls1 = usub+ujbeg[f]+offset;
879
880 if (f+1==l) {
881 l1 = uval+uhead[f]+offset;
882
883 stops = sze;
884 for(t=0; t<sze; ++t) {
885 j = ls1[0];
886
887 if (j>=subg[cf->nsnds-1])
888 break;
889
890 rtemp1 = l1[0]/diag[f];
891 diag[j] -= rtemp1*l1[0];
892 ++ l1;
893
894 l0 = uval+uhead[j];
895 ls0 = usub+ujbeg[j];
896
897 ++ ls1;
898 -- stops;
899
900 if (stops && ls1[stops-1]==ls0[stops-1]) {
901 for(s=0; s<stops; ++s)
902 l0[s] -= rtemp1 * l1[s];
903 }
904
905 else {
906 for(s=0, u=0; s<stops; ++u) {
907 if (ls0[u]==ls1[s]) {
908 l0[u] -= rtemp1 * l1[s];
909 ++ s;
910 }
911 }
912 }
913 }
914
915 if (t<sze && !cf->sdens)
916 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],l-subg[snde],t,iw);
917 }
918
919 else {
920 stops = sze;
921 for(t=0; t<sze; ++t, ++offset) {
922 j = ls1[0];
923
924 if (j>=subg[cf->nsnds-1]) {
925 if (!cf->sdens)
926 ExtUpdSnode(cf,snde,cf->nsnds-1,f-subg[snde],
927 l-subg[snde],offset,iw);
928 break;
929 }
930
931 ls0 = usub+ujbeg[j];
932 l0 = uval+uhead[j];
933
934 ++ ls1;
935 -- stops;
936
937 k = f;
938 itemp = offset+f;
939
940 if (stops && ls1[stops-1]==ls0[stops-1]) {
941 for(k=f; k<l; ++k)
942 map[k-f] = uhead[k]+itemp-k;
943
944 UpdSnode(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j);
945 }
946
947 else {
948 map[l] = 0;
949 for(s=0, u=0; s<stops; ++u) {
950 if (ls1[s]==ls0[u]) {
951 map[1+l+s] = 1+u;
952 ++ s;
953 }
954 }
955
956 for(k=f; k<l; ++k)
957 map[k-f] = uhead[k]+itemp-k;
958
959 UpdSnodes(1+stops,l-f,1,diag+f,uval,map,diag+j,uval,uhead+j,map+l);
960 }
961 }
962 }
963} /* PushFwardard */
964
965static int FacDenNode(chfac *cf,
966 int iw[],
967 double rw[],
968 int mode)
969{
970 int c,d,j,s,t,sncl,k,k0,m,cacsze,sze,offset,
971 *subg=cf->subg,*ujsze=cf->ujsze,
972 *ujbeg=cf->ujbeg,*uhead=cf->uhead,
973 *usub=cf->usub,*dhead=cf->dhead,
974 *dsub=cf->dsub,*dbeg=cf->dbeg,*ls;
975 double *diag=cf->diag,*uval=cf->uval;
976 int sresp;
977
978 cacsze = cf->cachesize*cf->cacheunit;
979
980 if (cf->sdens) {
981 for(d=0; d<cf->ndens; ++d) {
982 c = 0;
983 for(k=dhead[d]; k<dhead[d+1]; ++k) {
984 offset = dbeg[k];
985 s = dsub[k];
986 if (usub[ujbeg[subg[s]]+offset]<subg[cf->nsnds-1]) {
987 printf("\nindex error1");
988 exit(0);
989 }
990
991 for(j=subg[s]; j<subg[s+1]; ++c, ++j) {
992 rw[c] = diag[j];
993 iw[c] = uhead[j]+offset-(j-subg[s]);
994
995 if (usub[ujbeg[j]+offset-(j-subg[s])]<subg[cf->nsnds-1]) {
996 printf("\nindex error");
997 exit(0);
998 }
999 }
1000 }
1001
1002 if (c) {
1003 k = dhead[d];
1004 s = dsub[k];
1005 m = ujsze[subg[s]]-dbeg[k];
1006 ls = usub+ujbeg[subg[s]]+dbeg[k];
1007 if (m) {
1008 for(k=0; k<c;) {
1009 t = cacsze/(m*sizeof(double));
1010 t = max(t,1);
1011 t = min(c-k,t);
1012
1013 UpdSnodes(m,t,m,rw+k,uval,iw+k,
1014 diag+ls[0],uval,uhead+ls[0],ls);
1015 k+=t;
1016 }
1017 }
1018 }
1019 }
1020 }
1021
1022 s = cf->nsnds-1;
1023
1024 sncl = cf->subg[s+1]-cf->subg[s];
1025 for(k=0; k<sncl;) {
1026 k0 = k;
1027 for(sze=0; sze<cacsze && k<sncl; ++k)
1028 sze += ujsze[subg[s]+k] * sizeof(double);
1029
1030 if (k==k0)
1031 ++k;
1032 else if (k>=k0+2 && sze>cacsze)
1033 --k;
1034
1035 if (k>sncl)
1036 exit(0);
1037
1038 sresp = FacSnode(cf,s,k0,k,iw,mode);
1039 if (sresp!=CfcOk)
1040 return (sresp);
1041
1042 iUpdSnode(cf,s,k0,k,k,sncl,iw);
1043
1044 }
1045 return (CfcOk);
1046} /* FacDenNode */
1047
1048int ChlFact(chfac *cf,
1049 int *iw,
1050 double *rw,
1051 int mode)
1052{
1053 int s,sncl,k,k0,cacsze,sze,
1054 *subg=cf->subg,*ujsze=cf->ujsze;
1055 int cid;
1056
1057 cacsze=cf->cachesize*cf->cacheunit;
1058
1059 for(s=0; s+1<cf->nsnds; ++s) {
1060 sncl = cf->subg[s+1]-cf->subg[s];
1061
1062 for(k=0; k<sncl;) {
1063 k0 = k;
1064 for(sze=0; sze<=cacsze && k<sncl; ++k)
1065 sze += ujsze[subg[s]+k]*sizeof(double);
1066
1067 if (k==k0)
1068 ++k;
1069 else if (k>=k0+2 && sze>cacsze)
1070 --k;
1071
1072 if (k>sncl)
1073 exit(0);
1074
1075 cid=FacSnode(cf,s,k0,k,iw,mode);
1076 if (cid!=CfcOk)
1077 return (cid);
1078
1079 iUpdSnode(cf,s,k0,k,k,sncl,iw);
1080
1081 PushFward(cf,s,k0,k,iw);
1082 }
1083 }
1084
1085 cid=FacDenNode(cf,iw,rw,mode);
1086
1087 for (k=0;k<cf->nrow;k++){
1088 cf->sqrtdiag[k]=sqrt(fabs(cf->diag[k]));
1089 }
1090
1091 return cid;
1092} /* ChlFact */